comparison mercurial/cmdutil.py @ 34682:7e3001b74ab3

tersestatus: re-implement the functionality to terse the status The previous terse status implementation was hacking around os.listdir() and was flaky. There have been a lot of instances of mercurial buildbots failing and google's internal builds failing because of the hacky implementation of terse status. Even though I wrote the last implementation but it was hard for me to find the reason for the flake. The new implementation can be slower than the old one but is clean and easy to understand. In this we create a node object for each directory and create a tree like structure starting from the root of the working copy. While building the tree like structure we store some information on the nodes which will be helpful for deciding later whether we can terse the dir or not. Once the whole tree is build we traverse and built the list of files for each status with required tersing. There is no behaviour change as the old test, test-status-terse.t passes with the new implementation. Differential Revision: https://phab.mercurial-scm.org/D985
author Pulkit Goyal <7895pulkit@gmail.com>
date Fri, 06 Oct 2017 20:54:23 +0530
parents 2a360445afa0
children 3d6d4b12128e
comparison
equal deleted inserted replaced
34681:4dc8a2ee0f4f 34682:7e3001b74ab3
400 with repo.wlock(): 400 with repo.wlock():
401 return recordfunc(ui, repo, message, match, opts) 401 return recordfunc(ui, repo, message, match, opts)
402 402
403 return commit(ui, repo, recordinwlock, pats, opts) 403 return commit(ui, repo, recordinwlock, pats, opts)
404 404
405 def tersestatus(root, statlist, status, ignorefn, ignore): 405
406 # extracted at module level as it's required each time a file will be added
407 # to dirnode class object below
408 pathsep = pycompat.ossep
409
410 class dirnode(object):
406 """ 411 """
407 Returns a list of statuses with directory collapsed if all the files in the 412 represents a directory in user working copy
408 directory has the same status. 413
414 stores information which is required for purpose of tersing the status
415
416 path is the path to the directory
417
418 statuses is a set of statuses of all files in this directory (this includes
419 all the files in all the subdirectories too)
420
421 files is a list of files which are direct child of this directory
422
423 subdirs is a dictionary of sub-directory name as the key and it's own
424 dirnode object as the value
409 """ 425 """
410 426
411 def numfiles(dirname): 427 def __init__(self, dirpath):
428 self.path = dirpath
429 self.statuses = set([])
430 self.files = []
431 self.subdirs = {}
432
433 def _addfileindir(self, filename, status):
434 """ adds a file in this directory as the direct child """
435 self.files.append((filename, status))
436
437 def addfile(self, filename, status):
412 """ 438 """
413 Calculates the number of tracked files in a given directory which also 439 adds a file which is present in this directory to its direct parent
414 includes files which were removed or deleted. Considers ignored files 440 dirnode object
415 if ignore argument is True or 'i' is present in status argument. 441
442 if the file is not direct child of this directory, we traverse to the
443 directory of which this file is a direct child of and add the file there
416 """ 444 """
417 if lencache.get(dirname): 445
418 return lencache[dirname] 446 # the filename contains a path separator, it means it's not the direct
419 if 'i' in status or ignore: 447 # child of this directory
420 def match(localpath): 448 if pathsep in filename:
421 absolutepath = os.path.join(root, localpath) 449 subdir, filep = filename.split(pathsep, 1)
422 if os.path.isdir(absolutepath) and isemptydir(absolutepath): 450
423 return True 451 # does the dirnode object for subdir exists
424 return False 452 if subdir not in self.subdirs:
453 subdirpath = os.path.join(self.path, subdir)
454 self.subdirs[subdir] = dirnode(subdirpath)
455
456 # try adding the file in subdir
457 self.subdirs[subdir].addfile(filep, status)
458
425 else: 459 else:
426 def match(localpath): 460 self._addfileindir(filename, status)
427 # there can be directory whose all the files are ignored and 461
428 # hence the drectory should also be ignored while counting 462 if status not in self.statuses:
429 # number of files or subdirs in it's parent directory. This 463 self.statuses.add(status)
430 # checks the same. 464
431 # XXX: We need a better logic here. 465 def _addfilestotersed(path, files, tersedict):
432 if os.path.isdir(os.path.join(root, localpath)): 466 """ adds files to the their respective status list in the final tersed list
433 return isignoreddir(localpath) 467
434 else: 468 path is the path of parent directory of the file
435 # XXX: there can be files which have the ignored pattern but 469 files is a list of tuple where each tuple is (filename, status)
436 # are not ignored. That leads to bug in counting number of 470 tersedict is a dictonary which contains each status abbreviation as key and
437 # tracked files in the directory. 471 list of files and tersed dirs in that status as value
438 return ignorefn(localpath) 472 """
439 lendir = 0 473 for f, st in files:
440 abspath = os.path.join(root, dirname) 474 tersedict[st].append(os.path.join(path, f))
441 # There might be cases when a directory does not exists as the whole 475
442 # directory can be removed and/or deleted. 476 def _processtersestatus(subdir, tersedict, terseargs):
443 try: 477 """a recursive function which process status for a certain directory.
444 for f in os.listdir(abspath): 478
445 localpath = os.path.join(dirname, f) 479 subdir is an oject of dirnode class defined below. each object of dirnode
446 if not match(localpath): 480 class has a set of statuses which files in that directory has. This ease our
447 lendir += 1 481 check whether we can terse that directory or not.
448 except OSError: 482
449 pass 483 tersedict is a dictonary which contains each status abbreviation as key and
450 lendir += len(absentdir.get(dirname, [])) 484 list of files and tersed dirs in that status as value. In each function call
451 lencache[dirname] = lendir 485 we are passing the same dict and adding files and dirs to it.
452 return lendir 486
453 487 terseargs is the string of arguments passed by the user with `--terse` flag.
454 def isemptydir(abspath): 488
455 """ 489 Following are the cases which can happen:
456 Check whether a directory is empty or not, i.e. there is no files in the 490
457 directory and all its subdirectories. 491 1) All the files in the directory (including all the files in its
458 """ 492 subdirectories) share the same status and the user has asked us to terse
459 for f in os.listdir(abspath): 493 that status. -> we add the directory name to status list and return
460 fullpath = os.path.join(abspath, f) 494
461 if os.path.isdir(fullpath): 495 2) If '1)' does not happen, we do following:
462 # recursion here 496
463 ret = isemptydir(fullpath) 497 a) Add all the files which are in this directory (only the ones in
464 if not ret: 498 this directory, not the subdirs) to their respective status list
465 return False 499
466 else: 500 b) Recurse the function on all the subdirectories of this directory
467 return False 501 """
468 return True 502
469 503 if len(subdir.statuses) == 1:
470 def isignoreddir(localpath): 504 onlyst = subdir.statuses.pop()
471 """Return True if `localpath` directory is ignored or contains only 505
472 ignored files and should hence be considered ignored. 506 # Making sure we terse only when the status abbreviation is passed as
473 """ 507 # terse argument
474 dirpath = os.path.join(root, localpath) 508 if onlyst in terseargs:
475 if ignorefn(dirpath): 509 tersedict[onlyst].append(subdir.path + pycompat.ossep)
476 return True 510 return
477 for f in os.listdir(dirpath): 511
478 filepath = os.path.join(dirpath, f) 512 # add the files to status list
479 if os.path.isdir(filepath): 513 _addfilestotersed(subdir.path, subdir.files, tersedict)
480 # recursion here 514
481 ret = isignoreddir(os.path.join(localpath, f)) 515 #recurse on the subdirs
482 if not ret: 516 for dirobj in subdir.subdirs.values():
483 return False 517 _processtersestatus(dirobj, tersedict, terseargs)
484 else: 518
485 if not ignorefn(os.path.join(localpath, f)): 519 def tersedir(statuslist, terseargs):
486 return False 520 """
487 return True 521 terses the status if all the files in a directory shares the same status
488 522
489 def absentones(removedfiles, missingfiles): 523 statuslist is scmutil.status() object which contains a list of files for
490 """ 524 each status.
491 Returns a dictionary of directories with files in it which are either 525 terseargs is string which is passed by the user as the argument to `--terse`
492 removed or missing (deleted) in them. 526 flag.
493 """ 527
494 absentdir = {} 528 The function makes a tree of objects of dirnode class, and at each node it
495 absentfiles = removedfiles + missingfiles 529 stores the information required to know whether we can terse a certain
496 while absentfiles: 530 directory or not.
497 f = absentfiles.pop() 531
498 par = os.path.dirname(f) 532 tersedict (defined in the function) is a dictionary which has one word key
499 if par == '': 533 for each status and a list of files and dir in that status as the respective
500 continue 534 value. The dictionary is passed to other helper functions which builds it.
501 # we need to store files rather than number of files as some files 535 """
502 # or subdirectories in a directory can be counted twice. This is 536 # the order matters here as that is used to produce final list
503 # also we have used sets here. 537 allst = ('m', 'a', 'r', 'd', 'u', 'i', 'c')
504 try: 538
505 absentdir[par].add(f) 539 # checking the argument validity
506 except KeyError: 540 for s in terseargs:
507 absentdir[par] = set([f]) 541 if s not in allst:
508 absentfiles.append(par) 542 raise error.Abort(_("'%s' not recognized") % s)
509 return absentdir 543
510 544 # creating a dirnode object for the root of the repo
511 indexes = {'m': 0, 'a': 1, 'r': 2, 'd': 3, 'u': 4, 'i': 5, 'c': 6} 545 rootobj = dirnode('')
512 # get a dictonary of directories and files which are missing as os.listdir() 546 pstatus = ('modified', 'added', 'deleted', 'clean', 'unknown',
513 # won't be able to list them. 547 'ignored', 'removed')
514 absentdir = absentones(statlist[2], statlist[3]) 548
515 finalrs = [[]] * len(indexes) 549 tersedict = {}
516 didsomethingchanged = False 550 for attrname in pstatus:
517 # dictionary to store number of files and subdir in a directory so that we 551 for f in getattr(statuslist, attrname):
518 # don't compute that again. 552 rootobj.addfile(f, attrname[0])
519 lencache = {} 553 tersedict[attrname[0]] = []
520 554
521 for st in pycompat.bytestr(status): 555 # we won't be tersing the root dir, so add files in it
522 556 _addfilestotersed(rootobj.path, rootobj.files, tersedict)
523 try: 557
524 ind = indexes[st] 558 # process each sub-directory and build tersedict
525 except KeyError: 559 for subdir in rootobj.subdirs.values():
526 # TODO: Need a better error message here 560 _processtersestatus(subdir, tersedict, terseargs)
527 raise error.Abort("'%s' not recognized" % st) 561
528 562 tersedlist = []
529 sfiles = statlist[ind] 563 for st in allst:
530 if not sfiles: 564 tersedict[st].sort()
531 continue 565 tersedlist.append(tersedict[st])
532 pardict = {} 566
533 for a in sfiles: 567 return tersedlist
534 par = os.path.dirname(a)
535 pardict.setdefault(par, []).append(a)
536
537 rs = []
538 newls = []
539 for par, files in sorted(pardict.iteritems()):
540 lenpar = numfiles(par)
541 if lenpar == len(files):
542 newls.append(par)
543
544 if not newls:
545 continue
546
547 while newls:
548 newel = newls.pop()
549 if newel == '':
550 continue
551 parn = os.path.dirname(newel)
552 pardict[newel] = []
553 # Adding pycompat.ossep as newel is a directory.
554 pardict.setdefault(parn, []).append(newel + pycompat.ossep)
555 lenpar = numfiles(parn)
556 if lenpar == len(pardict[parn]):
557 newls.append(parn)
558
559 # dict.values() for Py3 compatibility
560 for files in pardict.values():
561 rs.extend(files)
562
563 rs.sort()
564 finalrs[ind] = rs
565 didsomethingchanged = True
566
567 # If nothing is changed, make sure the order of files is preserved.
568 if not didsomethingchanged:
569 return statlist
570
571 for x in xrange(len(indexes)):
572 if not finalrs[x]:
573 finalrs[x] = statlist[x]
574
575 return finalrs
576 568
577 def _commentlines(raw): 569 def _commentlines(raw):
578 '''Surround lineswith a comment char and a new line''' 570 '''Surround lineswith a comment char and a new line'''
579 lines = raw.splitlines() 571 lines = raw.splitlines()
580 commentedlines = ['# %s' % line for line in lines] 572 commentedlines = ['# %s' % line for line in lines]