Mercurial > public > mercurial-scm > hg-stable
diff hgext/largefiles/lfutil.py @ 15168:cfccd3bee7b3
hgext: add largefiles extension
This code has a number of contributors and a complicated history prior to its
introduction that can be seen by visiting:
https://developers.kilnhg.com/Repo/Kiln/largefiles/largefiles
http://hg.gerg.ca/hg-bfiles
and looking at the included copyright notices and contributors list.
author | various |
---|---|
date | Sat, 24 Sep 2011 17:35:45 +0200 |
parents | |
children | aa262fff87ac |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgext/largefiles/lfutil.py Sat Sep 24 17:35:45 2011 +0200 @@ -0,0 +1,502 @@ +# Copyright 2009-2010 Gregory P. Ward +# Copyright 2009-2010 Intelerad Medical Systems Incorporated +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''largefiles utility code: must not import other modules in this package.''' + +import os +import errno +import inspect +import shutil +import stat +import hashlib + +from mercurial import cmdutil, dirstate, httpconnection, match as match_, \ + url as url_, util +from mercurial.i18n import _ + +try: + from mercurial import scmutil +except ImportError: + pass + +shortname = '.hglf' +longname = 'largefiles' + + +# -- Portability wrappers ---------------------------------------------- + +if 'subrepos' in inspect.getargspec(dirstate.dirstate.status)[0]: + # for Mercurial >= 1.5 + def dirstate_walk(dirstate, matcher, unknown=False, ignored=False): + return dirstate.walk(matcher, [], unknown, ignored) +else: + # for Mercurial <= 1.4 + def dirstate_walk(dirstate, matcher, unknown=False, ignored=False): + return dirstate.walk(matcher, unknown, ignored) + +def repo_add(repo, list): + try: + # Mercurial <= 1.5 + add = repo.add + except AttributeError: + # Mercurial >= 1.6 + add = repo[None].add + return add(list) + +def repo_remove(repo, list, unlink=False): + try: + # Mercurial <= 1.5 + remove = repo.remove + except AttributeError: + # Mercurial >= 1.6 + try: + # Mercurial <= 1.8 + remove = repo[None].remove + except AttributeError: + # Mercurial >= 1.9 + def remove(list, unlink): + wlock = repo.wlock() + try: + if unlink: + for f in list: + try: + util.unlinkpath(repo.wjoin(f)) + except OSError, inst: + if inst.errno != errno.ENOENT: + raise + repo[None].forget(list) + finally: + wlock.release() + + return remove(list, unlink=unlink) + +def repo_forget(repo, list): + try: + # Mercurial <= 1.5 + forget = repo.forget + except AttributeError: + # Mercurial >= 1.6 + forget = repo[None].forget + return forget(list) + +def findoutgoing(repo, remote, force): + # First attempt is for Mercurial <= 1.5 second is for >= 1.6 + try: + return repo.findoutgoing(remote) + except AttributeError: + from mercurial import discovery + try: + # Mercurial <= 1.8 + return discovery.findoutgoing(repo, remote, force=force) + except AttributeError: + # Mercurial >= 1.9 + common, _anyinc, _heads = discovery.findcommonincoming(repo, + remote, force=force) + return repo.changelog.findmissing(common) + +# -- Private worker functions ------------------------------------------ + +if os.name == 'nt': + from mercurial import win32 + try: + linkfn = win32.oslink + except: + linkfn = win32.os_link +else: + linkfn = os.link + +def link(src, dest): + try: + linkfn(src, dest) + except OSError: + # If hardlinks fail fall back on copy + shutil.copyfile(src, dest) + os.chmod(dest, os.stat(src).st_mode) + +def systemcachepath(ui, hash): + path = ui.config(longname, 'systemcache', None) + if path: + path = os.path.join(path, hash) + else: + if os.name == 'nt': + path = os.path.join(os.getenv('LOCALAPPDATA') or \ + os.getenv('APPDATA'), longname, hash) + elif os.name == 'posix': + path = os.path.join(os.getenv('HOME'), '.' + longname, hash) + else: + raise util.Abort(_('Unknown operating system: %s\n') % os.name) + return path + +def insystemcache(ui, hash): + return os.path.exists(systemcachepath(ui, hash)) + +def findfile(repo, hash): + if incache(repo, hash): + repo.ui.note(_('Found %s in cache\n') % hash) + return cachepath(repo, hash) + if insystemcache(repo.ui, hash): + repo.ui.note(_('Found %s in system cache\n') % hash) + return systemcachepath(repo.ui, hash) + return None + +class largefiles_dirstate(dirstate.dirstate): + def __getitem__(self, key): + return super(largefiles_dirstate, self).__getitem__(unixpath(key)) + def normal(self, f): + return super(largefiles_dirstate, self).normal(unixpath(f)) + def remove(self, f): + return super(largefiles_dirstate, self).remove(unixpath(f)) + def add(self, f): + return super(largefiles_dirstate, self).add(unixpath(f)) + def drop(self, f): + return super(largefiles_dirstate, self).drop(unixpath(f)) + def forget(self, f): + return super(largefiles_dirstate, self).forget(unixpath(f)) + +def openlfdirstate(ui, repo): + ''' + Return a dirstate object that tracks big files: i.e. its root is the + repo root, but it is saved in .hg/largefiles/dirstate. + ''' + admin = repo.join(longname) + try: + # Mercurial >= 1.9 + opener = scmutil.opener(admin) + except ImportError: + # Mercurial <= 1.8 + opener = util.opener(admin) + if hasattr(repo.dirstate, '_validate'): + lfdirstate = largefiles_dirstate(opener, ui, repo.root, + repo.dirstate._validate) + else: + lfdirstate = largefiles_dirstate(opener, ui, repo.root) + + # If the largefiles dirstate does not exist, populate and create it. This + # ensures that we create it on the first meaningful largefiles operation in + # a new clone. It also gives us an easy way to forcibly rebuild largefiles + # state: + # rm .hg/largefiles/dirstate && hg status + # Or even, if things are really messed up: + # rm -rf .hg/largefiles && hg status + if not os.path.exists(os.path.join(admin, 'dirstate')): + util.makedirs(admin) + matcher = getstandinmatcher(repo) + for standin in dirstate_walk(repo.dirstate, matcher): + lfile = splitstandin(standin) + hash = readstandin(repo, lfile) + lfdirstate.normallookup(lfile) + try: + if hash == hashfile(lfile): + lfdirstate.normal(lfile) + except IOError, err: + if err.errno != errno.ENOENT: + raise + + lfdirstate.write() + + return lfdirstate + +def lfdirstate_status(lfdirstate, repo, rev): + wlock = repo.wlock() + try: + match = match_.always(repo.root, repo.getcwd()) + s = lfdirstate.status(match, [], False, False, False) + unsure, modified, added, removed, missing, unknown, ignored, clean = s + for lfile in unsure: + if repo[rev][standin(lfile)].data().strip() != \ + hashfile(repo.wjoin(lfile)): + modified.append(lfile) + else: + clean.append(lfile) + lfdirstate.normal(lfile) + lfdirstate.write() + finally: + wlock.release() + return (modified, added, removed, missing, unknown, ignored, clean) + +def listlfiles(repo, rev=None, matcher=None): + '''list largefiles in the working copy or specified changeset''' + + if matcher is None: + matcher = getstandinmatcher(repo) + + # ignore unknown files in working directory + return [splitstandin(f) for f in repo[rev].walk(matcher) \ + if rev is not None or repo.dirstate[f] != '?'] + +def incache(repo, hash): + return os.path.exists(cachepath(repo, hash)) + +def createdir(dir): + if not os.path.exists(dir): + os.makedirs(dir) + +def cachepath(repo, hash): + return repo.join(os.path.join(longname, hash)) + +def copyfromcache(repo, hash, filename): + '''copyfromcache copies the specified largefile from the repo or system + cache to the specified location in the repository. It will not throw an + exception on failure, as it is meant to be called only after ensuring that + the needed largefile exists in the cache.''' + path = findfile(repo, hash) + if path is None: + return False + util.makedirs(os.path.dirname(repo.wjoin(filename))) + shutil.copy(path, repo.wjoin(filename)) + return True + +def copytocache(repo, rev, file, uploaded=False): + hash = readstandin(repo, file) + if incache(repo, hash): + return + copytocacheabsolute(repo, repo.wjoin(file), hash) + +def copytocacheabsolute(repo, file, hash): + createdir(os.path.dirname(cachepath(repo, hash))) + if insystemcache(repo.ui, hash): + link(systemcachepath(repo.ui, hash), cachepath(repo, hash)) + else: + shutil.copyfile(file, cachepath(repo, hash)) + os.chmod(cachepath(repo, hash), os.stat(file).st_mode) + linktosystemcache(repo, hash) + +def linktosystemcache(repo, hash): + createdir(os.path.dirname(systemcachepath(repo.ui, hash))) + link(cachepath(repo, hash), systemcachepath(repo.ui, hash)) + +def getstandinmatcher(repo, pats=[], opts={}): + '''Return a match object that applies pats to the standin directory''' + standindir = repo.pathto(shortname) + if pats: + # patterns supplied: search standin directory relative to current dir + cwd = repo.getcwd() + if os.path.isabs(cwd): + # cwd is an absolute path for hg -R <reponame> + # work relative to the repository root in this case + cwd = '' + pats = [os.path.join(standindir, cwd, pat) for pat in pats] + elif os.path.isdir(standindir): + # no patterns: relative to repo root + pats = [standindir] + else: + # no patterns and no standin dir: return matcher that matches nothing + match = match_.match(repo.root, None, [], exact=True) + match.matchfn = lambda f: False + return match + return getmatcher(repo, pats, opts, showbad=False) + +def getmatcher(repo, pats=[], opts={}, showbad=True): + '''Wrapper around scmutil.match() that adds showbad: if false, neuter + the match object\'s bad() method so it does not print any warnings + about missing files or directories.''' + try: + # Mercurial >= 1.9 + match = scmutil.match(repo[None], pats, opts) + except ImportError: + # Mercurial <= 1.8 + match = cmdutil.match(repo, pats, opts) + + if not showbad: + match.bad = lambda f, msg: None + return match + +def composestandinmatcher(repo, rmatcher): + '''Return a matcher that accepts standins corresponding to the files + accepted by rmatcher. Pass the list of files in the matcher as the + paths specified by the user.''' + smatcher = getstandinmatcher(repo, rmatcher.files()) + isstandin = smatcher.matchfn + def composed_matchfn(f): + return isstandin(f) and rmatcher.matchfn(splitstandin(f)) + smatcher.matchfn = composed_matchfn + + return smatcher + +def standin(filename): + '''Return the repo-relative path to the standin for the specified big + file.''' + # Notes: + # 1) Most callers want an absolute path, but _create_standin() needs + # it repo-relative so lfadd() can pass it to repo_add(). So leave + # it up to the caller to use repo.wjoin() to get an absolute path. + # 2) Join with '/' because that's what dirstate always uses, even on + # Windows. Change existing separator to '/' first in case we are + # passed filenames from an external source (like the command line). + return shortname + '/' + filename.replace(os.sep, '/') + +def isstandin(filename): + '''Return true if filename is a big file standin. filename must + be in Mercurial\'s internal form (slash-separated).''' + return filename.startswith(shortname + '/') + +def splitstandin(filename): + # Split on / because that's what dirstate always uses, even on Windows. + # Change local separator to / first just in case we are passed filenames + # from an external source (like the command line). + bits = filename.replace(os.sep, '/').split('/', 1) + if len(bits) == 2 and bits[0] == shortname: + return bits[1] + else: + return None + +def updatestandin(repo, standin): + file = repo.wjoin(splitstandin(standin)) + if os.path.exists(file): + hash = hashfile(file) + executable = getexecutable(file) + writestandin(repo, standin, hash, executable) + +def readstandin(repo, filename, node=None): + '''read hex hash from standin for filename at given node, or working + directory if no node is given''' + return repo[node][standin(filename)].data().strip() + +def writestandin(repo, standin, hash, executable): + '''write hhash to <repo.root>/<standin>''' + writehash(hash, repo.wjoin(standin), executable) + +def copyandhash(instream, outfile): + '''Read bytes from instream (iterable) and write them to outfile, + computing the SHA-1 hash of the data along the way. Close outfile + when done and return the binary hash.''' + hasher = util.sha1('') + for data in instream: + hasher.update(data) + outfile.write(data) + + # Blecch: closing a file that somebody else opened is rude and + # wrong. But it's so darn convenient and practical! After all, + # outfile was opened just to copy and hash. + outfile.close() + + return hasher.digest() + +def hashrepofile(repo, file): + return hashfile(repo.wjoin(file)) + +def hashfile(file): + if not os.path.exists(file): + return '' + hasher = util.sha1('') + fd = open(file, 'rb') + for data in blockstream(fd): + hasher.update(data) + fd.close() + return hasher.hexdigest() + +class limitreader(object): + def __init__(self, f, limit): + self.f = f + self.limit = limit + + def read(self, length): + if self.limit == 0: + return '' + length = length > self.limit and self.limit or length + self.limit -= length + return self.f.read(length) + + def close(self): + pass + +def blockstream(infile, blocksize=128 * 1024): + """Generator that yields blocks of data from infile and closes infile.""" + while True: + data = infile.read(blocksize) + if not data: + break + yield data + # Same blecch as above. + infile.close() + +def readhash(filename): + rfile = open(filename, 'rb') + hash = rfile.read(40) + rfile.close() + if len(hash) < 40: + raise util.Abort(_('bad hash in \'%s\' (only %d bytes long)') + % (filename, len(hash))) + return hash + +def writehash(hash, filename, executable): + util.makedirs(os.path.dirname(filename)) + if os.path.exists(filename): + os.unlink(filename) + wfile = open(filename, 'wb') + + try: + wfile.write(hash) + wfile.write('\n') + finally: + wfile.close() + if os.path.exists(filename): + os.chmod(filename, getmode(executable)) + +def getexecutable(filename): + mode = os.stat(filename).st_mode + return (mode & stat.S_IXUSR) and (mode & stat.S_IXGRP) and (mode & \ + stat.S_IXOTH) + +def getmode(executable): + if executable: + return 0755 + else: + return 0644 + +def urljoin(first, second, *arg): + def join(left, right): + if not left.endswith('/'): + left += '/' + if right.startswith('/'): + right = right[1:] + return left + right + + url = join(first, second) + for a in arg: + url = join(url, a) + return url + +def hexsha1(data): + """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like + object data""" + h = hashlib.sha1() + for chunk in util.filechunkiter(data): + h.update(chunk) + return h.hexdigest() + +def httpsendfile(ui, filename): + try: + # Mercurial >= 1.9 + return httpconnection.httpsendfile(ui, filename, 'rb') + except ImportError: + if 'ui' in inspect.getargspec(url_.httpsendfile.__init__)[0]: + # Mercurial == 1.8 + return url_.httpsendfile(ui, filename, 'rb') + else: + # Mercurial <= 1.7 + return url_.httpsendfile(filename, 'rb') + +# Convert a path to a unix style path. This is used to give a +# canonical path to the lfdirstate. +def unixpath(path): + return os.path.normpath(path).replace(os.sep, '/') + +def islfilesrepo(repo): + return 'largefiles' in repo.requirements and any_(shortname+'/' in f[0] for f in + repo.store.datafiles()) + +def any_(gen): + for x in gen: + if x: + return True + return False + +class storeprotonotcapable(BaseException): + def __init__(self, storetypes): + self.storetypes = storetypes