mercurial/store.py
changeset 14288 00a0ab08f986
parent 14194 3a90fb3addc1
child 15057 774da7121fc9
equal deleted inserted replaced
14287:7c231754a621 14288:00a0ab08f986
    68     'the~07quick~adshot'
    68     'the~07quick~adshot'
    69     >>> dec('the~07quick~adshot')
    69     >>> dec('the~07quick~adshot')
    70     'the\\x07quick\\xadshot'
    70     'the\\x07quick\\xadshot'
    71     '''
    71     '''
    72     e = '_'
    72     e = '_'
    73     win_reserved = [ord(x) for x in '\\:*?"<>|']
    73     winreserved = [ord(x) for x in '\\:*?"<>|']
    74     cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
    74     cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
    75     for x in (range(32) + range(126, 256) + win_reserved):
    75     for x in (range(32) + range(126, 256) + winreserved):
    76         cmap[chr(x)] = "~%02x" % x
    76         cmap[chr(x)] = "~%02x" % x
    77     for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
    77     for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
    78         cmap[chr(x)] = e + chr(x).lower()
    78         cmap[chr(x)] = e + chr(x).lower()
    79     dmap = {}
    79     dmap = {}
    80     for k, v in cmap.iteritems():
    80     for k, v in cmap.iteritems():
    94     return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
    94     return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
    95             lambda s: decodedir("".join(list(decode(s)))))
    95             lambda s: decodedir("".join(list(decode(s)))))
    96 
    96 
    97 encodefilename, decodefilename = _buildencodefun()
    97 encodefilename, decodefilename = _buildencodefun()
    98 
    98 
    99 def _build_lower_encodefun():
    99 def _buildlowerencodefun():
   100     '''
   100     '''
   101     >>> f = _build_lower_encodefun()
   101     >>> f = _buildlowerencodefun()
   102     >>> f('nothing/special.txt')
   102     >>> f('nothing/special.txt')
   103     'nothing/special.txt'
   103     'nothing/special.txt'
   104     >>> f('HELLO')
   104     >>> f('HELLO')
   105     'hello'
   105     'hello'
   106     >>> f('hello:world?')
   106     >>> f('hello:world?')
   107     'hello~3aworld~3f'
   107     'hello~3aworld~3f'
   108     >>> f('the\x07quick\xADshot')
   108     >>> f('the\x07quick\xADshot')
   109     'the~07quick~adshot'
   109     'the~07quick~adshot'
   110     '''
   110     '''
   111     win_reserved = [ord(x) for x in '\\:*?"<>|']
   111     winreserved = [ord(x) for x in '\\:*?"<>|']
   112     cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
   112     cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
   113     for x in (range(32) + range(126, 256) + win_reserved):
   113     for x in (range(32) + range(126, 256) + winreserved):
   114         cmap[chr(x)] = "~%02x" % x
   114         cmap[chr(x)] = "~%02x" % x
   115     for x in range(ord("A"), ord("Z")+1):
   115     for x in range(ord("A"), ord("Z")+1):
   116         cmap[chr(x)] = chr(x).lower()
   116         cmap[chr(x)] = chr(x).lower()
   117     return lambda s: "".join([cmap[c] for c in s])
   117     return lambda s: "".join([cmap[c] for c in s])
   118 
   118 
   119 lowerencode = _build_lower_encodefun()
   119 lowerencode = _buildlowerencodefun()
   120 
   120 
   121 _windows_reserved_filenames = '''con prn aux nul
   121 _winreservednames = '''con prn aux nul
   122     com1 com2 com3 com4 com5 com6 com7 com8 com9
   122     com1 com2 com3 com4 com5 com6 com7 com8 com9
   123     lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
   123     lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
   124 def _auxencode(path, dotencode):
   124 def _auxencode(path, dotencode):
   125     '''
   125     '''
   126     Encodes filenames containing names reserved by Windows or which end in
   126     Encodes filenames containing names reserved by Windows or which end in
   141     '''
   141     '''
   142     res = []
   142     res = []
   143     for n in path.split('/'):
   143     for n in path.split('/'):
   144         if n:
   144         if n:
   145             base = n.split('.')[0]
   145             base = n.split('.')[0]
   146             if base and (base in _windows_reserved_filenames):
   146             if base and (base in _winreservednames):
   147                 # encode third letter ('aux' -> 'au~78')
   147                 # encode third letter ('aux' -> 'au~78')
   148                 ec = "~%02x" % ord(n[2])
   148                 ec = "~%02x" % ord(n[2])
   149                 n = n[0:2] + ec + n[3:]
   149                 n = n[0:2] + ec + n[3:]
   150             if n[-1] in '. ':
   150             if n[-1] in '. ':
   151                 # encode last period or space ('foo...' -> 'foo..~2e')
   151                 # encode last period or space ('foo...' -> 'foo..~2e')
   153             if dotencode and n[0] in '. ':
   153             if dotencode and n[0] in '. ':
   154                 n = "~%02x" % ord(n[0]) + n[1:]
   154                 n = "~%02x" % ord(n[0]) + n[1:]
   155         res.append(n)
   155         res.append(n)
   156     return '/'.join(res)
   156     return '/'.join(res)
   157 
   157 
   158 MAX_PATH_LEN_IN_HGSTORE = 120
   158 _maxstorepathlen = 120
   159 DIR_PREFIX_LEN = 8
   159 _dirprefixlen = 8
   160 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
   160 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
   161 def _hybridencode(path, auxencode):
   161 def _hybridencode(path, auxencode):
   162     '''encodes path with a length limit
   162     '''encodes path with a length limit
   163 
   163 
   164     Encodes all paths that begin with 'data/', according to the following.
   164     Encodes all paths that begin with 'data/', according to the following.
   165 
   165 
   171     Relevant path components consisting of Windows reserved filenames are
   171     Relevant path components consisting of Windows reserved filenames are
   172     masked by encoding the third character ('aux' -> 'au~78', see auxencode).
   172     masked by encoding the third character ('aux' -> 'au~78', see auxencode).
   173 
   173 
   174     Hashed encoding (not reversible):
   174     Hashed encoding (not reversible):
   175 
   175 
   176     If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
   176     If the default-encoded path is longer than _maxstorepathlen, a
   177     non-reversible hybrid hashing of the path is done instead.
   177     non-reversible hybrid hashing of the path is done instead.
   178     This encoding uses up to DIR_PREFIX_LEN characters of all directory
   178     This encoding uses up to _dirprefixlen characters of all directory
   179     levels of the lowerencoded path, but not more levels than can fit into
   179     levels of the lowerencoded path, but not more levels than can fit into
   180     _MAX_SHORTENED_DIRS_LEN.
   180     _maxshortdirslen.
   181     Then follows the filler followed by the sha digest of the full path.
   181     Then follows the filler followed by the sha digest of the full path.
   182     The filler is the beginning of the basename of the lowerencoded path
   182     The filler is the beginning of the basename of the lowerencoded path
   183     (the basename is everything after the last path separator). The filler
   183     (the basename is everything after the last path separator). The filler
   184     is as long as possible, filling in characters from the basename until
   184     is as long as possible, filling in characters from the basename until
   185     the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
   185     the encoded path has _maxstorepathlen characters (or all chars of the
   186     of the basename have been taken).
   186     basename have been taken).
   187     The extension (e.g. '.i' or '.d') is preserved.
   187     The extension (e.g. '.i' or '.d') is preserved.
   188 
   188 
   189     The string 'data/' at the beginning is replaced with 'dh/', if the hashed
   189     The string 'data/' at the beginning is replaced with 'dh/', if the hashed
   190     encoding was used.
   190     encoding was used.
   191     '''
   191     '''
   193         return path
   193         return path
   194     # escape directories ending with .i and .d
   194     # escape directories ending with .i and .d
   195     path = encodedir(path)
   195     path = encodedir(path)
   196     ndpath = path[len('data/'):]
   196     ndpath = path[len('data/'):]
   197     res = 'data/' + auxencode(encodefilename(ndpath))
   197     res = 'data/' + auxencode(encodefilename(ndpath))
   198     if len(res) > MAX_PATH_LEN_IN_HGSTORE:
   198     if len(res) > _maxstorepathlen:
   199         digest = _sha(path).hexdigest()
   199         digest = _sha(path).hexdigest()
   200         aep = auxencode(lowerencode(ndpath))
   200         aep = auxencode(lowerencode(ndpath))
   201         _root, ext = os.path.splitext(aep)
   201         _root, ext = os.path.splitext(aep)
   202         parts = aep.split('/')
   202         parts = aep.split('/')
   203         basename = parts[-1]
   203         basename = parts[-1]
   204         sdirs = []
   204         sdirs = []
   205         for p in parts[:-1]:
   205         for p in parts[:-1]:
   206             d = p[:DIR_PREFIX_LEN]
   206             d = p[:_dirprefixlen]
   207             if d[-1] in '. ':
   207             if d[-1] in '. ':
   208                 # Windows can't access dirs ending in period or space
   208                 # Windows can't access dirs ending in period or space
   209                 d = d[:-1] + '_'
   209                 d = d[:-1] + '_'
   210             t = '/'.join(sdirs) + '/' + d
   210             t = '/'.join(sdirs) + '/' + d
   211             if len(t) > _MAX_SHORTENED_DIRS_LEN:
   211             if len(t) > _maxshortdirslen:
   212                 break
   212                 break
   213             sdirs.append(d)
   213             sdirs.append(d)
   214         dirs = '/'.join(sdirs)
   214         dirs = '/'.join(sdirs)
   215         if len(dirs) > 0:
   215         if len(dirs) > 0:
   216             dirs += '/'
   216             dirs += '/'
   217         res = 'dh/' + dirs + digest + ext
   217         res = 'dh/' + dirs + digest + ext
   218         space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
   218         spaceleft = _maxstorepathlen - len(res)
   219         if space_left > 0:
   219         if spaceleft > 0:
   220             filler = basename[:space_left]
   220             filler = basename[:spaceleft]
   221             res = 'dh/' + dirs + filler + digest + ext
   221             res = 'dh/' + dirs + filler + digest + ext
   222     return res
   222     return res
   223 
   223 
   224 def _calcmode(path):
   224 def _calcmode(path):
   225     try:
   225     try: