mercurial/encoding.py
changeset 16274 5d75eb8568d1
parent 16133 84c58da3a1f8
child 16387 c481761033bd
equal deleted inserted replaced
16273:12e3f93b1cbc 16274:5d75eb8568d1
    90     'foo: ?'
    90     'foo: ?'
    91     >>> fromlocal(l) # magically in utf-8
    91     >>> fromlocal(l) # magically in utf-8
    92     'foo: \\xc3\\xa4'
    92     'foo: \\xc3\\xa4'
    93     """
    93     """
    94 
    94 
    95     for e in ('UTF-8', fallbackencoding):
    95     try:
    96         try:
    96         try:
    97             u = s.decode(e) # attempt strict decoding
    97             # make sure string is actually stored in UTF-8
       
    98             u = s.decode('UTF-8')
       
    99             if encoding == 'UTF-8':
       
   100                 # fast path
       
   101                 return s
    98             r = u.encode(encoding, "replace")
   102             r = u.encode(encoding, "replace")
    99             if u == r.decode(encoding):
   103             if u == r.decode(encoding):
   100                 # r is a safe, non-lossy encoding of s
   104                 # r is a safe, non-lossy encoding of s
   101                 return r
   105                 return r
   102             elif e == 'UTF-8':
   106             return localstr(s, r)
   103                 return localstr(s, r)
   107         except UnicodeDecodeError:
   104             else:
   108             # we should only get here if we're looking at an ancient changeset
       
   109             try:
       
   110                 u = s.decode(fallbackencoding)
       
   111                 r = u.encode(encoding, "replace")
       
   112                 if u == r.decode(encoding):
       
   113                     # r is a safe, non-lossy encoding of s
       
   114                     return r
   105                 return localstr(u.encode('UTF-8'), r)
   115                 return localstr(u.encode('UTF-8'), r)
   106 
   116             except UnicodeDecodeError:
   107         except LookupError, k:
   117                 u = s.decode("utf-8", "replace") # last ditch
   108             raise error.Abort(k, hint="please check your locale settings")
   118                 return u.encode(encoding, "replace") # can't round-trip
   109         except UnicodeDecodeError:
   119     except LookupError, k:
   110             pass
   120         raise error.Abort(k, hint="please check your locale settings")
   111     u = s.decode("utf-8", "replace") # last ditch
       
   112     return u.encode(encoding, "replace") # can't round-trip
       
   113 
   121 
   114 def fromlocal(s):
   122 def fromlocal(s):
   115     """
   123     """
   116     Convert a string from the local character encoding to UTF-8
   124     Convert a string from the local character encoding to UTF-8
   117 
   125