mercurial/encoding.py
changeset 30033 02dbfaa6df0b
parent 30030 0f6d6fdd3c2a
child 30034 e4a6b439acc5
equal deleted inserted replaced
30032:2219f4f82ede 30033:02dbfaa6df0b
    14 
    14 
    15 from . import (
    15 from . import (
    16     error,
    16     error,
    17     pycompat,
    17     pycompat,
    18 )
    18 )
       
    19 
       
    20 _sysstr = pycompat.sysstr
    19 
    21 
    20 if pycompat.ispy3:
    22 if pycompat.ispy3:
    21     unichr = chr
    23     unichr = chr
    22 
    24 
    23 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
    25 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
   134             # make sure string is actually stored in UTF-8
   136             # make sure string is actually stored in UTF-8
   135             u = s.decode('UTF-8')
   137             u = s.decode('UTF-8')
   136             if encoding == 'UTF-8':
   138             if encoding == 'UTF-8':
   137                 # fast path
   139                 # fast path
   138                 return s
   140                 return s
   139             r = u.encode(encoding, "replace")
   141             r = u.encode(_sysstr(encoding), u"replace")
   140             if u == r.decode(encoding):
   142             if u == r.decode(_sysstr(encoding)):
   141                 # r is a safe, non-lossy encoding of s
   143                 # r is a safe, non-lossy encoding of s
   142                 return r
   144                 return r
   143             return localstr(s, r)
   145             return localstr(s, r)
   144         except UnicodeDecodeError:
   146         except UnicodeDecodeError:
   145             # we should only get here if we're looking at an ancient changeset
   147             # we should only get here if we're looking at an ancient changeset
   146             try:
   148             try:
   147                 u = s.decode(fallbackencoding)
   149                 u = s.decode(_sysstr(fallbackencoding))
   148                 r = u.encode(encoding, "replace")
   150                 r = u.encode(_sysstr(encoding), u"replace")
   149                 if u == r.decode(encoding):
   151                 if u == r.decode(_sysstr(encoding)):
   150                     # r is a safe, non-lossy encoding of s
   152                     # r is a safe, non-lossy encoding of s
   151                     return r
   153                     return r
   152                 return localstr(u.encode('UTF-8'), r)
   154                 return localstr(u.encode('UTF-8'), r)
   153             except UnicodeDecodeError:
   155             except UnicodeDecodeError:
   154                 u = s.decode("utf-8", "replace") # last ditch
   156                 u = s.decode("utf-8", "replace") # last ditch
   155                 return u.encode(encoding, "replace") # can't round-trip
   157                 # can't round-trip
       
   158                 return u.encode(_sysstr(encoding), u"replace")
   156     except LookupError as k:
   159     except LookupError as k:
   157         raise error.Abort(k, hint="please check your locale settings")
   160         raise error.Abort(k, hint="please check your locale settings")
   158 
   161 
   159 def fromlocal(s):
   162 def fromlocal(s):
   160     """
   163     """
   170     # can we do a lossless round-trip?
   173     # can we do a lossless round-trip?
   171     if isinstance(s, localstr):
   174     if isinstance(s, localstr):
   172         return s._utf8
   175         return s._utf8
   173 
   176 
   174     try:
   177     try:
   175         return s.decode(encoding, encodingmode).encode("utf-8")
   178         u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
       
   179         return u.encode("utf-8")
   176     except UnicodeDecodeError as inst:
   180     except UnicodeDecodeError as inst:
   177         sub = s[max(0, inst.start - 10):inst.start + 10]
   181         sub = s[max(0, inst.start - 10):inst.start + 10]
   178         raise error.Abort("decoding near '%s': %s!" % (sub, inst))
   182         raise error.Abort("decoding near '%s': %s!" % (sub, inst))
   179     except LookupError as k:
   183     except LookupError as k:
   180         raise error.Abort(k, hint="please check your locale settings")
   184         raise error.Abort(k, hint="please check your locale settings")
   183 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
   187 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
   184         and "WFA" or "WF")
   188         and "WFA" or "WF")
   185 
   189 
   186 def colwidth(s):
   190 def colwidth(s):
   187     "Find the column width of a string for display in the local encoding"
   191     "Find the column width of a string for display in the local encoding"
   188     return ucolwidth(s.decode(encoding, 'replace'))
   192     return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
   189 
   193 
   190 def ucolwidth(d):
   194 def ucolwidth(d):
   191     "Find the column width of a Unicode string for display"
   195     "Find the column width of a Unicode string for display"
   192     eaw = getattr(unicodedata, 'east_asian_width', None)
   196     eaw = getattr(unicodedata, 'east_asian_width', None)
   193     if eaw is not None:
   197     if eaw is not None:
   263     +++
   267     +++
   264     >>> print trim(t, 1, ellipsis=ellipsis)
   268     >>> print trim(t, 1, ellipsis=ellipsis)
   265     +
   269     +
   266     """
   270     """
   267     try:
   271     try:
   268         u = s.decode(encoding)
   272         u = s.decode(_sysstr(encoding))
   269     except UnicodeDecodeError:
   273     except UnicodeDecodeError:
   270         if len(s) <= width: # trimming is not needed
   274         if len(s) <= width: # trimming is not needed
   271             return s
   275             return s
   272         width -= len(ellipsis)
   276         width -= len(ellipsis)
   273         if width <= 0: # no enough room even for ellipsis
   277         if width <= 0: # no enough room even for ellipsis
   290         uslice = lambda i: u[:-i]
   294         uslice = lambda i: u[:-i]
   291         concat = lambda s: s + ellipsis
   295         concat = lambda s: s + ellipsis
   292     for i in xrange(1, len(u)):
   296     for i in xrange(1, len(u)):
   293         usub = uslice(i)
   297         usub = uslice(i)
   294         if ucolwidth(usub) <= width:
   298         if ucolwidth(usub) <= width:
   295             return concat(usub.encode(encoding))
   299             return concat(usub.encode(_sysstr(encoding)))
   296     return ellipsis # no enough room for multi-column characters
   300     return ellipsis # no enough room for multi-column characters
   297 
   301 
   298 def _asciilower(s):
   302 def _asciilower(s):
   299     '''convert a string to lowercase if ASCII
   303     '''convert a string to lowercase if ASCII
   300 
   304 
   335         pass
   339         pass
   336     try:
   340     try:
   337         if isinstance(s, localstr):
   341         if isinstance(s, localstr):
   338             u = s._utf8.decode("utf-8")
   342             u = s._utf8.decode("utf-8")
   339         else:
   343         else:
   340             u = s.decode(encoding, encodingmode)
   344             u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
   341 
   345 
   342         lu = u.lower()
   346         lu = u.lower()
   343         if u == lu:
   347         if u == lu:
   344             return s # preserve localstring
   348             return s # preserve localstring
   345         return lu.encode(encoding)
   349         return lu.encode(_sysstr(encoding))
   346     except UnicodeError:
   350     except UnicodeError:
   347         return s.lower() # we don't know how to fold this except in ASCII
   351         return s.lower() # we don't know how to fold this except in ASCII
   348     except LookupError as k:
   352     except LookupError as k:
   349         raise error.Abort(k, hint="please check your locale settings")
   353         raise error.Abort(k, hint="please check your locale settings")
   350 
   354 
   358 def upperfallback(s):
   362 def upperfallback(s):
   359     try:
   363     try:
   360         if isinstance(s, localstr):
   364         if isinstance(s, localstr):
   361             u = s._utf8.decode("utf-8")
   365             u = s._utf8.decode("utf-8")
   362         else:
   366         else:
   363             u = s.decode(encoding, encodingmode)
   367             u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
   364 
   368 
   365         uu = u.upper()
   369         uu = u.upper()
   366         if u == uu:
   370         if u == uu:
   367             return s # preserve localstring
   371             return s # preserve localstring
   368         return uu.encode(encoding)
   372         return uu.encode(_sysstr(encoding))
   369     except UnicodeError:
   373     except UnicodeError:
   370         return s.upper() # we don't know how to fold this except in ASCII
   374         return s.upper() # we don't know how to fold this except in ASCII
   371     except LookupError as k:
   375     except LookupError as k:
   372         raise error.Abort(k, hint="please check your locale settings")
   376         raise error.Abort(k, hint="please check your locale settings")
   373 
   377