diff mercurial/encoding.py @ 13940:b7b26e54e37a stable

encoding: avoid localstr when a string can be encoded losslessly (issue2763) localstr's hash method exists to prevent bogus matching on lossy local encodings. For instance, we don't want 'caf?' to match 'caf?' in an ASCII locale. But when caf? can be losslessly encoded in the local charset, we can simply use a normal string and avoid the hashing trick. This avoids using localstr's hash method, which would prevent a match between
author Matt Mackall <mpm@selenic.com>
date Fri, 15 Apr 2011 23:45:41 -0500
parents 120eccaaa522
children e38846a79a23
line wrap: on
line diff
--- a/mercurial/encoding.py	Fri Apr 15 16:35:32 2011 +0300
+++ b/mercurial/encoding.py	Fri Apr 15 23:45:41 2011 -0500
@@ -95,11 +95,15 @@
     for e in ('UTF-8', fallbackencoding):
         try:
             u = s.decode(e) # attempt strict decoding
-            if e == 'UTF-8':
-                return localstr(s, u.encode(encoding, "replace"))
+            r = u.encode(encoding, "replace")
+            if u == r.decode(encoding):
+                # r is a safe, non-lossy encoding of s
+                return r
+            elif e == 'UTF-8':
+                return localstr(s, r)
             else:
-                return localstr(u.encode('UTF-8'),
-                                u.encode(encoding, "replace"))
+                return localstr(u.encode('UTF-8'), r)
+
         except LookupError, k:
             raise error.Abort("%s, please check your locale settings" % k)
         except UnicodeDecodeError: