comparison mercurial/encoding.py @ 15066:24efa83d81cb stable

i18n: calculate terminal columns by width information of each characters neither number of 'bytes' in any encoding nor 'characters' is appropriate to calculate terminal columns for specified string. this patch modifies MBTextWrapper for: - overriding '_wrap_chunks()' to make it use not built-in 'len()' but 'encoding.colwidth()' for columns of string - fixing '_cutdown()' to make it use 'encoding.colwidth()' instead of local, similar but incorrect implementation this patch also modifies 'encoding.py': - dividing 'colwith()' into 2 pieces: one for calculation columns of specified UNICODE string, and another for rest part of original one. the former is used from MBTextWrapper in 'util.py'. - preventing 'colwidth()' from evaluating HGENCODINGAMBIGUOUS configuration per each invocation: 'unicodedata.east_asian_width' checking is kept intact for reducing startup cost.
author FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
date Sat, 27 Aug 2011 04:56:12 +0900
parents 1f581a8b1948
children 176882876780
comparison
equal deleted inserted replaced
15065:24a6c3f903bb 15066:24efa83d81cb
133 raise error.Abort("decoding near '%s': %s!" % (sub, inst)) 133 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
134 except LookupError, k: 134 except LookupError, k:
135 raise error.Abort("%s, please check your locale settings" % k) 135 raise error.Abort("%s, please check your locale settings" % k)
136 136
137 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. 137 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
138 ambiguous = os.environ.get("HGENCODINGAMBIGUOUS", "narrow") 138 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
139 and "WFA" or "WF")
139 140
140 def colwidth(s): 141 def colwidth(s):
141 "Find the column width of a UTF-8 string for display" 142 "Find the column width of a UTF-8 string for display"
142 d = s.decode(encoding, 'replace') 143 return ucolwidth(s.decode(encoding, 'replace'))
144
145 def ucolwidth(d):
146 "Find the column width of a Unicode string for display"
143 eaw = getattr(unicodedata, 'east_asian_width', None) 147 eaw = getattr(unicodedata, 'east_asian_width', None)
144 if eaw is not None: 148 if eaw is not None:
145 wide = "WF"
146 if ambiguous == "wide":
147 wide = "WFA"
148 return sum([eaw(c) in wide and 2 or 1 for c in d]) 149 return sum([eaw(c) in wide and 2 or 1 for c in d])
149 return len(d) 150 return len(d)
150 151
151 def lower(s): 152 def lower(s):
152 "best-effort encoding-aware case-folding of local string s" 153 "best-effort encoding-aware case-folding of local string s"