25 Returns a generator of possibly rewritten tokens. |
25 Returns a generator of possibly rewritten tokens. |
26 |
26 |
27 The input token list may be mutated as part of processing. However, |
27 The input token list may be mutated as part of processing. However, |
28 its changes do not necessarily match the output token stream. |
28 its changes do not necessarily match the output token stream. |
29 """ |
29 """ |
|
30 sysstrtokens = set() |
|
31 |
30 # The following utility functions access the tokens list and i index of |
32 # The following utility functions access the tokens list and i index of |
31 # the for i, t enumerate(tokens) loop below |
33 # the for i, t enumerate(tokens) loop below |
32 def _isop(j, *o): |
34 def _isop(j, *o): |
33 """Assert that tokens[j] is an OP with one of the given values""" |
35 """Assert that tokens[j] is an OP with one of the given values""" |
34 try: |
36 try: |
60 elif _isop(j, ',') and nested == 0: |
62 elif _isop(j, ',') and nested == 0: |
61 n -= 1 |
63 n -= 1 |
62 |
64 |
63 return None |
65 return None |
64 |
66 |
65 def _ensureunicode(j): |
67 def _ensuresysstr(j): |
66 """Make sure the token at j is a unicode string |
68 """Make sure the token at j is a system string |
67 |
69 |
68 This rewrites a string token to include the unicode literal prefix |
70 Remember the given token so the string transformer won't add |
69 so the string transformer won't add the byte prefix. |
71 the byte prefix. |
70 |
72 |
71 Ignores tokens that are not strings. Assumes bounds checking has |
73 Ignores tokens that are not strings. Assumes bounds checking has |
72 already been done. |
74 already been done. |
73 |
75 |
74 """ |
76 """ |
75 st = tokens[j] |
77 st = tokens[j] |
76 if st.type == token.STRING and st.string.startswith(("'", '"')): |
78 if st.type == token.STRING and st.string.startswith(("'", '"')): |
77 tokens[j] = st._replace(string='u%s' % st.string) |
79 sysstrtokens.add(st) |
78 |
80 |
79 for i, t in enumerate(tokens): |
81 for i, t in enumerate(tokens): |
80 # Convert most string literals to byte literals. String literals |
82 # Convert most string literals to byte literals. String literals |
81 # in Python 2 are bytes. String literals in Python 3 are unicode. |
83 # in Python 2 are bytes. String literals in Python 3 are unicode. |
82 # Most strings in Mercurial are bytes and unicode strings are rare. |
84 # Most strings in Mercurial are bytes and unicode strings are rare. |
83 # Rather than rewrite all string literals to use ``b''`` to indicate |
85 # Rather than rewrite all string literals to use ``b''`` to indicate |
84 # byte strings, we apply this token transformer to insert the ``b`` |
86 # byte strings, we apply this token transformer to insert the ``b`` |
85 # prefix nearly everywhere. |
87 # prefix nearly everywhere. |
86 if t.type == token.STRING: |
88 if t.type == token.STRING and t not in sysstrtokens: |
87 s = t.string |
89 s = t.string |
88 |
90 |
89 # Preserve docstrings as string literals. This is inconsistent |
91 # Preserve docstrings as string literals. This is inconsistent |
90 # with regular unprefixed strings. However, the |
92 # with regular unprefixed strings. However, the |
91 # "from __future__" parsing (which allows a module docstring to |
93 # "from __future__" parsing (which allows a module docstring to |
115 # *attr() builtins don't accept byte strings to 2nd argument. |
117 # *attr() builtins don't accept byte strings to 2nd argument. |
116 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and |
118 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and |
117 not _isop(i - 1, '.')): |
119 not _isop(i - 1, '.')): |
118 arg1idx = _findargnofcall(1) |
120 arg1idx = _findargnofcall(1) |
119 if arg1idx is not None: |
121 if arg1idx is not None: |
120 _ensureunicode(arg1idx) |
122 _ensuresysstr(arg1idx) |
121 |
123 |
122 # .encode() and .decode() on str/bytes/unicode don't accept |
124 # .encode() and .decode() on str/bytes/unicode don't accept |
123 # byte strings on Python 3. |
125 # byte strings on Python 3. |
124 elif fn in ('encode', 'decode') and _isop(i - 1, '.'): |
126 elif fn in ('encode', 'decode') and _isop(i - 1, '.'): |
125 for argn in range(2): |
127 for argn in range(2): |
126 argidx = _findargnofcall(argn) |
128 argidx = _findargnofcall(argn) |
127 if argidx is not None: |
129 if argidx is not None: |
128 _ensureunicode(argidx) |
130 _ensuresysstr(argidx) |
129 |
131 |
130 # It changes iteritems/values to items/values as they are not |
132 # It changes iteritems/values to items/values as they are not |
131 # present in Python 3 world. |
133 # present in Python 3 world. |
132 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): |
134 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): |
133 yield t._replace(string=fn[4:]) |
135 yield t._replace(string=fn[4:]) |