comparison contrib/byteify-strings.py @ 38390:47dd23e6b116

byteify-strings: try to preserve column alignment
author Yuya Nishihara <yuya@tcha.org>
date Fri, 01 Jun 2018 00:13:55 +0900
parents 1d68fd5f614a
children f77bbd34a1df
comparison
equal deleted inserted replaced
38389:1d68fd5f614a 38390:47dd23e6b116
15 import os 15 import os
16 import sys 16 import sys
17 import tempfile 17 import tempfile
18 import token 18 import token
19 import tokenize 19 import tokenize
20
21 def adjusttokenpos(t, ofs):
22 """Adjust start/end column of the given token"""
23 return t._replace(start=(t.start[0], t.start[1] + ofs),
24 end=(t.end[0], t.end[1] + ofs))
20 25
21 if True: 26 if True:
22 def replacetokens(tokens, opts): 27 def replacetokens(tokens, opts):
23 """Transform a stream of tokens from raw to Python 3. 28 """Transform a stream of tokens from raw to Python 3.
24 29
76 """ 81 """
77 st = tokens[j] 82 st = tokens[j]
78 if st.type == token.STRING and st.string.startswith(("'", '"')): 83 if st.type == token.STRING and st.string.startswith(("'", '"')):
79 sysstrtokens.add(st) 84 sysstrtokens.add(st)
80 85
86 coldelta = 0 # column increment for new opening parens
87 coloffset = -1 # column offset for the current line (-1: TBD)
88 parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)
81 for i, t in enumerate(tokens): 89 for i, t in enumerate(tokens):
90 # Compute the column offset for the current line, such that
91 # the current line will be aligned to the last opening paren
92 # as before.
93 if coloffset < 0:
94 if t.start[1] == parens[-1][1]:
95 coloffset = parens[-1][2]
96 elif t.start[1] + 1 == parens[-1][1]:
97 # fix misaligned indent of s/util.Abort/error.Abort/
98 coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
99 else:
100 coloffset = 0
101
102 # Reset per-line attributes at EOL.
103 if t.type in (token.NEWLINE, tokenize.NL):
104 yield adjusttokenpos(t, coloffset)
105 coldelta = 0
106 coloffset = -1
107 continue
108
109 # Remember the last paren position.
110 if _isop(i, '(', '[', '{'):
111 parens.append(t.end + (coloffset + coldelta,))
112 elif _isop(i, ')', ']', '}'):
113 parens.pop()
114
82 # Convert most string literals to byte literals. String literals 115 # Convert most string literals to byte literals. String literals
83 # in Python 2 are bytes. String literals in Python 3 are unicode. 116 # in Python 2 are bytes. String literals in Python 3 are unicode.
84 # Most strings in Mercurial are bytes and unicode strings are rare. 117 # Most strings in Mercurial are bytes and unicode strings are rare.
85 # Rather than rewrite all string literals to use ``b''`` to indicate 118 # Rather than rewrite all string literals to use ``b''`` to indicate
86 # byte strings, we apply this token transformer to insert the ``b`` 119 # byte strings, we apply this token transformer to insert the ``b``
95 # is b''' prefixed, leading to a SyntaxError. We leave all 128 # is b''' prefixed, leading to a SyntaxError. We leave all
96 # docstrings as unprefixed to avoid this. This means Mercurial 129 # docstrings as unprefixed to avoid this. This means Mercurial
97 # components touching docstrings need to handle unicode, 130 # components touching docstrings need to handle unicode,
98 # unfortunately. 131 # unfortunately.
99 if s[0:3] in ("'''", '"""'): 132 if s[0:3] in ("'''", '"""'):
100 yield t 133 yield adjusttokenpos(t, coloffset)
101 continue 134 continue
102 135
103 # If the first character isn't a quote, it is likely a string 136 # If the first character isn't a quote, it is likely a string
104 # prefixing character (such as 'b', 'u', or 'r'. Ignore. 137 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
105 if s[0] not in ("'", '"'): 138 if s[0] not in ("'", '"'):
106 yield t 139 yield adjusttokenpos(t, coloffset)
107 continue 140 continue
108 141
109 # String literal. Prefix to make a b'' string. 142 # String literal. Prefix to make a b'' string.
110 yield t._replace(string='b%s' % t.string) 143 yield adjusttokenpos(t._replace(string='b%s' % t.string),
144 coloffset)
145 coldelta += 1
111 continue 146 continue
112 147
113 # This looks like a function call. 148 # This looks like a function call.
114 if t.type == token.NAME and _isop(i + 1, '('): 149 if t.type == token.NAME and _isop(i + 1, '('):
115 fn = t.string 150 fn = t.string
130 _ensuresysstr(argidx) 165 _ensuresysstr(argidx)
131 166
132 # It changes iteritems/values to items/values as they are not 167 # It changes iteritems/values to items/values as they are not
133 # present in Python 3 world. 168 # present in Python 3 world.
134 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): 169 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
135 yield t._replace(string=fn[4:]) 170 yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
136 continue 171 continue
137 172
138 # Emit unmodified token. 173 # Emit unmodified token.
139 yield t 174 yield adjusttokenpos(t, coloffset)
140 175
141 def process(fin, fout, opts): 176 def process(fin, fout, opts):
142 tokens = tokenize.tokenize(fin.readline) 177 tokens = tokenize.tokenize(fin.readline)
143 tokens = replacetokens(list(tokens), opts) 178 tokens = replacetokens(list(tokens), opts)
144 fout.write(tokenize.untokenize(tokens)) 179 fout.write(tokenize.untokenize(tokens))