Mercurial > public > mercurial-scm > hg
comparison contrib/byteify-strings.py @ 38390:47dd23e6b116
byteify-strings: try to preserve column alignment
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Fri, 01 Jun 2018 00:13:55 +0900 |
parents | 1d68fd5f614a |
children | f77bbd34a1df |
comparison
equal
deleted
inserted
replaced
38389:1d68fd5f614a | 38390:47dd23e6b116 |
---|---|
15 import os | 15 import os |
16 import sys | 16 import sys |
17 import tempfile | 17 import tempfile |
18 import token | 18 import token |
19 import tokenize | 19 import tokenize |
20 | |
21 def adjusttokenpos(t, ofs): | |
22 """Adjust start/end column of the given token""" | |
23 return t._replace(start=(t.start[0], t.start[1] + ofs), | |
24 end=(t.end[0], t.end[1] + ofs)) | |
20 | 25 |
21 if True: | 26 if True: |
22 def replacetokens(tokens, opts): | 27 def replacetokens(tokens, opts): |
23 """Transform a stream of tokens from raw to Python 3. | 28 """Transform a stream of tokens from raw to Python 3. |
24 | 29 |
76 """ | 81 """ |
77 st = tokens[j] | 82 st = tokens[j] |
78 if st.type == token.STRING and st.string.startswith(("'", '"')): | 83 if st.type == token.STRING and st.string.startswith(("'", '"')): |
79 sysstrtokens.add(st) | 84 sysstrtokens.add(st) |
80 | 85 |
86 coldelta = 0 # column increment for new opening parens | |
87 coloffset = -1 # column offset for the current line (-1: TBD) | |
88 parens = [(0, 0, 0)] # stack of (line, end-column, column-offset) | |
81 for i, t in enumerate(tokens): | 89 for i, t in enumerate(tokens): |
90 # Compute the column offset for the current line, such that | |
91 # the current line will be aligned to the last opening paren | |
92 # as before. | |
93 if coloffset < 0: | |
94 if t.start[1] == parens[-1][1]: | |
95 coloffset = parens[-1][2] | |
96 elif t.start[1] + 1 == parens[-1][1]: | |
97 # fix misaligned indent of s/util.Abort/error.Abort/ | |
98 coloffset = parens[-1][2] + (parens[-1][1] - t.start[1]) | |
99 else: | |
100 coloffset = 0 | |
101 | |
102 # Reset per-line attributes at EOL. | |
103 if t.type in (token.NEWLINE, tokenize.NL): | |
104 yield adjusttokenpos(t, coloffset) | |
105 coldelta = 0 | |
106 coloffset = -1 | |
107 continue | |
108 | |
109 # Remember the last paren position. | |
110 if _isop(i, '(', '[', '{'): | |
111 parens.append(t.end + (coloffset + coldelta,)) | |
112 elif _isop(i, ')', ']', '}'): | |
113 parens.pop() | |
114 | |
82 # Convert most string literals to byte literals. String literals | 115 # Convert most string literals to byte literals. String literals |
83 # in Python 2 are bytes. String literals in Python 3 are unicode. | 116 # in Python 2 are bytes. String literals in Python 3 are unicode. |
84 # Most strings in Mercurial are bytes and unicode strings are rare. | 117 # Most strings in Mercurial are bytes and unicode strings are rare. |
85 # Rather than rewrite all string literals to use ``b''`` to indicate | 118 # Rather than rewrite all string literals to use ``b''`` to indicate |
86 # byte strings, we apply this token transformer to insert the ``b`` | 119 # byte strings, we apply this token transformer to insert the ``b`` |
95 # is b''' prefixed, leading to a SyntaxError. We leave all | 128 # is b''' prefixed, leading to a SyntaxError. We leave all |
96 # docstrings as unprefixed to avoid this. This means Mercurial | 129 # docstrings as unprefixed to avoid this. This means Mercurial |
97 # components touching docstrings need to handle unicode, | 130 # components touching docstrings need to handle unicode, |
98 # unfortunately. | 131 # unfortunately. |
99 if s[0:3] in ("'''", '"""'): | 132 if s[0:3] in ("'''", '"""'): |
100 yield t | 133 yield adjusttokenpos(t, coloffset) |
101 continue | 134 continue |
102 | 135 |
103 # If the first character isn't a quote, it is likely a string | 136 # If the first character isn't a quote, it is likely a string |
104 # prefixing character (such as 'b', 'u', or 'r'. Ignore. | 137 # prefixing character (such as 'b', 'u', or 'r'. Ignore. |
105 if s[0] not in ("'", '"'): | 138 if s[0] not in ("'", '"'): |
106 yield t | 139 yield adjusttokenpos(t, coloffset) |
107 continue | 140 continue |
108 | 141 |
109 # String literal. Prefix to make a b'' string. | 142 # String literal. Prefix to make a b'' string. |
110 yield t._replace(string='b%s' % t.string) | 143 yield adjusttokenpos(t._replace(string='b%s' % t.string), |
144 coloffset) | |
145 coldelta += 1 | |
111 continue | 146 continue |
112 | 147 |
113 # This looks like a function call. | 148 # This looks like a function call. |
114 if t.type == token.NAME and _isop(i + 1, '('): | 149 if t.type == token.NAME and _isop(i + 1, '('): |
115 fn = t.string | 150 fn = t.string |
130 _ensuresysstr(argidx) | 165 _ensuresysstr(argidx) |
131 | 166 |
132 # It changes iteritems/values to items/values as they are not | 167 # It changes iteritems/values to items/values as they are not |
133 # present in Python 3 world. | 168 # present in Python 3 world. |
134 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): | 169 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): |
135 yield t._replace(string=fn[4:]) | 170 yield adjusttokenpos(t._replace(string=fn[4:]), coloffset) |
136 continue | 171 continue |
137 | 172 |
138 # Emit unmodified token. | 173 # Emit unmodified token. |
139 yield t | 174 yield adjusttokenpos(t, coloffset) |
140 | 175 |
141 def process(fin, fout, opts): | 176 def process(fin, fout, opts): |
142 tokens = tokenize.tokenize(fin.readline) | 177 tokens = tokenize.tokenize(fin.readline) |
143 tokens = replacetokens(list(tokens), opts) | 178 tokens = replacetokens(list(tokens), opts) |
144 fout.write(tokenize.untokenize(tokens)) | 179 fout.write(tokenize.untokenize(tokens)) |