comparison contrib/byteify-strings.py @ 38397:f77bbd34a1df

byteify-strings: remove superfluous "if True" block
author Yuya Nishihara <yuya@tcha.org>
date Sun, 03 Jun 2018 18:19:54 +0900
parents 47dd23e6b116
children da130c5cef90
comparison
equal deleted inserted replaced
38396:47dd23e6b116 38397:f77bbd34a1df
21 def adjusttokenpos(t, ofs): 21 def adjusttokenpos(t, ofs):
22 """Adjust start/end column of the given token""" 22 """Adjust start/end column of the given token"""
23 return t._replace(start=(t.start[0], t.start[1] + ofs), 23 return t._replace(start=(t.start[0], t.start[1] + ofs),
24 end=(t.end[0], t.end[1] + ofs)) 24 end=(t.end[0], t.end[1] + ofs))
25 25
26 if True: 26 def replacetokens(tokens, opts):
27 def replacetokens(tokens, opts): 27 """Transform a stream of tokens from raw to Python 3.
28 """Transform a stream of tokens from raw to Python 3. 28
29 29 Returns a generator of possibly rewritten tokens.
30 Returns a generator of possibly rewritten tokens. 30
31 31 The input token list may be mutated as part of processing. However,
32 The input token list may be mutated as part of processing. However, 32 its changes do not necessarily match the output token stream.
33 its changes do not necessarily match the output token stream. 33 """
34 sysstrtokens = set()
35
36 # The following utility functions access the tokens list and i index of
37 # the for i, t enumerate(tokens) loop below
38 def _isop(j, *o):
39 """Assert that tokens[j] is an OP with one of the given values"""
40 try:
41 return tokens[j].type == token.OP and tokens[j].string in o
42 except IndexError:
43 return False
44
45 def _findargnofcall(n):
46 """Find arg n of a call expression (start at 0)
47
48 Returns index of the first token of that argument, or None if
49 there is not that many arguments.
50
51 Assumes that token[i + 1] is '('.
52
34 """ 53 """
35 sysstrtokens = set() 54 nested = 0
36 55 for j in range(i + 2, len(tokens)):
37 # The following utility functions access the tokens list and i index of 56 if _isop(j, ')', ']', '}'):
38 # the for i, t enumerate(tokens) loop below 57 # end of call, tuple, subscription or dict / set
39 def _isop(j, *o): 58 nested -= 1
40 """Assert that tokens[j] is an OP with one of the given values""" 59 if nested < 0:
41 try: 60 return None
42 return tokens[j].type == token.OP and tokens[j].string in o 61 elif n == 0:
43 except IndexError: 62 # this is the starting position of arg
44 return False 63 return j
45 64 elif _isop(j, '(', '[', '{'):
46 def _findargnofcall(n): 65 nested += 1
47 """Find arg n of a call expression (start at 0) 66 elif _isop(j, ',') and nested == 0:
48 67 n -= 1
49 Returns index of the first token of that argument, or None if 68
50 there is not that many arguments. 69 return None
51 70
52 Assumes that token[i + 1] is '('. 71 def _ensuresysstr(j):
53 72 """Make sure the token at j is a system string
54 """ 73
55 nested = 0 74 Remember the given token so the string transformer won't add
56 for j in range(i + 2, len(tokens)): 75 the byte prefix.
57 if _isop(j, ')', ']', '}'): 76
58 # end of call, tuple, subscription or dict / set 77 Ignores tokens that are not strings. Assumes bounds checking has
59 nested -= 1 78 already been done.
60 if nested < 0: 79
61 return None 80 """
62 elif n == 0: 81 st = tokens[j]
63 # this is the starting position of arg 82 if st.type == token.STRING and st.string.startswith(("'", '"')):
64 return j 83 sysstrtokens.add(st)
65 elif _isop(j, '(', '[', '{'): 84
66 nested += 1 85 coldelta = 0 # column increment for new opening parens
67 elif _isop(j, ',') and nested == 0: 86 coloffset = -1 # column offset for the current line (-1: TBD)
68 n -= 1 87 parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)
69 88 for i, t in enumerate(tokens):
70 return None 89 # Compute the column offset for the current line, such that
71 90 # the current line will be aligned to the last opening paren
72 def _ensuresysstr(j): 91 # as before.
73 """Make sure the token at j is a system string 92 if coloffset < 0:
74 93 if t.start[1] == parens[-1][1]:
75 Remember the given token so the string transformer won't add 94 coloffset = parens[-1][2]
76 the byte prefix. 95 elif t.start[1] + 1 == parens[-1][1]:
77 96 # fix misaligned indent of s/util.Abort/error.Abort/
78 Ignores tokens that are not strings. Assumes bounds checking has 97 coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
79 already been done. 98 else:
80 99 coloffset = 0
81 """ 100
82 st = tokens[j] 101 # Reset per-line attributes at EOL.
83 if st.type == token.STRING and st.string.startswith(("'", '"')): 102 if t.type in (token.NEWLINE, tokenize.NL):
84 sysstrtokens.add(st) 103 yield adjusttokenpos(t, coloffset)
85 104 coldelta = 0
86 coldelta = 0 # column increment for new opening parens 105 coloffset = -1
87 coloffset = -1 # column offset for the current line (-1: TBD) 106 continue
88 parens = [(0, 0, 0)] # stack of (line, end-column, column-offset) 107
89 for i, t in enumerate(tokens): 108 # Remember the last paren position.
90 # Compute the column offset for the current line, such that 109 if _isop(i, '(', '[', '{'):
91 # the current line will be aligned to the last opening paren 110 parens.append(t.end + (coloffset + coldelta,))
92 # as before. 111 elif _isop(i, ')', ']', '}'):
93 if coloffset < 0: 112 parens.pop()
94 if t.start[1] == parens[-1][1]: 113
95 coloffset = parens[-1][2] 114 # Convert most string literals to byte literals. String literals
96 elif t.start[1] + 1 == parens[-1][1]: 115 # in Python 2 are bytes. String literals in Python 3 are unicode.
97 # fix misaligned indent of s/util.Abort/error.Abort/ 116 # Most strings in Mercurial are bytes and unicode strings are rare.
98 coloffset = parens[-1][2] + (parens[-1][1] - t.start[1]) 117 # Rather than rewrite all string literals to use ``b''`` to indicate
99 else: 118 # byte strings, we apply this token transformer to insert the ``b``
100 coloffset = 0 119 # prefix nearly everywhere.
101 120 if t.type == token.STRING and t not in sysstrtokens:
102 # Reset per-line attributes at EOL. 121 s = t.string
103 if t.type in (token.NEWLINE, tokenize.NL): 122
123 # Preserve docstrings as string literals. This is inconsistent
124 # with regular unprefixed strings. However, the
125 # "from __future__" parsing (which allows a module docstring to
126 # exist before it) doesn't properly handle the docstring if it
127 # is b''' prefixed, leading to a SyntaxError. We leave all
128 # docstrings as unprefixed to avoid this. This means Mercurial
129 # components touching docstrings need to handle unicode,
130 # unfortunately.
131 if s[0:3] in ("'''", '"""'):
104 yield adjusttokenpos(t, coloffset) 132 yield adjusttokenpos(t, coloffset)
105 coldelta = 0
106 coloffset = -1
107 continue 133 continue
108 134
109 # Remember the last paren position. 135 # If the first character isn't a quote, it is likely a string
110 if _isop(i, '(', '[', '{'): 136 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
111 parens.append(t.end + (coloffset + coldelta,)) 137 if s[0] not in ("'", '"'):
112 elif _isop(i, ')', ']', '}'): 138 yield adjusttokenpos(t, coloffset)
113 parens.pop()
114
115 # Convert most string literals to byte literals. String literals
116 # in Python 2 are bytes. String literals in Python 3 are unicode.
117 # Most strings in Mercurial are bytes and unicode strings are rare.
118 # Rather than rewrite all string literals to use ``b''`` to indicate
119 # byte strings, we apply this token transformer to insert the ``b``
120 # prefix nearly everywhere.
121 if t.type == token.STRING and t not in sysstrtokens:
122 s = t.string
123
124 # Preserve docstrings as string literals. This is inconsistent
125 # with regular unprefixed strings. However, the
126 # "from __future__" parsing (which allows a module docstring to
127 # exist before it) doesn't properly handle the docstring if it
128 # is b''' prefixed, leading to a SyntaxError. We leave all
129 # docstrings as unprefixed to avoid this. This means Mercurial
130 # components touching docstrings need to handle unicode,
131 # unfortunately.
132 if s[0:3] in ("'''", '"""'):
133 yield adjusttokenpos(t, coloffset)
134 continue
135
136 # If the first character isn't a quote, it is likely a string
137 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
138 if s[0] not in ("'", '"'):
139 yield adjusttokenpos(t, coloffset)
140 continue
141
142 # String literal. Prefix to make a b'' string.
143 yield adjusttokenpos(t._replace(string='b%s' % t.string),
144 coloffset)
145 coldelta += 1
146 continue 139 continue
147 140
148 # This looks like a function call. 141 # String literal. Prefix to make a b'' string.
149 if t.type == token.NAME and _isop(i + 1, '('): 142 yield adjusttokenpos(t._replace(string='b%s' % t.string),
150 fn = t.string 143 coloffset)
151 144 coldelta += 1
152 # *attr() builtins don't accept byte strings to 2nd argument. 145 continue
153 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and 146
154 not _isop(i - 1, '.')): 147 # This looks like a function call.
155 arg1idx = _findargnofcall(1) 148 if t.type == token.NAME and _isop(i + 1, '('):
156 if arg1idx is not None: 149 fn = t.string
157 _ensuresysstr(arg1idx) 150
158 151 # *attr() builtins don't accept byte strings to 2nd argument.
159 # .encode() and .decode() on str/bytes/unicode don't accept 152 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
160 # byte strings on Python 3. 153 not _isop(i - 1, '.')):
161 elif fn in ('encode', 'decode') and _isop(i - 1, '.'): 154 arg1idx = _findargnofcall(1)
162 for argn in range(2): 155 if arg1idx is not None:
163 argidx = _findargnofcall(argn) 156 _ensuresysstr(arg1idx)
164 if argidx is not None: 157
165 _ensuresysstr(argidx) 158 # .encode() and .decode() on str/bytes/unicode don't accept
166 159 # byte strings on Python 3.
167 # It changes iteritems/values to items/values as they are not 160 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
168 # present in Python 3 world. 161 for argn in range(2):
169 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): 162 argidx = _findargnofcall(argn)
170 yield adjusttokenpos(t._replace(string=fn[4:]), coloffset) 163 if argidx is not None:
171 continue 164 _ensuresysstr(argidx)
172 165
173 # Emit unmodified token. 166 # It changes iteritems/values to items/values as they are not
174 yield adjusttokenpos(t, coloffset) 167 # present in Python 3 world.
168 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
169 yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
170 continue
171
172 # Emit unmodified token.
173 yield adjusttokenpos(t, coloffset)
175 174
176 def process(fin, fout, opts): 175 def process(fin, fout, opts):
177 tokens = tokenize.tokenize(fin.readline) 176 tokens = tokenize.tokenize(fin.readline)
178 tokens = replacetokens(list(tokens), opts) 177 tokens = replacetokens(list(tokens), opts)
179 fout.write(tokenize.untokenize(tokens)) 178 fout.write(tokenize.untokenize(tokens))