contrib/byteify-strings.py
changeset 43076 2372284d9457
parent 42701 11498aa91c03
child 43379 bb509f39d387
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
    16 import sys
    16 import sys
    17 import tempfile
    17 import tempfile
    18 import token
    18 import token
    19 import tokenize
    19 import tokenize
    20 
    20 
       
    21 
    21 def adjusttokenpos(t, ofs):
    22 def adjusttokenpos(t, ofs):
    22     """Adjust start/end column of the given token"""
    23     """Adjust start/end column of the given token"""
    23     return t._replace(start=(t.start[0], t.start[1] + ofs),
    24     return t._replace(
    24                       end=(t.end[0], t.end[1] + ofs))
    25         start=(t.start[0], t.start[1] + ofs), end=(t.end[0], t.end[1] + ofs)
       
    26     )
       
    27 
    25 
    28 
    26 def replacetokens(tokens, opts):
    29 def replacetokens(tokens, opts):
    27     """Transform a stream of tokens from raw to Python 3.
    30     """Transform a stream of tokens from raw to Python 3.
    28 
    31 
    29     Returns a generator of possibly rewritten tokens.
    32     Returns a generator of possibly rewritten tokens.
    80         """
    83         """
    81         k = j
    84         k = j
    82         currtoken = tokens[k]
    85         currtoken = tokens[k]
    83         while currtoken.type in (token.STRING, token.NEWLINE, tokenize.NL):
    86         while currtoken.type in (token.STRING, token.NEWLINE, tokenize.NL):
    84             k += 1
    87             k += 1
    85             if (
    88             if currtoken.type == token.STRING and currtoken.string.startswith(
    86                 currtoken.type == token.STRING
    89                 ("'", '"')
    87                 and currtoken.string.startswith(("'", '"'))
       
    88             ):
    90             ):
    89                 sysstrtokens.add(currtoken)
    91                 sysstrtokens.add(currtoken)
    90             try:
    92             try:
    91                 currtoken = tokens[k]
    93                 currtoken = tokens[k]
    92             except IndexError:
    94             except IndexError:
   124 
   126 
   125     coldelta = 0  # column increment for new opening parens
   127     coldelta = 0  # column increment for new opening parens
   126     coloffset = -1  # column offset for the current line (-1: TBD)
   128     coloffset = -1  # column offset for the current line (-1: TBD)
   127     parens = [(0, 0, 0, -1)]  # stack of (line, end-column, column-offset, type)
   129     parens = [(0, 0, 0, -1)]  # stack of (line, end-column, column-offset, type)
   128     ignorenextline = False  # don't transform the next line
   130     ignorenextline = False  # don't transform the next line
   129     insideignoreblock = False # don't transform until turned off
   131     insideignoreblock = False  # don't transform until turned off
   130     for i, t in enumerate(tokens):
   132     for i, t in enumerate(tokens):
   131         # Compute the column offset for the current line, such that
   133         # Compute the column offset for the current line, such that
   132         # the current line will be aligned to the last opening paren
   134         # the current line will be aligned to the last opening paren
   133         # as before.
   135         # as before.
   134         if coloffset < 0:
   136         if coloffset < 0:
   135             lastparen = parens[-1]
   137             lastparen = parens[-1]
   136             if t.start[1] == lastparen[1]:
   138             if t.start[1] == lastparen[1]:
   137                 coloffset = lastparen[2]
   139                 coloffset = lastparen[2]
   138             elif (
   140             elif t.start[1] + 1 == lastparen[1] and lastparen[3] not in (
   139                 t.start[1] + 1 == lastparen[1]
   141                 token.NEWLINE,
   140                 and lastparen[3] not in (token.NEWLINE, tokenize.NL)
   142                 tokenize.NL,
   141             ):
   143             ):
   142                 # fix misaligned indent of s/util.Abort/error.Abort/
   144                 # fix misaligned indent of s/util.Abort/error.Abort/
   143                 coloffset = lastparen[2] + (lastparen[1] - t.start[1])
   145                 coloffset = lastparen[2] + (lastparen[1] - t.start[1])
   144             else:
   146             else:
   145                 coloffset = 0
   147                 coloffset = 0
   200             if s[0] not in ("'", '"'):
   202             if s[0] not in ("'", '"'):
   201                 yield adjusttokenpos(t, coloffset)
   203                 yield adjusttokenpos(t, coloffset)
   202                 continue
   204                 continue
   203 
   205 
   204             # String literal. Prefix to make a b'' string.
   206             # String literal. Prefix to make a b'' string.
   205             yield adjusttokenpos(t._replace(string='b%s' % t.string),
   207             yield adjusttokenpos(t._replace(string='b%s' % t.string), coloffset)
   206                                  coloffset)
       
   207             coldelta += 1
   208             coldelta += 1
   208             continue
   209             continue
   209 
   210 
   210         # This looks like a function call.
   211         # This looks like a function call.
   211         if t.type == token.NAME and _isop(i + 1, '('):
   212         if t.type == token.NAME and _isop(i + 1, '('):
   212             fn = t.string
   213             fn = t.string
   213 
   214 
   214             # *attr() builtins don't accept byte strings to 2nd argument.
   215             # *attr() builtins don't accept byte strings to 2nd argument.
   215             if fn in (
   216             if fn in (
   216                 'getattr', 'setattr', 'hasattr', 'safehasattr', 'wrapfunction',
   217                 'getattr',
   217                 'wrapclass', 'addattr'
   218                 'setattr',
       
   219                 'hasattr',
       
   220                 'safehasattr',
       
   221                 'wrapfunction',
       
   222                 'wrapclass',
       
   223                 'addattr',
   218             ) and (opts['allow-attr-methods'] or not _isop(i - 1, '.')):
   224             ) and (opts['allow-attr-methods'] or not _isop(i - 1, '.')):
   219                 arg1idx = _findargnofcall(1)
   225                 arg1idx = _findargnofcall(1)
   220                 if arg1idx is not None:
   226                 if arg1idx is not None:
   221                     _ensuresysstr(arg1idx)
   227                     _ensuresysstr(arg1idx)
   222 
   228 
   239                 _ensuresysstr(i + 2)
   245                 _ensuresysstr(i + 2)
   240             if _ismethodcall(i, 'get', 'pop', 'setdefault', 'popitem'):
   246             if _ismethodcall(i, 'get', 'pop', 'setdefault', 'popitem'):
   241                 _ensuresysstr(i + 4)
   247                 _ensuresysstr(i + 4)
   242 
   248 
   243         # Looks like "if __name__ == '__main__'".
   249         # Looks like "if __name__ == '__main__'".
   244         if (t.type == token.NAME and t.string == '__name__'
   250         if (
   245             and _isop(i + 1, '==')):
   251             t.type == token.NAME
       
   252             and t.string == '__name__'
       
   253             and _isop(i + 1, '==')
       
   254         ):
   246             _ensuresysstr(i + 2)
   255             _ensuresysstr(i + 2)
   247 
   256 
   248         # Emit unmodified token.
   257         # Emit unmodified token.
   249         yield adjusttokenpos(t, coloffset)
   258         yield adjusttokenpos(t, coloffset)
       
   259 
   250 
   260 
   251 def process(fin, fout, opts):
   261 def process(fin, fout, opts):
   252     tokens = tokenize.tokenize(fin.readline)
   262     tokens = tokenize.tokenize(fin.readline)
   253     tokens = replacetokens(list(tokens), opts)
   263     tokens = replacetokens(list(tokens), opts)
   254     fout.write(tokenize.untokenize(tokens))
   264     fout.write(tokenize.untokenize(tokens))
       
   265 
   255 
   266 
   256 def tryunlink(fname):
   267 def tryunlink(fname):
   257     try:
   268     try:
   258         os.unlink(fname)
   269         os.unlink(fname)
   259     except OSError as err:
   270     except OSError as err:
   260         if err.errno != errno.ENOENT:
   271         if err.errno != errno.ENOENT:
   261             raise
   272             raise
   262 
   273 
       
   274 
   263 @contextlib.contextmanager
   275 @contextlib.contextmanager
   264 def editinplace(fname):
   276 def editinplace(fname):
   265     n = os.path.basename(fname)
   277     n = os.path.basename(fname)
   266     d = os.path.dirname(fname)
   278     d = os.path.dirname(fname)
   267     fp = tempfile.NamedTemporaryFile(prefix='.%s-' % n, suffix='~', dir=d,
   279     fp = tempfile.NamedTemporaryFile(
   268                                      delete=False)
   280         prefix='.%s-' % n, suffix='~', dir=d, delete=False
       
   281     )
   269     try:
   282     try:
   270         yield fp
   283         yield fp
   271         fp.close()
   284         fp.close()
   272         if os.name == 'nt':
   285         if os.name == 'nt':
   273             tryunlink(fname)
   286             tryunlink(fname)
   274         os.rename(fp.name, fname)
   287         os.rename(fp.name, fname)
   275     finally:
   288     finally:
   276         fp.close()
   289         fp.close()
   277         tryunlink(fp.name)
   290         tryunlink(fp.name)
   278 
   291 
       
   292 
   279 def main():
   293 def main():
   280     ap = argparse.ArgumentParser()
   294     ap = argparse.ArgumentParser()
   281     ap.add_argument('--version', action='version',
   295     ap.add_argument(
   282                     version='Byteify strings 1.0')
   296         '--version', action='version', version='Byteify strings 1.0'
   283     ap.add_argument('-i', '--inplace', action='store_true', default=False,
   297     )
   284                     help='edit files in place')
   298     ap.add_argument(
   285     ap.add_argument('--dictiter', action='store_true', default=False,
   299         '-i',
   286                     help='rewrite iteritems() and itervalues()'),
   300         '--inplace',
   287     ap.add_argument('--allow-attr-methods', action='store_true',
   301         action='store_true',
   288                     default=False,
   302         default=False,
   289                     help='also handle attr*() when they are methods'),
   303         help='edit files in place',
   290     ap.add_argument('--treat-as-kwargs', nargs="+", default=[],
   304     )
   291                     help="ignore kwargs-like objects"),
   305     ap.add_argument(
       
   306         '--dictiter',
       
   307         action='store_true',
       
   308         default=False,
       
   309         help='rewrite iteritems() and itervalues()',
       
   310     ),
       
   311     ap.add_argument(
       
   312         '--allow-attr-methods',
       
   313         action='store_true',
       
   314         default=False,
       
   315         help='also handle attr*() when they are methods',
       
   316     ),
       
   317     ap.add_argument(
       
   318         '--treat-as-kwargs',
       
   319         nargs="+",
       
   320         default=[],
       
   321         help="ignore kwargs-like objects",
       
   322     ),
   292     ap.add_argument('files', metavar='FILE', nargs='+', help='source file')
   323     ap.add_argument('files', metavar='FILE', nargs='+', help='source file')
   293     args = ap.parse_args()
   324     args = ap.parse_args()
   294     opts = {
   325     opts = {
   295         'dictiter': args.dictiter,
   326         'dictiter': args.dictiter,
   296         'treat-as-kwargs': set(args.treat_as_kwargs),
   327         'treat-as-kwargs': set(args.treat_as_kwargs),
   304         else:
   335         else:
   305             with open(fname, 'rb') as fin:
   336             with open(fname, 'rb') as fin:
   306                 fout = sys.stdout.buffer
   337                 fout = sys.stdout.buffer
   307                 process(fin, fout, opts)
   338                 process(fin, fout, opts)
   308 
   339 
       
   340 
   309 if __name__ == '__main__':
   341 if __name__ == '__main__':
   310     if sys.version_info.major < 3:
   342     if sys.version_info.major < 3:
   311         print('This script must be run under Python 3.')
   343         print('This script must be run under Python 3.')
   312         sys.exit(3)
   344         sys.exit(3)
   313     main()
   345     main()