contrib/byteify-strings.py
changeset 38387 b704da9a9dda
parent 38386 9f42e4a83676
child 38388 f701bc936e7f
equal deleted inserted replaced
38386:9f42e4a83676 38387:b704da9a9dda
    10 from __future__ import absolute_import
    10 from __future__ import absolute_import
    11 
    11 
    12 import argparse
    12 import argparse
    13 import contextlib
    13 import contextlib
    14 import errno
    14 import errno
    15 import io
       
    16 import os
    15 import os
    17 import sys
    16 import sys
    18 import tempfile
    17 import tempfile
    19 import token
    18 import token
    20 import tokenize
    19 import tokenize
    21 
    20 
    22 if True:
    21 if True:
    23     def replacetokens(tokens, fullname):
    22     def replacetokens(tokens):
    24         """Transform a stream of tokens from raw to Python 3.
    23         """Transform a stream of tokens from raw to Python 3.
    25 
    24 
    26         Returns a generator of possibly rewritten tokens.
    25         Returns a generator of possibly rewritten tokens.
    27 
    26 
    28         The input token list may be mutated as part of processing. However,
    27         The input token list may be mutated as part of processing. However,
    29         its changes do not necessarily match the output token stream.
    28         its changes do not necessarily match the output token stream.
    30         """
    29         """
    31         futureimpline = False
       
    32 
       
    33         # The following utility functions access the tokens list and i index of
    30         # The following utility functions access the tokens list and i index of
    34         # the for i, t enumerate(tokens) loop below
    31         # the for i, t enumerate(tokens) loop below
    35         def _isop(j, *o):
    32         def _isop(j, *o):
    36             """Assert that tokens[j] is an OP with one of the given values"""
    33             """Assert that tokens[j] is an OP with one of the given values"""
    37             try:
    34             try:
   109 
   106 
   110                 # String literal. Prefix to make a b'' string.
   107                 # String literal. Prefix to make a b'' string.
   111                 yield t._replace(string='b%s' % t.string)
   108                 yield t._replace(string='b%s' % t.string)
   112                 continue
   109                 continue
   113 
   110 
   114             # Insert compatibility imports at "from __future__ import" line.
       
   115             # No '\n' should be added to preserve line numbers.
       
   116             if (t.type == token.NAME and t.string == 'import' and
       
   117                 all(u.type == token.NAME for u in tokens[i - 2:i]) and
       
   118                 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
       
   119                 futureimpline = True
       
   120             if t.type == token.NEWLINE and futureimpline:
       
   121                 futureimpline = False
       
   122                 if fullname == 'mercurial.pycompat':
       
   123                     yield t
       
   124                     continue
       
   125                 r, c = t.start
       
   126                 l = (b'; from mercurial.pycompat import '
       
   127                      b'delattr, getattr, hasattr, setattr, xrange, '
       
   128                      b'open, unicode\n')
       
   129                 for u in tokenize.tokenize(io.BytesIO(l).readline):
       
   130                     if u.type in (tokenize.ENCODING, token.ENDMARKER):
       
   131                         continue
       
   132                     yield u._replace(
       
   133                         start=(r, c + u.start[1]), end=(r, c + u.end[1]))
       
   134                 continue
       
   135 
       
   136             # This looks like a function call.
   111             # This looks like a function call.
   137             if t.type == token.NAME and _isop(i + 1, '('):
   112             if t.type == token.NAME and _isop(i + 1, '('):
   138                 fn = t.string
   113                 fn = t.string
   139 
   114 
   140                 # *attr() builtins don't accept byte strings to 2nd argument.
   115                 # *attr() builtins don't accept byte strings to 2nd argument.
   161             # Emit unmodified token.
   136             # Emit unmodified token.
   162             yield t
   137             yield t
   163 
   138 
   164 def process(fin, fout):
   139 def process(fin, fout):
   165     tokens = tokenize.tokenize(fin.readline)
   140     tokens = tokenize.tokenize(fin.readline)
   166     tokens = replacetokens(list(tokens), fullname='<dummy>')
   141     tokens = replacetokens(list(tokens))
   167     fout.write(tokenize.untokenize(tokens))
   142     fout.write(tokenize.untokenize(tokens))
   168 
   143 
   169 def tryunlink(fname):
   144 def tryunlink(fname):
   170     try:
   145     try:
   171         os.unlink(fname)
   146         os.unlink(fname)