contrib/hgfixes/fix_bytes.py
author Renato Cunha <renatoc@gmail.com>
Tue, 03 Aug 2010 13:41:47 -0300
changeset 11747 40d5633889bb
child 11748 37a70a784397
permissions -rw-r--r--
hgfixes: add a fixer to convert plain strings to bytestrings This patch implements a 2to3 fixer that converts all plain strings in a python source file to byte strings syntax. Example: foo = 'Normal string' would become foo = b'Normal string' The motivation behind this fixer can be found in http://selenic.com/pipermail/mercurial-devel/2010-June/022363.html or, in other words: the current hg source assumes that _most_ strings are "meant" to be byte sequences, so it makes sense to make the convertion implemented by this patch. As mentioned above, not all mercurial modules want to use strings as bytes, examples include i18n (which uses unicode), and demandimport (in py3k, module names are normal strings, thus unicode, and there's no need for a convertion). Therefore, these modules are blacklisted in the fixer. There are also a few functions that can take only unicode arguments, thus the convertion shouldn't be done for those.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
11747
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
     1
"""Fixer that changes plain strings to bytes strings."""
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
     2
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
     3
import re
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
     4
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
     5
from lib2to3 import fixer_base
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
     6
from lib2to3.pgen2 import token
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
     7
from lib2to3.fixer_util import Name
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
     8
from lib2to3.pygram import python_symbols as syms
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
     9
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    10
_re = re.compile(r'[rR]?[\'\"]')
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    11
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    12
# XXX: Implementing a blacklist in 2to3 turned out to be more troublesome than
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    13
# blacklisting some modules inside the fixers. So, this is what I came with.
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    14
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    15
blacklist = ['mercurial/demandimport.py',
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    16
             'mercurial/i18n.py',
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    17
            ]
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    18
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    19
def isdocstring(node):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    20
    def isclassorfunction(ancestor):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    21
        symbols = (syms.funcdef, syms.classdef)
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    22
        # if the current node is a child of a function definition, a class
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    23
        # definition or a file, then it is a docstring
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    24
        if ancestor.type == syms.simple_stmt:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    25
            try:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    26
                while True:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    27
                    if ancestor.type in symbols:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    28
                        return True
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    29
                    ancestor = ancestor.parent
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    30
            except AttributeError:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    31
                return False
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    32
        return False
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    33
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    34
    def ismodule(ancestor):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    35
        # Our child is a docstring if we are a simple statement, and our
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    36
        # ancestor is file_input. In other words, our child is a lone string in
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    37
        # the source file.
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    38
        try:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    39
            if (ancestor.type == syms.simple_stmt and
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    40
                ancestor.parent.type == syms.file_input):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    41
                    return True
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    42
        except AttributeError:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    43
            return False
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    44
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    45
    def isdocassignment(ancestor):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    46
        # Assigning to __doc__, definitely a string
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    47
        try:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    48
            while True:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    49
                if (ancestor.type == syms.expr_stmt and
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    50
                    Name('__doc__') in ancestor.children):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    51
                        return True
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    52
                ancestor = ancestor.parent
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    53
        except AttributeError:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    54
            return False
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    55
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    56
    if ismodule(node.parent) or \
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    57
       isdocassignment(node.parent) or \
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    58
       isclassorfunction(node.parent):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    59
        return True
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    60
    return False
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    61
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    62
def shouldtransform(node):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    63
    specialnames = ['__main__']
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    64
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    65
    if node.value in specialnames:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    66
        return False
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    67
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    68
    ggparent = node.parent.parent.parent
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    69
    sggparent = str(ggparent)
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    70
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    71
    if 'getattr' in sggparent or \
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    72
       'hasattr' in sggparent or \
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    73
       'setattr' in sggparent or \
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    74
       'encode' in sggparent or \
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    75
       'decode' in sggparent:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    76
           return False
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    77
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    78
    return True
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    79
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    80
class FixBytes(fixer_base.BaseFix):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    81
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    82
    PATTERN = 'STRING'
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    83
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    84
    def transform(self, node, results):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    85
        if self.filename in blacklist:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    86
            return
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    87
        if node.type == token.STRING:
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    88
            if _re.match(node.value):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    89
                if isdocstring(node):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    90
                    return
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    91
                if not shouldtransform(node):
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    92
                    return
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    93
                new = node.clone()
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    94
                new.value = 'b' + new.value
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    95
                return new
40d5633889bb hgfixes: add a fixer to convert plain strings to bytestrings
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
    96