Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/pycompat.py @ 31448:b70407bd84d5
pycompat: add bytestr wrapper which mostly acts as a Python 2 str
This allows us to handle bytes in mostly the same manner as Python 2 str,
so we can get rid of ugly s[i:i + 1] hacks:
s = bytestr(s)
while i < len(s):
c = s[i]
...
This is the simpler version of the previous RFC patch which tried to preserve
the bytestr type if possible. New version simply drops the bytestr wrapping
so we aren't likely to pass a bytestr to a function that expects Python 3
bytes.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Wed, 08 Mar 2017 22:48:26 +0900 |
parents | 63a39d647888 |
children | a1e40ceee640 |
comparison
equal
deleted
inserted
replaced
31447:82350f7fa56c | 31448:b70407bd84d5 |
---|---|
74 if getattr(sys, 'argv', None) is not None: | 74 if getattr(sys, 'argv', None) is not None: |
75 sysargv = list(map(os.fsencode, sys.argv)) | 75 sysargv = list(map(os.fsencode, sys.argv)) |
76 | 76 |
77 bytechr = struct.Struct('>B').pack | 77 bytechr = struct.Struct('>B').pack |
78 | 78 |
79 class bytestr(bytes): | |
80 """A bytes which mostly acts as a Python 2 str | |
81 | |
82 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1) | |
83 (b'', b'foo', b'ascii', b'1') | |
84 >>> s = bytestr(b'foo') | |
85 >>> assert s is bytestr(s) | |
86 | |
87 There's no implicit conversion from non-ascii str as its encoding is | |
88 unknown: | |
89 | |
90 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS | |
91 Traceback (most recent call last): | |
92 ... | |
93 UnicodeEncodeError: ... | |
94 | |
95 Comparison between bytestr and bytes should work: | |
96 | |
97 >>> assert bytestr(b'foo') == b'foo' | |
98 >>> assert b'foo' == bytestr(b'foo') | |
99 >>> assert b'f' in bytestr(b'foo') | |
100 >>> assert bytestr(b'f') in b'foo' | |
101 | |
102 Sliced elements should be bytes, not integer: | |
103 | |
104 >>> s[1], s[:2] | |
105 (b'o', b'fo') | |
106 >>> list(s), list(reversed(s)) | |
107 ([b'f', b'o', b'o'], [b'o', b'o', b'f']) | |
108 | |
109 As bytestr type isn't propagated across operations, you need to cast | |
110 bytes to bytestr explicitly: | |
111 | |
112 >>> s = bytestr(b'foo').upper() | |
113 >>> t = bytestr(s) | |
114 >>> s[0], t[0] | |
115 (70, b'F') | |
116 | |
117 Be careful to not pass a bytestr object to a function which expects | |
118 bytearray-like behavior. | |
119 | |
120 >>> t = bytes(t) # cast to bytes | |
121 >>> assert type(t) is bytes | |
122 """ | |
123 | |
124 def __new__(cls, s=b''): | |
125 if isinstance(s, bytestr): | |
126 return s | |
127 if not isinstance(s, (bytes, bytearray)): | |
128 s = str(s).encode(u'ascii') | |
129 return bytes.__new__(cls, s) | |
130 | |
131 def __getitem__(self, key): | |
132 s = bytes.__getitem__(self, key) | |
133 if not isinstance(s, bytes): | |
134 s = bytechr(s) | |
135 return s | |
136 | |
137 def __iter__(self): | |
138 return iterbytestr(bytes.__iter__(self)) | |
139 | |
79 def iterbytestr(s): | 140 def iterbytestr(s): |
80 """Iterate bytes as if it were a str object of Python 2""" | 141 """Iterate bytes as if it were a str object of Python 2""" |
81 return map(bytechr, s) | 142 return map(bytechr, s) |
82 | 143 |
83 def sysstr(s): | 144 def sysstr(s): |
144 | 205 |
145 else: | 206 else: |
146 import cStringIO | 207 import cStringIO |
147 | 208 |
148 bytechr = chr | 209 bytechr = chr |
210 bytestr = str | |
149 iterbytestr = iter | 211 iterbytestr = iter |
150 | 212 |
151 def sysstr(s): | 213 def sysstr(s): |
152 return s | 214 return s |
153 | 215 |