comparison mercurial/windows.py @ 38483:3efadf2317c7

windows: add a method to convert Unix style command lines to Windows style This started as a copy/paste of `os.path.expandvars()`, but limited to a given dictionary of variables, converting `foo = foo + bar` to `foo += bar`, and adding 'b' string prefixes. Then code was added to make sure that a value being substituted in wouldn't itself be expanded by cmd.exe. But that left inconsistent results between `$var1` and `%var1%` when its value was '%foo%'- since neither were touched, `$var1` wouldn't expand but `%var1%` would. So instead, this just converts the Unix style to Windows style (if the variable exists, because Windows will leave `%missing%` as-is), and lets cmd.exe do its thing. I then dropped the %% -> % conversion (because Windows doesn't do this), and added the ability to escape the '$' with '\'. The escape character is dropped, for consistency with shell handling. After everything seemed stable and working, running the whole test suite flagged a problem near the end of test-bookmarks.t:1069. The problem is cmd.exe won't pass empty variables to its child, so defined but empty variables are now skipped. I can't think of anything better, and it seems like a pre-existing violation of the documentation, which calls out that HG_OLDNODE is empty on bookmark creation. Future additions could potentially be replacing strong quotes with double quotes (cmd.exe doesn't know what to do with the former), escaping a double quote, and some tilde expansion via os.path.expanduser(). I've got some doubts about replacing the strong quotes in case sh.exe is run, but it seems like the right thing to do the vast majority of the time. The original form of this was discussed about a year ago[1]. [1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-July/100735.html
author Matt Harbison <matt_harbison@yahoo.com>
date Sun, 24 Jun 2018 01:13:09 -0400
parents a6c6b7beb025
children af8d8513d7de
comparison
equal deleted inserted replaced
38482:5faaa31a6082 38483:3efadf2317c7
10 import errno 10 import errno
11 import msvcrt 11 import msvcrt
12 import os 12 import os
13 import re 13 import re
14 import stat 14 import stat
15 import string
15 import sys 16 import sys
16 17
17 from .i18n import _ 18 from .i18n import _
18 from . import ( 19 from . import (
19 encoding, 20 encoding,
250 normcasespec = encoding.normcasespecs.upper 251 normcasespec = encoding.normcasespecs.upper
251 normcasefallback = encoding.upperfallback 252 normcasefallback = encoding.upperfallback
252 253
253 def samestat(s1, s2): 254 def samestat(s1, s2):
254 return False 255 return False
256
257 def shelltocmdexe(path, env):
258 r"""Convert shell variables in the form $var and ${var} inside ``path``
259 to %var% form. Existing Windows style variables are left unchanged.
260
261 The variables are limited to the given environment. Unknown variables are
262 left unchanged.
263
264 >>> e = {b'var1': b'v1', b'var2': b'v2', b'var3': b'v3'}
265 >>> # Only valid values are expanded
266 >>> shelltocmdexe(b'cmd $var1 ${var2} %var3% $missing ${missing} %missing%',
267 ... e)
268 'cmd %var1% %var2% %var3% $missing ${missing} %missing%'
269 >>> # Single quote prevents expansion, as does \$ escaping
270 >>> shelltocmdexe(b"cmd '$var1 ${var2} %var3%' \$var1 \${var2} \\", e)
271 "cmd '$var1 ${var2} %var3%' $var1 ${var2} \\"
272 >>> # $$ -> $, %% is not special, but can be the end and start of variables
273 >>> shelltocmdexe(b"cmd $$ %% %var1%%var2%", e)
274 'cmd $ %% %var1%%var2%'
275 >>> # No double substitution
276 >>> shelltocmdexe(b"$var1 %var1%", {b'var1': b'%var2%', b'var2': b'boom'})
277 '%var1% %var1%'
278 """
279 if b'$' not in path:
280 return path
281
282 varchars = pycompat.sysbytes(string.ascii_letters + string.digits) + b'_-'
283
284 res = b''
285 index = 0
286 pathlen = len(path)
287 while index < pathlen:
288 c = path[index]
289 if c == b'\'': # no expansion within single quotes
290 path = path[index + 1:]
291 pathlen = len(path)
292 try:
293 index = path.index(b'\'')
294 res += b'\'' + path[:index + 1]
295 except ValueError:
296 res += c + path
297 index = pathlen - 1
298 elif c == b'%': # variable
299 path = path[index + 1:]
300 pathlen = len(path)
301 try:
302 index = path.index(b'%')
303 except ValueError:
304 res += b'%' + path
305 index = pathlen - 1
306 else:
307 var = path[:index]
308 res += b'%' + var + b'%'
309 elif c == b'$': # variable or '$$'
310 if path[index + 1:index + 2] == b'$':
311 res += c
312 index += 1
313 elif path[index + 1:index + 2] == b'{':
314 path = path[index + 2:]
315 pathlen = len(path)
316 try:
317 index = path.index(b'}')
318 var = path[:index]
319
320 # See below for why empty variables are handled specially
321 if env.get(var, '') != '':
322 res += b'%' + var + b'%'
323 else:
324 res += b'${' + var + b'}'
325 except ValueError:
326 res += b'${' + path
327 index = pathlen - 1
328 else:
329 var = b''
330 index += 1
331 c = path[index:index + 1]
332 while c != b'' and c in varchars:
333 var += c
334 index += 1
335 c = path[index:index + 1]
336 # Some variables (like HG_OLDNODE) may be defined, but have an
337 # empty value. Those need to be skipped because when spawning
338 # cmd.exe to run the hook, it doesn't replace %VAR% for an empty
339 # VAR, and that really confuses things like revset expressions.
340 # OTOH, if it's left in Unix format and the hook runs sh.exe, it
341 # will substitute to an empty string, and everything is happy.
342 if env.get(var, '') != '':
343 res += b'%' + var + b'%'
344 else:
345 res += b'$' + var
346
347 if c != '':
348 index -= 1
349 elif c == b'\\' and index + 1 < pathlen and path[index + 1] == b'$':
350 # Skip '\', but only if it is escaping $
351 res += b'$'
352 index += 1
353 else:
354 res += c
355
356 index += 1
357 return res
255 358
256 # A sequence of backslashes is special iff it precedes a double quote: 359 # A sequence of backslashes is special iff it precedes a double quote:
257 # - if there's an even number of backslashes, the double quote is not 360 # - if there's an even number of backslashes, the double quote is not
258 # quoted (i.e. it ends the quoted region) 361 # quoted (i.e. it ends the quoted region)
259 # - if there's an odd number of backslashes, the double quote is quoted 362 # - if there's an odd number of backslashes, the double quote is quoted