Mercurial > public > mercurial-scm > hg
comparison mercurial/util.py @ 14076:924c82157d46
url: move URL parsing functions into util to improve startup time
The introduction of the new URL parsing code has created a startup
time regression. This is mainly due to the use of url.hasscheme() in
the ui class. It ends up importing many libraries that the url module
requires.
This fix helps marginally, but if we can get rid of the urllib import
in the URL parser all together, startup time will go back to normal.
perfstartup time before the URL refactoring (8796fb6af67e):
! wall 0.050692 comb 0.000000 user 0.000000 sys 0.000000 (best of 100)
current startup time (139fb11210bb):
! wall 0.070685 comb 0.000000 user 0.000000 sys 0.000000 (best of 100)
after this change:
! wall 0.064667 comb 0.000000 user 0.000000 sys 0.000000 (best of 100)
author | Brodie Rao <brodie@bitheap.org> |
---|---|
date | Sat, 30 Apr 2011 09:43:20 -0700 |
parents | e4bfb9c337f3 |
children | c285bdb0572a |
comparison
equal
deleted
inserted
replaced
14075:bc101902a68d | 14076:924c82157d46 |
---|---|
15 | 15 |
16 from i18n import _ | 16 from i18n import _ |
17 import error, osutil, encoding | 17 import error, osutil, encoding |
18 import errno, re, shutil, sys, tempfile, traceback | 18 import errno, re, shutil, sys, tempfile, traceback |
19 import os, time, calendar, textwrap, unicodedata, signal | 19 import os, time, calendar, textwrap, unicodedata, signal |
20 import imp, socket | 20 import imp, socket, urllib |
21 | 21 |
22 # Python compatibility | 22 # Python compatibility |
23 | 23 |
24 def sha1(s): | 24 def sha1(s): |
25 return _fastsha1(s) | 25 return _fastsha1(s) |
1281 """Parse s into a boolean. | 1281 """Parse s into a boolean. |
1282 | 1282 |
1283 If s is not a valid boolean, returns None. | 1283 If s is not a valid boolean, returns None. |
1284 """ | 1284 """ |
1285 return _booleans.get(s.lower(), None) | 1285 return _booleans.get(s.lower(), None) |
1286 | |
1287 class url(object): | |
1288 """Reliable URL parser. | |
1289 | |
1290 This parses URLs and provides attributes for the following | |
1291 components: | |
1292 | |
1293 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment> | |
1294 | |
1295 Missing components are set to None. The only exception is | |
1296 fragment, which is set to '' if present but empty. | |
1297 | |
1298 If parsefragment is False, fragment is included in query. If | |
1299 parsequery is False, query is included in path. If both are | |
1300 False, both fragment and query are included in path. | |
1301 | |
1302 See http://www.ietf.org/rfc/rfc2396.txt for more information. | |
1303 | |
1304 Note that for backward compatibility reasons, bundle URLs do not | |
1305 take host names. That means 'bundle://../' has a path of '../'. | |
1306 | |
1307 Examples: | |
1308 | |
1309 >>> url('http://www.ietf.org/rfc/rfc2396.txt') | |
1310 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'> | |
1311 >>> url('ssh://[::1]:2200//home/joe/repo') | |
1312 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'> | |
1313 >>> url('file:///home/joe/repo') | |
1314 <url scheme: 'file', path: '/home/joe/repo'> | |
1315 >>> url('bundle:foo') | |
1316 <url scheme: 'bundle', path: 'foo'> | |
1317 >>> url('bundle://../foo') | |
1318 <url scheme: 'bundle', path: '../foo'> | |
1319 >>> url('c:\\\\foo\\\\bar') | |
1320 <url path: 'c:\\\\foo\\\\bar'> | |
1321 | |
1322 Authentication credentials: | |
1323 | |
1324 >>> url('ssh://joe:xyz@x/repo') | |
1325 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'> | |
1326 >>> url('ssh://joe@x/repo') | |
1327 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'> | |
1328 | |
1329 Query strings and fragments: | |
1330 | |
1331 >>> url('http://host/a?b#c') | |
1332 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> | |
1333 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False) | |
1334 <url scheme: 'http', host: 'host', path: 'a?b#c'> | |
1335 """ | |
1336 | |
1337 _safechars = "!~*'()+" | |
1338 _safepchars = "/!~*'()+" | |
1339 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match | |
1340 | |
1341 def __init__(self, path, parsequery=True, parsefragment=True): | |
1342 # We slowly chomp away at path until we have only the path left | |
1343 self.scheme = self.user = self.passwd = self.host = None | |
1344 self.port = self.path = self.query = self.fragment = None | |
1345 self._localpath = True | |
1346 self._hostport = '' | |
1347 self._origpath = path | |
1348 | |
1349 # special case for Windows drive letters | |
1350 if hasdriveletter(path): | |
1351 self.path = path | |
1352 return | |
1353 | |
1354 # For compatibility reasons, we can't handle bundle paths as | |
1355 # normal URLS | |
1356 if path.startswith('bundle:'): | |
1357 self.scheme = 'bundle' | |
1358 path = path[7:] | |
1359 if path.startswith('//'): | |
1360 path = path[2:] | |
1361 self.path = path | |
1362 return | |
1363 | |
1364 if self._matchscheme(path): | |
1365 parts = path.split(':', 1) | |
1366 if parts[0]: | |
1367 self.scheme, path = parts | |
1368 self._localpath = False | |
1369 | |
1370 if not path: | |
1371 path = None | |
1372 if self._localpath: | |
1373 self.path = '' | |
1374 return | |
1375 else: | |
1376 if parsefragment and '#' in path: | |
1377 path, self.fragment = path.split('#', 1) | |
1378 if not path: | |
1379 path = None | |
1380 if self._localpath: | |
1381 self.path = path | |
1382 return | |
1383 | |
1384 if parsequery and '?' in path: | |
1385 path, self.query = path.split('?', 1) | |
1386 if not path: | |
1387 path = None | |
1388 if not self.query: | |
1389 self.query = None | |
1390 | |
1391 # // is required to specify a host/authority | |
1392 if path and path.startswith('//'): | |
1393 parts = path[2:].split('/', 1) | |
1394 if len(parts) > 1: | |
1395 self.host, path = parts | |
1396 path = path | |
1397 else: | |
1398 self.host = parts[0] | |
1399 path = None | |
1400 if not self.host: | |
1401 self.host = None | |
1402 if path: | |
1403 path = '/' + path | |
1404 | |
1405 if self.host and '@' in self.host: | |
1406 self.user, self.host = self.host.rsplit('@', 1) | |
1407 if ':' in self.user: | |
1408 self.user, self.passwd = self.user.split(':', 1) | |
1409 if not self.host: | |
1410 self.host = None | |
1411 | |
1412 # Don't split on colons in IPv6 addresses without ports | |
1413 if (self.host and ':' in self.host and | |
1414 not (self.host.startswith('[') and self.host.endswith(']'))): | |
1415 self._hostport = self.host | |
1416 self.host, self.port = self.host.rsplit(':', 1) | |
1417 if not self.host: | |
1418 self.host = None | |
1419 | |
1420 if (self.host and self.scheme == 'file' and | |
1421 self.host not in ('localhost', '127.0.0.1', '[::1]')): | |
1422 raise Abort(_('file:// URLs can only refer to localhost')) | |
1423 | |
1424 self.path = path | |
1425 | |
1426 for a in ('user', 'passwd', 'host', 'port', | |
1427 'path', 'query', 'fragment'): | |
1428 v = getattr(self, a) | |
1429 if v is not None: | |
1430 setattr(self, a, urllib.unquote(v)) | |
1431 | |
1432 def __repr__(self): | |
1433 attrs = [] | |
1434 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path', | |
1435 'query', 'fragment'): | |
1436 v = getattr(self, a) | |
1437 if v is not None: | |
1438 attrs.append('%s: %r' % (a, v)) | |
1439 return '<url %s>' % ', '.join(attrs) | |
1440 | |
1441 def __str__(self): | |
1442 """Join the URL's components back into a URL string. | |
1443 | |
1444 Examples: | |
1445 | |
1446 >>> str(url('http://user:pw@host:80/?foo#bar')) | |
1447 'http://user:pw@host:80/?foo#bar' | |
1448 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#')) | |
1449 'ssh://user:pw@[::1]:2200//home/joe#' | |
1450 >>> str(url('http://localhost:80//')) | |
1451 'http://localhost:80//' | |
1452 >>> str(url('http://localhost:80/')) | |
1453 'http://localhost:80/' | |
1454 >>> str(url('http://localhost:80')) | |
1455 'http://localhost:80/' | |
1456 >>> str(url('bundle:foo')) | |
1457 'bundle:foo' | |
1458 >>> str(url('bundle://../foo')) | |
1459 'bundle:../foo' | |
1460 >>> str(url('path')) | |
1461 'path' | |
1462 """ | |
1463 if self._localpath: | |
1464 s = self.path | |
1465 if self.scheme == 'bundle': | |
1466 s = 'bundle:' + s | |
1467 if self.fragment: | |
1468 s += '#' + self.fragment | |
1469 return s | |
1470 | |
1471 s = self.scheme + ':' | |
1472 if (self.user or self.passwd or self.host or | |
1473 self.scheme and not self.path): | |
1474 s += '//' | |
1475 if self.user: | |
1476 s += urllib.quote(self.user, safe=self._safechars) | |
1477 if self.passwd: | |
1478 s += ':' + urllib.quote(self.passwd, safe=self._safechars) | |
1479 if self.user or self.passwd: | |
1480 s += '@' | |
1481 if self.host: | |
1482 if not (self.host.startswith('[') and self.host.endswith(']')): | |
1483 s += urllib.quote(self.host) | |
1484 else: | |
1485 s += self.host | |
1486 if self.port: | |
1487 s += ':' + urllib.quote(self.port) | |
1488 if self.host: | |
1489 s += '/' | |
1490 if self.path: | |
1491 s += urllib.quote(self.path, safe=self._safepchars) | |
1492 if self.query: | |
1493 s += '?' + urllib.quote(self.query, safe=self._safepchars) | |
1494 if self.fragment is not None: | |
1495 s += '#' + urllib.quote(self.fragment, safe=self._safepchars) | |
1496 return s | |
1497 | |
1498 def authinfo(self): | |
1499 user, passwd = self.user, self.passwd | |
1500 try: | |
1501 self.user, self.passwd = None, None | |
1502 s = str(self) | |
1503 finally: | |
1504 self.user, self.passwd = user, passwd | |
1505 if not self.user: | |
1506 return (s, None) | |
1507 return (s, (None, (str(self), self.host), | |
1508 self.user, self.passwd or '')) | |
1509 | |
1510 def localpath(self): | |
1511 if self.scheme == 'file' or self.scheme == 'bundle': | |
1512 path = self.path or '/' | |
1513 # For Windows, we need to promote hosts containing drive | |
1514 # letters to paths with drive letters. | |
1515 if hasdriveletter(self._hostport): | |
1516 path = self._hostport + '/' + self.path | |
1517 elif self.host is not None and self.path: | |
1518 path = '/' + path | |
1519 # We also need to handle the case of file:///C:/, which | |
1520 # should return C:/, not /C:/. | |
1521 elif hasdriveletter(path): | |
1522 # Strip leading slash from paths with drive names | |
1523 return path[1:] | |
1524 return path | |
1525 return self._origpath | |
1526 | |
1527 def hasscheme(path): | |
1528 return bool(url(path).scheme) | |
1529 | |
1530 def hasdriveletter(path): | |
1531 return path[1:2] == ':' and path[0:1].isalpha() | |
1532 | |
1533 def localpath(path): | |
1534 return url(path, parsequery=False, parsefragment=False).localpath() | |
1535 | |
1536 def hidepassword(u): | |
1537 '''hide user credential in a url string''' | |
1538 u = url(u) | |
1539 if u.passwd: | |
1540 u.passwd = '***' | |
1541 return str(u) | |
1542 | |
1543 def removeauth(u): | |
1544 '''remove all authentication information from a url string''' | |
1545 u = url(u) | |
1546 u.user = u.passwd = None | |
1547 return str(u) |