mercurial/hgweb/request.py
changeset 43076 2372284d9457
parent 40545 6107d4549fcc
child 43077 687b865b95ad
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
     6 # This software may be used and distributed according to the terms of the
     6 # This software may be used and distributed according to the terms of the
     7 # GNU General Public License version 2 or any later version.
     7 # GNU General Public License version 2 or any later version.
     8 
     8 
     9 from __future__ import absolute_import
     9 from __future__ import absolute_import
    10 
    10 
    11 #import wsgiref.validate
    11 # import wsgiref.validate
    12 
    12 
    13 from ..thirdparty import (
    13 from ..thirdparty import attr
    14     attr,
       
    15 )
       
    16 from .. import (
    14 from .. import (
    17     error,
    15     error,
    18     pycompat,
    16     pycompat,
    19     util,
    17     util,
    20 )
    18 )
    21 
    19 
       
    20 
    22 class multidict(object):
    21 class multidict(object):
    23     """A dict like object that can store multiple values for a key.
    22     """A dict like object that can store multiple values for a key.
    24 
    23 
    25     Used to store parsed request parameters.
    24     Used to store parsed request parameters.
    26 
    25 
    27     This is inspired by WebOb's class of the same name.
    26     This is inspired by WebOb's class of the same name.
    28     """
    27     """
       
    28 
    29     def __init__(self):
    29     def __init__(self):
    30         self._items = {}
    30         self._items = {}
    31 
    31 
    32     def __getitem__(self, key):
    32     def __getitem__(self, key):
    33         """Returns the last set value for a key."""
    33         """Returns the last set value for a key."""
    73 
    73 
    74         return vals[0]
    74         return vals[0]
    75 
    75 
    76     def asdictoflists(self):
    76     def asdictoflists(self):
    77         return {k: list(v) for k, v in self._items.iteritems()}
    77         return {k: list(v) for k, v in self._items.iteritems()}
       
    78 
    78 
    79 
    79 @attr.s(frozen=True)
    80 @attr.s(frozen=True)
    80 class parsedrequest(object):
    81 class parsedrequest(object):
    81     """Represents a parsed WSGI request.
    82     """Represents a parsed WSGI request.
    82 
    83 
   122     # Request body input stream.
   123     # Request body input stream.
   123     bodyfh = attr.ib()
   124     bodyfh = attr.ib()
   124     # WSGI environment dict, unmodified.
   125     # WSGI environment dict, unmodified.
   125     rawenv = attr.ib()
   126     rawenv = attr.ib()
   126 
   127 
       
   128 
   127 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
   129 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
   128     """Parse URL components from environment variables.
   130     """Parse URL components from environment variables.
   129 
   131 
   130     WSGI defines request attributes via environment variables. This function
   132     WSGI defines request attributes via environment variables. This function
   131     parses the environment variables into a data structure.
   133     parses the environment variables into a data structure.
   151     # PEP 3333 defines the WSGI spec and is a useful reference for this code.
   153     # PEP 3333 defines the WSGI spec and is a useful reference for this code.
   152 
   154 
   153     # We first validate that the incoming object conforms with the WSGI spec.
   155     # We first validate that the incoming object conforms with the WSGI spec.
   154     # We only want to be dealing with spec-conforming WSGI implementations.
   156     # We only want to be dealing with spec-conforming WSGI implementations.
   155     # TODO enable this once we fix internal violations.
   157     # TODO enable this once we fix internal violations.
   156     #wsgiref.validate.check_environ(env)
   158     # wsgiref.validate.check_environ(env)
   157 
   159 
   158     # PEP-0333 states that environment keys and values are native strings
   160     # PEP-0333 states that environment keys and values are native strings
   159     # (bytes on Python 2 and str on Python 3). The code points for the Unicode
   161     # (bytes on Python 2 and str on Python 3). The code points for the Unicode
   160     # strings on Python 3 must be between \00000-\000FF. We deal with bytes
   162     # strings on Python 3 must be between \00000-\000FF. We deal with bytes
   161     # in Mercurial, so mass convert string keys and values to bytes.
   163     # in Mercurial, so mass convert string keys and values to bytes.
   162     if pycompat.ispy3:
   164     if pycompat.ispy3:
   163         env = {k.encode('latin-1'): v for k, v in env.iteritems()}
   165         env = {k.encode('latin-1'): v for k, v in env.iteritems()}
   164         env = {k: v.encode('latin-1') if isinstance(v, str) else v
   166         env = {
   165                for k, v in env.iteritems()}
   167             k: v.encode('latin-1') if isinstance(v, str) else v
       
   168             for k, v in env.iteritems()
       
   169         }
   166 
   170 
   167     # Some hosting solutions are emulating hgwebdir, and dispatching directly
   171     # Some hosting solutions are emulating hgwebdir, and dispatching directly
   168     # to an hgweb instance using this environment variable.  This was always
   172     # to an hgweb instance using this environment variable.  This was always
   169     # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
   173     # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
   170     if not reponame:
   174     if not reponame:
   253 
   257 
   254         if not env.get('PATH_INFO'):
   258         if not env.get('PATH_INFO'):
   255             raise error.ProgrammingError('reponame requires PATH_INFO')
   259             raise error.ProgrammingError('reponame requires PATH_INFO')
   256 
   260 
   257         if not env['PATH_INFO'].startswith(repoprefix):
   261         if not env['PATH_INFO'].startswith(repoprefix):
   258             raise error.ProgrammingError('PATH_INFO does not begin with repo '
   262             raise error.ProgrammingError(
   259                                          'name: %s (%s)' % (env['PATH_INFO'],
   263                 'PATH_INFO does not begin with repo '
   260                                                             reponame))
   264                 'name: %s (%s)' % (env['PATH_INFO'], reponame)
   261 
   265             )
   262         dispatchpath = env['PATH_INFO'][len(repoprefix):]
   266 
       
   267         dispatchpath = env['PATH_INFO'][len(repoprefix) :]
   263 
   268 
   264         if dispatchpath and not dispatchpath.startswith('/'):
   269         if dispatchpath and not dispatchpath.startswith('/'):
   265             raise error.ProgrammingError('reponame prefix of PATH_INFO does '
   270             raise error.ProgrammingError(
   266                                          'not end at path delimiter: %s (%s)' %
   271                 'reponame prefix of PATH_INFO does '
   267                                          (env['PATH_INFO'], reponame))
   272                 'not end at path delimiter: %s (%s)'
       
   273                 % (env['PATH_INFO'], reponame)
       
   274             )
   268 
   275 
   269         apppath = apppath.rstrip('/') + repoprefix
   276         apppath = apppath.rstrip('/') + repoprefix
   270         dispatchparts = dispatchpath.strip('/').split('/')
   277         dispatchparts = dispatchpath.strip('/').split('/')
   271         dispatchpath = '/'.join(dispatchparts)
   278         dispatchpath = '/'.join(dispatchparts)
   272 
   279 
   293     # perform case normalization for us. We just rewrite underscore to dash
   300     # perform case normalization for us. We just rewrite underscore to dash
   294     # so keys match what likely went over the wire.
   301     # so keys match what likely went over the wire.
   295     headers = []
   302     headers = []
   296     for k, v in env.iteritems():
   303     for k, v in env.iteritems():
   297         if k.startswith('HTTP_'):
   304         if k.startswith('HTTP_'):
   298             headers.append((k[len('HTTP_'):].replace('_', '-'), v))
   305             headers.append((k[len('HTTP_') :].replace('_', '-'), v))
   299 
   306 
   300     from . import wsgiheaders # avoid cycle
   307     from . import wsgiheaders  # avoid cycle
       
   308 
   301     headers = wsgiheaders.Headers(headers)
   309     headers = wsgiheaders.Headers(headers)
   302 
   310 
   303     # This is kind of a lie because the HTTP header wasn't explicitly
   311     # This is kind of a lie because the HTTP header wasn't explicitly
   304     # sent. But for all intents and purposes it should be OK to lie about
   312     # sent. But for all intents and purposes it should be OK to lie about
   305     # this, since a consumer will either either value to determine how many
   313     # this, since a consumer will either either value to determine how many
   311         headers['Content-Type'] = env['CONTENT_TYPE']
   319         headers['Content-Type'] = env['CONTENT_TYPE']
   312 
   320 
   313     if bodyfh is None:
   321     if bodyfh is None:
   314         bodyfh = env['wsgi.input']
   322         bodyfh = env['wsgi.input']
   315         if 'Content-Length' in headers:
   323         if 'Content-Length' in headers:
   316             bodyfh = util.cappedreader(bodyfh,
   324             bodyfh = util.cappedreader(
   317                                        int(headers['Content-Length'] or '0'))
   325                 bodyfh, int(headers['Content-Length'] or '0')
   318 
   326             )
   319     return parsedrequest(method=env['REQUEST_METHOD'],
   327 
   320                          url=fullurl, baseurl=baseurl,
   328     return parsedrequest(
   321                          advertisedurl=advertisedfullurl,
   329         method=env['REQUEST_METHOD'],
   322                          advertisedbaseurl=advertisedbaseurl,
   330         url=fullurl,
   323                          urlscheme=env['wsgi.url_scheme'],
   331         baseurl=baseurl,
   324                          remoteuser=env.get('REMOTE_USER'),
   332         advertisedurl=advertisedfullurl,
   325                          remotehost=env.get('REMOTE_HOST'),
   333         advertisedbaseurl=advertisedbaseurl,
   326                          apppath=apppath,
   334         urlscheme=env['wsgi.url_scheme'],
   327                          dispatchparts=dispatchparts, dispatchpath=dispatchpath,
   335         remoteuser=env.get('REMOTE_USER'),
   328                          reponame=reponame,
   336         remotehost=env.get('REMOTE_HOST'),
   329                          querystring=querystring,
   337         apppath=apppath,
   330                          qsparams=qsparams,
   338         dispatchparts=dispatchparts,
   331                          headers=headers,
   339         dispatchpath=dispatchpath,
   332                          bodyfh=bodyfh,
   340         reponame=reponame,
   333                          rawenv=env)
   341         querystring=querystring,
       
   342         qsparams=qsparams,
       
   343         headers=headers,
       
   344         bodyfh=bodyfh,
       
   345         rawenv=env,
       
   346     )
       
   347 
   334 
   348 
   335 class offsettrackingwriter(object):
   349 class offsettrackingwriter(object):
   336     """A file object like object that is append only and tracks write count.
   350     """A file object like object that is append only and tracks write count.
   337 
   351 
   338     Instances are bound to a callable. This callable is called with data
   352     Instances are bound to a callable. This callable is called with data
   343 
   357 
   344     The intent of this class is to wrap the ``write()`` function returned by
   358     The intent of this class is to wrap the ``write()`` function returned by
   345     a WSGI ``start_response()`` function. Since ``write()`` is a callable and
   359     a WSGI ``start_response()`` function. Since ``write()`` is a callable and
   346     not a file object, it doesn't implement other file object methods.
   360     not a file object, it doesn't implement other file object methods.
   347     """
   361     """
       
   362 
   348     def __init__(self, writefn):
   363     def __init__(self, writefn):
   349         self._write = writefn
   364         self._write = writefn
   350         self._offset = 0
   365         self._offset = 0
   351 
   366 
   352     def write(self, s):
   367     def write(self, s):
   361         pass
   376         pass
   362 
   377 
   363     def tell(self):
   378     def tell(self):
   364         return self._offset
   379         return self._offset
   365 
   380 
       
   381 
   366 class wsgiresponse(object):
   382 class wsgiresponse(object):
   367     """Represents a response to a WSGI request.
   383     """Represents a response to a WSGI request.
   368 
   384 
   369     A response consists of a status line, headers, and a body.
   385     A response consists of a status line, headers, and a body.
   370 
   386 
   387         """
   403         """
   388         self._req = req
   404         self._req = req
   389         self._startresponse = startresponse
   405         self._startresponse = startresponse
   390 
   406 
   391         self.status = None
   407         self.status = None
   392         from . import wsgiheaders # avoid cycle
   408         from . import wsgiheaders  # avoid cycle
       
   409 
   393         self.headers = wsgiheaders.Headers([])
   410         self.headers = wsgiheaders.Headers([])
   394 
   411 
   395         self._bodybytes = None
   412         self._bodybytes = None
   396         self._bodygen = None
   413         self._bodygen = None
   397         self._bodywillwrite = False
   414         self._bodywillwrite = False
   398         self._started = False
   415         self._started = False
   399         self._bodywritefn = None
   416         self._bodywritefn = None
   400 
   417 
   401     def _verifybody(self):
   418     def _verifybody(self):
   402         if (self._bodybytes is not None or self._bodygen is not None
   419         if (
   403             or self._bodywillwrite):
   420             self._bodybytes is not None
       
   421             or self._bodygen is not None
       
   422             or self._bodywillwrite
       
   423         ):
   404             raise error.ProgrammingError('cannot define body multiple times')
   424             raise error.ProgrammingError('cannot define body multiple times')
   405 
   425 
   406     def setbodybytes(self, b):
   426     def setbodybytes(self, b):
   407         """Define the response body as static bytes.
   427         """Define the response body as static bytes.
   408 
   428 
   448         self._started = True
   468         self._started = True
   449 
   469 
   450         if not self.status:
   470         if not self.status:
   451             raise error.ProgrammingError('status line not defined')
   471             raise error.ProgrammingError('status line not defined')
   452 
   472 
   453         if (self._bodybytes is None and self._bodygen is None
   473         if (
   454             and not self._bodywillwrite):
   474             self._bodybytes is None
       
   475             and self._bodygen is None
       
   476             and not self._bodywillwrite
       
   477         ):
   455             raise error.ProgrammingError('response body not defined')
   478             raise error.ProgrammingError('response body not defined')
   456 
   479 
   457         # RFC 7232 Section 4.1 states that a 304 MUST generate one of
   480         # RFC 7232 Section 4.1 states that a 304 MUST generate one of
   458         # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
   481         # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
   459         # and SHOULD NOT generate other headers unless they could be used
   482         # and SHOULD NOT generate other headers unless they could be used
   467             if self.headers.get('Content-Length') == '0':
   490             if self.headers.get('Content-Length') == '0':
   468                 del self.headers['Content-Length']
   491                 del self.headers['Content-Length']
   469 
   492 
   470             # Strictly speaking, this is too strict. But until it causes
   493             # Strictly speaking, this is too strict. But until it causes
   471             # problems, let's be strict.
   494             # problems, let's be strict.
   472             badheaders = {k for k in self.headers.keys()
   495             badheaders = {
   473                           if k.lower() not in ('date', 'etag', 'expires',
   496                 k
   474                                                'cache-control',
   497                 for k in self.headers.keys()
   475                                                'content-location',
   498                 if k.lower()
   476                                                'content-security-policy',
   499                 not in (
   477                                                'vary')}
   500                     'date',
       
   501                     'etag',
       
   502                     'expires',
       
   503                     'cache-control',
       
   504                     'content-location',
       
   505                     'content-security-policy',
       
   506                     'vary',
       
   507                 )
       
   508             }
   478             if badheaders:
   509             if badheaders:
   479                 raise error.ProgrammingError(
   510                 raise error.ProgrammingError(
   480                     'illegal header on 304 response: %s' %
   511                     'illegal header on 304 response: %s'
   481                     ', '.join(sorted(badheaders)))
   512                     % ', '.join(sorted(badheaders))
       
   513                 )
   482 
   514 
   483             if self._bodygen is not None or self._bodywillwrite:
   515             if self._bodygen is not None or self._bodywillwrite:
   484                 raise error.ProgrammingError("must use setbodybytes('') with "
   516                 raise error.ProgrammingError(
   485                                              "304 responses")
   517                     "must use setbodybytes('') with " "304 responses"
       
   518                 )
   486 
   519 
   487         # Various HTTP clients (notably httplib) won't read the HTTP response
   520         # Various HTTP clients (notably httplib) won't read the HTTP response
   488         # until the HTTP request has been sent in full. If servers (us) send a
   521         # until the HTTP request has been sent in full. If servers (us) send a
   489         # response before the HTTP request has been fully sent, the connection
   522         # response before the HTTP request has been fully sent, the connection
   490         # may deadlock because neither end is reading.
   523         # may deadlock because neither end is reading.
   529             while True:
   562             while True:
   530                 chunk = self._req.bodyfh.read(32768)
   563                 chunk = self._req.bodyfh.read(32768)
   531                 if not chunk:
   564                 if not chunk:
   532                     break
   565                     break
   533 
   566 
   534         strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
   567         strheaders = [
   535                       k, v in self.headers.items()]
   568             (pycompat.strurl(k), pycompat.strurl(v))
   536         write = self._startresponse(pycompat.sysstr(self.status),
   569             for k, v in self.headers.items()
   537                                     strheaders)
   570         ]
       
   571         write = self._startresponse(pycompat.sysstr(self.status), strheaders)
   538 
   572 
   539         if self._bodybytes:
   573         if self._bodybytes:
   540             yield self._bodybytes
   574             yield self._bodybytes
   541         elif self._bodygen:
   575         elif self._bodygen:
   542             for chunk in self._bodygen:
   576             for chunk in self._bodygen:
   564         """
   598         """
   565         if not self._bodywillwrite:
   599         if not self._bodywillwrite:
   566             raise error.ProgrammingError('must call setbodywillwrite() first')
   600             raise error.ProgrammingError('must call setbodywillwrite() first')
   567 
   601 
   568         if not self._started:
   602         if not self._started:
   569             raise error.ProgrammingError('must call sendresponse() first; did '
   603             raise error.ProgrammingError(
   570                                          'you remember to consume it since it '
   604                 'must call sendresponse() first; did '
   571                                          'is a generator?')
   605                 'you remember to consume it since it '
       
   606                 'is a generator?'
       
   607             )
   572 
   608 
   573         assert self._bodywritefn
   609         assert self._bodywritefn
   574         return offsettrackingwriter(self._bodywritefn)
   610         return offsettrackingwriter(self._bodywritefn)
       
   611 
   575 
   612 
   576 def wsgiapplication(app_maker):
   613 def wsgiapplication(app_maker):
   577     '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
   614     '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
   578     can and should now be used as a WSGI application.'''
   615     can and should now be used as a WSGI application.'''
   579     application = app_maker()
   616     application = app_maker()
       
   617 
   580     def run_wsgi(env, respond):
   618     def run_wsgi(env, respond):
   581         return application(env, respond)
   619         return application(env, respond)
       
   620 
   582     return run_wsgi
   621     return run_wsgi