mercurial/hgweb/request.py
changeset 36806 69b2d0900cd7
parent 36713 2442927cdd96
child 36808 0031e972ded2
equal deleted inserted replaced
36805:ec46415ed826 36806:69b2d0900cd7
     9 from __future__ import absolute_import
     9 from __future__ import absolute_import
    10 
    10 
    11 import cgi
    11 import cgi
    12 import errno
    12 import errno
    13 import socket
    13 import socket
       
    14 #import wsgiref.validate
    14 
    15 
    15 from .common import (
    16 from .common import (
    16     ErrorResponse,
    17     ErrorResponse,
    17     HTTP_NOT_MODIFIED,
    18     HTTP_NOT_MODIFIED,
    18     statusmessage,
    19     statusmessage,
    19 )
    20 )
    20 
    21 
       
    22 from ..thirdparty import (
       
    23     attr,
       
    24 )
    21 from .. import (
    25 from .. import (
    22     pycompat,
    26     pycompat,
    23     util,
    27     util,
    24 )
    28 )
    25 
    29 
    51     bytesform = {}
    55     bytesform = {}
    52     for k, v in form.iteritems():
    56     for k, v in form.iteritems():
    53         bytesform[pycompat.bytesurl(k)] = [
    57         bytesform[pycompat.bytesurl(k)] = [
    54             pycompat.bytesurl(i.strip()) for i in v]
    58             pycompat.bytesurl(i.strip()) for i in v]
    55     return bytesform
    59     return bytesform
       
    60 
       
    61 @attr.s(frozen=True)
       
    62 class parsedrequest(object):
       
    63     """Represents a parsed WSGI request / static HTTP request parameters."""
       
    64 
       
    65     # Full URL for this request.
       
    66     url = attr.ib()
       
    67     # URL without any path components. Just <proto>://<host><port>.
       
    68     baseurl = attr.ib()
       
    69     # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
       
    70     # of HTTP: Host header for hostname. This is likely what clients used.
       
    71     advertisedurl = attr.ib()
       
    72     advertisedbaseurl = attr.ib()
       
    73     # WSGI application path.
       
    74     apppath = attr.ib()
       
    75     # List of path parts to be used for dispatch.
       
    76     dispatchparts = attr.ib()
       
    77     # URL path component (no query string) used for dispatch.
       
    78     dispatchpath = attr.ib()
       
    79     # Raw query string (part after "?" in URL).
       
    80     querystring = attr.ib()
       
    81 
       
    82 def parserequestfromenv(env):
       
    83     """Parse URL components from environment variables.
       
    84 
       
    85     WSGI defines request attributes via environment variables. This function
       
    86     parses the environment variables into a data structure.
       
    87     """
       
    88     # PEP-0333 defines the WSGI spec and is a useful reference for this code.
       
    89 
       
    90     # We first validate that the incoming object conforms with the WSGI spec.
       
    91     # We only want to be dealing with spec-conforming WSGI implementations.
       
    92     # TODO enable this once we fix internal violations.
       
    93     #wsgiref.validate.check_environ(env)
       
    94 
       
    95     # PEP-0333 states that environment keys and values are native strings
       
    96     # (bytes on Python 2 and str on Python 3). The code points for the Unicode
       
    97     # strings on Python 3 must be between \00000-\000FF. We deal with bytes
       
    98     # in Mercurial, so mass convert string keys and values to bytes.
       
    99     if pycompat.ispy3:
       
   100         env = {k.encode('latin-1'): v for k, v in env.iteritems()}
       
   101         env = {k: v.encode('latin-1') if isinstance(v, str) else v
       
   102                for k, v in env.iteritems()}
       
   103 
       
   104     # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
       
   105     # the environment variables.
       
   106     # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
       
   107     # how URLs are reconstructed.
       
   108     fullurl = env['wsgi.url_scheme'] + '://'
       
   109     advertisedfullurl = fullurl
       
   110 
       
   111     def addport(s):
       
   112         if env['wsgi.url_scheme'] == 'https':
       
   113             if env['SERVER_PORT'] != '443':
       
   114                 s += ':' + env['SERVER_PORT']
       
   115         else:
       
   116             if env['SERVER_PORT'] != '80':
       
   117                 s += ':' + env['SERVER_PORT']
       
   118 
       
   119         return s
       
   120 
       
   121     if env.get('HTTP_HOST'):
       
   122         fullurl += env['HTTP_HOST']
       
   123     else:
       
   124         fullurl += env['SERVER_NAME']
       
   125         fullurl = addport(fullurl)
       
   126 
       
   127     advertisedfullurl += env['SERVER_NAME']
       
   128     advertisedfullurl = addport(advertisedfullurl)
       
   129 
       
   130     baseurl = fullurl
       
   131     advertisedbaseurl = advertisedfullurl
       
   132 
       
   133     fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
       
   134     advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
       
   135     fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
       
   136     advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
       
   137 
       
   138     if env.get('QUERY_STRING'):
       
   139         fullurl += '?' + env['QUERY_STRING']
       
   140         advertisedfullurl += '?' + env['QUERY_STRING']
       
   141 
       
   142     # When dispatching requests, we look at the URL components (PATH_INFO
       
   143     # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
       
   144     # has the concept of "virtual" repositories. This is defined via REPO_NAME.
       
   145     # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
       
   146     # root. We also exclude its path components from PATH_INFO when resolving
       
   147     # the dispatch path.
       
   148 
       
   149     # TODO the use of trailing slashes in apppath is arguably wrong. We need it
       
   150     # to appease low-level parts of hgweb_mod for now.
       
   151     apppath = env['SCRIPT_NAME']
       
   152     if not apppath.endswith('/'):
       
   153         apppath += '/'
       
   154 
       
   155     if env.get('REPO_NAME'):
       
   156         apppath += env.get('REPO_NAME') + '/'
       
   157 
       
   158     if 'PATH_INFO' in env:
       
   159         dispatchparts = env['PATH_INFO'].strip('/').split('/')
       
   160 
       
   161         # Strip out repo parts.
       
   162         repoparts = env.get('REPO_NAME', '').split('/')
       
   163         if dispatchparts[:len(repoparts)] == repoparts:
       
   164             dispatchparts = dispatchparts[len(repoparts):]
       
   165     else:
       
   166         dispatchparts = []
       
   167 
       
   168     dispatchpath = '/'.join(dispatchparts)
       
   169 
       
   170     querystring = env.get('QUERY_STRING', '')
       
   171 
       
   172     return parsedrequest(url=fullurl, baseurl=baseurl,
       
   173                          advertisedurl=advertisedfullurl,
       
   174                          advertisedbaseurl=advertisedbaseurl,
       
   175                          apppath=apppath,
       
   176                          dispatchparts=dispatchparts, dispatchpath=dispatchpath,
       
   177                          querystring=querystring)
    56 
   178 
    57 class wsgirequest(object):
   179 class wsgirequest(object):
    58     """Higher-level API for a WSGI request.
   180     """Higher-level API for a WSGI request.
    59 
   181 
    60     WSGI applications are invoked with 2 arguments. They are used to
   182     WSGI applications are invoked with 2 arguments. They are used to