51 bytesform = {} |
55 bytesform = {} |
52 for k, v in form.iteritems(): |
56 for k, v in form.iteritems(): |
53 bytesform[pycompat.bytesurl(k)] = [ |
57 bytesform[pycompat.bytesurl(k)] = [ |
54 pycompat.bytesurl(i.strip()) for i in v] |
58 pycompat.bytesurl(i.strip()) for i in v] |
55 return bytesform |
59 return bytesform |
|
60 |
|
61 @attr.s(frozen=True) |
|
62 class parsedrequest(object): |
|
63 """Represents a parsed WSGI request / static HTTP request parameters.""" |
|
64 |
|
65 # Full URL for this request. |
|
66 url = attr.ib() |
|
67 # URL without any path components. Just <proto>://<host><port>. |
|
68 baseurl = attr.ib() |
|
69 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead |
|
70 # of HTTP: Host header for hostname. This is likely what clients used. |
|
71 advertisedurl = attr.ib() |
|
72 advertisedbaseurl = attr.ib() |
|
73 # WSGI application path. |
|
74 apppath = attr.ib() |
|
75 # List of path parts to be used for dispatch. |
|
76 dispatchparts = attr.ib() |
|
77 # URL path component (no query string) used for dispatch. |
|
78 dispatchpath = attr.ib() |
|
79 # Raw query string (part after "?" in URL). |
|
80 querystring = attr.ib() |
|
81 |
|
82 def parserequestfromenv(env): |
|
83 """Parse URL components from environment variables. |
|
84 |
|
85 WSGI defines request attributes via environment variables. This function |
|
86 parses the environment variables into a data structure. |
|
87 """ |
|
88 # PEP-0333 defines the WSGI spec and is a useful reference for this code. |
|
89 |
|
90 # We first validate that the incoming object conforms with the WSGI spec. |
|
91 # We only want to be dealing with spec-conforming WSGI implementations. |
|
92 # TODO enable this once we fix internal violations. |
|
93 #wsgiref.validate.check_environ(env) |
|
94 |
|
95 # PEP-0333 states that environment keys and values are native strings |
|
96 # (bytes on Python 2 and str on Python 3). The code points for the Unicode |
|
97 # strings on Python 3 must be between \00000-\000FF. We deal with bytes |
|
98 # in Mercurial, so mass convert string keys and values to bytes. |
|
99 if pycompat.ispy3: |
|
100 env = {k.encode('latin-1'): v for k, v in env.iteritems()} |
|
101 env = {k: v.encode('latin-1') if isinstance(v, str) else v |
|
102 for k, v in env.iteritems()} |
|
103 |
|
104 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines |
|
105 # the environment variables. |
|
106 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines |
|
107 # how URLs are reconstructed. |
|
108 fullurl = env['wsgi.url_scheme'] + '://' |
|
109 advertisedfullurl = fullurl |
|
110 |
|
111 def addport(s): |
|
112 if env['wsgi.url_scheme'] == 'https': |
|
113 if env['SERVER_PORT'] != '443': |
|
114 s += ':' + env['SERVER_PORT'] |
|
115 else: |
|
116 if env['SERVER_PORT'] != '80': |
|
117 s += ':' + env['SERVER_PORT'] |
|
118 |
|
119 return s |
|
120 |
|
121 if env.get('HTTP_HOST'): |
|
122 fullurl += env['HTTP_HOST'] |
|
123 else: |
|
124 fullurl += env['SERVER_NAME'] |
|
125 fullurl = addport(fullurl) |
|
126 |
|
127 advertisedfullurl += env['SERVER_NAME'] |
|
128 advertisedfullurl = addport(advertisedfullurl) |
|
129 |
|
130 baseurl = fullurl |
|
131 advertisedbaseurl = advertisedfullurl |
|
132 |
|
133 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) |
|
134 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) |
|
135 fullurl += util.urlreq.quote(env.get('PATH_INFO', '')) |
|
136 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', '')) |
|
137 |
|
138 if env.get('QUERY_STRING'): |
|
139 fullurl += '?' + env['QUERY_STRING'] |
|
140 advertisedfullurl += '?' + env['QUERY_STRING'] |
|
141 |
|
142 # When dispatching requests, we look at the URL components (PATH_INFO |
|
143 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir |
|
144 # has the concept of "virtual" repositories. This is defined via REPO_NAME. |
|
145 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app |
|
146 # root. We also exclude its path components from PATH_INFO when resolving |
|
147 # the dispatch path. |
|
148 |
|
149 # TODO the use of trailing slashes in apppath is arguably wrong. We need it |
|
150 # to appease low-level parts of hgweb_mod for now. |
|
151 apppath = env['SCRIPT_NAME'] |
|
152 if not apppath.endswith('/'): |
|
153 apppath += '/' |
|
154 |
|
155 if env.get('REPO_NAME'): |
|
156 apppath += env.get('REPO_NAME') + '/' |
|
157 |
|
158 if 'PATH_INFO' in env: |
|
159 dispatchparts = env['PATH_INFO'].strip('/').split('/') |
|
160 |
|
161 # Strip out repo parts. |
|
162 repoparts = env.get('REPO_NAME', '').split('/') |
|
163 if dispatchparts[:len(repoparts)] == repoparts: |
|
164 dispatchparts = dispatchparts[len(repoparts):] |
|
165 else: |
|
166 dispatchparts = [] |
|
167 |
|
168 dispatchpath = '/'.join(dispatchparts) |
|
169 |
|
170 querystring = env.get('QUERY_STRING', '') |
|
171 |
|
172 return parsedrequest(url=fullurl, baseurl=baseurl, |
|
173 advertisedurl=advertisedfullurl, |
|
174 advertisedbaseurl=advertisedbaseurl, |
|
175 apppath=apppath, |
|
176 dispatchparts=dispatchparts, dispatchpath=dispatchpath, |
|
177 querystring=querystring) |
56 |
178 |
57 class wsgirequest(object): |
179 class wsgirequest(object): |
58 """Higher-level API for a WSGI request. |
180 """Higher-level API for a WSGI request. |
59 |
181 |
60 WSGI applications are invoked with 2 arguments. They are used to |
182 WSGI applications are invoked with 2 arguments. They are used to |