diff --git a/src/hypercorn/app_wrappers.py b/src/hypercorn/app_wrappers.py index 2f856e9..13d2d8c 100644 --- a/src/hypercorn/app_wrappers.py +++ b/src/hypercorn/app_wrappers.py @@ -84,8 +84,9 @@ async def handle_http( await send({"type": "http.response.body", "body": b"", "more_body": False}) def run_app(self, environ: dict, send: Callable) -> None: - headers: list[tuple[bytes, bytes]] + headers: list[tuple[bytes, bytes]] = [] response_started = False + headers_sent = False status_code: int | None = None def start_response( @@ -93,7 +94,21 @@ def start_response( response_headers: list[tuple[str, str]], exc_info: Exception | None = None, ) -> None: - nonlocal headers, response_started, status_code + nonlocal headers, response_started, status_code, headers_sent + + if response_started and exc_info is None: + raise RuntimeError( + "start_response cannot be called again without the exc_info parameter" + ) + elif exc_info is not None: + try: + if headers_sent: + # The headers have already been sent and we can no longer change + # the status_code and headers. reraise this exception in accordance + # with the WSGI specification. + raise exc_info[1].with_traceback(exc_info[2]) + finally: + exc_info = None # Delete reference to exc_info to avoid circular references raw, _ = status.split(" ", 1) status_code = int(raw) @@ -106,16 +121,35 @@ def start_response( response_body = self.app(environ, start_response) try: - first_chunk = True for output in response_body: - if first_chunk: + # Per the WSGI specification in PEP-3333, the start_response callable + # must not actually transmit the response headers. Instead, it must + # store them for the server to transmit only after the first iteration + # of the application return value that yields a non-empty bytestring. + # + # We therefore delay sending the http.response.start event until after + # we receive a non-empty byte string from the application return value. + if output and not headers_sent: if not response_started: raise RuntimeError("WSGI app did not call start_response") + # Send the http.response.start event with the status and headers, flagging + # that this was completed so they aren't sent twice. send({"type": "http.response.start", "status": status_code, "headers": headers}) - first_chunk = False + headers_sent = True send({"type": "http.response.body", "body": output, "more_body": True}) + + # If we still haven't sent the headers by this point, then we received no + # non-empty byte strings from the application return value. This can happen when + # handling certain HTTP methods that don't include a response body like HEAD. + # In those cases we still need to send the http.response.start event with the + # status code and headers, but we need to ensure they haven't been sent previously. + if not headers_sent: + if not response_started: + raise RuntimeError("WSGI app did not call start_response") + + send({"type": "http.response.start", "status": status_code, "headers": headers}) finally: if hasattr(response_body, "close"): response_body.close() diff --git a/tests/test_app_wrappers.py b/tests/test_app_wrappers.py index ca96a7d..a0ffd33 100644 --- a/tests/test_app_wrappers.py +++ b/tests/test_app_wrappers.py @@ -10,22 +10,22 @@ from hypercorn.app_wrappers import _build_environ, InvalidPathError, WSGIWrapper from hypercorn.typing import ASGIReceiveEvent, ASGISendEvent, ConnectionState, HTTPScope - - -def echo_body(environ: dict, start_response: Callable) -> list[bytes]: - status = "200 OK" - output = environ["wsgi.input"].read() - headers = [ - ("Content-Type", "text/plain; charset=utf-8"), - ("Content-Length", str(len(output))), - ] - start_response(status, headers) - return [output] +from .wsgi_applications import ( + wsgi_app_echo_body, + wsgi_app_generator, + wsgi_app_generator_delayed_start_response, + wsgi_app_generator_multiple_start_response_after_body, + wsgi_app_generator_no_body, + wsgi_app_multiple_start_response_no_exc_info, + wsgi_app_no_body, + wsgi_app_no_start_response, + wsgi_app_simple, +) @pytest.mark.trio async def test_wsgi_trio() -> None: - app = WSGIWrapper(echo_body, 2**16) + app = WSGIWrapper(wsgi_app_echo_body, 2**16) scope: HTTPScope = { "http_version": "1.1", "asgi": {}, @@ -52,12 +52,12 @@ async def _send(message: ASGISendEvent) -> None: await app(scope, receive_channel.receive, _send, trio.to_thread.run_sync, trio.from_thread.run) assert messages == [ + {"body": bytearray(b""), "type": "http.response.body", "more_body": True}, { "headers": [(b"content-type", b"text/plain; charset=utf-8"), (b"content-length", b"0")], "status": 200, "type": "http.response.start", }, - {"body": bytearray(b""), "type": "http.response.body", "more_body": True}, {"body": bytearray(b""), "type": "http.response.body", "more_body": False}, ] @@ -83,7 +83,7 @@ def _call_soon(func: Callable, *args: Any) -> Any: @pytest.mark.asyncio async def test_wsgi_asyncio() -> None: - app = WSGIWrapper(echo_body, 2**16) + app = WSGIWrapper(wsgi_app_echo_body, 2**16) scope: HTTPScope = { "http_version": "1.1", "asgi": {}, @@ -100,21 +100,24 @@ async def test_wsgi_asyncio() -> None: "extensions": {}, "state": ConnectionState({}), } - messages = await _run_app(app, scope) + messages = await _run_app(app, scope, b"Hello, world!") assert messages == [ { - "headers": [(b"content-type", b"text/plain; charset=utf-8"), (b"content-length", b"0")], + "headers": [ + (b"content-type", b"text/plain; charset=utf-8"), + (b"content-length", b"13"), + ], "status": 200, "type": "http.response.start", }, - {"body": bytearray(b""), "type": "http.response.body", "more_body": True}, - {"body": bytearray(b""), "type": "http.response.body", "more_body": False}, + {"body": b"Hello, world!", "type": "http.response.body", "more_body": True}, + {"body": b"", "type": "http.response.body", "more_body": False}, ] @pytest.mark.asyncio async def test_max_body_size() -> None: - app = WSGIWrapper(echo_body, 4) + app = WSGIWrapper(wsgi_app_echo_body, 4) scope: HTTPScope = { "http_version": "1.1", "asgi": {}, @@ -138,13 +141,9 @@ async def test_max_body_size() -> None: ] -def no_start_response(environ: dict, start_response: Callable) -> list[bytes]: - return [b"result"] - - @pytest.mark.asyncio async def test_no_start_response() -> None: - app = WSGIWrapper(no_start_response, 2**16) + app = WSGIWrapper(wsgi_app_no_start_response, 2**16) scope: HTTPScope = { "http_version": "1.1", "asgi": {}, @@ -206,3 +205,151 @@ def test_build_environ_root_path() -> None: } with pytest.raises(InvalidPathError): _build_environ(scope, b"") + + +@pytest.mark.asyncio +@pytest.mark.parametrize("wsgi_app", [wsgi_app_simple, wsgi_app_generator]) +async def test_wsgi_protocol(wsgi_app: Callable) -> None: + app = WSGIWrapper(wsgi_app, 2**16) + scope: HTTPScope = { + "http_version": "1.1", + "asgi": {}, + "method": "GET", + "headers": [], + "path": "/", + "root_path": "/", + "query_string": b"a=b", + "raw_path": b"/", + "scheme": "http", + "type": "http", + "client": ("localhost", 80), + "server": None, + "extensions": {}, + "state": ConnectionState({}), + } + + messages = await _run_app(app, scope) + assert messages == [ + { + "headers": [(b"x-test-header", b"Test-Value")], + "status": 200, + "type": "http.response.start", + }, + {"body": b"Hello, ", "type": "http.response.body", "more_body": True}, + {"body": b"world!", "type": "http.response.body", "more_body": True}, + {"body": b"", "type": "http.response.body", "more_body": False}, + ] + + +@pytest.mark.asyncio +@pytest.mark.parametrize("wsgi_app", [wsgi_app_no_body, wsgi_app_generator_no_body]) +async def test_wsgi_protocol_no_body(wsgi_app: Callable) -> None: + app = WSGIWrapper(wsgi_app, 2**16) + scope: HTTPScope = { + "http_version": "1.1", + "asgi": {}, + "method": "GET", + "headers": [], + "path": "/", + "root_path": "/", + "query_string": b"a=b", + "raw_path": b"/", + "scheme": "http", + "type": "http", + "client": ("localhost", 80), + "server": None, + "extensions": {}, + "state": ConnectionState({}), + } + + messages = await _run_app(app, scope) + assert messages == [ + { + "headers": [(b"x-test-header", b"Test-Value")], + "status": 200, + "type": "http.response.start", + }, + {"body": b"", "type": "http.response.body", "more_body": False}, + ] + + +@pytest.mark.asyncio +async def test_wsgi_protocol_overwrite_start_response() -> None: + app = WSGIWrapper(wsgi_app_generator_delayed_start_response, 2**16) + scope: HTTPScope = { + "http_version": "1.1", + "asgi": {}, + "method": "GET", + "headers": [], + "path": "/", + "root_path": "/", + "query_string": b"a=b", + "raw_path": b"/", + "scheme": "http", + "type": "http", + "client": ("localhost", 80), + "server": None, + "extensions": {}, + "state": ConnectionState({}), + } + + messages = await _run_app(app, scope) + assert messages == [ + {"body": b"", "type": "http.response.body", "more_body": True}, + { + "headers": [(b"x-test-header", b"New-Value")], + "status": 500, + "type": "http.response.start", + }, + {"body": b"Hello, ", "type": "http.response.body", "more_body": True}, + {"body": b"world!", "type": "http.response.body", "more_body": True}, + {"body": b"", "type": "http.response.body", "more_body": False}, + ] + + +@pytest.mark.asyncio +async def test_wsgi_protocol_multiple_start_response_no_exc_info() -> None: + app = WSGIWrapper(wsgi_app_multiple_start_response_no_exc_info, 2**16) + scope: HTTPScope = { + "http_version": "1.1", + "asgi": {}, + "method": "GET", + "headers": [], + "path": "/", + "root_path": "/", + "query_string": b"a=b", + "raw_path": b"/", + "scheme": "http", + "type": "http", + "client": ("localhost", 80), + "server": None, + "extensions": {}, + "state": ConnectionState({}), + } + + with pytest.raises(RuntimeError): + await _run_app(app, scope) + + +@pytest.mark.asyncio +async def test_wsgi_protocol_multiple_start_response_after_body() -> None: + app = WSGIWrapper(wsgi_app_generator_multiple_start_response_after_body, 2**16) + scope: HTTPScope = { + "http_version": "1.1", + "asgi": {}, + "method": "GET", + "headers": [], + "path": "/", + "root_path": "/", + "query_string": b"a=b", + "raw_path": b"/", + "scheme": "http", + "type": "http", + "client": ("localhost", 80), + "server": None, + "extensions": {}, + "state": ConnectionState({}), + } + + with pytest.raises(ValueError): + await _run_app(app, scope) diff --git a/tests/wsgi_applications.py b/tests/wsgi_applications.py new file mode 100644 index 0000000..a030552 --- /dev/null +++ b/tests/wsgi_applications.py @@ -0,0 +1,193 @@ +import sys +from collections.abc import Callable +from typing import Generator + + +def wsgi_app_echo_body(environ: dict, start_response: Callable) -> list[bytes]: + """Simple WSGI application which returns the request body as the response body.""" + status = "200 OK" + output = environ["wsgi.input"].read() + headers = [ + ("Content-Type", "text/plain; charset=utf-8"), + ("Content-Length", str(len(output))), + ] + start_response(status, headers) + return [output] + + +def wsgi_app_no_start_response(environ: dict, start_response: Callable) -> list[bytes]: + """Invalid WSGI application which fails to call start_response""" + return [b"result"] + + +def wsgi_app_simple(environ: dict, start_response: Callable) -> list[bytes]: + """ + A basic WSGI Application. + + It is valid to send multiple chunks of data, but the status code and headers + must be sent before the first non-empty chunk of body data is sent. + + Therefore, the headers must be sent immediately after the first non-empty + byte string is returned, but before continuing to iterate further. While sending + the headers before begining iteration would technically work in this case, + this violates the WSGI spec and further examples prove that this behavior + is actually invalid. + """ + start_response("200 OK", [("X-Test-Header", "Test-Value")]) + return [b"Hello, ", b"world!"] + + +def wsgi_app_generator(environ: dict, start_response: Callable) -> Generator[bytes, None, None]: + """ + A synchronous generator usable as a valid WSGI Application. + + Notably, the WSGI specification ensures only that start_response() is called + before the first item is returned from the iterator. It does not have to + be immediately called when app(environ, start_response) is called. + + Using a generator for a WSGI app will delay calling start_response() until after + something begins iterating on it, so only invoking the app and not iterating on + the returned iterable will not be sufficient to get the status code and headers. + + It is also valid to send multiple chunks of data, but the status code and headers + must be sent before the first non-empty chunk of body data is sent. + + Therefore it is not valid to send the status code and headers before iterating on + the returned generator. It is only valid to send status code and headers during + iteration of the generator, immediately after the first non-empty byte + string is returned, but before continuing to iterate further. + """ + start_response("200 OK", [("X-Test-Header", "Test-Value")]) + yield b"Hello, " + yield b"world!" + + +def wsgi_app_no_body(environ: dict, start_response: Callable) -> list[bytes]: + """ + A WSGI Application that does not yield up any body chunks when iterated on. + + This is most common when supporting HTTP methods such as HEAD, which is identical + to GET except that the server MUST NOT return a message body in the response. + + The iterable returned by this app will have no contents, immediately exiting + any for loops attempting to iterate on it. Even though no body was returned + from the application, this is still a valid HTTP request and MUST send the + status code and headers as the response. Failing to do so violates the + WSGI, ASGI, and HTTP specifications. + + Therefore, the status code and headers must be sent after the iteration completes, + as it is not valid to send them only during iteration. If headers are only sent + within the body of the for loop, this application will cause the server to fail + to send this information at all. However, care must be taken to check + whether the status code and headers were already sent during the iteration process, + as they may have been sent during the iteration process for applications with + non-empty bodies. If this isn't accounted for they will be sent twice in error. + """ + start_response("200 OK", [("X-Test-Header", "Test-Value")]) + return [] + + +def wsgi_app_generator_no_body( + environ: dict, start_response: Callable +) -> Generator[bytes, None, None]: + """ + A synchronous generator usable as a valid WSGI Application, which + does not yield up any body chunks when iterated on. + + This is a very complicated edge case. It is most commonly found when building a + generator based WSGI app with support for HTTP methods such as HEAD, which is + identical to GET except that the server MUST NOT return a message body in the response. + + 1. The application is subject to the same delay in calling start_response until + after the server has begun iterating on the returned generator object. + + 2. The status code and headers are also not available during iteration, as the + empty generator will immediately end any for loops that attempt to iterate on it. + + 3. Even though no body was returned from the application, this is still a valid + HTTP request and MUST send the status code and headers as the response. Failing + to do so violates the WSGI, ASGI, and HTTP specifications. + + Therefore, the status code and headers must be sent after the iteration completes, + as it is not valid to send them only during iteration. If headers are only sent + within the body of the for loop, this application will cause the server to fail + to send this information at all. However, care must be taken to check + whether the status code and headers were already sent during the iteration process, + as they may have been sent during the iteration process for applications with + non-empty bodies. If this isn't accounted for they will be sent twice in error. + """ + start_response("200 OK", [("X-Test-Header", "Test-Value")]) + if False: + yield b"" # Unreachable yield makes this an empty generator # noqa + + +def wsgi_app_generator_delayed_start_response( + environ: dict, start_response: Callable +) -> Generator[bytes, None, None]: + """ + A synchronous generator usable as a valid WSGI Application, which calls start_response + a second time after yielding up empty chunks of body. + + This application exercises the ability for WSGI apps to change their status code + right up until the last possible second before the first non-empty chunk of body is + sent. The status code and headers must be buffered until the first non-empty chunk of body + is yielded by this generator, and should be overwritable until that time. + """ + # Initial 200 OK status that will be overwritten before any non-empty chunks of body are sent + start_response("200 OK", [("X-Test-Header", "Old-Value")]) + yield b"" + + try: + raise ValueError + except ValueError: + # start_response may be called more than once before the first non-empty byte string + # is yielded by this generator. However, it is a fatal error to call start_response() + # a second time without passing an exception tuple in via the exc_info argument. + start_response( + "500 Internal Server Error", [("X-Test-Header", "New-Value")], exc_info=sys.exc_info() + ) + + yield b"Hello, " + yield b"world!" + + +def wsgi_app_multiple_start_response_no_exc_info( + environ: dict, start_response: Callable +) -> list[bytes]: + """ + An invalid WSGI Application, which calls start_response a second time + without passing an exception tuple in via the exc_info argument. + + This is considered a fatal error in the WSGI specification and should raise an exception. + """ + + # Calling start_response multiple times without exc_info should raise an error + start_response("200 OK", []) + start_response("202 Accepted", []) + return [] + + +def wsgi_app_generator_multiple_start_response_after_body( + environ: dict, start_response: Callable +) -> Generator[bytes, None, None]: + """ + An invalid WSGI Application, which calls start_response a second time + after the first non-empty byte string is returned. This should reraise the exception + as the headers and status code have already been sent. + + This is considered a fatal error in the WSGI specification and should raise an exception. + """ + + # Calling start_response multiple times without exc_info should raise an error + start_response("200 OK", []) + yield b"Hello, world!" + + try: + raise ValueError + except ValueError: + # start_response may not be called again after the first non-empty byte string is returned + # + # It is a fatal error to call start_response() a second time without passing an exception + # tuple in via the exc_info argument, so ensure we do that to avoid raising the wrong + # exception. + start_response("500 Internal Server Error", [], exc_info=sys.exc_info())