@@ -77,7 +77,7 @@ def __init__(
7777 # Total size of raw bytes passed for parsing
7878 self .total_size : int = 0
7979 # Buffer to hold unprocessed bytes
80- self .buffer : bytes = b''
80+ self .buffer : Optional [ memoryview ] = None
8181 # Internal headers data structure:
8282 # - Keys are lower case header names.
8383 # - Values are 2-tuple containing original
@@ -102,13 +102,13 @@ def request(
102102 httpParserTypes .REQUEST_PARSER ,
103103 enable_proxy_protocol = enable_proxy_protocol ,
104104 )
105- parser .parse (raw )
105+ parser .parse (memoryview ( raw ) )
106106 return parser
107107
108108 @classmethod
109109 def response (cls : Type [T ], raw : bytes ) -> T :
110110 parser = cls (httpParserTypes .RESPONSE_PARSER )
111- parser .parse (raw )
111+ parser .parse (memoryview ( raw ) )
112112 return parser
113113
114114 def header (self , key : bytes ) -> bytes :
@@ -206,14 +206,21 @@ def body_expected(self) -> bool:
206206 """Returns true if content or chunked response is expected."""
207207 return self ._content_expected or self ._is_chunked_encoded
208208
209- def parse (self , raw : bytes , allowed_url_schemes : Optional [List [bytes ]] = None ) -> None :
209+ def parse (
210+ self ,
211+ raw : memoryview ,
212+ allowed_url_schemes : Optional [List [bytes ]] = None ,
213+ ) -> None :
210214 """Parses HTTP request out of raw bytes.
211215
212216 Check for `HttpParser.state` after `parse` has successfully returned."""
213217 size = len (raw )
214218 self .total_size += size
215- raw = self .buffer + raw
216- self .buffer , more = b'' , size > 0
219+ if self .buffer :
220+ # TODO(abhinavsingh): Instead of tobytes our parser
221+ # must be capable of working with arrays of memoryview
222+ raw = memoryview (self .buffer .tobytes () + raw .tobytes ())
223+ self .buffer , more = None , size > 0
217224 while more and self .state != httpParserStates .COMPLETE :
218225 # gte with HEADERS_COMPLETE also encapsulated RCVING_BODY state
219226 if self .state >= httpParserStates .HEADERS_COMPLETE :
@@ -237,7 +244,7 @@ def parse(self, raw: bytes, allowed_url_schemes: Optional[List[bytes]] = None) -
237244 not (self ._content_expected or self ._is_chunked_encoded ) and \
238245 raw == b'' :
239246 self .state = httpParserStates .COMPLETE
240- self .buffer = raw
247+ self .buffer = None if raw == b'' else raw
241248
242249 def build (self , disable_headers : Optional [List [bytes ]] = None , for_proxy : bool = False ) -> bytes :
243250 """Rebuild the request object."""
@@ -278,7 +285,7 @@ def build_response(self) -> bytes:
278285 body = self ._get_body_or_chunks (),
279286 )
280287
281- def _process_body (self , raw : bytes ) -> Tuple [bool , bytes ]:
288+ def _process_body (self , raw : memoryview ) -> Tuple [bool , memoryview ]:
282289 # Ref: http://www.ietf.org/rfc/rfc2616.txt
283290 # 3.If a Content-Length header field (section 14.13) is present, its
284291 # decimal value in OCTETs represents both the entity-length and the
@@ -297,7 +304,8 @@ def _process_body(self, raw: bytes) -> Tuple[bool, bytes]:
297304 self .body = self .chunk .body
298305 self .state = httpParserStates .COMPLETE
299306 more = False
300- elif self ._content_expected :
307+ return more , raw
308+ if self ._content_expected :
301309 self .state = httpParserStates .RCVING_BODY
302310 if self .body is None :
303311 self .body = b''
@@ -307,23 +315,21 @@ def _process_body(self, raw: bytes) -> Tuple[bool, bytes]:
307315 if self .body and \
308316 len (self .body ) == int (self .header (b'content-length' )):
309317 self .state = httpParserStates .COMPLETE
310- more , raw = len (raw ) > 0 , raw [total_size - received_size :]
311- else :
312- self .state = httpParserStates .RCVING_BODY
313- # Received a packet without content-length header
314- # and no transfer-encoding specified.
315- #
316- # This can happen for both HTTP/1.0 and HTTP/1.1 scenarios.
317- # Currently, we consume the remaining buffer as body.
318- #
319- # Ref https://github.com/abhinavsingh/proxy.py/issues/398
320- #
321- # See TestHttpParser.test_issue_398 scenario
322- self .body = raw
323- more , raw = False , b''
324- return more , raw
325-
326- def _process_headers (self , raw : bytes ) -> Tuple [bool , bytes ]:
318+ return len (raw ) > 0 , raw [total_size - received_size :]
319+ # Received a packet without content-length header
320+ # and no transfer-encoding specified.
321+ #
322+ # This can happen for both HTTP/1.0 and HTTP/1.1 scenarios.
323+ # Currently, we consume the remaining buffer as body.
324+ #
325+ # Ref https://github.com/abhinavsingh/proxy.py/issues/398
326+ #
327+ # See TestHttpParser.test_issue_398 scenario
328+ self .state = httpParserStates .RCVING_BODY
329+ self .body = raw
330+ return False , memoryview (b'' )
331+
332+ def _process_headers (self , raw : memoryview ) -> Tuple [bool , memoryview ]:
327333 """Returns False when no CRLF could be found in received bytes.
328334
329335 TODO: We should not return until parser reaches headers complete
@@ -334,10 +340,10 @@ def _process_headers(self, raw: bytes) -> Tuple[bool, bytes]:
334340 This will also help make the parser even more stateless.
335341 """
336342 while True :
337- parts = raw .split (CRLF , 1 )
343+ parts = raw .tobytes (). split (CRLF , 1 )
338344 if len (parts ) == 1 :
339345 return False , raw
340- line , raw = parts [0 ], parts [1 ]
346+ line , raw = parts [0 ], memoryview ( parts [1 ])
341347 if self .state in (httpParserStates .LINE_RCVD , httpParserStates .RCVING_HEADERS ):
342348 if line == b'' or line .strip () == b'' : # Blank line received.
343349 self .state = httpParserStates .HEADERS_COMPLETE
@@ -352,14 +358,14 @@ def _process_headers(self, raw: bytes) -> Tuple[bool, bytes]:
352358
353359 def _process_line (
354360 self ,
355- raw : bytes ,
361+ raw : memoryview ,
356362 allowed_url_schemes : Optional [List [bytes ]] = None ,
357- ) -> Tuple [bool , bytes ]:
363+ ) -> Tuple [bool , memoryview ]:
358364 while True :
359- parts = raw .split (CRLF , 1 )
365+ parts = raw .tobytes (). split (CRLF , 1 )
360366 if len (parts ) == 1 :
361367 return False , raw
362- line , raw = parts [0 ], parts [1 ]
368+ line , raw = parts [0 ], memoryview ( parts [1 ])
363369 if self .type == httpParserTypes .REQUEST_PARSER :
364370 if self .protocol is not None and self .protocol .version is None :
365371 # We expect to receive entire proxy protocol v1 line
0 commit comments