Skip to content

Commit 9df00a1

Browse files
[PR #10073/349b7565 backport][3.11] Improve performance of parsing headers (#10083)
Co-authored-by: J. Nick Koston <[email protected]>
1 parent da9210b commit 9df00a1

File tree

2 files changed

+26
-31
lines changed

2 files changed

+26
-31
lines changed

CHANGES/10073.misc.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improved performance of parsing headers when using the C parser -- by :user:`bdraco`.

aiohttp/_http_parser.pyx

+25-31
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ cdef object CONTENT_ENCODING = hdrs.CONTENT_ENCODING
7171
cdef object EMPTY_PAYLOAD = _EMPTY_PAYLOAD
7272
cdef object StreamReader = _StreamReader
7373
cdef object DeflateBuffer = _DeflateBuffer
74-
74+
cdef bytes EMPTY_BYTES = b""
7575

7676
cdef inline object extend(object buf, const char* at, size_t length):
7777
cdef Py_ssize_t s
@@ -277,8 +277,9 @@ cdef class HttpParser:
277277
cparser.llhttp_t* _cparser
278278
cparser.llhttp_settings_t* _csettings
279279

280-
bytearray _raw_name
281-
bytearray _raw_value
280+
bytes _raw_name
281+
object _name
282+
bytes _raw_value
282283
bint _has_value
283284

284285
object _protocol
@@ -296,7 +297,7 @@ cdef class HttpParser:
296297
bytearray _buf
297298
str _path
298299
str _reason
299-
object _headers
300+
list _headers
300301
list _raw_headers
301302
bint _upgraded
302303
list _messages
@@ -350,8 +351,8 @@ cdef class HttpParser:
350351
self._payload_exception = payload_exception
351352
self._messages = []
352353

353-
self._raw_name = bytearray()
354-
self._raw_value = bytearray()
354+
self._raw_name = EMPTY_BYTES
355+
self._raw_value = EMPTY_BYTES
355356
self._has_value = False
356357

357358
self._max_line_size = max_line_size
@@ -378,42 +379,35 @@ cdef class HttpParser:
378379
self._limit = limit
379380

380381
cdef _process_header(self):
381-
if self._raw_name:
382-
raw_name = bytes(self._raw_name)
383-
raw_value = bytes(self._raw_value)
384-
385-
name = find_header(raw_name)
386-
value = raw_value.decode('utf-8', 'surrogateescape')
382+
cdef str value
383+
if self._raw_name is not EMPTY_BYTES:
384+
name = find_header(self._raw_name)
385+
value = self._raw_value.decode('utf-8', 'surrogateescape')
387386

388-
self._headers.add(name, value)
387+
self._headers.append((name, value))
389388

390389
if name is CONTENT_ENCODING:
391390
self._content_encoding = value
392391

393-
PyByteArray_Resize(self._raw_name, 0)
394-
PyByteArray_Resize(self._raw_value, 0)
395392
self._has_value = False
396-
self._raw_headers.append((raw_name, raw_value))
393+
self._raw_headers.append((self._raw_name, self._raw_value))
394+
self._raw_name = EMPTY_BYTES
395+
self._raw_value = EMPTY_BYTES
397396

398397
cdef _on_header_field(self, char* at, size_t length):
399-
cdef Py_ssize_t size
400-
cdef char *buf
401398
if self._has_value:
402399
self._process_header()
403400

404-
size = PyByteArray_Size(self._raw_name)
405-
PyByteArray_Resize(self._raw_name, size + length)
406-
buf = PyByteArray_AsString(self._raw_name)
407-
memcpy(buf + size, at, length)
401+
if self._raw_name is EMPTY_BYTES:
402+
self._raw_name = at[:length]
403+
else:
404+
self._raw_name += at[:length]
408405

409406
cdef _on_header_value(self, char* at, size_t length):
410-
cdef Py_ssize_t size
411-
cdef char *buf
412-
413-
size = PyByteArray_Size(self._raw_value)
414-
PyByteArray_Resize(self._raw_value, size + length)
415-
buf = PyByteArray_AsString(self._raw_value)
416-
memcpy(buf + size, at, length)
407+
if self._raw_value is EMPTY_BYTES:
408+
self._raw_value = at[:length]
409+
else:
410+
self._raw_value += at[:length]
417411
self._has_value = True
418412

419413
cdef _on_headers_complete(self):
@@ -424,7 +418,7 @@ cdef class HttpParser:
424418
chunked = self._cparser.flags & cparser.F_CHUNKED
425419

426420
raw_headers = tuple(self._raw_headers)
427-
headers = CIMultiDictProxy(self._headers)
421+
headers = CIMultiDictProxy(CIMultiDict(self._headers))
428422

429423
if self._cparser.type == cparser.HTTP_REQUEST:
430424
allowed = upgrade and headers.get("upgrade", "").lower() in ALLOWED_UPGRADES
@@ -672,7 +666,7 @@ cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1:
672666
cdef HttpParser pyparser = <HttpParser>parser.data
673667

674668
pyparser._started = True
675-
pyparser._headers = CIMultiDict()
669+
pyparser._headers = []
676670
pyparser._raw_headers = []
677671
PyByteArray_Resize(pyparser._buf, 0)
678672
pyparser._path = None

0 commit comments

Comments
 (0)