Skip to content

Commit 00e6331

Browse files
committed
Fix prefix resource lookup
1 parent 3563173 commit 00e6331

File tree

1 file changed

+90
-93
lines changed

1 file changed

+90
-93
lines changed

aiohttp/web_urldispatcher.py

+90-93
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import abc
22
import asyncio
33
import base64
4-
import dataclasses
54
import functools
65
import hashlib
76
import html
@@ -999,12 +998,6 @@ def __contains__(self, route: object) -> bool:
999998
return route in self._routes
1000999

10011000

1002-
@dataclasses.dataclass
1003-
class _PrefixSubtree:
1004-
common_prefix: str
1005-
resources: list[PrefixResource]
1006-
1007-
10081001
class UrlDispatcher(AbstractRouter, Mapping[str, AbstractResource]):
10091002
NAME_SPLIT_RE = re.compile(r"[.:-]")
10101003
HTTP_NOT_FOUND = HTTPNotFound()
@@ -1017,7 +1010,8 @@ def __init__(self) -> None:
10171010
self._matched_sub_app_resources: List[MatchedSubAppResource] = []
10181011
self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported]
10191012
self._plain_resources: dict[str, PlainResource] = {}
1020-
self._prefix_resources: dict[str, _PrefixSubtree] = {}
1013+
self._prefix_resources: dict[str, list[PrefixResource]] = {}
1014+
self._has_variable_resources = True
10211015

10221016
def _on_match(
10231017
self, id_: int, from_: int, to: int, flags: int, found: list[int]
@@ -1029,65 +1023,79 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo:
10291023
allowed_methods: set[str] = set()
10301024
path = request.rel_url.path_safe
10311025

1026+
# plain resource lookup
10321027
if (plain_resource := self._plain_resources.get(path)) is not None:
10331028
match_dict, allowed = await plain_resource.resolve(request)
10341029
if match_dict is not None:
10351030
return match_dict
10361031
else:
10371032
allowed_methods |= allowed
10381033

1039-
parts = path.split("/")
1040-
# path.startswith("/"), thus parts[0] == "".
1041-
# parts[1] is the first prefix segment
1042-
if (subtree := self._prefix_resources.get(parts[1])) is not None:
1043-
if len(subtree.resources) == 1 or path.startswith(subtree.common_prefix):
1044-
for prefix_resource in subtree.resources:
1045-
match_dict, allowed = await prefix_resource.resolve(request)
1046-
if match_dict is not None:
1047-
return match_dict
1048-
else:
1049-
allowed_methods |= allowed
1050-
1051-
if self._hyperdb is not None:
1052-
found: list[int] = []
1053-
resources = self._resources
1054-
1055-
self._hyperdb.scan(
1056-
path.encode("utf8"), match_event_handler=self._on_match, context=found
1057-
)
1058-
if len(found) > 1:
1059-
# Multiple matches are found,
1060-
# use the FIRST match.
1061-
# Match ids are basically indexes in self._resources.
1062-
found.sort()
1063-
1064-
for idx in found:
1065-
resource = resources[idx]
1066-
match_dict, allowed = await resource.resolve(request)
1034+
# prefix resource lookup
1035+
url_part = path
1036+
prefix_resources = self._prefix_resources
1037+
1038+
# Walk the url parts looking for candidates. We walk the url backwards
1039+
# to ensure the most explicit match is found first. If there are multiple
1040+
# candidates for a given url part because there are multiple resources
1041+
# registered for the same canonical path, we resolve them in a linear
1042+
# fashion to ensure registration order is respected.
1043+
while url_part:
1044+
for prefix_resource in prefix_resources.get(url_part, ()):
1045+
match_dict, allowed = await prefix_resource.resolve(request)
10671046
if match_dict is not None:
10681047
return match_dict
10691048
else:
10701049
allowed_methods |= allowed
1071-
else:
1072-
url_part = path
1073-
resource_index = self._resource_index
1074-
1075-
# Walk the url parts looking for candidates. We walk the url backwards
1076-
# to ensure the most explicit match is found first. If there are multiple
1077-
# candidates for a given url part because there are multiple resources
1078-
# registered for the same canonical path, we resolve them in a linear
1079-
# fashion to ensure registration order is respected.
1080-
while url_part:
1081-
for candidate in resource_index.get(url_part, ()):
1082-
match_dict, allowed = await candidate.resolve(request)
1050+
if url_part == "/":
1051+
break
1052+
url_part = url_part.rpartition("/")[0] or "/"
1053+
1054+
# variable resource lookup
1055+
if self._has_variable_resources:
1056+
if self._hyperdb is not None:
1057+
found: list[int] = []
1058+
resources = self._resources
1059+
1060+
self._hyperdb.scan(
1061+
path.encode("utf8"),
1062+
match_event_handler=self._on_match,
1063+
context=found,
1064+
)
1065+
if len(found) > 1:
1066+
# Multiple matches are found,
1067+
# use the FIRST match.
1068+
# Match ids are basically indexes in self._resources.
1069+
found.sort()
1070+
1071+
for idx in found:
1072+
resource = resources[idx]
1073+
match_dict, allowed = await resource.resolve(request)
10831074
if match_dict is not None:
10841075
return match_dict
10851076
else:
10861077
allowed_methods |= allowed
1087-
if url_part == "/":
1088-
break
1089-
url_part = url_part.rpartition("/")[0] or "/"
1090-
1078+
else:
1079+
url_part = path
1080+
resource_index = self._resource_index
1081+
1082+
# Walk the url parts looking for candidates. We walk the url backwards
1083+
# to ensure the most explicit match is found first. If there are multiple
1084+
# candidates for a given url part because there are multiple resources
1085+
# registered for the same canonical path, we resolve them in a linear
1086+
# fashion to ensure registration order is respected.
1087+
while url_part:
1088+
for candidate in resource_index.get(url_part, ()):
1089+
match_dict, allowed = await candidate.resolve(request)
1090+
if match_dict is not None:
1091+
return match_dict
1092+
else:
1093+
allowed_methods |= allowed
1094+
if url_part == "/":
1095+
break
1096+
url_part = url_part.rpartition("/")[0] or "/"
1097+
1098+
# domain resource lookup
10911099
#
10921100
# We didn't find any candidates, so we'll try the matched sub-app
10931101
# resources which we have to walk in a linear fashion because they
@@ -1318,58 +1326,47 @@ def _rebuild(self) -> None:
13181326
for id_, resource in enumerate(self._resources):
13191327
if isinstance(resource, PlainResource):
13201328
self._plain_resources[resource.get_info()["path"]] = resource
1321-
continue
13221329
elif isinstance(resource, DynamicResource):
13231330
pattern = resource.get_info()["pattern"].pattern
1331+
patterns.append(f"^{pattern}$".encode())
1332+
ids.append(id_)
13241333
elif isinstance(resource, PrefixResource):
13251334
if isinstance(resource, MatchedSubAppResource):
13261335
# wildcard resources doesn't fit hyperscan table
13271336
continue
13281337
prefix = resource.get_info()["prefix"]
1329-
parts = prefix.split("/")
1330-
segment = parts[0]
1331-
subtree = self._prefix_resources.get(segment)
1332-
if subtree is None:
1333-
subtree = _PrefixSubtree(prefix, [resource])
1334-
self._prefix_resources[segment] = subtree
1335-
else:
1336-
subtree_parts = subtree.common_prefix.split("/")
1337-
segments = []
1338-
for lft, rgt in zip(parts, subtree_parts):
1339-
if lft == rgt:
1340-
segments.append(lft)
1341-
subtree.common_prefix = "/".join(segments)
1342-
subtree.resources.append(resource)
1343-
continue
1338+
# There may be multiple resources for a prefix
1339+
# so we keep them in a list to ensure that registration
1340+
# order is respected.
1341+
self._prefix_resources.setdefault(prefix.rstrip("/") or "/", []).append(
1342+
resource
1343+
)
13441344
else:
13451345
raise RuntimeError(f"Unsupported resource type {type(resource)}")
13461346

1347-
patterns.append(f"^{pattern}$".encode())
1348-
ids.append(id_)
1349-
1350-
if not HAS_HYPERSCAN:
1351-
return
1352-
13531347
count = len(patterns)
1354-
self._hyperdb = hyperscan.Database()
1355-
try:
1356-
self._hyperdb.compile(
1357-
expressions=patterns,
1358-
ids=ids,
1359-
elements=count,
1360-
flags=[
1361-
hyperscan.HS_FLAG_UTF8
1362-
| hyperscan.HS_FLAG_UCP
1363-
| hyperscan.HS_FLAG_SINGLEMATCH
1364-
]
1365-
* count,
1366-
)
1367-
except hyperscan.error as exc:
1368-
web_logger.warning(
1369-
"Cannot compile hyperscan database: %s, switching to fallback url resolver",
1370-
repr(exc),
1371-
)
1372-
self._hyperdb = None
1348+
self._has_variable_resources = count > 0
1349+
if self._has_variable_resources:
1350+
if HAS_HYPERSCAN:
1351+
self._hyperdb = hyperscan.Database()
1352+
try:
1353+
self._hyperdb.compile(
1354+
expressions=patterns,
1355+
ids=ids,
1356+
elements=count,
1357+
flags=[
1358+
hyperscan.HS_FLAG_UTF8
1359+
| hyperscan.HS_FLAG_UCP
1360+
| hyperscan.HS_FLAG_SINGLEMATCH
1361+
]
1362+
* count,
1363+
)
1364+
except hyperscan.error as exc:
1365+
web_logger.warning(
1366+
"Cannot compile hyperscan database: %s, switching to fallback url resolver",
1367+
repr(exc),
1368+
)
1369+
self._hyperdb = None
13731370

13741371
def add_routes(self, routes: Iterable[AbstractRouteDef]) -> List[AbstractRoute]:
13751372
"""Append routes to route table.

0 commit comments

Comments
 (0)