1
1
import abc
2
2
import asyncio
3
3
import base64
4
- import dataclasses
5
4
import functools
6
5
import hashlib
7
6
import html
@@ -999,12 +998,6 @@ def __contains__(self, route: object) -> bool:
999
998
return route in self ._routes
1000
999
1001
1000
1002
- @dataclasses .dataclass
1003
- class _PrefixSubtree :
1004
- common_prefix : str
1005
- resources : list [PrefixResource ]
1006
-
1007
-
1008
1001
class UrlDispatcher (AbstractRouter , Mapping [str , AbstractResource ]):
1009
1002
NAME_SPLIT_RE = re .compile (r"[.:-]" )
1010
1003
HTTP_NOT_FOUND = HTTPNotFound ()
@@ -1017,7 +1010,8 @@ def __init__(self) -> None:
1017
1010
self ._matched_sub_app_resources : List [MatchedSubAppResource ] = []
1018
1011
self ._hyperdb : Optional [hyperscan .Database ] = None # type: ignore[no-any-unimported]
1019
1012
self ._plain_resources : dict [str , PlainResource ] = {}
1020
- self ._prefix_resources : dict [str , _PrefixSubtree ] = {}
1013
+ self ._prefix_resources : dict [str , list [PrefixResource ]] = {}
1014
+ self ._has_variable_resources = True
1021
1015
1022
1016
def _on_match (
1023
1017
self , id_ : int , from_ : int , to : int , flags : int , found : list [int ]
@@ -1029,65 +1023,79 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo:
1029
1023
allowed_methods : set [str ] = set ()
1030
1024
path = request .rel_url .path_safe
1031
1025
1026
+ # plain resource lookup
1032
1027
if (plain_resource := self ._plain_resources .get (path )) is not None :
1033
1028
match_dict , allowed = await plain_resource .resolve (request )
1034
1029
if match_dict is not None :
1035
1030
return match_dict
1036
1031
else :
1037
1032
allowed_methods |= allowed
1038
1033
1039
- parts = path .split ("/" )
1040
- # path.startswith("/"), thus parts[0] == "".
1041
- # parts[1] is the first prefix segment
1042
- if (subtree := self ._prefix_resources .get (parts [1 ])) is not None :
1043
- if len (subtree .resources ) == 1 or path .startswith (subtree .common_prefix ):
1044
- for prefix_resource in subtree .resources :
1045
- match_dict , allowed = await prefix_resource .resolve (request )
1046
- if match_dict is not None :
1047
- return match_dict
1048
- else :
1049
- allowed_methods |= allowed
1050
-
1051
- if self ._hyperdb is not None :
1052
- found : list [int ] = []
1053
- resources = self ._resources
1054
-
1055
- self ._hyperdb .scan (
1056
- path .encode ("utf8" ), match_event_handler = self ._on_match , context = found
1057
- )
1058
- if len (found ) > 1 :
1059
- # Multiple matches are found,
1060
- # use the FIRST match.
1061
- # Match ids are basically indexes in self._resources.
1062
- found .sort ()
1063
-
1064
- for idx in found :
1065
- resource = resources [idx ]
1066
- match_dict , allowed = await resource .resolve (request )
1034
+ # prefix resource lookup
1035
+ url_part = path
1036
+ prefix_resources = self ._prefix_resources
1037
+
1038
+ # Walk the url parts looking for candidates. We walk the url backwards
1039
+ # to ensure the most explicit match is found first. If there are multiple
1040
+ # candidates for a given url part because there are multiple resources
1041
+ # registered for the same canonical path, we resolve them in a linear
1042
+ # fashion to ensure registration order is respected.
1043
+ while url_part :
1044
+ for prefix_resource in prefix_resources .get (url_part , ()):
1045
+ match_dict , allowed = await prefix_resource .resolve (request )
1067
1046
if match_dict is not None :
1068
1047
return match_dict
1069
1048
else :
1070
1049
allowed_methods |= allowed
1071
- else :
1072
- url_part = path
1073
- resource_index = self ._resource_index
1074
-
1075
- # Walk the url parts looking for candidates. We walk the url backwards
1076
- # to ensure the most explicit match is found first. If there are multiple
1077
- # candidates for a given url part because there are multiple resources
1078
- # registered for the same canonical path, we resolve them in a linear
1079
- # fashion to ensure registration order is respected.
1080
- while url_part :
1081
- for candidate in resource_index .get (url_part , ()):
1082
- match_dict , allowed = await candidate .resolve (request )
1050
+ if url_part == "/" :
1051
+ break
1052
+ url_part = url_part .rpartition ("/" )[0 ] or "/"
1053
+
1054
+ # variable resource lookup
1055
+ if self ._has_variable_resources :
1056
+ if self ._hyperdb is not None :
1057
+ found : list [int ] = []
1058
+ resources = self ._resources
1059
+
1060
+ self ._hyperdb .scan (
1061
+ path .encode ("utf8" ),
1062
+ match_event_handler = self ._on_match ,
1063
+ context = found ,
1064
+ )
1065
+ if len (found ) > 1 :
1066
+ # Multiple matches are found,
1067
+ # use the FIRST match.
1068
+ # Match ids are basically indexes in self._resources.
1069
+ found .sort ()
1070
+
1071
+ for idx in found :
1072
+ resource = resources [idx ]
1073
+ match_dict , allowed = await resource .resolve (request )
1083
1074
if match_dict is not None :
1084
1075
return match_dict
1085
1076
else :
1086
1077
allowed_methods |= allowed
1087
- if url_part == "/" :
1088
- break
1089
- url_part = url_part .rpartition ("/" )[0 ] or "/"
1090
-
1078
+ else :
1079
+ url_part = path
1080
+ resource_index = self ._resource_index
1081
+
1082
+ # Walk the url parts looking for candidates. We walk the url backwards
1083
+ # to ensure the most explicit match is found first. If there are multiple
1084
+ # candidates for a given url part because there are multiple resources
1085
+ # registered for the same canonical path, we resolve them in a linear
1086
+ # fashion to ensure registration order is respected.
1087
+ while url_part :
1088
+ for candidate in resource_index .get (url_part , ()):
1089
+ match_dict , allowed = await candidate .resolve (request )
1090
+ if match_dict is not None :
1091
+ return match_dict
1092
+ else :
1093
+ allowed_methods |= allowed
1094
+ if url_part == "/" :
1095
+ break
1096
+ url_part = url_part .rpartition ("/" )[0 ] or "/"
1097
+
1098
+ # domain resource lookup
1091
1099
#
1092
1100
# We didn't find any candidates, so we'll try the matched sub-app
1093
1101
# resources which we have to walk in a linear fashion because they
@@ -1318,58 +1326,47 @@ def _rebuild(self) -> None:
1318
1326
for id_ , resource in enumerate (self ._resources ):
1319
1327
if isinstance (resource , PlainResource ):
1320
1328
self ._plain_resources [resource .get_info ()["path" ]] = resource
1321
- continue
1322
1329
elif isinstance (resource , DynamicResource ):
1323
1330
pattern = resource .get_info ()["pattern" ].pattern
1331
+ patterns .append (f"^{ pattern } $" .encode ())
1332
+ ids .append (id_ )
1324
1333
elif isinstance (resource , PrefixResource ):
1325
1334
if isinstance (resource , MatchedSubAppResource ):
1326
1335
# wildcard resources doesn't fit hyperscan table
1327
1336
continue
1328
1337
prefix = resource .get_info ()["prefix" ]
1329
- parts = prefix .split ("/" )
1330
- segment = parts [0 ]
1331
- subtree = self ._prefix_resources .get (segment )
1332
- if subtree is None :
1333
- subtree = _PrefixSubtree (prefix , [resource ])
1334
- self ._prefix_resources [segment ] = subtree
1335
- else :
1336
- subtree_parts = subtree .common_prefix .split ("/" )
1337
- segments = []
1338
- for lft , rgt in zip (parts , subtree_parts ):
1339
- if lft == rgt :
1340
- segments .append (lft )
1341
- subtree .common_prefix = "/" .join (segments )
1342
- subtree .resources .append (resource )
1343
- continue
1338
+ # There may be multiple resources for a prefix
1339
+ # so we keep them in a list to ensure that registration
1340
+ # order is respected.
1341
+ self ._prefix_resources .setdefault (prefix .rstrip ("/" ) or "/" , []).append (
1342
+ resource
1343
+ )
1344
1344
else :
1345
1345
raise RuntimeError (f"Unsupported resource type { type (resource )} " )
1346
1346
1347
- patterns .append (f"^{ pattern } $" .encode ())
1348
- ids .append (id_ )
1349
-
1350
- if not HAS_HYPERSCAN :
1351
- return
1352
-
1353
1347
count = len (patterns )
1354
- self ._hyperdb = hyperscan .Database ()
1355
- try :
1356
- self ._hyperdb .compile (
1357
- expressions = patterns ,
1358
- ids = ids ,
1359
- elements = count ,
1360
- flags = [
1361
- hyperscan .HS_FLAG_UTF8
1362
- | hyperscan .HS_FLAG_UCP
1363
- | hyperscan .HS_FLAG_SINGLEMATCH
1364
- ]
1365
- * count ,
1366
- )
1367
- except hyperscan .error as exc :
1368
- web_logger .warning (
1369
- "Cannot compile hyperscan database: %s, switching to fallback url resolver" ,
1370
- repr (exc ),
1371
- )
1372
- self ._hyperdb = None
1348
+ self ._has_variable_resources = count > 0
1349
+ if self ._has_variable_resources :
1350
+ if HAS_HYPERSCAN :
1351
+ self ._hyperdb = hyperscan .Database ()
1352
+ try :
1353
+ self ._hyperdb .compile (
1354
+ expressions = patterns ,
1355
+ ids = ids ,
1356
+ elements = count ,
1357
+ flags = [
1358
+ hyperscan .HS_FLAG_UTF8
1359
+ | hyperscan .HS_FLAG_UCP
1360
+ | hyperscan .HS_FLAG_SINGLEMATCH
1361
+ ]
1362
+ * count ,
1363
+ )
1364
+ except hyperscan .error as exc :
1365
+ web_logger .warning (
1366
+ "Cannot compile hyperscan database: %s, switching to fallback url resolver" ,
1367
+ repr (exc ),
1368
+ )
1369
+ self ._hyperdb = None
1373
1370
1374
1371
def add_routes (self , routes : Iterable [AbstractRouteDef ]) -> List [AbstractRoute ]:
1375
1372
"""Append routes to route table.
0 commit comments