Skip to content

Commit 94eefad

Browse files
JibolaNoahStapp
authored andcommitted
INTPYTHON-736 Convert simple $expr queries to $match queries
Co-authored-by: Noah Stapp <[email protected]>
1 parent b94f0fc commit 94eefad

File tree

12 files changed

+757
-96
lines changed

12 files changed

+757
-96
lines changed

django_mongodb_backend/query.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from django.db.models.sql.where import AND, OR, XOR, ExtraWhere, NothingNode, WhereNode
1212
from pymongo.errors import BulkWriteError, DuplicateKeyError, PyMongoError
1313

14+
from .query_conversion.query_optimizer import convert_expr_to_match
15+
1416

1517
def wrap_database_errors(func):
1618
@wraps(func)
@@ -87,7 +89,7 @@ def get_pipeline(self):
8789
for query in self.subqueries or ():
8890
pipeline.extend(query.get_pipeline())
8991
if self.match_mql:
90-
pipeline.append({"$match": self.match_mql})
92+
pipeline.extend(convert_expr_to_match(self.match_mql))
9193
if self.aggregation_pipeline:
9294
pipeline.extend(self.aggregation_pipeline)
9395
if self.project_fields:

django_mongodb_backend/query_conversion/__init__.py

Whitespace-only changes.
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
class BaseConverter:
2+
"""Base class for $expr to $match converters."""
3+
4+
@classmethod
5+
def convert(cls, expr):
6+
raise NotImplementedError("Subclasses must implement this method.")
7+
8+
@classmethod
9+
def is_simple_value(cls, value):
10+
"""Is the value is a simple type (not a dict)?"""
11+
if value is None:
12+
return True
13+
if isinstance(value, str) and value.startswith("$"):
14+
return False
15+
if isinstance(value, (list, tuple, set)):
16+
return all(cls.is_simple_value(v) for v in value)
17+
# TODO: Support `$getField` conversion.
18+
return not isinstance(value, dict)
19+
20+
21+
class BinaryConverter(BaseConverter):
22+
"""
23+
Base class for converting binary operations.
24+
25+
For example:
26+
"$expr": {
27+
{"$gt": ["$price", 100]}
28+
}
29+
is converted to:
30+
{"$gt": ["price", 100]}
31+
"""
32+
33+
operator: str
34+
35+
@classmethod
36+
def convert(cls, args):
37+
if isinstance(args, list) and len(args) == 2:
38+
field_expr, value = args
39+
# Check if first argument is a simple field reference.
40+
if (
41+
isinstance(field_expr, str)
42+
and field_expr.startswith("$")
43+
and cls.is_simple_value(value)
44+
):
45+
field_name = field_expr[1:] # Remove the $ prefix.
46+
if cls.operator == "$eq":
47+
return {field_name: value}
48+
return {field_name: {cls.operator: value}}
49+
return None
50+
51+
52+
class EqConverter(BinaryConverter):
53+
"""
54+
Convert $eq operation to a $match query.
55+
56+
For example:
57+
"$expr": {
58+
{"$eq": ["$status", "active"]}
59+
}
60+
is converted to:
61+
{"status": "active"}
62+
"""
63+
64+
operator = "$eq"
65+
66+
67+
class GtConverter(BinaryConverter):
68+
operator = "$gt"
69+
70+
71+
class GteConverter(BinaryConverter):
72+
operator = "$gte"
73+
74+
75+
class LtConverter(BinaryConverter):
76+
operator = "$lt"
77+
78+
79+
class LteConverter(BinaryConverter):
80+
operator = "$lte"
81+
82+
83+
class InConverter(BaseConverter):
84+
"""
85+
Convert $in operation to a $match query.
86+
87+
For example:
88+
"$expr": {
89+
{"$in": ["$category", ["electronics", "books"]]}
90+
}
91+
is converted to:
92+
{"category": {"$in": ["electronics", "books"]}}
93+
"""
94+
95+
@classmethod
96+
def convert(cls, in_args):
97+
if isinstance(in_args, list) and len(in_args) == 2:
98+
field_expr, values = in_args
99+
# Check if first argument is a simple field reference.
100+
if isinstance(field_expr, str) and field_expr.startswith("$"):
101+
field_name = field_expr[1:] # Remove the $ prefix.
102+
if isinstance(values, (list, tuple, set)) and all(
103+
cls.is_simple_value(v) for v in values
104+
):
105+
return {field_name: {"$in": values}}
106+
return None
107+
108+
109+
class LogicalConverter(BaseConverter):
110+
"""
111+
Base class for converting logical operations to a $match query.
112+
113+
For example:
114+
"$expr": {
115+
"$or": [
116+
{"$eq": ["$status", "active"]},
117+
{"$in": ["$category", ["electronics", "books"]]},
118+
]
119+
}
120+
is converted to:
121+
"$or": [
122+
{"status": "active"},
123+
{"category": {"$in": ["electronics", "books"]}},
124+
]
125+
"""
126+
127+
@classmethod
128+
def convert(cls, combined_conditions):
129+
if isinstance(combined_conditions, list):
130+
optimized_conditions = []
131+
for condition in combined_conditions:
132+
if isinstance(condition, dict) and len(condition) == 1:
133+
if optimized_condition := convert_expression(condition):
134+
optimized_conditions.append(optimized_condition)
135+
else:
136+
# Any failure should stop optimization.
137+
return None
138+
if optimized_conditions:
139+
return {cls._logical_op: optimized_conditions}
140+
return None
141+
142+
143+
class OrConverter(LogicalConverter):
144+
_logical_op = "$or"
145+
146+
147+
class AndConverter(LogicalConverter):
148+
_logical_op = "$and"
149+
150+
151+
OPTIMIZABLE_OPS = {
152+
"$eq": EqConverter,
153+
"$in": InConverter,
154+
"$and": AndConverter,
155+
"$or": OrConverter,
156+
"$gt": GtConverter,
157+
"$gte": GteConverter,
158+
"$lt": LtConverter,
159+
"$lte": LteConverter,
160+
}
161+
162+
163+
def convert_expression(expr):
164+
"""
165+
Optimize MQL by converting an $expr condition to $match. Return the $match
166+
MQL, or None if not optimizable.
167+
"""
168+
if isinstance(expr, dict) and len(expr) == 1:
169+
op = next(iter(expr.keys()))
170+
if op in OPTIMIZABLE_OPS:
171+
return OPTIMIZABLE_OPS[op].convert(expr[op])
172+
return None
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
from .expression_converters import convert_expression
2+
3+
4+
def convert_expr_to_match(query):
5+
"""
6+
Optimize an MQL query by converting conditions into a list of $match
7+
stages.
8+
"""
9+
if "$expr" not in query:
10+
return [query]
11+
if query["$expr"] == {}:
12+
return [{"$match": {}}]
13+
return _process_expression(query["$expr"])
14+
15+
16+
def _process_expression(expr):
17+
"""Process an expression and extract optimizable conditions."""
18+
match_conditions = []
19+
remaining_conditions = []
20+
if isinstance(expr, dict):
21+
has_and = "$and" in expr
22+
has_or = "$or" in expr
23+
# Do a top-level check for $and or $or because these should inform.
24+
# If they fail, they should failover to a remaining conditions list.
25+
# There's probably a better way to do this.
26+
if has_and:
27+
and_match_conditions = _process_logical_conditions("$and", expr["$and"])
28+
match_conditions.extend(and_match_conditions)
29+
if has_or:
30+
or_match_conditions = _process_logical_conditions("$or", expr["$or"])
31+
match_conditions.extend(or_match_conditions)
32+
if not has_and and not has_or:
33+
# Process single condition.
34+
if optimized := convert_expression(expr):
35+
match_conditions.append({"$match": optimized})
36+
else:
37+
remaining_conditions.append({"$match": {"$expr": expr}})
38+
else:
39+
# Can't optimize.
40+
remaining_conditions.append({"$expr": expr})
41+
return match_conditions + remaining_conditions
42+
43+
44+
def _process_logical_conditions(logical_op, logical_conditions):
45+
"""Process conditions within a logical array."""
46+
optimized_conditions = []
47+
match_conditions = []
48+
remaining_conditions = []
49+
for condition in logical_conditions:
50+
_remaining_conditions = []
51+
if isinstance(condition, dict):
52+
if optimized := convert_expression(condition):
53+
optimized_conditions.append(optimized)
54+
else:
55+
_remaining_conditions.append(condition)
56+
else:
57+
_remaining_conditions.append(condition)
58+
if _remaining_conditions:
59+
# Any expressions that can't be optimized must remain in a $expr
60+
# that preserves the logical operator.
61+
if len(_remaining_conditions) > 1:
62+
remaining_conditions.append({"$expr": {logical_op: _remaining_conditions}})
63+
else:
64+
remaining_conditions.append({"$expr": _remaining_conditions[0]})
65+
if optimized_conditions:
66+
optimized_conditions.extend(remaining_conditions)
67+
if len(optimized_conditions) > 1:
68+
match_conditions.append({"$match": {logical_op: optimized_conditions}})
69+
else:
70+
match_conditions.append({"$match": optimized_conditions[0]})
71+
else:
72+
match_conditions.append({"$match": {logical_op: remaining_conditions}})
73+
return match_conditions

docs/releases/5.2.x.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@ Bug fixes
2222
operation is completed on the server to prevent conflicts when running
2323
multiple operations sequentially.
2424

25+
Performance improvements
26+
------------------------
27+
28+
- Made simple queries that use ``$eq``, ``$in``, ``$and``, ``$or``, ``$gt``,
29+
``$gte``, ``$lt``, and/or ``$lte`` use ``$match`` instead of ``$expr`` so
30+
that they can use indexes.
31+
2532
5.2.0
2633
=====
2734

tests/expression_converter_/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)