Skip to content

Commit 1c9646f

Browse files
authored
Add DISTINCT ON and LIMIT BY support to ClickHouse dialect (#817)
1 parent 0dff7e3 commit 1c9646f

File tree

3 files changed

+109
-5
lines changed

3 files changed

+109
-5
lines changed

pypika/dialects.py

+62-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import itertools
22
import warnings
33
from copy import copy
4-
from typing import Any, Optional, Union, Tuple as TypedTuple
4+
from typing import Any, Optional, Union, Tuple as TypedTuple, List
55

66
from pypika.enums import Dialects
77
from pypika.queries import (
@@ -384,7 +384,7 @@ def get_sql(self, *args: Any, **kwargs: Any) -> str:
384384
kwargs['groupby_alias'] = False
385385
return super().get_sql(*args, **kwargs)
386386

387-
def _apply_pagination(self, querystring: str) -> str:
387+
def _apply_pagination(self, querystring: str, **kwargs) -> str:
388388
# Note: Overridden as Oracle specifies offset before the fetch next limit
389389
if self._offset:
390390
querystring += self._offset_sql()
@@ -719,7 +719,7 @@ def top(self, value: Union[str, int], percent: bool = False, with_ties: bool = F
719719
self._top_percent: bool = percent
720720
self._top_with_ties: bool = with_ties
721721

722-
def _apply_pagination(self, querystring: str) -> str:
722+
def _apply_pagination(self, querystring: str, **kwargs) -> str:
723723
# Note: Overridden as MSSQL specifies offset before the fetch next limit
724724
if self._limit is not None or self._offset:
725725
# Offset has to be present if fetch next is specified in a MSSQL query
@@ -794,11 +794,21 @@ def drop_view(self, view: str) -> "ClickHouseDropQueryBuilder":
794794
class ClickHouseQueryBuilder(QueryBuilder):
795795
QUERY_CLS = ClickHouseQuery
796796

797+
_distinct_on: List[Term]
798+
_limit_by: Optional[TypedTuple[int, int, List[Term]]]
799+
797800
def __init__(self, **kwargs) -> None:
798801
super().__init__(**kwargs)
799802
self._final = False
800803
self._sample = None
801804
self._sample_offset = None
805+
self._distinct_on = []
806+
self._limit_by = None
807+
808+
def __copy__(self) -> "ClickHouseQueryBuilder":
809+
newone = super().__copy__()
810+
newone._limit_by = copy(self._limit_by)
811+
return newone
802812

803813
@builder
804814
def final(self) -> "ClickHouseQueryBuilder":
@@ -839,6 +849,55 @@ def _set_sql(self, **kwargs: Any) -> str:
839849
)
840850
)
841851

852+
@builder
853+
def distinct_on(self, *fields: Union[str, Term]) -> "ClickHouseQueryBuilder":
854+
for field in fields:
855+
if isinstance(field, str):
856+
self._distinct_on.append(Field(field))
857+
elif isinstance(field, Term):
858+
self._distinct_on.append(field)
859+
860+
def _distinct_sql(self, **kwargs: Any) -> str:
861+
if self._distinct_on:
862+
return "DISTINCT ON({distinct_on}) ".format(
863+
distinct_on=",".join(term.get_sql(with_alias=True, **kwargs) for term in self._distinct_on)
864+
)
865+
return super()._distinct_sql(**kwargs)
866+
867+
@builder
868+
def limit_by(self, n, *by: Union[str, Term]) -> "ClickHouseQueryBuilder":
869+
self._limit_by = (n, 0, [Field(field) if isinstance(field, str) else field for field in by])
870+
871+
@builder
872+
def limit_offset_by(self, n, offset, *by: Union[str, Term]) -> "ClickHouseQueryBuilder":
873+
self._limit_by = (n, offset, [Field(field) if isinstance(field, str) else field for field in by])
874+
875+
def _apply_pagination(self, querystring: str, **kwargs) -> str:
876+
# LIMIT BY isn't really a pagination per se but since we need
877+
# to add this to the query right before an actual LIMIT clause
878+
# this is good enough.
879+
if self._limit_by:
880+
querystring += self._limit_by_sql(**kwargs)
881+
return super()._apply_pagination(querystring, **kwargs)
882+
883+
def _limit_by_sql(self, **kwargs: Any) -> str:
884+
(n, offset, by) = self._limit_by
885+
by = ",".join(term.get_sql(with_alias=True, **kwargs) for term in by)
886+
if offset != 0:
887+
return f" LIMIT {n} OFFSET {offset} BY ({by})"
888+
else:
889+
return f" LIMIT {n} BY ({by})"
890+
891+
def replace_table(self, current_table: Optional[Table], new_table: Optional[Table]) -> "ClickHouseQueryBuilder":
892+
newone = super().replace_table(current_table, new_table)
893+
if self._limit_by:
894+
newone._limit_by = (
895+
self._limit_by[0],
896+
self._limit_by[1],
897+
[column.replace_table(current_table, new_table) for column in self._limit_by[2]],
898+
)
899+
return newone
900+
842901

843902
class ClickHouseDropQueryBuilder(DropQueryBuilder):
844903
QUERY_CLS = ClickHouseQuery

pypika/queries.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1354,7 +1354,7 @@ def get_sql(self, with_alias: bool = False, subquery: bool = False, **kwargs: An
13541354
if self._orderbys:
13551355
querystring += self._orderby_sql(**kwargs)
13561356

1357-
querystring = self._apply_pagination(querystring)
1357+
querystring = self._apply_pagination(querystring, **kwargs)
13581358

13591359
if self._for_update:
13601360
querystring += self._for_update_sql(**kwargs)
@@ -1370,7 +1370,7 @@ def get_sql(self, with_alias: bool = False, subquery: bool = False, **kwargs: An
13701370

13711371
return querystring
13721372

1373-
def _apply_pagination(self, querystring: str) -> str:
1373+
def _apply_pagination(self, querystring: str, **kwargs) -> str:
13741374
if self._limit is not None:
13751375
querystring += self._limit_sql()
13761376

pypika/tests/dialects/test_clickhouse.py

+45
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,48 @@ def test_drop_other(self):
9999
self.assertEqual('DROP QUOTA "myquota"', str(q1))
100100
self.assertEqual('DROP USER "myuser"', str(q2))
101101
self.assertEqual('DROP VIEW "myview"', str(q3))
102+
103+
104+
class DistinctOnTests(TestCase):
105+
table_abc = Table("abc")
106+
107+
def test_distinct_on(self):
108+
q = ClickHouseQuery.from_(self.table_abc).distinct_on("lname", self.table_abc.fname).select("lname", "id")
109+
110+
self.assertEqual('''SELECT DISTINCT ON("lname","fname") "lname","id" FROM "abc"''', str(q))
111+
112+
113+
class LimitByTests(TestCase):
114+
table_abc = Table("abc")
115+
116+
def test_limit_by(self):
117+
q = ClickHouseQuery.from_(self.table_abc).limit_by(1, "a", self.table_abc.b).select("a", "b", "c")
118+
119+
self.assertEqual('''SELECT "a","b","c" FROM "abc" LIMIT 1 BY ("a","b")''', str(q))
120+
121+
def test_limit_offset_by(self):
122+
q = ClickHouseQuery.from_(self.table_abc).limit_offset_by(1, 2, "a", self.table_abc.b).select("a", "b", "c")
123+
124+
self.assertEqual('''SELECT "a","b","c" FROM "abc" LIMIT 1 OFFSET 2 BY ("a","b")''', str(q))
125+
126+
def test_limit_offset0_by(self):
127+
q = ClickHouseQuery.from_(self.table_abc).limit_offset_by(1, 0, "a", self.table_abc.b).select("a", "b", "c")
128+
129+
self.assertEqual('''SELECT "a","b","c" FROM "abc" LIMIT 1 BY ("a","b")''', str(q))
130+
131+
def test_rename_table(self):
132+
table_join = Table("join")
133+
134+
q = (
135+
ClickHouseQuery.from_(self.table_abc)
136+
.join(table_join)
137+
.using("a")
138+
.limit_by(1, self.table_abc.a, table_join.a)
139+
.select(self.table_abc.b, table_join.b)
140+
)
141+
q = q.replace_table(self.table_abc, Table("xyz"))
142+
143+
self.assertEqual(
144+
'''SELECT "xyz"."b","join"."b" FROM "xyz" JOIN "join" USING ("a") LIMIT 1 BY ("xyz"."a","join"."a")''',
145+
str(q),
146+
)

0 commit comments

Comments
 (0)