Skip to content

Commit

Permalink
Pushdown varchar predicate to ClickHouse unconditionally
Browse files Browse the repository at this point in the history
ClickHouse collation is case-sensitive.
ClickHouse has same sort ordering as Trino.

Per https://clickhouse.com/docs/en/sql-reference/statements/show#show_columns
ClickHouse has no per-column collations
Clickhouse is UTF-8 encoded with byte-by-byte comparison.
https://clickhouse.com/docs/en/sql-reference/statements/select/order-by#collation-support
So exactly as trino.
https://github.com/airlift/slice/blob/2.2/src/main/java/io/airlift/slice/Slice.java#L1205
That’s why all operations on varchars may pushdown.
  • Loading branch information
ssheikin authored and raunaqmorarka committed Sep 21, 2024
1 parent dceac08 commit f3751cf
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
import io.trino.plugin.jdbc.LongReadFunction;
import io.trino.plugin.jdbc.LongWriteFunction;
import io.trino.plugin.jdbc.ObjectWriteFunction;
import io.trino.plugin.jdbc.PredicatePushdownController;
import io.trino.plugin.jdbc.QueryBuilder;
import io.trino.plugin.jdbc.RemoteTableName;
import io.trino.plugin.jdbc.SliceWriteFunction;
Expand All @@ -64,7 +63,6 @@
import io.trino.spi.connector.ColumnMetadata;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorTableMetadata;
import io.trino.spi.predicate.Domain;
import io.trino.spi.type.CharType;
import io.trino.spi.type.DecimalType;
import io.trino.spi.type.Decimals;
Expand Down Expand Up @@ -128,7 +126,6 @@
import static io.trino.plugin.jdbc.DecimalSessionSessionProperties.getDecimalRounding;
import static io.trino.plugin.jdbc.DecimalSessionSessionProperties.getDecimalRoundingMode;
import static io.trino.plugin.jdbc.JdbcErrorCode.JDBC_ERROR;
import static io.trino.plugin.jdbc.JdbcMetadataSessionProperties.getDomainCompactionThreshold;
import static io.trino.plugin.jdbc.PredicatePushdownController.DISABLE_PUSHDOWN;
import static io.trino.plugin.jdbc.PredicatePushdownController.FULL_PUSHDOWN;
import static io.trino.plugin.jdbc.StandardColumnMappings.bigintColumnMapping;
Expand Down Expand Up @@ -206,20 +203,6 @@ public class ClickHouseClient

public static final int DEFAULT_DOMAIN_COMPACTION_THRESHOLD = 1_000;

private static final PredicatePushdownController CLICKHOUSE_PUSHDOWN_CONTROLLER = (session, domain) -> {
if (domain.isOnlyNull()) {
return FULL_PUSHDOWN.apply(session, domain);
}

Domain simplifiedDomain = domain.simplify(getDomainCompactionThreshold(session));
if (!simplifiedDomain.getValues().isDiscreteSet()) {
// Domain#simplify can turn a discrete set into a range predicate
return DISABLE_PUSHDOWN.apply(session, domain);
}

return FULL_PUSHDOWN.apply(session, simplifiedDomain);
};

private final ConnectorExpressionRewriter<ParameterizedExpression> connectorExpressionRewriter;
private final AggregateFunctionRewriter<JdbcExpression, ?> aggregateFunctionRewriter;
private final Type uuidType;
Expand Down Expand Up @@ -247,8 +230,8 @@ public ClickHouseClient(
ImmutableSet.<AggregateFunctionRule<JdbcExpression, ParameterizedExpression>>builder()
.add(new ImplementCountAll(bigintTypeHandle))
.add(new ImplementCount(bigintTypeHandle))
.add(new ImplementCountDistinct(bigintTypeHandle, false))
.add(new ImplementMinMax(false)) // TODO: Revisit once https://github.com/trinodb/trino/issues/7100 is resolved
.add(new ImplementCountDistinct(bigintTypeHandle, true))
.add(new ImplementMinMax(true))
.add(new ImplementSum(ClickHouseClient::toTypeHandle))
.add(new ImplementAvgFloatingPoint())
.add(new ImplementAvgBigint())
Expand All @@ -265,23 +248,9 @@ public Optional<JdbcExpression> implementAggregation(ConnectorSession session, A
return aggregateFunctionRewriter.rewrite(session, aggregate, assignments);
}

@Override
public boolean supportsAggregationPushdown(ConnectorSession session, JdbcTableHandle table, List<AggregateFunction> aggregates, Map<String, ColumnHandle> assignments, List<List<ColumnHandle>> groupingSets)
{
// TODO: Remove override once https://github.com/trinodb/trino/issues/7100 is resolved. Currently pushdown for textual types is not tested and may lead to incorrect results.
return preventTextualTypeAggregationPushdown(groupingSets);
}

@Override
public boolean supportsTopN(ConnectorSession session, JdbcTableHandle handle, List<JdbcSortItem> sortOrder)
{
for (JdbcSortItem sortItem : sortOrder) {
Type sortItemType = sortItem.column().getColumnType();
checkArgument(!(sortItemType instanceof CharType), "Unexpected char type: %s", sortItem.column().getColumnName());
if (sortItemType instanceof VarcharType) {
return false;
}
}
return true;
}

Expand Down Expand Up @@ -672,7 +641,7 @@ public Optional<ColumnMapping> toColumnMapping(ConnectorSession session, Connect
createUnboundedVarcharType(),
varcharReadFunction(createUnboundedVarcharType()),
varcharWriteFunction(),
CLICKHOUSE_PUSHDOWN_CONTROLLER));
FULL_PUSHDOWN));
}
return Optional.of(varbinaryColumnMapping());
case UUID:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
import com.google.common.collect.ImmutableMap;
import io.trino.Session;
import io.trino.plugin.jdbc.BaseJdbcConnectorTest;
import io.trino.plugin.jdbc.JdbcTableHandle;
import io.trino.spi.predicate.TupleDomain;
import io.trino.sql.planner.plan.AggregationNode;
import io.trino.sql.planner.plan.FilterNode;
import io.trino.testing.MaterializedResult;
Expand Down Expand Up @@ -47,8 +45,6 @@
import static io.trino.plugin.clickhouse.TestingClickHouseServer.CLICKHOUSE_LATEST_IMAGE;
import static io.trino.plugin.jdbc.JdbcMetadataSessionProperties.DOMAIN_COMPACTION_THRESHOLD;
import static io.trino.spi.type.VarcharType.VARCHAR;
import static io.trino.sql.planner.assertions.PlanMatchPattern.node;
import static io.trino.sql.planner.assertions.PlanMatchPattern.tableScan;
import static io.trino.testing.MaterializedResult.resultBuilder;
import static io.trino.testing.TestingNames.randomNameSuffix;
import static java.lang.String.format;
Expand Down Expand Up @@ -78,7 +74,6 @@ protected boolean hasBehavior(TestingConnectorBehavior connectorBehavior)
SUPPORTS_DELETE,
SUPPORTS_DROP_NOT_NULL_CONSTRAINT,
SUPPORTS_NEGATIVE_DATE,
SUPPORTS_PREDICATE_PUSHDOWN_WITH_VARCHAR_INEQUALITY,
SUPPORTS_ROW_TYPE,
SUPPORTS_SET_COLUMN_TYPE,
SUPPORTS_UPDATE -> false;
Expand Down Expand Up @@ -879,7 +874,7 @@ public void testTextualPredicatePushdown()
// varchar range
assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name BETWEEN 'POLAND' AND 'RPA'"))
.matches("VALUES (BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar))")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

// varchar IN without domain compaction
assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name IN ('POLAND', 'ROMANIA', 'VIETNAM')"))
Expand All @@ -897,17 +892,10 @@ public void testTextualPredicatePushdown()
.matches("VALUES " +
"(BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar)), " +
"(BIGINT '2', BIGINT '21', CAST('VIETNAM' AS varchar))")
// Filter node is retained as no constraint is pushed into connector.
// Filter node is retained as constraint is pushed into connector is simplified, and
// The compacted domain is a range predicate which can give wrong results
// if pushed down as ClickHouse has different sort ordering for letters from Trino
.isNotFullyPushedDown(
node(
FilterNode.class,
// verify that no constraint is applied by the connector
tableScan(
tableHandle -> ((JdbcTableHandle) tableHandle).getConstraint().isAll(),
TupleDomain.all(),
ImmutableMap.of())));
// so has to be filtered by trino too to ensure correct predicate.
.isNotFullyPushedDown(FilterNode.class);

// varchar different case
assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name = 'romania'"))
Expand Down

0 comments on commit f3751cf

Please sign in to comment.