-
Notifications
You must be signed in to change notification settings - Fork 93
feat: support Nested Lists #627
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
95c27c9
4b42b10
9e99091
5b277c9
3ae2b62
0e4c6fd
5ad5fa1
925c9ef
b0a787d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -281,6 +281,17 @@ public static Expression.StructLiteral struct(boolean nullable, Expression.Liter | |||||
| return Expression.StructLiteral.builder().nullable(nullable).addFields(values).build(); | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Creator a nested list expression with one or more elements. | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| * | ||||||
| * <p>Note: This class cannot be used to construct an empty list. To create an empty list, use | ||||||
| * {@link ExpressionCreator#emptyList(boolean, Type)} which returns an {@link | ||||||
| * Expression.EmptyListLiteral}. | ||||||
| */ | ||||||
| public static Expression.NestedList nestedList(boolean nullable, List<Expression> values) { | ||||||
| return Expression.NestedList.builder().nullable(nullable).addAllValues(values).build(); | ||||||
| } | ||||||
|
|
||||||
| public static Expression.StructLiteral struct( | ||||||
| boolean nullable, Iterable<? extends Expression.Literal> values) { | ||||||
| return Expression.StructLiteral.builder().nullable(nullable).addAllFields(values).build(); | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -197,6 +197,8 @@ public Expression from(io.substrait.proto.Expression expr) { | |
| multiOrList.getValueList().stream().map(this::from).collect(Collectors.toList())) | ||
| .build(); | ||
| } | ||
| case NESTED: | ||
| return from(expr.getNested()); | ||
| case CAST: | ||
| return ExpressionCreator.cast( | ||
| protoTypeConverter.from(expr.getCast().getType()), | ||
|
|
@@ -361,6 +363,17 @@ private WindowBound toWindowBound(io.substrait.proto.Expression.WindowFunction.B | |
| } | ||
| } | ||
|
|
||
| public Expression.Nested from(io.substrait.proto.Expression.Nested nested) { | ||
| switch (nested.getNestedTypeCase()) { | ||
| case LIST: | ||
| List<Expression> list = | ||
| nested.getList().getValuesList().stream().map(this::from).collect(Collectors.toList()); | ||
| return ExpressionCreator.nestedList(nested.getNullable(), list); | ||
| default: | ||
| throw new IllegalStateException("Unimplemented nested type: " + nested.getNestedTypeCase()); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor: We can use |
||
| } | ||
| } | ||
|
|
||
| public Expression.Literal from(io.substrait.proto.Expression.Literal literal) { | ||
| switch (literal.getLiteralTypeCase()) { | ||
| case BOOLEAN: | ||
|
|
||
gord02 marked this conversation as resolved.
Show resolved
Hide resolved
|
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,94 @@ | ||||||
| package io.substrait.type.proto; | ||||||
|
|
||||||
| import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; | ||||||
| import static org.junit.jupiter.api.Assertions.assertThrows; | ||||||
|
|
||||||
| import io.substrait.TestBase; | ||||||
| import io.substrait.expression.Expression; | ||||||
| import io.substrait.expression.ImmutableExpression; | ||||||
| import org.junit.jupiter.api.Test; | ||||||
|
|
||||||
| class NestedListExpressionTest extends TestBase { | ||||||
| io.substrait.expression.Expression literalExpression = | ||||||
| Expression.BoolLiteral.builder().value(true).build(); | ||||||
| Expression.ScalarFunctionInvocation nonLiteralExpression = b.add(b.i32(7), b.i32(42)); | ||||||
|
|
||||||
| @Test | ||||||
| void DifferentTypedLiteralsNestedListTest() { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
minor suggestion: if we include the expected behaviour in the test name, it's easier to see at at glance what its testing. Similar suggestions for test below. |
||||||
| ImmutableExpression.NestedList.Builder builder = | ||||||
| Expression.NestedList.builder().addValues(literalExpression).addValues(b.i32(12)); | ||||||
| assertThrows(AssertionError.class, builder::build); | ||||||
| } | ||||||
|
|
||||||
| @Test | ||||||
| void SameTypedLiteralsNestedListTest() { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Beyond the name suggestion for this test, it is a bit redundant because it doesn't really verify anything differently than your normal tests below do. |
||||||
| ImmutableExpression.NestedList.Builder builder = | ||||||
| Expression.NestedList.builder().addValues(nonLiteralExpression).addValues(b.i32(12)); | ||||||
| assertDoesNotThrow(builder::build); | ||||||
|
|
||||||
| io.substrait.relation.Project project = | ||||||
| io.substrait.relation.Project.builder() | ||||||
| .addExpressions(builder.build()) | ||||||
| .input(b.emptyScan()) | ||||||
| .build(); | ||||||
| verifyRoundTrip(project); | ||||||
| } | ||||||
|
|
||||||
| @Test | ||||||
| void EmptyNestedListTest() { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| ImmutableExpression.NestedList.Builder builder = Expression.NestedList.builder(); | ||||||
| assertThrows(AssertionError.class, builder::build); | ||||||
| } | ||||||
gord02 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
|
||||||
| @Test | ||||||
| void literalNestedListTest() { | ||||||
| Expression.NestedList literalNestedList = | ||||||
| Expression.NestedList.builder() | ||||||
| .addValues(literalExpression) | ||||||
| .addValues(literalExpression) | ||||||
| .build(); | ||||||
|
|
||||||
| io.substrait.relation.Project project = | ||||||
| io.substrait.relation.Project.builder() | ||||||
| .addExpressions(literalNestedList) | ||||||
| .input(b.emptyScan()) | ||||||
| .build(); | ||||||
|
|
||||||
| verifyRoundTrip(project); | ||||||
gord02 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| } | ||||||
|
|
||||||
| @Test | ||||||
| void literalNullableNestedListTest() { | ||||||
| Expression.NestedList literalNestedList = | ||||||
| Expression.NestedList.builder() | ||||||
| .addValues(literalExpression) | ||||||
| .addValues(literalExpression) | ||||||
| .nullable(true) | ||||||
| .build(); | ||||||
|
|
||||||
| io.substrait.relation.Project project = | ||||||
| io.substrait.relation.Project.builder() | ||||||
| .addExpressions(literalNestedList) | ||||||
| .input(b.emptyScan()) | ||||||
| .build(); | ||||||
|
|
||||||
| verifyRoundTrip(project); | ||||||
| } | ||||||
|
|
||||||
| @Test | ||||||
| void nonLiteralNestedListTest() { | ||||||
| Expression.NestedList nonLiteralNestedList = | ||||||
| Expression.NestedList.builder() | ||||||
| .addValues(nonLiteralExpression) | ||||||
| .addValues(nonLiteralExpression) | ||||||
| .build(); | ||||||
|
|
||||||
| io.substrait.relation.Project project = | ||||||
| io.substrait.relation.Project.builder() | ||||||
| .addExpressions(nonLiteralNestedList) | ||||||
| .input(b.emptyScan()) | ||||||
| .build(); | ||||||
|
|
||||||
| verifyRoundTrip(project); | ||||||
| } | ||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| package io.substrait.isthmus; | ||
|
|
||
| import static java.util.Objects.requireNonNull; | ||
|
|
||
| import org.apache.calcite.rel.type.RelDataType; | ||
| import org.apache.calcite.sql.SqlKind; | ||
| import org.apache.calcite.sql.SqlOperatorBinding; | ||
| import org.apache.calcite.sql.fun.SqlMultisetValueConstructor; | ||
| import org.apache.calcite.sql.type.SqlTypeUtil; | ||
| import org.apache.calcite.sql.validate.SqlValidatorUtil; | ||
|
|
||
| /** | ||
| * Substrait-specific constructor to map back to the Expression NestedList type in Substrait. This | ||
| * constructor creates a special type of SqlKind.ARRAY_VALUE_CONSTRUCTOR for lists that store | ||
| * non-literal expressions. | ||
|
Comment on lines
+14
to
+15
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Conceptually and by looking at the tests (
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It can handle both literals and non-literals. I'll update the comment to avoid the confusion |
||
| */ | ||
| public class NestedListConstructor extends SqlMultisetValueConstructor { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess you picked
The impedance mismatch is that in SQL (and Calcite), arrays and lists are technically the same entity, while IIRC in Substrait they are treated as different entities (@benbellick can you confirm this?). By looking at It's probably enough to change I suggest to extend
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to make sure I am understanding it correctly, let me know your thoughts on the following scenarios: We want to ensure that the roundtrip of both a nested list with literals and non-literals are both returned to a nested list. If the literalConstructorConverter is run first on a list of just literals, then it would pass and then wouldn't be mapped back to a nested list. In the other case, where the nestedExpressionConverter is run first, the literal lists that were originally not a NestedList would become a nested list. Does the above account for this or is the difference not important? Also, is there a way to meaningfully extend the SqlArrayValueConstructor class? Its definition is bare with just a constructor to its parent type. |
||
|
|
||
| public NestedListConstructor() { | ||
| super("NESTEDLIST", SqlKind.ARRAY_VALUE_CONSTRUCTOR); | ||
| } | ||
|
|
||
| @Override | ||
| public RelDataType inferReturnType(SqlOperatorBinding opBinding) { | ||
| RelDataType type = | ||
| getComponentType(opBinding.getTypeFactory(), opBinding.collectOperandTypes()); | ||
| requireNonNull(type, "inferred array element type"); | ||
|
|
||
| // explicit cast elements to component type if they are not same | ||
| SqlValidatorUtil.adjustTypeForArrayConstructor(type, opBinding); | ||
|
|
||
| return SqlTypeUtil.createArrayType(opBinding.getTypeFactory(), type, false); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -179,6 +179,10 @@ public Rel visit(org.apache.calcite.rel.core.Project project) { | |||||||||
| .map(this::toExpression) | ||||||||||
| .collect(java.util.stream.Collectors.toList()); | ||||||||||
|
|
||||||||||
| // if there is no input fields, don’t put a remapping on it | ||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
nit
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor/pedantic: small rewording + avoiding extra whitespace
Suggested change
|
||||||||||
| if (project.getInput().getRowType().getFieldCount() == 0) { | ||||||||||
| return Project.builder().expressions(expressions).input(apply(project.getInput())).build(); | ||||||||||
| } | ||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change looks good, but is a bit out of place in this PR. From experimenting locally it seems that one of your tests fails locally with it because a pointless remap is introduced. I do think it's a good change, and we should keep it in this PR, but could you add an explicit test for this features along the lines of |
||||||||||
| // todo: eliminate excessive projects. This should be done by converting rexinputrefs to remaps. | ||||||||||
| return Project.builder() | ||||||||||
| .remap( | ||||||||||
|
|
||||||||||
Uh oh!
There was an error while loading. Please reload this page.