From 9d60dc7285147097811cd0ae975d9a80b8cb17c8 Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Tue, 8 Jul 2025 10:25:12 +0200 Subject: [PATCH 01/22] feat(isthmus): udf support for substrait<->calcite # Conflicts: # isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java # Conflicts: # isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java # isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java # isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java --- .../io/substrait/isthmus/ExtensionUtils.java | 37 ++ .../isthmus/SimpleExtensionToSqlOperator.java | 319 ++++++++++++++++++ .../substrait/isthmus/SqlConverterBase.java | 13 +- .../isthmus/SqlExpressionToSubstrait.java | 4 +- .../io/substrait/isthmus/SqlToSubstrait.java | 41 ++- .../isthmus/SubstraitRelNodeConverter.java | 44 ++- .../isthmus/SubstraitRelVisitor.java | 21 +- .../io/substrait/isthmus/SubstraitToSql.java | 8 +- .../isthmus/expression/FunctionConverter.java | 4 +- .../isthmus/sql/SubstraitSqlValidator.java | 5 + .../substrait/isthmus/CalciteLiteralTest.java | 4 +- .../isthmus/ComplexAggregateTest.java | 4 +- .../io/substrait/isthmus/ComplexSortTest.java | 5 +- .../substrait/isthmus/NameRoundtripTest.java | 5 +- .../isthmus/OptimizerIntegrationTest.java | 6 +- .../io/substrait/isthmus/PlanTestBase.java | 50 ++- .../SimpleExtensionToSqlOperatorTest.java | 48 +++ .../isthmus/UdfSqlSubstraitTest.java | 40 +++ .../extensions/functions_string_custom.yaml | 11 + 19 files changed, 638 insertions(+), 31 deletions(-) create mode 100644 isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java create mode 100644 isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java create mode 100644 isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java create mode 100644 isthmus/src/test/resources/extensions/functions_string_custom.yaml diff --git a/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java b/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java new file mode 100644 index 000000000..e9852a7ce --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java @@ -0,0 +1,37 @@ +package io.substrait.isthmus; + +import io.substrait.extension.SimpleExtension; +import io.substrait.isthmus.calcite.SubstraitOperatorTable; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.stream.Collectors; + +public class ExtensionUtils { + + public static SimpleExtension.ExtensionCollection getDynamicExtensions( + SimpleExtension.ExtensionCollection extensions) { + Set knownFunctionNames = + SubstraitOperatorTable.INSTANCE.getOperatorList().stream() + .map(op -> op.getName().toLowerCase(Locale.ROOT)) + .collect(Collectors.toSet()); + + List customFunctions = + extensions.scalarFunctions().stream() + .filter(f -> !knownFunctionNames.contains(f.name().toLowerCase(Locale.ROOT))) + .collect(Collectors.toList()); + + return SimpleExtension.ExtensionCollection.builder() + .scalarFunctions(customFunctions) + // TODO: handle aggregates and other functions + .build(); + } + + public static SimpleExtension.ExtensionCollection loadExtensions(List yamlFunctionFiles) { + SimpleExtension.ExtensionCollection allExtensions = SimpleExtension.loadDefaults(); + if (yamlFunctionFiles != null && !yamlFunctionFiles.isEmpty()) { + allExtensions = allExtensions.merge(SimpleExtension.load(yamlFunctionFiles)); + } + return allExtensions; + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java new file mode 100644 index 000000000..879319b4d --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java @@ -0,0 +1,319 @@ +package io.substrait.isthmus; + +import io.substrait.extension.SimpleExtension; +import io.substrait.function.ParameterizedType; +import io.substrait.function.ParameterizedTypeVisitor; +import io.substrait.function.TypeExpression; +import io.substrait.type.Type; +import io.substrait.type.TypeExpressionEvaluator; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlOperatorBinding; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.type.SqlTypeName; + +public final class SimpleExtensionToSqlOperator { + + private static final RelDataTypeFactory DEFAULT_TYPE_FACTORY = + new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM); + + private SimpleExtensionToSqlOperator() {} + + public static List from(SimpleExtension.ExtensionCollection collection) { + return from(collection, DEFAULT_TYPE_FACTORY); + } + + public static List from( + SimpleExtension.ExtensionCollection collection, RelDataTypeFactory typeFactory) { + TypeConverter typeConverter = TypeConverter.DEFAULT; + return Stream.concat( + collection.scalarFunctions().stream(), collection.aggregateFunctions().stream()) + .map(function -> toSqlFunction(function, typeFactory, typeConverter)) + .collect(Collectors.toList()); + } + + private static SqlFunction toSqlFunction( + SimpleExtension.Function function, + RelDataTypeFactory typeFactory, + TypeConverter typeConverter) { + List requiredArgs = + function.args().stream() + .filter(SimpleExtension.Argument::required) + .filter(t -> t instanceof SimpleExtension.ValueArgument || t instanceof SimpleExtension.EnumArgument) + .map(t -> (SimpleExtension.Argument) t) + .collect(Collectors.toList()); + + List argFamilies = + requiredArgs.stream() + .map(arg -> arg.value().accept(new CalciteTypeVisitor()).getFamily()) + .collect(Collectors.toList()); + + SqlReturnTypeInference returnTypeInference = + new SubstraitReturnTypeInference(function, typeFactory, typeConverter); + + return new SqlFunction( + function.name(), + SqlKind.OTHER_FUNCTION, + returnTypeInference, + null, + OperandTypes.family(argFamilies), + SqlFunctionCategory.USER_DEFINED_FUNCTION); + } + + private static class SubstraitReturnTypeInference implements SqlReturnTypeInference { + + private final SimpleExtension.Function function; + private final RelDataTypeFactory typeFactory; + private final TypeConverter typeConverter; + + private SubstraitReturnTypeInference( + SimpleExtension.Function function, + RelDataTypeFactory typeFactory, + TypeConverter typeConverter) { + this.function = function; + this.typeFactory = typeFactory; + this.typeConverter = typeConverter; + } + + @Override + public RelDataType inferReturnType(SqlOperatorBinding opBinding) { + List substraitArgTypes = + opBinding.collectOperandTypes().stream() + .map(typeConverter::toSubstrait) + .collect(Collectors.toList()); + + TypeExpression returnExpression = function.returnType(); + Type resolvedSubstraitType = + TypeExpressionEvaluator.evaluateExpression( + returnExpression, function.args(), substraitArgTypes); + + return typeConverter.toCalcite(typeFactory, resolvedSubstraitType); + } + } + + private static class CalciteTypeVisitor + extends ParameterizedTypeVisitor.ParameterizedTypeThrowsVisitor< + SqlTypeName, RuntimeException> { + + private CalciteTypeVisitor() { + super("Type not supported for Calcite conversion."); + } + + @Override + public SqlTypeName visit(Type.Bool expr) { + return SqlTypeName.BOOLEAN; + } + + @Override + public SqlTypeName visit(Type.I8 expr) { + return SqlTypeName.TINYINT; + } + + @Override + public SqlTypeName visit(Type.I16 expr) { + return SqlTypeName.SMALLINT; + } + + @Override + public SqlTypeName visit(Type.I32 expr) { + return SqlTypeName.INTEGER; + } + + @Override + public SqlTypeName visit(Type.I64 expr) { + return SqlTypeName.BIGINT; + } + + @Override + public SqlTypeName visit(Type.FP32 expr) { + return SqlTypeName.FLOAT; + } + + @Override + public SqlTypeName visit(Type.FP64 expr) { + return SqlTypeName.DOUBLE; + } + + @Override + public SqlTypeName visit(Type.Str expr) { + return SqlTypeName.VARCHAR; + } + + @Override + public SqlTypeName visit(Type.Binary expr) { + return SqlTypeName.VARBINARY; + } + + @Override + public SqlTypeName visit(Type.Date expr) { + return SqlTypeName.DATE; + } + + @Override + public SqlTypeName visit(Type.Time expr) { + return SqlTypeName.TIME; + } + + @Override + public SqlTypeName visit(Type.TimestampTZ expr) { + return SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE; + } + + @Override + public SqlTypeName visit(Type.Timestamp expr) { + return SqlTypeName.TIMESTAMP; + } + + @Override + public SqlTypeName visit(Type.IntervalYear year) { + return SqlTypeName.INTERVAL_YEAR_MONTH; + } + + @Override + public SqlTypeName visit(Type.IntervalDay day) { + return SqlTypeName.INTERVAL_DAY; + } + + @Override + public SqlTypeName visit(Type.UUID expr) { + return SqlTypeName.VARCHAR; + } + + @Override + public SqlTypeName visit(Type.Struct struct) { + return SqlTypeName.ROW; + } + + @Override + public SqlTypeName visit(Type.ListType listType) { + return SqlTypeName.ARRAY; + } + + @Override + public SqlTypeName visit(Type.Map map) { + return SqlTypeName.MAP; + } + + @Override + public SqlTypeName visit(ParameterizedType.FixedChar expr) { + return SqlTypeName.CHAR; + } + + @Override + public SqlTypeName visit(ParameterizedType.VarChar expr) { + return SqlTypeName.VARCHAR; + } + + @Override + public SqlTypeName visit(ParameterizedType.FixedBinary expr) { + return SqlTypeName.BINARY; + } + + @Override + public SqlTypeName visit(ParameterizedType.Decimal expr) { + return SqlTypeName.DECIMAL; + } + + @Override + public SqlTypeName visit(ParameterizedType.Struct expr) { + return SqlTypeName.ROW; + } + + @Override + public SqlTypeName visit(ParameterizedType.ListType expr) { + return SqlTypeName.ARRAY; + } + + @Override + public SqlTypeName visit(ParameterizedType.Map expr) { + return SqlTypeName.MAP; + } + + @Override + public SqlTypeName visit(ParameterizedType.PrecisionTimestamp expr) { + return SqlTypeName.TIMESTAMP; + } + + @Override + public SqlTypeName visit(ParameterizedType.PrecisionTimestampTZ expr) { + return SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE; + } + + @Override + public SqlTypeName visit(ParameterizedType.PrecisionTime expr) { + return SqlTypeName.TIME; + } + + @Override + public SqlTypeName visit(ParameterizedType.IntervalDay expr) { + return SqlTypeName.INTERVAL_DAY; + } + + @Override + public SqlTypeName visit(ParameterizedType.IntervalCompound expr) { + // TODO: double check + return SqlTypeName.INTERVAL_DAY_HOUR; + } + + @Override + public SqlTypeName visit(ParameterizedType.StringLiteral expr) { + String type = expr.value().toUpperCase(); + + if (type.startsWith("ANY")) { + return SqlTypeName.ANY; + } + + switch (type) { + case "BOOLEAN": + return SqlTypeName.BOOLEAN; + case "I8": + return SqlTypeName.TINYINT; + case "I16": + return SqlTypeName.SMALLINT; + case "I32": + return SqlTypeName.INTEGER; + case "I64": + return SqlTypeName.BIGINT; + case "FP32": + return SqlTypeName.FLOAT; + case "FP64": + return SqlTypeName.DOUBLE; + case "STRING": + return SqlTypeName.VARCHAR; + case "BINARY": + return SqlTypeName.VARBINARY; + case "TIMESTAMP": + return SqlTypeName.TIMESTAMP; + case "TIMESTAMP_TZ": + return SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE; + case "DATE": + return SqlTypeName.DATE; + case "TIME": + return SqlTypeName.TIME; + case "UUID": + return SqlTypeName.VARCHAR; + default: + if (type.startsWith("DECIMAL")) { + return SqlTypeName.DECIMAL; + } + if (type.startsWith("STRUCT")) { + return SqlTypeName.ROW; + } + if (type.startsWith("LIST")) { + return SqlTypeName.ARRAY; + } + return super.visit(expr); + } + } + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java b/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java index e60df0b68..2ed056e6d 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java @@ -19,9 +19,7 @@ import org.apache.calcite.sql2rel.SqlToRelConverter; class SqlConverterBase { - protected static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = - SimpleExtension.loadDefaults(); - + protected final SimpleExtension.ExtensionCollection extensionCollection; final RelDataTypeFactory factory; final RelOptCluster relOptCluster; final CalciteConnectionConfig config; @@ -32,7 +30,8 @@ class SqlConverterBase { protected static final FeatureBoard FEATURES_DEFAULT = ImmutableFeatureBoard.builder().build(); final FeatureBoard featureBoard; - protected SqlConverterBase(FeatureBoard features) { + protected SqlConverterBase( + FeatureBoard features, SimpleExtension.ExtensionCollection extensionCollection) { this.factory = new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM); this.config = CalciteConnectionConfig.DEFAULT.set(CalciteConnectionProperty.CASE_SENSITIVE, "false"); @@ -51,5 +50,11 @@ protected SqlConverterBase(FeatureBoard features) { .withUnquotedCasing(featureBoard.unquotedCasing()) .withParserFactory(SqlDdlParserImpl.FACTORY) .withConformance(SqlConformanceEnum.LENIENT); + + this.extensionCollection = extensionCollection; + } + + protected SqlConverterBase(FeatureBoard features) { + this(features, SimpleExtension.loadDefaults()); } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java index c32fab07c..fc144b090 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java @@ -34,12 +34,12 @@ public class SqlExpressionToSubstrait extends SqlConverterBase { protected final RexExpressionConverter rexConverter; public SqlExpressionToSubstrait() { - this(FEATURES_DEFAULT, EXTENSION_COLLECTION); + this(FEATURES_DEFAULT, SimpleExtension.loadDefaults()); } public SqlExpressionToSubstrait( FeatureBoard features, SimpleExtension.ExtensionCollection extensions) { - super(features); + super(features, extensions); ScalarFunctionConverter scalarFunctionConverter = new ScalarFunctionConverter(extensions.scalarFunctions(), factory); this.rexConverter = new RexExpressionConverter(scalarFunctionConverter); diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index 3e19ca58c..33384adc7 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -1,22 +1,53 @@ package io.substrait.isthmus; import io.substrait.isthmus.sql.SubstraitSqlToCalcite; +import com.google.common.annotations.VisibleForTesting; +import io.substrait.extension.SimpleExtension; +import io.substrait.isthmus.calcite.SubstraitOperatorTable; +import io.substrait.isthmus.sql.SubstraitSqlValidator; import io.substrait.plan.ImmutablePlan.Builder; import io.substrait.plan.Plan; import io.substrait.plan.Plan.Version; import io.substrait.plan.PlanProtoConverter; import org.apache.calcite.prepare.Prepare; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.rules.CoreRules; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlOperatorTable; import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.util.SqlOperatorTables; +import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql2rel.SqlToRelConverter; +import org.apache.calcite.sql2rel.StandardConvertletTable; + +import java.util.List; /** Take a SQL statement and a set of table definitions and return a substrait plan. */ public class SqlToSubstrait extends SqlConverterBase { + private final SqlOperatorTable operatorTable; public SqlToSubstrait() { - this(null); + this(SimpleExtension.loadDefaults(), null); } public SqlToSubstrait(FeatureBoard features) { - super(features); + this(SimpleExtension.loadDefaults(), features); + } + + public SqlToSubstrait(SimpleExtension.ExtensionCollection extensions, FeatureBoard features) { + super(features, extensions); + + SimpleExtension.ExtensionCollection dynamicExtensionCollection = + ExtensionUtils.getDynamicExtensions(extensions); + List generatedDynamicOperators = + SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, this.factory); + + this.operatorTable = + SqlOperatorTables.chain( + SqlOperatorTables.of(generatedDynamicOperators), SubstraitOperatorTable.INSTANCE); } /** @@ -53,9 +84,9 @@ public Plan convert(String sqlStatements, Prepare.CatalogReader catalogReader) builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build()); // TODO: consider case in which one sql passes conversion while others don't - SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader).stream() - .map(root -> SubstraitRelVisitor.convert(root, EXTENSION_COLLECTION, featureBoard)) - .forEach(root -> builder.addRoots(root)); + SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader).stream() + .map(root -> SubstraitRelVisitor.convert(root, extensionCollection, featureBoard)) + .forEach(root -> builder.addRoots(root)); return builder.build(); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java index 801110c5f..101468a3e 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java @@ -1,7 +1,5 @@ package io.substrait.isthmus; -import static io.substrait.isthmus.SqlConverterBase.EXTENSION_COLLECTION; - import com.google.common.collect.ImmutableList; import com.google.common.collect.Range; import com.google.common.collect.RangeMap; @@ -12,6 +10,7 @@ import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.expression.AggregateFunctionConverter; import io.substrait.isthmus.expression.ExpressionRexConverter; +import io.substrait.isthmus.expression.FunctionMappings; import io.substrait.isthmus.expression.ScalarFunctionConverter; import io.substrait.isthmus.expression.WindowFunctionConverter; import io.substrait.relation.AbstractRelVisitor; @@ -97,7 +96,7 @@ public SubstraitRelNodeConverter( this( typeFactory, relBuilder, - new ScalarFunctionConverter(extensions.scalarFunctions(), typeFactory), + createScalarFunctionConverter(extensions, typeFactory), new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory), new WindowFunctionConverter(extensions.windowFunctions(), typeFactory), TypeConverter.DEFAULT); @@ -139,11 +138,45 @@ public SubstraitRelNodeConverter( this.expressionRexConverter.setRelNodeConverter(this); } + private static ScalarFunctionConverter createScalarFunctionConverter( + SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory) { + + java.util.Set knownFunctionNames = + FunctionMappings.SCALAR_SIGS.stream() + .map(FunctionMappings.Sig::name) + .collect(Collectors.toSet()); + + List dynamicFunctions = + extensions.scalarFunctions().stream() + .filter(f -> !knownFunctionNames.contains(f.name().toLowerCase())) + .collect(Collectors.toList()); + + List additionalSignatures; + if (dynamicFunctions.isEmpty()) { + additionalSignatures = Collections.emptyList(); + } else { + SimpleExtension.ExtensionCollection dynamicExtensionCollection = + SimpleExtension.ExtensionCollection.builder().scalarFunctions(dynamicFunctions).build(); + + List dynamicOperators = + SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, typeFactory); + + additionalSignatures = + dynamicOperators.stream() + .map(op -> FunctionMappings.s(op, op.getName())) + .collect(Collectors.toList()); + } + + return new ScalarFunctionConverter( + extensions.scalarFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT); + } + public static RelNode convert( Rel relRoot, RelOptCluster relOptCluster, Prepare.CatalogReader catalogReader, - SqlParser.Config parserConfig) { + SqlParser.Config parserConfig, + SimpleExtension.ExtensionCollection extensions) { RelBuilder relBuilder = RelBuilder.create( Frameworks.newConfigBuilder() @@ -154,8 +187,7 @@ public static RelNode convert( .build()); return relRoot.accept( - new SubstraitRelNodeConverter( - EXTENSION_COLLECTION, relOptCluster.getTypeFactory(), relBuilder), + new SubstraitRelNodeConverter(extensions, relOptCluster.getTypeFactory(), relBuilder), Context.newContext()); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java index cc41e84a0..4bc7e3e9e 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java @@ -8,6 +8,7 @@ import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.expression.AggregateFunctionConverter; import io.substrait.isthmus.expression.CallConverters; +import io.substrait.isthmus.expression.FunctionMappings; import io.substrait.isthmus.expression.LiteralConverter; import io.substrait.isthmus.expression.RexExpressionConverter; import io.substrait.isthmus.expression.ScalarFunctionConverter; @@ -53,6 +54,7 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.util.ImmutableBitSet; import org.immutables.value.Value; @@ -78,10 +80,25 @@ public SubstraitRelVisitor( RelDataTypeFactory typeFactory, SimpleExtension.ExtensionCollection extensions, FeatureBoard features) { + + SimpleExtension.ExtensionCollection dynamicExtensionCollection = + ExtensionUtils.getDynamicExtensions(extensions); + List dynamicOperators = + SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, typeFactory); + + List additionalSignatures = + dynamicOperators.stream() + .map(op -> FunctionMappings.s(op, op.getName())) + .collect(Collectors.toList()); this.typeConverter = TypeConverter.DEFAULT; - ArrayList converters = new ArrayList(); + ArrayList converters = new ArrayList<>(); converters.addAll(CallConverters.defaults(typeConverter)); - converters.add(new ScalarFunctionConverter(extensions.scalarFunctions(), typeFactory)); + converters.add( + new ScalarFunctionConverter( + extensions.scalarFunctions(), + additionalSignatures, + typeFactory, + TypeConverter.DEFAULT)); converters.add(CallConverters.CREATE_SEARCH_CONV.apply(new RexBuilder(typeFactory))); this.aggregateFunctionConverter = new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory); diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java index 421b45317..e327ab007 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java @@ -1,5 +1,6 @@ package io.substrait.isthmus; +import io.substrait.extension.SimpleExtension; import io.substrait.relation.Rel; import org.apache.calcite.prepare.Prepare; import org.apache.calcite.rel.RelNode; @@ -10,7 +11,12 @@ public SubstraitToSql() { super(FEATURES_DEFAULT); } + public SubstraitToSql(SimpleExtension.ExtensionCollection extensions) { + super(FEATURES_DEFAULT, extensions); + } + public RelNode substraitRelToCalciteRel(Rel relRoot, Prepare.CatalogReader catalog) { - return SubstraitRelNodeConverter.convert(relRoot, relOptCluster, catalog, parserConfig); + return SubstraitRelNodeConverter.convert( + relRoot, relOptCluster, catalog, parserConfig, extensionCollection); } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java index 8f0ccb000..691d45fa7 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java @@ -24,7 +24,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.IdentityHashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -85,8 +84,7 @@ public FunctionConverter( .collect( Multimaps.toMultimap( FunctionMappings.Sig::name, Function.identity(), ArrayListMultimap::create)); - IdentityHashMap matcherMap = - new IdentityHashMap(); + Map matcherMap = new HashMap<>(); for (String key : alm.keySet()) { Collection sigs = calciteOperators.get(key); if (sigs.isEmpty()) { diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlValidator.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlValidator.java index eddcb1d0f..a4fe518b9 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlValidator.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlValidator.java @@ -2,6 +2,7 @@ import io.substrait.isthmus.calcite.SubstraitOperatorTable; import org.apache.calcite.prepare.Prepare; +import org.apache.calcite.sql.SqlOperatorTable; import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql.validate.SqlValidatorImpl; @@ -12,4 +13,8 @@ public class SubstraitSqlValidator extends SqlValidatorImpl { public SubstraitSqlValidator(Prepare.CatalogReader catalogReader) { super(SubstraitOperatorTable.INSTANCE, catalogReader, catalogReader.getTypeFactory(), CONFIG); } + + public SubstraitSqlValidator(Prepare.CatalogReader catalogReader, SqlOperatorTable opTable) { + super(opTable, catalogReader, catalogReader.getTypeFactory(), CONFIG); + } } diff --git a/isthmus/src/test/java/io/substrait/isthmus/CalciteLiteralTest.java b/isthmus/src/test/java/io/substrait/isthmus/CalciteLiteralTest.java index 448b332df..8136d6544 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/CalciteLiteralTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/CalciteLiteralTest.java @@ -1,6 +1,5 @@ package io.substrait.isthmus; -import static io.substrait.isthmus.SqlConverterBase.EXTENSION_COLLECTION; import static io.substrait.isthmus.SubstraitTypeSystem.YEAR_MONTH_INTERVAL; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -11,6 +10,7 @@ import io.substrait.expression.Expression.Literal; import io.substrait.expression.Expression.TimestampLiteral; import io.substrait.expression.ExpressionCreator; +import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.SubstraitRelNodeConverter.Context; import io.substrait.isthmus.expression.ExpressionRexConverter; import io.substrait.isthmus.expression.RexExpressionConverter; @@ -35,6 +35,8 @@ import org.junit.jupiter.api.Test; public class CalciteLiteralTest extends CalciteObjs { + private static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = + SimpleExtension.loadDefaults(); private final ScalarFunctionConverter scalarFunctionConverter = new ScalarFunctionConverter(EXTENSION_COLLECTION.scalarFunctions(), type); diff --git a/isthmus/src/test/java/io/substrait/isthmus/ComplexAggregateTest.java b/isthmus/src/test/java/io/substrait/isthmus/ComplexAggregateTest.java index 553b20395..64308c3ef 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/ComplexAggregateTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/ComplexAggregateTest.java @@ -1,12 +1,12 @@ package io.substrait.isthmus; -import static io.substrait.isthmus.SqlConverterBase.EXTENSION_COLLECTION; import static org.junit.jupiter.api.Assertions.assertEquals; import io.substrait.dsl.SubstraitBuilder; import io.substrait.expression.AggregateFunctionInvocation; import io.substrait.expression.Expression; import io.substrait.expression.ImmutableAggregateFunctionInvocation; +import io.substrait.extension.SimpleExtension; import io.substrait.relation.Aggregate; import io.substrait.relation.NamedScan; import io.substrait.relation.Rel; @@ -17,6 +17,8 @@ import org.junit.jupiter.api.Test; public class ComplexAggregateTest extends PlanTestBase { + private static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = + SimpleExtension.loadDefaults(); final TypeCreator R = TypeCreator.of(false); SubstraitBuilder b = new SubstraitBuilder(extensions); diff --git a/isthmus/src/test/java/io/substrait/isthmus/ComplexSortTest.java b/isthmus/src/test/java/io/substrait/isthmus/ComplexSortTest.java index 948fcfc23..ee548a525 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/ComplexSortTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/ComplexSortTest.java @@ -1,10 +1,10 @@ package io.substrait.isthmus; -import static io.substrait.isthmus.SqlConverterBase.EXTENSION_COLLECTION; import static org.junit.jupiter.api.Assertions.assertEquals; import io.substrait.dsl.SubstraitBuilder; import io.substrait.expression.Expression; +import io.substrait.extension.SimpleExtension; import io.substrait.relation.Rel; import io.substrait.type.TypeCreator; import java.io.PrintWriter; @@ -20,6 +20,9 @@ public class ComplexSortTest extends PlanTestBase { + private static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = + SimpleExtension.loadDefaults(); + final TypeCreator R = TypeCreator.of(false); SubstraitBuilder b = new SubstraitBuilder(extensions); diff --git a/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java index d462ea05d..bd45dcff1 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java @@ -1,8 +1,8 @@ package io.substrait.isthmus; -import static io.substrait.isthmus.SqlConverterBase.EXTENSION_COLLECTION; import static org.junit.jupiter.api.Assertions.assertEquals; +import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.sql.SubstraitCreateStatementParser; import io.substrait.isthmus.sql.SubstraitSqlToCalcite; import io.substrait.plan.Plan; @@ -13,6 +13,9 @@ public class NameRoundtripTest extends PlanTestBase { + private static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = + SimpleExtension.loadDefaults(); + @Test void preserveNamesFromSql() throws Exception { String createStatement = "CREATE TABLE foo(a BIGINT, b BIGINT)"; diff --git a/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java b/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java index fd9572b2e..cdc8233c8 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java @@ -1,10 +1,11 @@ package io.substrait.isthmus; -import static io.substrait.isthmus.SqlConverterBase.EXTENSION_COLLECTION; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.sql.SubstraitSqlToCalcite; import java.io.IOException; +import java.util.List; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.plan.hep.HepProgramBuilder; @@ -16,6 +17,9 @@ public class OptimizerIntegrationTest extends PlanTestBase { + private static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = + SimpleExtension.loadDefaults(); + @Test void conversionHandlesBuiltInSum0CallAddedByRule() throws SqlParseException, IOException { String query = diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index 56173f1ea..ab722e285 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -41,12 +41,12 @@ import org.apache.calcite.tools.RelBuilder; public class PlanTestBase { - protected final SimpleExtension.ExtensionCollection extensions = - SqlConverterBase.EXTENSION_COLLECTION; + protected final SimpleExtension.ExtensionCollection extensions; + protected final RelCreator creator = new RelCreator(); protected final RelBuilder builder = creator.createRelBuilder(); protected final RelDataTypeFactory typeFactory = creator.typeFactory(); - protected final SubstraitBuilder substraitBuilder = new SubstraitBuilder(extensions); + protected final SubstraitBuilder substraitBuilder; protected static final TypeCreator R = TypeCreator.of(false); protected static final TypeCreator N = TypeCreator.of(true); @@ -66,6 +66,15 @@ public class PlanTestBase { protected static CalciteCatalogReader TPCDS_CATALOG = PlanTestBase.schemaToCatalog("tpcds", TPCDS_SCHEMA); + protected PlanTestBase() { + this(SimpleExtension.loadDefaults()); + } + + protected PlanTestBase(SimpleExtension.ExtensionCollection extensions) { + this.extensions = extensions; + this.substraitBuilder = new SubstraitBuilder(extensions); + } + public static String asString(String resource) throws IOException { return Resources.toString(Resources.getResource(resource), Charsets.UTF_8); } @@ -147,6 +156,41 @@ protected RelRoot assertSqlSubstraitRelRoundTrip( return relRoot2; } + protected RelRoot assertSqlSubstraitRelRoundTripWorkaroundOptimizer( + String query, Prepare.CatalogReader catalogReader) throws Exception { + // sql <--> substrait round trip test. + // Assert (sql -> calcite -> substrait) and (sql -> substrait -> calcite -> substrait) are same. + // Return list of sql -> Substrait rel -> Calcite rel. + + SubstraitToCalcite substraitToCalcite = new SubstraitToCalcite(extensions, typeFactory); + + SqlToSubstrait s = new SqlToSubstrait(extensions, null); + + // 1. SQL -> Calcite RelRoot + List relRoots = s.sqlToRelNode(query, catalogReader); + assertEquals(1, relRoots.size()); + RelRoot relRoot1 = relRoots.get(0); + + // 2. Calcite RelRoot -> Substrait Rel + Plan.Root pojo1 = SubstraitRelVisitor.convert(relRoot1, extensions); + + // 3. Substrait Rel -> Calcite RelNode + RelRoot relRoot2 = substraitToCalcite.convert(pojo1); + + // 4. Calcite RelNode -> Substrait Rel + Plan.Root pojo2 = SubstraitRelVisitor.convert(relRoot2, extensions); + + // Here pojo1 and pojo2 can be different because of different default optimization + // rules between SqlNode->RelRoot conversion (Sql->Substrait) and + // RelBuilder/RexBuilder (Substrait->Sql). + // Therefore, substrait plans passed through conversion to calcite should be compared + RelRoot relRoot3 = substraitToCalcite.convert(pojo2); + Plan.Root pojo3 = SubstraitRelVisitor.convert(relRoot3, extensions); + + assertEquals(pojo2, pojo3); + return relRoot2; + } + @Beta protected void assertFullRoundTrip(String query) throws SqlParseException { assertFullRoundTrip(query, TPCH_CATALOG); diff --git a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java new file mode 100644 index 000000000..31baffc2d --- /dev/null +++ b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java @@ -0,0 +1,48 @@ +package io.substrait.isthmus; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.substrait.extension.SimpleExtension; +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperator; +import org.junit.jupiter.api.Test; + +public class SimpleExtensionToSqlOperatorTest { + + @Test + void test() throws IOException { + String customFunctionPath = "/extensions/functions_string_custom.yaml"; + + SimpleExtension.ExtensionCollection customExtensions = + SimpleExtension.load( + customFunctionPath, + SimpleExtensionToSqlOperatorTest.class.getResourceAsStream(customFunctionPath)); + + List operators = SimpleExtensionToSqlOperator.from(customExtensions); + + assertEquals(1, operators.size(), "Should generate one operator from the YAML file."); + + Optional function = + operators.stream() + .filter(op -> op.getName().equalsIgnoreCase("REGEXP_EXTRACT")) + .findFirst(); + + assertTrue(function.isPresent(), "The REGEXP_EXTRACT function should be present."); + + SqlOperator op = function.get(); + System.out.println("Successfully found and verified Custom UDF:"); + System.out.printf(" - Name: %s%n", op.getName()); + + SqlOperandCountRange operandCountRange = op.getOperandCountRange(); + assertEquals(2, operandCountRange.getMin(), "Function should require 2 arguments."); + assertEquals(2, operandCountRange.getMax(), "Function should require 2 arguments."); + System.out.printf(" - Argument Count: %d%n", operandCountRange.getMin()); + + assertNotNull(op.getOperandTypeChecker(), "Operand type checker should not be null."); + } +} diff --git a/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java b/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java new file mode 100644 index 000000000..a2df48645 --- /dev/null +++ b/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java @@ -0,0 +1,40 @@ +package io.substrait.isthmus; + +import io.substrait.extension.SimpleExtension; +import io.substrait.isthmus.sql.SubstraitCreateStatementParser; +import java.util.List; +import org.apache.calcite.prepare.Prepare; +import org.junit.jupiter.api.Test; + +public class UdfSqlSubstraitTest extends PlanTestBase { + + private static final String CUSTOM_FUNCTION_PATH = "/extensions/functions_string_custom.yaml"; + + UdfSqlSubstraitTest() { + super(loadExtensions(List.of(CUSTOM_FUNCTION_PATH))); + } + + @Test + public void customUdfTest() throws Exception { + + final String[] sql = { + "CREATE TABLE t(x VARCHAR NOT NULL)", "SELECT regexp_extract(x, 'ab') from t" + }; + + final Prepare.CatalogReader catalogReader = + SubstraitCreateStatementParser.processCreateStatementsToCatalog(sql[0]); + + assertSqlSubstraitRelRoundTripWorkaroundOptimizer(sql[1], catalogReader); + } + + private static SimpleExtension.ExtensionCollection loadExtensions( + List yamlFunctionFiles) { + SimpleExtension.ExtensionCollection extensions = SimpleExtension.loadDefaults(); + if (yamlFunctionFiles != null && !yamlFunctionFiles.isEmpty()) { + SimpleExtension.ExtensionCollection customExtensions = + SimpleExtension.load(yamlFunctionFiles); + extensions = extensions.merge(customExtensions); + } + return extensions; + } +} diff --git a/isthmus/src/test/resources/extensions/functions_string_custom.yaml b/isthmus/src/test/resources/extensions/functions_string_custom.yaml new file mode 100644 index 000000000..b71f72199 --- /dev/null +++ b/isthmus/src/test/resources/extensions/functions_string_custom.yaml @@ -0,0 +1,11 @@ +%YAML 1.2 +--- +scalar_functions: + - name: "regexp_extract" + impls: + - args: + - name: "text" + value: string + - name: "pattern" + value: string + return: string From 556847bd57de496b43b0d2d27418517e1be76307 Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Wed, 13 Aug 2025 16:56:08 +0200 Subject: [PATCH 02/22] chore(isthmus): handle nullability and EnumArgument in SimplExtensionToSqlOperator --- .../isthmus/SimpleExtensionToSqlOperator.java | 47 ++++++++++++++----- .../io/substrait/isthmus/SqlToSubstrait.java | 2 +- .../SimpleExtensionToSqlOperatorTest.java | 4 +- .../isthmus/UdfSqlSubstraitTest.java | 20 ++++---- .../extensions/functions_string_custom.yaml | 11 ----- .../extensions/scalar_functions_custom.yaml | 44 +++++++++++++++++ 6 files changed, 93 insertions(+), 35 deletions(-) delete mode 100644 isthmus/src/test/resources/extensions/functions_string_custom.yaml create mode 100644 isthmus/src/test/resources/extensions/scalar_functions_custom.yaml diff --git a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java index 879319b4d..4ee3a419e 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java @@ -6,6 +6,7 @@ import io.substrait.function.TypeExpression; import io.substrait.type.Type; import io.substrait.type.TypeExpressionEvaluator; +import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -46,17 +47,19 @@ private static SqlFunction toSqlFunction( SimpleExtension.Function function, RelDataTypeFactory typeFactory, TypeConverter typeConverter) { - List requiredArgs = - function.args().stream() - .filter(SimpleExtension.Argument::required) - .filter(t -> t instanceof SimpleExtension.ValueArgument || t instanceof SimpleExtension.EnumArgument) - .map(t -> (SimpleExtension.Argument) t) - .collect(Collectors.toList()); - - List argFamilies = - requiredArgs.stream() - .map(arg -> arg.value().accept(new CalciteTypeVisitor()).getFamily()) - .collect(Collectors.toList()); + + List argFamilies = new ArrayList<>(); + + for (SimpleExtension.Argument arg : function.requiredArguments()) { + if (arg instanceof SimpleExtension.ValueArgument) { + SimpleExtension.ValueArgument valueArg = (SimpleExtension.ValueArgument) arg; + SqlTypeName typeName = valueArg.value().accept(new CalciteTypeVisitor()); + argFamilies.add(typeName.getFamily()); + } else if (arg instanceof SimpleExtension.EnumArgument) { + // Treat an EnumArgument as a required string literal. + argFamilies.add(SqlTypeFamily.STRING); + } + } SqlReturnTypeInference returnTypeInference = new SubstraitReturnTypeInference(function, typeFactory, typeConverter); @@ -97,7 +100,27 @@ public RelDataType inferReturnType(SqlOperatorBinding opBinding) { TypeExpressionEvaluator.evaluateExpression( returnExpression, function.args(), substraitArgTypes); - return typeConverter.toCalcite(typeFactory, resolvedSubstraitType); + boolean finalIsNullable; + switch (function.nullability()) { + case MIRROR: + // If any input is nullable, the output is nullable. + finalIsNullable = + opBinding.collectOperandTypes().stream().anyMatch(RelDataType::isNullable); + break; + case DISCRETE: + // The function can return null even if inputs are not null. + finalIsNullable = true; + break; + case DECLARED_OUTPUT: + default: + // Use the nullability declared on the resolved Substrait type. + finalIsNullable = resolvedSubstraitType.nullable(); + break; + } + + RelDataType baseCalciteType = typeConverter.toCalcite(typeFactory, resolvedSubstraitType); + + return typeFactory.createTypeWithNullability(baseCalciteType, finalIsNullable); } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index 33384adc7..af4e5194a 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -47,7 +47,7 @@ public SqlToSubstrait(SimpleExtension.ExtensionCollection extensions, FeatureBoa this.operatorTable = SqlOperatorTables.chain( - SqlOperatorTables.of(generatedDynamicOperators), SubstraitOperatorTable.INSTANCE); + SubstraitOperatorTable.INSTANCE, SqlOperatorTables.of(generatedDynamicOperators)); } /** diff --git a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java index 31baffc2d..e34e945f4 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java @@ -16,7 +16,7 @@ public class SimpleExtensionToSqlOperatorTest { @Test void test() throws IOException { - String customFunctionPath = "/extensions/functions_string_custom.yaml"; + String customFunctionPath = "/extensions/scalar_functions_custom.yaml"; SimpleExtension.ExtensionCollection customExtensions = SimpleExtension.load( @@ -25,8 +25,6 @@ void test() throws IOException { List operators = SimpleExtensionToSqlOperator.from(customExtensions); - assertEquals(1, operators.size(), "Should generate one operator from the YAML file."); - Optional function = operators.stream() .filter(op -> op.getName().equalsIgnoreCase("REGEXP_EXTRACT")) diff --git a/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java b/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java index a2df48645..4b4905a35 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java @@ -8,7 +8,7 @@ public class UdfSqlSubstraitTest extends PlanTestBase { - private static final String CUSTOM_FUNCTION_PATH = "/extensions/functions_string_custom.yaml"; + private static final String CUSTOM_FUNCTION_PATH = "/extensions/scalar_functions_custom.yaml"; UdfSqlSubstraitTest() { super(loadExtensions(List.of(CUSTOM_FUNCTION_PATH))); @@ -17,14 +17,18 @@ public class UdfSqlSubstraitTest extends PlanTestBase { @Test public void customUdfTest() throws Exception { - final String[] sql = { - "CREATE TABLE t(x VARCHAR NOT NULL)", "SELECT regexp_extract(x, 'ab') from t" - }; - final Prepare.CatalogReader catalogReader = - SubstraitCreateStatementParser.processCreateStatementsToCatalog(sql[0]); - - assertSqlSubstraitRelRoundTripWorkaroundOptimizer(sql[1], catalogReader); + SubstraitCreateStatementParser.processCreateStatementsToCatalog( + "CREATE TABLE t(x VARCHAR NOT NULL)"); + + assertSqlSubstraitRelRoundTripWorkaroundOptimizer( + "SELECT regexp_extract(x, 'ab') from t", catalogReader); + assertSqlSubstraitRelRoundTripWorkaroundOptimizer( + "SELECT format_text('UPPER', x) FROM t", catalogReader); + assertSqlSubstraitRelRoundTripWorkaroundOptimizer( + "SELECT system_property_get(x) FROM t", catalogReader); + assertSqlSubstraitRelRoundTripWorkaroundOptimizer( + "SELECT safe_divide(10,0) FROM t", catalogReader); } private static SimpleExtension.ExtensionCollection loadExtensions( diff --git a/isthmus/src/test/resources/extensions/functions_string_custom.yaml b/isthmus/src/test/resources/extensions/functions_string_custom.yaml deleted file mode 100644 index b71f72199..000000000 --- a/isthmus/src/test/resources/extensions/functions_string_custom.yaml +++ /dev/null @@ -1,11 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - name: "regexp_extract" - impls: - - args: - - name: "text" - value: string - - name: "pattern" - value: string - return: string diff --git a/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml b/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml new file mode 100644 index 000000000..8c152df71 --- /dev/null +++ b/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml @@ -0,0 +1,44 @@ +%YAML 1.2 +--- +scalar_functions: + - name: "regexp_extract" + impls: + - args: + - name: "text" + value: string + - name: "pattern" + value: string + return: string + + - name: "format_text" + description: "Formats text based on a mode. The output is nullable if the input is." + impls: + - args: + - name: "mode" +# options: ["UPPER", "LOWER"] + value: string + - name: "input_text" +# options: ["UPPER", "LOWER"] + value: string + return: string + nullability: MIRROR + + - name: "system_property_get" + description: "Safely gets a system property. Always returns a nullable string." + impls: + - args: + - name: "property_name" + value: string + return: string? + nullability: DECLARED_OUTPUT + + - name: "safe_divide" + description: "Performs division, returning NULL if the denominator is zero." + impls: + - args: + - name: "numerator" + value: i32 + - name: "denominator" + value: i32 + return: fp32? + nullability: DISCRETE From 857fe2cac309e1f50072ddc315299e6c8496d667 Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Tue, 8 Jul 2025 10:25:12 +0200 Subject: [PATCH 03/22] feat(isthmus): udf support for substrait<->calcite # Conflicts: # isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java --- .../resources/extensions/functions_string_custom.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 isthmus/src/test/resources/extensions/functions_string_custom.yaml diff --git a/isthmus/src/test/resources/extensions/functions_string_custom.yaml b/isthmus/src/test/resources/extensions/functions_string_custom.yaml new file mode 100644 index 000000000..b71f72199 --- /dev/null +++ b/isthmus/src/test/resources/extensions/functions_string_custom.yaml @@ -0,0 +1,11 @@ +%YAML 1.2 +--- +scalar_functions: + - name: "regexp_extract" + impls: + - args: + - name: "text" + value: string + - name: "pattern" + value: string + return: string From eeb97cd73057a18d884fdb4225238fb30b19c2cb Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Wed, 13 Aug 2025 16:56:08 +0200 Subject: [PATCH 04/22] chore(isthmus): handle nullability and EnumArgument in SimplExtensionToSqlOperator --- .../io/substrait/isthmus/SqlToSubstrait.java | 21 +++-------- .../isthmus/sql/SubstraitSqlToCalcite.java | 36 +++++++++++++++++++ .../isthmus/OptimizerIntegrationTest.java | 1 - .../io/substrait/isthmus/PlanTestBase.java | 6 ++-- .../extensions/functions_string_custom.yaml | 11 ------ 5 files changed, 43 insertions(+), 32 deletions(-) delete mode 100644 isthmus/src/test/resources/extensions/functions_string_custom.yaml diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index af4e5194a..9116d8ce4 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -1,29 +1,18 @@ package io.substrait.isthmus; -import io.substrait.isthmus.sql.SubstraitSqlToCalcite; -import com.google.common.annotations.VisibleForTesting; import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.calcite.SubstraitOperatorTable; -import io.substrait.isthmus.sql.SubstraitSqlValidator; +import io.substrait.isthmus.sql.SubstraitSqlToCalcite; import io.substrait.plan.ImmutablePlan.Builder; import io.substrait.plan.Plan; import io.substrait.plan.Plan.Version; import io.substrait.plan.PlanProtoConverter; +import java.util.List; import org.apache.calcite.prepare.Prepare; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.rules.CoreRules; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlNodeList; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlOperatorTable; import org.apache.calcite.sql.parser.SqlParseException; -import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.sql.util.SqlOperatorTables; -import org.apache.calcite.sql.validate.SqlValidator; -import org.apache.calcite.sql2rel.SqlToRelConverter; -import org.apache.calcite.sql2rel.StandardConvertletTable; - -import java.util.List; /** Take a SQL statement and a set of table definitions and return a substrait plan. */ public class SqlToSubstrait extends SqlConverterBase { @@ -84,9 +73,9 @@ public Plan convert(String sqlStatements, Prepare.CatalogReader catalogReader) builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build()); // TODO: consider case in which one sql passes conversion while others don't - SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader).stream() - .map(root -> SubstraitRelVisitor.convert(root, extensionCollection, featureBoard)) - .forEach(root -> builder.addRoots(root)); + SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader, operatorTable).stream() + .map(root -> SubstraitRelVisitor.convert(root, extensionCollection, featureBoard)) + .forEach(root -> builder.addRoots(root)); return builder.build(); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java index 499f33f1d..24a2fb0f4 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java @@ -15,6 +15,7 @@ import org.apache.calcite.rel.rules.CoreRules; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperatorTable; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql2rel.SqlToRelConverter; @@ -41,6 +42,23 @@ public static RelRoot convertQuery(String sqlStatement, Prepare.CatalogReader ca return convertQuery(sqlStatement, catalogReader, validator, createDefaultRelOptCluster()); } + /** + * Converts a SQL statement to a Calcite {@link RelRoot}. + * + * @param sqlStatement a SQL statement string + * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in + * the SQL statement + * @param operatorTable the {@link SqlOperatorTable} for dynamic operators + * @return a {@link RelRoot} corresponding to the given SQL statement + * @throws SqlParseException if there is an error while parsing the SQL statement + */ + public static RelRoot convertQuery( + String sqlStatement, Prepare.CatalogReader catalogReader, SqlOperatorTable operatorTable) + throws SqlParseException { + SqlValidator validator = new SubstraitSqlValidator(catalogReader, operatorTable); + return convertQuery(sqlStatement, catalogReader, validator, createDefaultRelOptCluster()); + } + /** * Converts a SQL statement to a Calcite {@link RelRoot}. * @@ -72,6 +90,24 @@ public static RelRoot convertQuery( return relRoots.get(0); } + /** + * Converts one or more SQL statements to a List of {@link RelRoot}, with one {@link RelRoot} per + * statement. + * + * @param sqlStatements a string containing one or more SQL statements + * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in + * the SQL statements + * @param operatorTable the {@link SqlOperatorTable} for dynamic operators + * @return a list of {@link RelRoot}s corresponding to the given SQL statements + * @throws SqlParseException if there is an error while parsing the SQL statements + */ + public static List convertQueries( + String sqlStatements, Prepare.CatalogReader catalogReader, SqlOperatorTable operatorTable) + throws SqlParseException { + SqlValidator validator = new SubstraitSqlValidator(catalogReader, operatorTable); + return convertQueries(sqlStatements, catalogReader, validator, createDefaultRelOptCluster()); + } + /** * Converts one or more SQL statements to a List of {@link RelRoot}, with one {@link RelRoot} per * statement. diff --git a/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java b/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java index cdc8233c8..ae7005baf 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java @@ -5,7 +5,6 @@ import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.sql.SubstraitSqlToCalcite; import java.io.IOException; -import java.util.List; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.plan.hep.HepProgramBuilder; diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index ab722e285..4d3292ba5 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -167,12 +167,10 @@ protected RelRoot assertSqlSubstraitRelRoundTripWorkaroundOptimizer( SqlToSubstrait s = new SqlToSubstrait(extensions, null); // 1. SQL -> Calcite RelRoot - List relRoots = s.sqlToRelNode(query, catalogReader); - assertEquals(1, relRoots.size()); - RelRoot relRoot1 = relRoots.get(0); + Plan plan1 = s.convert(query, catalogReader); // 2. Calcite RelRoot -> Substrait Rel - Plan.Root pojo1 = SubstraitRelVisitor.convert(relRoot1, extensions); + Plan.Root pojo1 = plan1.getRoots().get(0); // 3. Substrait Rel -> Calcite RelNode RelRoot relRoot2 = substraitToCalcite.convert(pojo1); diff --git a/isthmus/src/test/resources/extensions/functions_string_custom.yaml b/isthmus/src/test/resources/extensions/functions_string_custom.yaml deleted file mode 100644 index b71f72199..000000000 --- a/isthmus/src/test/resources/extensions/functions_string_custom.yaml +++ /dev/null @@ -1,11 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - name: "regexp_extract" - impls: - - args: - - name: "text" - value: string - - name: "pattern" - value: string - return: string From 6d9cc2a45a397190d061aceb2d1a150fa67d9205 Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Mon, 13 Oct 2025 19:44:05 +0200 Subject: [PATCH 05/22] chore: resolve pmd --- .../src/main/java/io/substrait/isthmus/SqlConverterBase.java | 1 - substrait | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java b/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java index 448613ec4..2ed056e6d 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java @@ -1,6 +1,5 @@ package io.substrait.isthmus; -import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.config.CalciteConnectionProperty; diff --git a/substrait b/substrait index 3c25b1b3e..793c64ba2 160000 --- a/substrait +++ b/substrait @@ -1 +1 @@ -Subproject commit 3c25b1b3eaadecba6d10af6b3dd0fe038d0c5993 +Subproject commit 793c64ba26e337c22f5e91b658be58b1eea7efd3 From 1a1bf47701d1b3d4fd07a3271476fb0b3d97c21a Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Mon, 13 Oct 2025 21:33:26 +0200 Subject: [PATCH 06/22] chore: sync submodules with main --- .../src/main/java/io/substrait/isthmus/ExtensionUtils.java | 3 ++- .../src/main/java/io/substrait/isthmus/SqlConverterBase.java | 3 ++- .../java/io/substrait/isthmus/SqlExpressionToSubstrait.java | 3 ++- .../src/main/java/io/substrait/isthmus/SqlToSubstrait.java | 5 +++-- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java b/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java index e9852a7ce..1fb868568 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java +++ b/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java @@ -1,5 +1,6 @@ package io.substrait.isthmus; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.calcite.SubstraitOperatorTable; import java.util.List; @@ -28,7 +29,7 @@ public static SimpleExtension.ExtensionCollection getDynamicExtensions( } public static SimpleExtension.ExtensionCollection loadExtensions(List yamlFunctionFiles) { - SimpleExtension.ExtensionCollection allExtensions = SimpleExtension.loadDefaults(); + SimpleExtension.ExtensionCollection allExtensions = DefaultExtensionCatalog.DEFAULT_COLLECTION; if (yamlFunctionFiles != null && !yamlFunctionFiles.isEmpty()) { allExtensions = allExtensions.merge(SimpleExtension.load(yamlFunctionFiles)); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java b/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java index 2ed056e6d..7fa86e7d8 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java @@ -1,5 +1,6 @@ package io.substrait.isthmus; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.config.CalciteConnectionProperty; @@ -55,6 +56,6 @@ protected SqlConverterBase( } protected SqlConverterBase(FeatureBoard features) { - this(features, SimpleExtension.loadDefaults()); + this(features, DefaultExtensionCatalog.DEFAULT_COLLECTION); } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java index fc144b090..3d45f8bde 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java @@ -3,6 +3,7 @@ import io.substrait.extendedexpression.ExtendedExpression; import io.substrait.extendedexpression.ExtendedExpressionProtoConverter; import io.substrait.extendedexpression.ImmutableExtendedExpression.Builder; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.calcite.SubstraitTable; import io.substrait.isthmus.expression.RexExpressionConverter; @@ -34,7 +35,7 @@ public class SqlExpressionToSubstrait extends SqlConverterBase { protected final RexExpressionConverter rexConverter; public SqlExpressionToSubstrait() { - this(FEATURES_DEFAULT, SimpleExtension.loadDefaults()); + this(FEATURES_DEFAULT, DefaultExtensionCatalog.DEFAULT_COLLECTION); } public SqlExpressionToSubstrait( diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index 9116d8ce4..bde02db9d 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -1,5 +1,6 @@ package io.substrait.isthmus; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.calcite.SubstraitOperatorTable; import io.substrait.isthmus.sql.SubstraitSqlToCalcite; @@ -19,11 +20,11 @@ public class SqlToSubstrait extends SqlConverterBase { private final SqlOperatorTable operatorTable; public SqlToSubstrait() { - this(SimpleExtension.loadDefaults(), null); + this(DefaultExtensionCatalog.DEFAULT_COLLECTION, null); } public SqlToSubstrait(FeatureBoard features) { - this(SimpleExtension.loadDefaults(), features); + this(DefaultExtensionCatalog.DEFAULT_COLLECTION, features); } public SqlToSubstrait(SimpleExtension.ExtensionCollection extensions, FeatureBoard features) { From 834ab5e18cc8cad672dfdead4f71e24bed3be243 Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Tue, 14 Oct 2025 00:18:03 +0200 Subject: [PATCH 07/22] chore: sync substrait with main --- substrait | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/substrait b/substrait index 793c64ba2..3c25b1b3e 160000 --- a/substrait +++ b/substrait @@ -1 +1 @@ -Subproject commit 793c64ba26e337c22f5e91b658be58b1eea7efd3 +Subproject commit 3c25b1b3eaadecba6d10af6b3dd0fe038d0c5993 From 8340cc5568e6d2edf3d108de34154348e02d77fb Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Tue, 14 Oct 2025 10:58:51 +0200 Subject: [PATCH 08/22] chore(isthmus): resolve conflicts after submodule update --- .../src/test/java/io/substrait/isthmus/CalciteLiteralTest.java | 3 ++- .../test/java/io/substrait/isthmus/ComplexAggregateTest.java | 3 ++- .../src/test/java/io/substrait/isthmus/ComplexSortTest.java | 3 ++- .../src/test/java/io/substrait/isthmus/NameRoundtripTest.java | 3 ++- .../java/io/substrait/isthmus/OptimizerIntegrationTest.java | 3 ++- isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java | 3 ++- .../test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java | 3 ++- 7 files changed, 14 insertions(+), 7 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/CalciteLiteralTest.java b/isthmus/src/test/java/io/substrait/isthmus/CalciteLiteralTest.java index 8136d6544..1afd7c1e7 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/CalciteLiteralTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/CalciteLiteralTest.java @@ -10,6 +10,7 @@ import io.substrait.expression.Expression.Literal; import io.substrait.expression.Expression.TimestampLiteral; import io.substrait.expression.ExpressionCreator; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.SubstraitRelNodeConverter.Context; import io.substrait.isthmus.expression.ExpressionRexConverter; @@ -36,7 +37,7 @@ public class CalciteLiteralTest extends CalciteObjs { private static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = - SimpleExtension.loadDefaults(); + DefaultExtensionCatalog.DEFAULT_COLLECTION; private final ScalarFunctionConverter scalarFunctionConverter = new ScalarFunctionConverter(EXTENSION_COLLECTION.scalarFunctions(), type); diff --git a/isthmus/src/test/java/io/substrait/isthmus/ComplexAggregateTest.java b/isthmus/src/test/java/io/substrait/isthmus/ComplexAggregateTest.java index 64308c3ef..20ad8ea97 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/ComplexAggregateTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/ComplexAggregateTest.java @@ -6,6 +6,7 @@ import io.substrait.expression.AggregateFunctionInvocation; import io.substrait.expression.Expression; import io.substrait.expression.ImmutableAggregateFunctionInvocation; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import io.substrait.relation.Aggregate; import io.substrait.relation.NamedScan; @@ -18,7 +19,7 @@ public class ComplexAggregateTest extends PlanTestBase { private static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = - SimpleExtension.loadDefaults(); + DefaultExtensionCatalog.DEFAULT_COLLECTION; final TypeCreator R = TypeCreator.of(false); SubstraitBuilder b = new SubstraitBuilder(extensions); diff --git a/isthmus/src/test/java/io/substrait/isthmus/ComplexSortTest.java b/isthmus/src/test/java/io/substrait/isthmus/ComplexSortTest.java index ee548a525..462d62e12 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/ComplexSortTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/ComplexSortTest.java @@ -4,6 +4,7 @@ import io.substrait.dsl.SubstraitBuilder; import io.substrait.expression.Expression; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import io.substrait.relation.Rel; import io.substrait.type.TypeCreator; @@ -21,7 +22,7 @@ public class ComplexSortTest extends PlanTestBase { private static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = - SimpleExtension.loadDefaults(); + DefaultExtensionCatalog.DEFAULT_COLLECTION; final TypeCreator R = TypeCreator.of(false); SubstraitBuilder b = new SubstraitBuilder(extensions); diff --git a/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java index 8d5cd9c14..00aac7f44 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.sql.SubstraitCreateStatementParser; import io.substrait.isthmus.sql.SubstraitSqlToCalcite; @@ -14,7 +15,7 @@ public class NameRoundtripTest extends PlanTestBase { private static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = - SimpleExtension.loadDefaults(); + DefaultExtensionCatalog.DEFAULT_COLLECTION; @Test void preserveNamesFromSql() throws Exception { diff --git a/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java b/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java index ae7005baf..d166ff625 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.sql.SubstraitSqlToCalcite; import java.io.IOException; @@ -17,7 +18,7 @@ public class OptimizerIntegrationTest extends PlanTestBase { private static final SimpleExtension.ExtensionCollection EXTENSION_COLLECTION = - SimpleExtension.loadDefaults(); + DefaultExtensionCatalog.DEFAULT_COLLECTION; @Test void conversionHandlesBuiltInSum0CallAddedByRule() throws SqlParseException, IOException { diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index 3c6d945c9..557398d1f 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -8,6 +8,7 @@ import com.google.common.base.Charsets; import com.google.common.io.Resources; import io.substrait.dsl.SubstraitBuilder; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.ExtensionCollector; import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.sql.SubstraitCreateStatementParser; @@ -67,7 +68,7 @@ public class PlanTestBase { PlanTestBase.schemaToCatalog("tpcds", TPCDS_SCHEMA); protected PlanTestBase() { - this(SimpleExtension.loadDefaults()); + this(DefaultExtensionCatalog.DEFAULT_COLLECTION); } protected PlanTestBase(SimpleExtension.ExtensionCollection extensions) { diff --git a/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java b/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java index 4b4905a35..5cb984fc5 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java @@ -1,5 +1,6 @@ package io.substrait.isthmus; +import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.sql.SubstraitCreateStatementParser; import java.util.List; @@ -33,7 +34,7 @@ public void customUdfTest() throws Exception { private static SimpleExtension.ExtensionCollection loadExtensions( List yamlFunctionFiles) { - SimpleExtension.ExtensionCollection extensions = SimpleExtension.loadDefaults(); + SimpleExtension.ExtensionCollection extensions = DefaultExtensionCatalog.DEFAULT_COLLECTION; if (yamlFunctionFiles != null && !yamlFunctionFiles.isEmpty()) { SimpleExtension.ExtensionCollection customExtensions = SimpleExtension.load(yamlFunctionFiles); From 6ba115f4cee6437db2ddbfe19c94049e09dd3a60 Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Tue, 14 Oct 2025 11:24:13 +0200 Subject: [PATCH 09/22] chore(isthmus): fix yaml for custom udf --- .../src/test/resources/extensions/scalar_functions_custom.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml b/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml index 8c152df71..9dd543de8 100644 --- a/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml +++ b/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml @@ -1,5 +1,6 @@ %YAML 1.2 --- +urn: extension:substrait:functions_custom scalar_functions: - name: "regexp_extract" impls: From 838a0a7cc81a72e6d47d164a7cb0a013d5479629 Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Wed, 26 Nov 2025 22:26:30 +0100 Subject: [PATCH 10/22] chore(isthmus): fix regression --- .../io/substrait/isthmus/expression/FunctionConverter.java | 4 +--- .../test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java | 4 ++-- .../test/resources/extensions/scalar_functions_custom.yaml | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java index d738ef157..b5604d4d9 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java @@ -25,7 +25,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.IdentityHashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -129,8 +128,7 @@ public FunctionConverter( .collect( Multimaps.toMultimap( FunctionMappings.Sig::name, Function.identity(), ArrayListMultimap::create)); - IdentityHashMap matcherMap = - new IdentityHashMap(); + Map matcherMap = new HashMap<>(); for (String key : nameToFn.keySet()) { Collection sigs = calciteOperators.get(key); if (sigs.isEmpty()) { diff --git a/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java b/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java index b622520ed..01e6835e4 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java @@ -23,13 +23,13 @@ void customUdfTest() throws Exception { "CREATE TABLE t(x VARCHAR NOT NULL)"); assertSqlSubstraitRelRoundTripWorkaroundOptimizer( - "SELECT regexp_extract(x, 'ab') from t", catalogReader); + "SELECT regexp_extract_custom(x, 'ab') from t", catalogReader); assertSqlSubstraitRelRoundTripWorkaroundOptimizer( "SELECT format_text('UPPER', x) FROM t", catalogReader); assertSqlSubstraitRelRoundTripWorkaroundOptimizer( "SELECT system_property_get(x) FROM t", catalogReader); assertSqlSubstraitRelRoundTripWorkaroundOptimizer( - "SELECT safe_divide(10,0) FROM t", catalogReader); + "SELECT safe_divide_custom(10,0) FROM t", catalogReader); } private static SimpleExtension.ExtensionCollection loadExtensions( diff --git a/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml b/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml index 9dd543de8..f8071ee03 100644 --- a/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml +++ b/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml @@ -2,7 +2,7 @@ --- urn: extension:substrait:functions_custom scalar_functions: - - name: "regexp_extract" + - name: "regexp_extract_custom" impls: - args: - name: "text" @@ -33,7 +33,7 @@ scalar_functions: return: string? nullability: DECLARED_OUTPUT - - name: "safe_divide" + - name: "safe_divide_custom" description: "Performs division, returning NULL if the denominator is zero." impls: - args: From 32ae275aa9796a9ab4dea7315e8a8606db164899 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Wed, 26 Nov 2025 16:01:28 +0100 Subject: [PATCH 11/22] fix: add missing serialVersionUIDs (#621) Signed-off-by: Niels Pardon --- build-logic/src/main/resources/substrait-pmd.xml | 1 + .../main/java/io/substrait/function/ParameterizedType.java | 3 +++ core/src/main/java/io/substrait/function/TypeExpression.java | 4 +++- core/src/main/java/io/substrait/type/Deserializers.java | 2 ++ .../main/java/io/substrait/type/parser/TypeStringParser.java | 3 +++ .../java/io/substrait/type/proto/GenericRoundtripTest.java | 3 +++ 6 files changed, 15 insertions(+), 1 deletion(-) diff --git a/build-logic/src/main/resources/substrait-pmd.xml b/build-logic/src/main/resources/substrait-pmd.xml index c115976f8..5282c3a06 100644 --- a/build-logic/src/main/resources/substrait-pmd.xml +++ b/build-logic/src/main/resources/substrait-pmd.xml @@ -20,4 +20,5 @@ + diff --git a/core/src/main/java/io/substrait/function/ParameterizedType.java b/core/src/main/java/io/substrait/function/ParameterizedType.java index 1f151c8a7..e514fb975 100644 --- a/core/src/main/java/io/substrait/function/ParameterizedType.java +++ b/core/src/main/java/io/substrait/function/ParameterizedType.java @@ -11,6 +11,9 @@ public interface ParameterizedType extends TypeExpression { class RequiredParameterizedVisitorException extends RuntimeException { + + private static final long serialVersionUID = 5009974222890249956L; + @Override public synchronized Throwable fillInStackTrace() { return this; diff --git a/core/src/main/java/io/substrait/function/TypeExpression.java b/core/src/main/java/io/substrait/function/TypeExpression.java index 5d6363646..a183c1959 100644 --- a/core/src/main/java/io/substrait/function/TypeExpression.java +++ b/core/src/main/java/io/substrait/function/TypeExpression.java @@ -6,7 +6,9 @@ @Value.Enclosing public interface TypeExpression { - class RequiredTypeExpressionVisitorException extends RuntimeException {} + class RequiredTypeExpressionVisitorException extends RuntimeException { + private static final long serialVersionUID = 8381558691397737963L; + } R accept(final TypeVisitor typeVisitor) throws E; diff --git a/core/src/main/java/io/substrait/type/Deserializers.java b/core/src/main/java/io/substrait/type/Deserializers.java index bc90ecf1e..13936dc07 100644 --- a/core/src/main/java/io/substrait/type/Deserializers.java +++ b/core/src/main/java/io/substrait/type/Deserializers.java @@ -30,6 +30,8 @@ public class Deserializers { public static class ParseDeserializer extends StdDeserializer { + private static final long serialVersionUID = 2105956703553161270L; + private final BiFunction converter; public ParseDeserializer( diff --git a/core/src/main/java/io/substrait/type/parser/TypeStringParser.java b/core/src/main/java/io/substrait/type/parser/TypeStringParser.java index 8085ea6b8..b8e94793b 100644 --- a/core/src/main/java/io/substrait/type/parser/TypeStringParser.java +++ b/core/src/main/java/io/substrait/type/parser/TypeStringParser.java @@ -65,6 +65,9 @@ public void syntaxError( } public static class ParseError extends RuntimeException { + + private static final long serialVersionUID = -6831467523614033666L; + public ParseError(final String message, final Throwable cause) { super(message, cause); } diff --git a/core/src/test/java/io/substrait/type/proto/GenericRoundtripTest.java b/core/src/test/java/io/substrait/type/proto/GenericRoundtripTest.java index dce8490f1..268729c68 100644 --- a/core/src/test/java/io/substrait/type/proto/GenericRoundtripTest.java +++ b/core/src/test/java/io/substrait/type/proto/GenericRoundtripTest.java @@ -143,6 +143,9 @@ private static Object valGenerator(Class type) { // Class used to propagate type generation errors from param generator to test cases private static class UnsupportedTypeGenerationException extends Exception { + + private static final long serialVersionUID = -8627552468610061245L; + public UnsupportedTypeGenerationException(String s) { super(s); } From 8d2b4ed3772d3b7f1f7ac935b7f82c5e694561ee Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Wed, 26 Nov 2025 22:56:12 +0100 Subject: [PATCH 12/22] chore(isthmus): fix regression --- .../substrait/isthmus/SimpleExtensionToSqlOperatorTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java index 33910577e..100f8db92 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java @@ -27,10 +27,10 @@ void test() throws IOException { Optional function = operators.stream() - .filter(op -> op.getName().equalsIgnoreCase("REGEXP_EXTRACT")) + .filter(op -> op.getName().equalsIgnoreCase("REGEXP_EXTRACT_CUSTOM")) .findFirst(); - assertTrue(function.isPresent(), "The REGEXP_EXTRACT function should be present."); + assertTrue(function.isPresent(), "The REGEXP_EXTRACT_CUSTOM function should be present."); SqlOperator op = function.get(); System.out.println("Successfully found and verified Custom UDF:"); From 9b0c0274df94647981496d24495f7cbfff9fe2c6 Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Wed, 3 Dec 2025 19:16:01 +0100 Subject: [PATCH 13/22] feat(isthmus): enable dynamic UDFs with FeatureBoard --- .../io/substrait/isthmus/FeatureBoard.java | 15 +++++ .../io/substrait/isthmus/SqlToSubstrait.java | 22 ++++--- .../io/substrait/isthmus/PlanTestBase.java | 65 ++++++++++++++----- .../isthmus/UdfSqlSubstraitTest.java | 18 ++--- 4 files changed, 89 insertions(+), 31 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java b/isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java index 8db29f73c..a54f24146 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java +++ b/isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java @@ -17,4 +17,19 @@ public abstract class FeatureBoard { public Casing unquotedCasing() { return Casing.TO_UPPER; } + + /** + * Controls whether to support dynamic user-defined functions (UDFs) during SQL to Substrait plan + * conversion. + * + *

When enabled, custom functions defined in extension YAML files are available for use in SQL + * queries. These functions will be dynamically converted to SQL operators during plan conversion. + * This feature must be explicitly enabled by users and is disabled by default. + * + * @return true if dynamic UDFs should be supported; false otherwise (default) + */ + @Value.Default + public boolean allowDynamicUdfs() { + return false; + } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index bde02db9d..abe935a75 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -30,14 +30,20 @@ public SqlToSubstrait(FeatureBoard features) { public SqlToSubstrait(SimpleExtension.ExtensionCollection extensions, FeatureBoard features) { super(features, extensions); - SimpleExtension.ExtensionCollection dynamicExtensionCollection = - ExtensionUtils.getDynamicExtensions(extensions); - List generatedDynamicOperators = - SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, this.factory); - - this.operatorTable = - SqlOperatorTables.chain( - SubstraitOperatorTable.INSTANCE, SqlOperatorTables.of(generatedDynamicOperators)); + if (featureBoard.allowDynamicUdfs()) { + SimpleExtension.ExtensionCollection dynamicExtensionCollection = + ExtensionUtils.getDynamicExtensions(extensions); + if (!dynamicExtensionCollection.scalarFunctions().isEmpty() + || !dynamicExtensionCollection.aggregateFunctions().isEmpty()) { + List generatedDynamicOperators = + SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, this.factory); + this.operatorTable = + SqlOperatorTables.chain( + SubstraitOperatorTable.INSTANCE, SqlOperatorTables.of(generatedDynamicOperators)); + return; + } + } + this.operatorTable = SubstraitOperatorTable.INSTANCE; } /** diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index 3753ce39e..0e884fc85 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -154,39 +154,74 @@ protected RelRoot assertSqlSubstraitRelRoundTrip( return relRoot2; } - protected RelRoot assertSqlSubstraitRelRoundTripWorkaroundOptimizer( - String query, Prepare.CatalogReader catalogReader) throws Exception { - // sql <--> substrait round trip test. - // Assert (sql -> calcite -> substrait) and (sql -> substrait -> calcite -> substrait) are same. - // Return list of sql -> Substrait rel -> Calcite rel. - + /** + * Verifies that the given query can be converted through multiple round trips, with loose POJO + * comparison. + * + *

"Loose" here means not comparing the initial POJO (from SQL→Substrait conversion) to the + * first POJO after the round trip (from Substrait→Calcite→Substrait conversion), due to optimizer + * differences between: + * + *

    + *
  • SqlNode→RelRoot conversion (SQL→Substrait path) + *
  • RelBuilder/RexBuilder optimization (Substrait→Calcite path) + *
+ * + *

Instead, this method compares the second and third round-trip POJOs, ensuring that + * subsequent round trips produce stable results. + * + * @param query the SQL query to test + * @param catalogReader the Calcite catalog with table definitions + * @param featureBoard optional FeatureBoard to control conversion behavior (e.g., dynamic UDFs). + * If null, a default FeatureBoard is used. + */ + protected RelRoot assertSqlSubstraitRelRoundTripLoosePojoComparison( + String query, Prepare.CatalogReader catalogReader, FeatureBoard featureBoard) + throws Exception { SubstraitToCalcite substraitToCalcite = new SubstraitToCalcite(extensions, typeFactory); - SqlToSubstrait s = new SqlToSubstrait(extensions, null); + // Use provided FeatureBoard, or create default if null + FeatureBoard features = + featureBoard != null ? featureBoard : ImmutableFeatureBoard.builder().build(); + SqlToSubstrait s = new SqlToSubstrait(extensions, features); - // 1. SQL -> Calcite RelRoot + // 1. SQL -> Substrait Plan Plan plan1 = s.convert(query, catalogReader); - // 2. Calcite RelRoot -> Substrait Rel + // 2. Substrait Plan -> Substrait Root (POJO 1) Plan.Root pojo1 = plan1.getRoots().get(0); - // 3. Substrait Rel -> Calcite RelNode + // 3. Substrait Root -> Calcite RelNode RelRoot relRoot2 = substraitToCalcite.convert(pojo1); - // 4. Calcite RelNode -> Substrait Rel + // 4. Calcite RelNode -> Substrait Root (POJO 2) Plan.Root pojo2 = SubstraitRelVisitor.convert(relRoot2, extensions); - // Here pojo1 and pojo2 can be different because of different default optimization - // rules between SqlNode->RelRoot conversion (Sql->Substrait) and - // RelBuilder/RexBuilder (Substrait->Sql). - // Therefore, substrait plans passed through conversion to calcite should be compared + // Note: pojo1 and pojo2 may differ due to different optimization strategies applied by: + // - SqlNode->RelRoot conversion during SQL->Substrait conversion + // - RelBuilder/RexBuilder optimization during Substrait->Calcite conversion + // This is expected, so we don't compare pojo1 and pojo2. + + // 5. Substrait Root 2 -> Calcite RelNode RelRoot relRoot3 = substraitToCalcite.convert(pojo2); + + // 6. Calcite RelNode -> Substrait Root (POJO 3) Plan.Root pojo3 = SubstraitRelVisitor.convert(relRoot3, extensions); + // Verify that subsequent round trips are stable (pojo2 and pojo3 should be identical) assertEquals(pojo2, pojo3); return relRoot2; } + /** + * Convenience overload of {@link #assertSqlSubstraitRelRoundTripLoosePojoComparison(String, + * Prepare.CatalogReader, FeatureBoard)} with default FeatureBoard behavior (no dynamic UDFs). + */ + protected RelRoot assertSqlSubstraitRelRoundTripLoosePojoComparison( + String query, Prepare.CatalogReader catalogReader) throws Exception { + return assertSqlSubstraitRelRoundTripLoosePojoComparison(query, catalogReader, null); + } + @Beta protected void assertFullRoundTrip(String query) throws SqlParseException { assertFullRoundTrip(query, TPCH_CATALOG); diff --git a/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java b/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java index 01e6835e4..69b8be3b9 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/UdfSqlSubstraitTest.java @@ -22,14 +22,16 @@ void customUdfTest() throws Exception { SubstraitCreateStatementParser.processCreateStatementsToCatalog( "CREATE TABLE t(x VARCHAR NOT NULL)"); - assertSqlSubstraitRelRoundTripWorkaroundOptimizer( - "SELECT regexp_extract_custom(x, 'ab') from t", catalogReader); - assertSqlSubstraitRelRoundTripWorkaroundOptimizer( - "SELECT format_text('UPPER', x) FROM t", catalogReader); - assertSqlSubstraitRelRoundTripWorkaroundOptimizer( - "SELECT system_property_get(x) FROM t", catalogReader); - assertSqlSubstraitRelRoundTripWorkaroundOptimizer( - "SELECT safe_divide_custom(10,0) FROM t", catalogReader); + FeatureBoard featureBoard = ImmutableFeatureBoard.builder().allowDynamicUdfs(true).build(); + + assertSqlSubstraitRelRoundTripLoosePojoComparison( + "SELECT regexp_extract_custom(x, 'ab') from t", catalogReader, featureBoard); + assertSqlSubstraitRelRoundTripLoosePojoComparison( + "SELECT format_text('UPPER', x) FROM t", catalogReader, featureBoard); + assertSqlSubstraitRelRoundTripLoosePojoComparison( + "SELECT system_property_get(x) FROM t", catalogReader, featureBoard); + assertSqlSubstraitRelRoundTripLoosePojoComparison( + "SELECT safe_divide_custom(10,0) FROM t", catalogReader, featureBoard); } private static SimpleExtension.ExtensionCollection loadExtensions( From a9e7c600accd5e93615cc934de421e1fdcc84c78 Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Wed, 3 Dec 2025 22:12:24 +0100 Subject: [PATCH 14/22] chore(isthmus): refactor SimpleExtensionToSqlOperatorTest --- .../SimpleExtensionToSqlOperatorTest.java | 185 +++++++++++++++--- 1 file changed, 160 insertions(+), 25 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java index 100f8db92..e718d8aa5 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java @@ -5,42 +5,177 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import io.substrait.extension.SimpleExtension; -import java.io.IOException; +import io.substrait.type.Type; +import io.substrait.type.TypeExpressionEvaluator; +import java.util.Collections; import java.util.List; -import java.util.Optional; -import org.apache.calcite.sql.SqlOperandCountRange; +import java.util.function.Consumer; +import java.util.stream.Stream; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.runtime.CalciteException; +import org.apache.calcite.runtime.Resources; import org.apache.calcite.sql.SqlOperator; -import org.junit.jupiter.api.Test; +import org.apache.calcite.sql.SqlOperatorBinding; +import org.apache.calcite.sql.validate.SqlValidatorException; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +/** Tests for conversion of SimpleExtension function definitions to Calcite SqlOperators. */ class SimpleExtensionToSqlOperatorTest { - @Test - void test() throws IOException { - String customFunctionPath = "/extensions/scalar_functions_custom.yaml"; + private static final String CUSTOM_FUNCTION_PATH = "/extensions/scalar_functions_custom.yaml"; - SimpleExtension.ExtensionCollection customExtensions = - SimpleExtension.load( - customFunctionPath, - SimpleExtensionToSqlOperatorTest.class.getResourceAsStream(customFunctionPath)); + private static final SimpleExtension.ExtensionCollection EXTENSIONS = + SimpleExtension.load( + CUSTOM_FUNCTION_PATH, + SimpleExtensionToSqlOperatorTest.class.getResourceAsStream(CUSTOM_FUNCTION_PATH)); - List operators = SimpleExtensionToSqlOperator.from(customExtensions); + private static final List OPERATORS = SimpleExtensionToSqlOperator.from(EXTENSIONS); - Optional function = - operators.stream() - .filter(op -> op.getName().equalsIgnoreCase("REGEXP_EXTRACT_CUSTOM")) - .findFirst(); + /** Data carrier for test cases. */ + record TestSpec( + String name, + int minArgs, + int maxArgs, + SimpleExtension.Nullability nullability, + String expectedReturnType, + Consumer customValidator) { - assertTrue(function.isPresent(), "The REGEXP_EXTRACT_CUSTOM function should be present."); + TestSpec( + final String name, + final int min, + final int max, + final SimpleExtension.Nullability nullability, + final String returnType) { + this(name, min, max, nullability, returnType, op -> {}); + } + } + + @ParameterizedTest + @MethodSource("provideTestSpecs") + void testCustomUdfConversion(final TestSpec spec) { + final SqlOperator operator = findOperator(spec.name); + final SimpleExtension.Function funcDef = findFunctionDef(spec.name); + + // 1. Verify Argument Counts + assertEquals( + spec.minArgs, + operator.getOperandCountRange().getMin(), + () -> spec.name + ": Incorrect min args"); + assertEquals( + spec.maxArgs, + operator.getOperandCountRange().getMax(), + () -> spec.name + ": Incorrect max args"); + assertNotNull(operator.getOperandTypeChecker(), () -> spec.name + ": Type checker missing"); + + // 2. Verify Nullability (if specified) + if (spec.nullability != null) { + assertEquals( + spec.nullability, funcDef.nullability(), () -> spec.name + ": Incorrect nullability"); + } + + // 3. Verify Return Type + verifyReturnType(operator, funcDef, spec.expectedReturnType); + + // 4. Custom Validation + spec.customValidator.accept(operator); + } + + private static Stream provideTestSpecs() { + return Stream.of( + new TestSpec( + "REGEXP_EXTRACT_CUSTOM", + 2, + 2, + null, + "VARCHAR", + op -> { + final String sigs = + op.getOperandTypeChecker().getAllowedSignatures(op, op.getName()).toLowerCase(); + // Calcite represents string families as + assertTrue( + sigs.contains("varchar") || sigs.contains("string") || sigs.contains("character"), + () -> "Signatures should contain string types. Actual: " + sigs); + }), + new TestSpec("FORMAT_TEXT", 2, 2, SimpleExtension.Nullability.MIRROR, "VARCHAR"), + new TestSpec( + "SYSTEM_PROPERTY_GET", 1, 1, SimpleExtension.Nullability.DECLARED_OUTPUT, "VARCHAR"), + new TestSpec("SAFE_DIVIDE_CUSTOM", 2, 2, SimpleExtension.Nullability.DISCRETE, "REAL")); + } + + private void verifyReturnType( + final SqlOperator operator, + final SimpleExtension.Function funcDef, + final String expectedTypeName) { + assertNotNull(funcDef.returnType(), "Return type missing in YAML"); + assertNotNull(operator.getReturnTypeInference(), "SQL Operator missing return type inference"); + + // 1. Evaluate expected type from YAML + final Type expectedType = + TypeExpressionEvaluator.evaluateExpression( + funcDef.returnType(), funcDef.args(), Collections.emptyList()); + + // 2. Convert expected Substrait type to Calcite type + final RelDataType expectedCalciteType = + TypeConverter.DEFAULT.toCalcite(SubstraitTypeSystem.TYPE_FACTORY, expectedType); + + // 3. Validate consistency: Ensure YAML derived type matches the TestSpec expectation string + // This utilizes the previously unused 'expectedTypeName' + assertEquals( + expectedTypeName, + expectedCalciteType.getSqlTypeName().toString(), + () -> + "YAML definition derived type does not match TestSpec expectation for " + + funcDef.name()); + + // 4. Infer actual type from the Calcite Operator using a minimal binding + final RelDataType actualReturnType = + operator.getReturnTypeInference().inferReturnType(createMockBinding(operator)); + + // 5. Compare Derived Expectation vs Actual Operator Inference + assertEquals( + expectedCalciteType.getSqlTypeName(), + actualReturnType.getSqlTypeName(), + () -> "Return type mismatch for " + funcDef.name()); + assertEquals( + expectedCalciteType.isNullable(), + actualReturnType.isNullable(), + () -> "Nullability mismatch for " + funcDef.name()); + } + + private SqlOperator findOperator(final String name) { + return OPERATORS.stream() + .filter(o -> o.getName().equalsIgnoreCase(name)) + .findFirst() + .orElseThrow(() -> new AssertionError("Operator not found: " + name)); + } + + private SimpleExtension.Function findFunctionDef(final String name) { + return EXTENSIONS.scalarFunctions().stream() + .filter(f -> f.name().equalsIgnoreCase(name)) + .findFirst() + .orElseThrow(() -> new AssertionError("YAML Definition not found: " + name)); + } - SqlOperator op = function.get(); - System.out.println("Successfully found and verified Custom UDF:"); - System.out.printf(" - Name: %s%n", op.getName()); + /** Minimal anonymous implementation of SqlOperatorBinding to support return type inference. */ + private SqlOperatorBinding createMockBinding(final SqlOperator operator) { + final RelDataTypeFactory typeFactory = SubstraitTypeSystem.TYPE_FACTORY; + return new SqlOperatorBinding(typeFactory, operator) { + @Override + public int getOperandCount() { + return 0; + } - SqlOperandCountRange operandCountRange = op.getOperandCountRange(); - assertEquals(2, operandCountRange.getMin(), "Function should require 2 arguments."); - assertEquals(2, operandCountRange.getMax(), "Function should require 2 arguments."); - System.out.printf(" - Argument Count: %d%n", operandCountRange.getMin()); + @Override + public RelDataType getOperandType(final int ordinal) { + throw new IndexOutOfBoundsException(); + } - assertNotNull(op.getOperandTypeChecker(), "Operand type checker should not be null."); + @Override + public CalciteException newError(final Resources.ExInst e) { + return new CalciteException(e.toString(), null); + } + }; } } From 2f6cd2893d55197f4829e7f81c48d1eef5969cff Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Wed, 3 Dec 2025 23:50:53 +0100 Subject: [PATCH 15/22] chore(isthmus): address minor PR comments --- .../io/substrait/isthmus/ExtensionUtils.java | 35 ++++++++++++------- .../isthmus/SimpleExtensionToSqlOperator.java | 14 ++------ .../extensions/scalar_functions_custom.yaml | 2 -- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java b/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java index 1fb868568..377020bb3 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java +++ b/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java @@ -1,8 +1,7 @@ package io.substrait.isthmus; -import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.extension.SimpleExtension; -import io.substrait.isthmus.calcite.SubstraitOperatorTable; +import io.substrait.isthmus.expression.FunctionMappings; import java.util.List; import java.util.Locale; import java.util.Set; @@ -10,11 +9,31 @@ public class ExtensionUtils { + /** + * Extracts dynamic extensions from a collection of extensions. + * + *

A dynamic extension is a user-defined function (UDF) that is not part of the standard + * Substrait function catalog. These are custom functions that users define and provide at + * runtime, extending the built-in function set with domain-specific or application-specific + * operations. + * + *

This method filters out all functions that are already known to the Calcite operator table + * (the standard/built-in functions) and returns only the custom functions that represent new + * capabilities not available in the default function set. + * + *

Example: If a user defines a custom UDF "my_hash_function" that computes a + * proprietary hash, this would be a dynamic extension since it's not part of the standard + * Substrait specification. + * + * @param extensions the complete collection of extensions (both standard and custom) + * @return a new ExtensionCollection containing only the dynamic (custom/user-defined) functions + * that are not present in the standard Substrait function catalog + */ public static SimpleExtension.ExtensionCollection getDynamicExtensions( SimpleExtension.ExtensionCollection extensions) { Set knownFunctionNames = - SubstraitOperatorTable.INSTANCE.getOperatorList().stream() - .map(op -> op.getName().toLowerCase(Locale.ROOT)) + FunctionMappings.SCALAR_SIGS.stream() + .map(FunctionMappings.Sig::name) .collect(Collectors.toSet()); List customFunctions = @@ -27,12 +46,4 @@ public static SimpleExtension.ExtensionCollection getDynamicExtensions( // TODO: handle aggregates and other functions .build(); } - - public static SimpleExtension.ExtensionCollection loadExtensions(List yamlFunctionFiles) { - SimpleExtension.ExtensionCollection allExtensions = DefaultExtensionCatalog.DEFAULT_COLLECTION; - if (yamlFunctionFiles != null && !yamlFunctionFiles.isEmpty()) { - allExtensions = allExtensions.merge(SimpleExtension.load(yamlFunctionFiles)); - } - return allExtensions; - } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java index 4ee3a419e..3d130dd0e 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java @@ -37,6 +37,7 @@ public static List from(SimpleExtension.ExtensionCollection collect public static List from( SimpleExtension.ExtensionCollection collection, RelDataTypeFactory typeFactory) { TypeConverter typeConverter = TypeConverter.DEFAULT; + // TODO: add support for windows functions return Stream.concat( collection.scalarFunctions().stream(), collection.aggregateFunctions().stream()) .map(function -> toSqlFunction(function, typeFactory, typeConverter)) @@ -108,9 +109,6 @@ public RelDataType inferReturnType(SqlOperatorBinding opBinding) { opBinding.collectOperandTypes().stream().anyMatch(RelDataType::isNullable); break; case DISCRETE: - // The function can return null even if inputs are not null. - finalIsNullable = true; - break; case DECLARED_OUTPUT: default: // Use the nullability declared on the resolved Substrait type. @@ -209,7 +207,7 @@ public SqlTypeName visit(Type.IntervalDay day) { @Override public SqlTypeName visit(Type.UUID expr) { - return SqlTypeName.VARCHAR; + return SqlTypeName.UUID; } @Override @@ -282,12 +280,6 @@ public SqlTypeName visit(ParameterizedType.IntervalDay expr) { return SqlTypeName.INTERVAL_DAY; } - @Override - public SqlTypeName visit(ParameterizedType.IntervalCompound expr) { - // TODO: double check - return SqlTypeName.INTERVAL_DAY_HOUR; - } - @Override public SqlTypeName visit(ParameterizedType.StringLiteral expr) { String type = expr.value().toUpperCase(); @@ -324,7 +316,7 @@ public SqlTypeName visit(ParameterizedType.StringLiteral expr) { case "TIME": return SqlTypeName.TIME; case "UUID": - return SqlTypeName.VARCHAR; + return SqlTypeName.UUID; default: if (type.startsWith("DECIMAL")) { return SqlTypeName.DECIMAL; diff --git a/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml b/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml index f8071ee03..05595eb2f 100644 --- a/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml +++ b/isthmus/src/test/resources/extensions/scalar_functions_custom.yaml @@ -16,10 +16,8 @@ scalar_functions: impls: - args: - name: "mode" -# options: ["UPPER", "LOWER"] value: string - name: "input_text" -# options: ["UPPER", "LOWER"] value: string return: string nullability: MIRROR From 71fcdfcfc58fb51b591d56a93aaf5113885c6c8d Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Thu, 4 Dec 2025 00:16:18 +0100 Subject: [PATCH 16/22] feat(isthmus): make dynamic UDF configurable for substrait to sql conversion --- .../isthmus/SubstraitRelNodeConverter.java | 81 +++++++++++++------ .../substrait/isthmus/SubstraitToCalcite.java | 13 ++- .../io/substrait/isthmus/PlanTestBase.java | 5 +- 3 files changed, 73 insertions(+), 26 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java index 33f9f4a59..aeb886bd7 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java @@ -108,9 +108,21 @@ public SubstraitRelNodeConverter( RelDataTypeFactory typeFactory, RelBuilder relBuilder) { this( + extensions, typeFactory, relBuilder, - createScalarFunctionConverter(extensions, typeFactory), + ImmutableFeatureBoard.builder().build()); + } + + public SubstraitRelNodeConverter( + SimpleExtension.ExtensionCollection extensions, + RelDataTypeFactory typeFactory, + RelBuilder relBuilder, + FeatureBoard featureBoard) { + this( + typeFactory, + relBuilder, + createScalarFunctionConverter(extensions, typeFactory, featureBoard.allowDynamicUdfs()), new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory), new WindowFunctionConverter(extensions.windowFunctions(), typeFactory), TypeConverter.DEFAULT); @@ -153,32 +165,39 @@ public SubstraitRelNodeConverter( } private static ScalarFunctionConverter createScalarFunctionConverter( - SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory) { - - java.util.Set knownFunctionNames = - FunctionMappings.SCALAR_SIGS.stream() - .map(FunctionMappings.Sig::name) - .collect(Collectors.toSet()); - - List dynamicFunctions = - extensions.scalarFunctions().stream() - .filter(f -> !knownFunctionNames.contains(f.name().toLowerCase())) - .collect(Collectors.toList()); + SimpleExtension.ExtensionCollection extensions, + RelDataTypeFactory typeFactory, + boolean allowDynamicUdfs) { List additionalSignatures; - if (dynamicFunctions.isEmpty()) { - additionalSignatures = Collections.emptyList(); - } else { - SimpleExtension.ExtensionCollection dynamicExtensionCollection = - SimpleExtension.ExtensionCollection.builder().scalarFunctions(dynamicFunctions).build(); - List dynamicOperators = - SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, typeFactory); + if (allowDynamicUdfs) { + java.util.Set knownFunctionNames = + FunctionMappings.SCALAR_SIGS.stream() + .map(FunctionMappings.Sig::name) + .collect(Collectors.toSet()); - additionalSignatures = - dynamicOperators.stream() - .map(op -> FunctionMappings.s(op, op.getName())) + List dynamicFunctions = + extensions.scalarFunctions().stream() + .filter(f -> !knownFunctionNames.contains(f.name().toLowerCase())) .collect(Collectors.toList()); + + if (dynamicFunctions.isEmpty()) { + additionalSignatures = Collections.emptyList(); + } else { + SimpleExtension.ExtensionCollection dynamicExtensionCollection = + SimpleExtension.ExtensionCollection.builder().scalarFunctions(dynamicFunctions).build(); + + List dynamicOperators = + SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, typeFactory); + + additionalSignatures = + dynamicOperators.stream() + .map(op -> FunctionMappings.s(op, op.getName())) + .collect(Collectors.toList()); + } + } else { + additionalSignatures = Collections.emptyList(); } return new ScalarFunctionConverter( @@ -191,6 +210,22 @@ public static RelNode convert( Prepare.CatalogReader catalogReader, SqlParser.Config parserConfig, SimpleExtension.ExtensionCollection extensions) { + return convert( + relRoot, + relOptCluster, + catalogReader, + parserConfig, + extensions, + ImmutableFeatureBoard.builder().build()); + } + + public static RelNode convert( + Rel relRoot, + RelOptCluster relOptCluster, + Prepare.CatalogReader catalogReader, + SqlParser.Config parserConfig, + SimpleExtension.ExtensionCollection extensions, + FeatureBoard featureBoard) { RelBuilder relBuilder = RelBuilder.create( Frameworks.newConfigBuilder() @@ -201,7 +236,7 @@ public static RelNode convert( .build()); return relRoot.accept( - new SubstraitRelNodeConverter(extensions, relOptCluster.getTypeFactory(), relBuilder), + new SubstraitRelNodeConverter(extensions, relOptCluster.getTypeFactory(), relBuilder, featureBoard), Context.newContext()); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java index 8dcfbf9e0..84b5ce7c4 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java @@ -38,6 +38,7 @@ public class SubstraitToCalcite { protected final RelDataTypeFactory typeFactory; protected final TypeConverter typeConverter; protected final Prepare.CatalogReader catalogReader; + protected final FeatureBoard featureBoard; public SubstraitToCalcite( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory) { @@ -63,10 +64,20 @@ public SubstraitToCalcite( RelDataTypeFactory typeFactory, TypeConverter typeConverter, Prepare.CatalogReader catalogReader) { + this(extensions, typeFactory, typeConverter, catalogReader, ImmutableFeatureBoard.builder().build()); + } + + public SubstraitToCalcite( + SimpleExtension.ExtensionCollection extensions, + RelDataTypeFactory typeFactory, + TypeConverter typeConverter, + Prepare.CatalogReader catalogReader, + FeatureBoard featureBoard) { this.extensions = extensions; this.typeFactory = typeFactory; this.typeConverter = typeConverter; this.catalogReader = catalogReader; + this.featureBoard = featureBoard; } /** @@ -94,7 +105,7 @@ protected RelBuilder createRelBuilder(CalciteSchema schema) { *

Override this method to customize the {@link SubstraitRelNodeConverter}. */ protected SubstraitRelNodeConverter createSubstraitRelNodeConverter(RelBuilder relBuilder) { - return new SubstraitRelNodeConverter(extensions, typeFactory, relBuilder); + return new SubstraitRelNodeConverter(extensions, typeFactory, relBuilder, featureBoard); } /** diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index 0e884fc85..465ba36c5 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -178,11 +178,12 @@ protected RelRoot assertSqlSubstraitRelRoundTrip( protected RelRoot assertSqlSubstraitRelRoundTripLoosePojoComparison( String query, Prepare.CatalogReader catalogReader, FeatureBoard featureBoard) throws Exception { - SubstraitToCalcite substraitToCalcite = new SubstraitToCalcite(extensions, typeFactory); - // Use provided FeatureBoard, or create default if null FeatureBoard features = featureBoard != null ? featureBoard : ImmutableFeatureBoard.builder().build(); + + SubstraitToCalcite substraitToCalcite = + new SubstraitToCalcite(extensions, typeFactory, TypeConverter.DEFAULT, null, features); SqlToSubstrait s = new SqlToSubstrait(extensions, features); // 1. SQL -> Substrait Plan From 142d372640c35d1aed05d6bc3f5437ff258d8716 Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Thu, 4 Dec 2025 00:27:23 +0100 Subject: [PATCH 17/22] chore(isthmus): fix format violations --- .../io/substrait/isthmus/SubstraitRelNodeConverter.java | 9 +++------ .../java/io/substrait/isthmus/SubstraitToCalcite.java | 7 ++++++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java index aeb886bd7..8c424d369 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java @@ -107,11 +107,7 @@ public SubstraitRelNodeConverter( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory, RelBuilder relBuilder) { - this( - extensions, - typeFactory, - relBuilder, - ImmutableFeatureBoard.builder().build()); + this(extensions, typeFactory, relBuilder, ImmutableFeatureBoard.builder().build()); } public SubstraitRelNodeConverter( @@ -236,7 +232,8 @@ public static RelNode convert( .build()); return relRoot.accept( - new SubstraitRelNodeConverter(extensions, relOptCluster.getTypeFactory(), relBuilder, featureBoard), + new SubstraitRelNodeConverter( + extensions, relOptCluster.getTypeFactory(), relBuilder, featureBoard), Context.newContext()); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java index 84b5ce7c4..772a3e192 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java @@ -64,7 +64,12 @@ public SubstraitToCalcite( RelDataTypeFactory typeFactory, TypeConverter typeConverter, Prepare.CatalogReader catalogReader) { - this(extensions, typeFactory, typeConverter, catalogReader, ImmutableFeatureBoard.builder().build()); + this( + extensions, + typeFactory, + typeConverter, + catalogReader, + ImmutableFeatureBoard.builder().build()); } public SubstraitToCalcite( From 74fd2e7039f399f2b33cce24629f236daf915651 Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Thu, 4 Dec 2025 00:48:38 +0100 Subject: [PATCH 18/22] chore(isthmus): minor refactoring --- .../substrait/isthmus/SimpleExtensionToSqlOperator.java | 8 +++++++- .../io/substrait/isthmus/sql/SubstraitSqlToCalcite.java | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java index 3d130dd0e..e457d6b09 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java @@ -36,7 +36,13 @@ public static List from(SimpleExtension.ExtensionCollection collect public static List from( SimpleExtension.ExtensionCollection collection, RelDataTypeFactory typeFactory) { - TypeConverter typeConverter = TypeConverter.DEFAULT; + return from(collection, typeFactory, TypeConverter.DEFAULT); + } + + public static List from( + SimpleExtension.ExtensionCollection collection, + RelDataTypeFactory typeFactory, + TypeConverter typeConverter) { // TODO: add support for windows functions return Stream.concat( collection.scalarFunctions().stream(), collection.aggregateFunctions().stream()) diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java index 6b84b2de5..b46d30e7c 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java @@ -48,7 +48,7 @@ public static RelRoot convertQuery(String sqlStatement, Prepare.CatalogReader ca * @param sqlStatement a SQL statement string * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in * the SQL statement - * @param operatorTable the {@link SqlOperatorTable} for dynamic operators + * @param operatorTable the {@link SqlOperatorTable} for controlling valid operators * @return a {@link RelRoot} corresponding to the given SQL statement * @throws SqlParseException if there is an error while parsing the SQL statement */ @@ -97,7 +97,7 @@ public static RelRoot convertQuery( * @param sqlStatements a string containing one or more SQL statements * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in * the SQL statements - * @param operatorTable the {@link SqlOperatorTable} for dynamic operators + * @param operatorTable the {@link SqlOperatorTable} for controlling valid operators * @return a list of {@link RelRoot}s corresponding to the given SQL statements * @throws SqlParseException if there is an error while parsing the SQL statements */ From ec4517337bec91c772a08a539a7690c3d5738d23 Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Thu, 4 Dec 2025 11:17:17 +0100 Subject: [PATCH 19/22] chore(isthmus): check arg types in SimpleExtensionToSqlOperatorTest --- .../SimpleExtensionToSqlOperatorTest.java | 229 +++++++++++------- 1 file changed, 140 insertions(+), 89 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java index e718d8aa5..c4a243594 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java @@ -7,9 +7,14 @@ import io.substrait.extension.SimpleExtension; import io.substrait.type.Type; import io.substrait.type.TypeExpressionEvaluator; +import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.function.Consumer; +import java.util.Map; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; @@ -17,6 +22,7 @@ import org.apache.calcite.runtime.Resources; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlOperatorBinding; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.validate.SqlValidatorException; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; @@ -25,40 +31,43 @@ class SimpleExtensionToSqlOperatorTest { private static final String CUSTOM_FUNCTION_PATH = "/extensions/scalar_functions_custom.yaml"; + private static final RelDataTypeFactory TYPE_FACTORY = SubstraitTypeSystem.TYPE_FACTORY; + + private static final Map FUNCTION_DEFS; + private static final Map OPERATORS; + + static { + final var extensions = + SimpleExtension.load( + CUSTOM_FUNCTION_PATH, + SimpleExtensionToSqlOperatorTest.class.getResourceAsStream(CUSTOM_FUNCTION_PATH)); + + FUNCTION_DEFS = + extensions.scalarFunctions().stream() + .collect( + Collectors.toUnmodifiableMap(f -> f.name().toLowerCase(), Function.identity())); + + OPERATORS = + SimpleExtensionToSqlOperator.from(extensions).stream() + .collect( + Collectors.toUnmodifiableMap( + op -> op.getName().toLowerCase(), Function.identity())); + } - private static final SimpleExtension.ExtensionCollection EXTENSIONS = - SimpleExtension.load( - CUSTOM_FUNCTION_PATH, - SimpleExtensionToSqlOperatorTest.class.getResourceAsStream(CUSTOM_FUNCTION_PATH)); - - private static final List OPERATORS = SimpleExtensionToSqlOperator.from(EXTENSIONS); - - /** Data carrier for test cases. */ + /** Test Specification. */ record TestSpec( String name, int minArgs, int maxArgs, SimpleExtension.Nullability nullability, - String expectedReturnType, - Consumer customValidator) { - - TestSpec( - final String name, - final int min, - final int max, - final SimpleExtension.Nullability nullability, - final String returnType) { - this(name, min, max, nullability, returnType, op -> {}); - } - } + List expectedArgTypes) {} @ParameterizedTest @MethodSource("provideTestSpecs") void testCustomUdfConversion(final TestSpec spec) { - final SqlOperator operator = findOperator(spec.name); - final SimpleExtension.Function funcDef = findFunctionDef(spec.name); + final SqlOperator operator = getOperator(spec.name); + final SimpleExtension.Function funcDef = getFunctionDef(spec.name); - // 1. Verify Argument Counts assertEquals( spec.minArgs, operator.getOperandCountRange().getMin(), @@ -67,109 +76,151 @@ void testCustomUdfConversion(final TestSpec spec) { spec.maxArgs, operator.getOperandCountRange().getMax(), () -> spec.name + ": Incorrect max args"); - assertNotNull(operator.getOperandTypeChecker(), () -> spec.name + ": Type checker missing"); - // 2. Verify Nullability (if specified) if (spec.nullability != null) { assertEquals( spec.nullability, funcDef.nullability(), () -> spec.name + ": Incorrect nullability"); } - // 3. Verify Return Type - verifyReturnType(operator, funcDef, spec.expectedReturnType); + if (!spec.expectedArgTypes.isEmpty()) { + verifyAllowedSignatures(operator, spec.expectedArgTypes); + } - // 4. Custom Validation - spec.customValidator.accept(operator); + verifyReturnTypeConsistency(operator, funcDef); } private static Stream provideTestSpecs() { return Stream.of( + new TestSpec("REGEXP_EXTRACT_CUSTOM", 2, 2, null, List.of("VARCHAR", "VARCHAR")), + new TestSpec( + "FORMAT_TEXT", 2, 2, SimpleExtension.Nullability.MIRROR, List.of("VARCHAR", "VARCHAR")), + new TestSpec( + "SYSTEM_PROPERTY_GET", + 1, + 1, + SimpleExtension.Nullability.DECLARED_OUTPUT, + List.of("VARCHAR")), new TestSpec( - "REGEXP_EXTRACT_CUSTOM", + "SAFE_DIVIDE_CUSTOM", 2, 2, - null, - "VARCHAR", - op -> { - final String sigs = - op.getOperandTypeChecker().getAllowedSignatures(op, op.getName()).toLowerCase(); - // Calcite represents string families as - assertTrue( - sigs.contains("varchar") || sigs.contains("string") || sigs.contains("character"), - () -> "Signatures should contain string types. Actual: " + sigs); - }), - new TestSpec("FORMAT_TEXT", 2, 2, SimpleExtension.Nullability.MIRROR, "VARCHAR"), - new TestSpec( - "SYSTEM_PROPERTY_GET", 1, 1, SimpleExtension.Nullability.DECLARED_OUTPUT, "VARCHAR"), - new TestSpec("SAFE_DIVIDE_CUSTOM", 2, 2, SimpleExtension.Nullability.DISCRETE, "REAL")); + SimpleExtension.Nullability.DISCRETE, + List.of("INTEGER", "INTEGER"))); } - private void verifyReturnType( - final SqlOperator operator, - final SimpleExtension.Function funcDef, - final String expectedTypeName) { - assertNotNull(funcDef.returnType(), "Return type missing in YAML"); - assertNotNull(operator.getReturnTypeInference(), "SQL Operator missing return type inference"); + /** + * Parses the operator's signature string and checks that the types match the expected list + * index-by-index. + */ + private void verifyAllowedSignatures( + final SqlOperator operator, final List expectedArgTypes) { + assertNotNull(operator.getOperandTypeChecker(), "Operand type checker is null"); - // 1. Evaluate expected type from YAML - final Type expectedType = - TypeExpressionEvaluator.evaluateExpression( - funcDef.returnType(), funcDef.args(), Collections.emptyList()); + // e.g., "SAFE_DIVIDE_CUSTOM(, )" + final String signature = + operator + .getOperandTypeChecker() + .getAllowedSignatures(operator, operator.getName()) + .toUpperCase(); + + // Regex to capture arguments inside parentheses: NAME(ARG1, ARG2) + final Pattern pattern = Pattern.compile(".*?\\((.*)\\).*"); + final Matcher matcher = pattern.matcher(signature); + + assertTrue(matcher.matches(), () -> "Signature format not recognized: " + signature); - // 2. Convert expected Substrait type to Calcite type - final RelDataType expectedCalciteType = - TypeConverter.DEFAULT.toCalcite(SubstraitTypeSystem.TYPE_FACTORY, expectedType); + // Split args by comma (assuming simple types for this test suite) + final String argsPart = matcher.group(1); + final List actualArgTypes = + Arrays.stream(argsPart.split(",")).map(String::trim).toList(); - // 3. Validate consistency: Ensure YAML derived type matches the TestSpec expectation string - // This utilizes the previously unused 'expectedTypeName' assertEquals( - expectedTypeName, - expectedCalciteType.getSqlTypeName().toString(), - () -> - "YAML definition derived type does not match TestSpec expectation for " - + funcDef.name()); + expectedArgTypes.size(), + actualArgTypes.size(), + () -> "Signature argument count mismatch. Signature: " + signature); + + // Positional Check + for (int i = 0; i < expectedArgTypes.size(); i++) { + final String expected = expectedArgTypes.get(i); + final String actual = actualArgTypes.get(i); + + final SqlTypeName sqlTypeName = SqlTypeName.valueOf(expected); + final String familyName = sqlTypeName.getFamily().toString(); + + // Check if the actual slot matches the specific type OR the generic family + // e.g. Expected "INTEGER" matches actual "" or "INTEGER" + final boolean match = actual.contains(expected) || actual.contains(familyName); + + final int index = i; + assertTrue( + match, + () -> + "Argument mismatch at index " + + index + + ".\n" + + "Expected: " + + expected + + " (Family: " + + familyName + + ")\n" + + "Actual: " + + actual + + "\n" + + "Full Signature: " + + signature); + } + } + + private void verifyReturnTypeConsistency( + final SqlOperator operator, final SimpleExtension.Function funcDef) { + assertNotNull(operator.getReturnTypeInference(), "Return type inference is null"); + + // A. Expected: Evaluate YAML return type -> Convert to Calcite + final Type yamlReturnType = + TypeExpressionEvaluator.evaluateExpression( + funcDef.returnType(), funcDef.args(), Collections.emptyList()); + final RelDataType expectedType = TypeConverter.DEFAULT.toCalcite(TYPE_FACTORY, yamlReturnType); - // 4. Infer actual type from the Calcite Operator using a minimal binding - final RelDataType actualReturnType = - operator.getReturnTypeInference().inferReturnType(createMockBinding(operator)); + // B. Actual: Infer from Operator (using empty binding, sufficient for static types) + final RelDataType actualType = + operator + .getReturnTypeInference() + .inferReturnType(createMockBinding(operator, Collections.emptyList())); - // 5. Compare Derived Expectation vs Actual Operator Inference + // C. Compare assertEquals( - expectedCalciteType.getSqlTypeName(), - actualReturnType.getSqlTypeName(), + expectedType.getSqlTypeName(), + actualType.getSqlTypeName(), () -> "Return type mismatch for " + funcDef.name()); assertEquals( - expectedCalciteType.isNullable(), - actualReturnType.isNullable(), + expectedType.isNullable(), + actualType.isNullable(), () -> "Nullability mismatch for " + funcDef.name()); } - private SqlOperator findOperator(final String name) { - return OPERATORS.stream() - .filter(o -> o.getName().equalsIgnoreCase(name)) - .findFirst() - .orElseThrow(() -> new AssertionError("Operator not found: " + name)); + private static SqlOperator getOperator(final String name) { + final SqlOperator op = OPERATORS.get(name.toLowerCase()); + assertNotNull(op, "Operator not found: " + name); + return op; } - private SimpleExtension.Function findFunctionDef(final String name) { - return EXTENSIONS.scalarFunctions().stream() - .filter(f -> f.name().equalsIgnoreCase(name)) - .findFirst() - .orElseThrow(() -> new AssertionError("YAML Definition not found: " + name)); + private static SimpleExtension.Function getFunctionDef(final String name) { + final SimpleExtension.Function func = FUNCTION_DEFS.get(name.toLowerCase()); + assertNotNull(func, "YAML Def not found: " + name); + return func; } - /** Minimal anonymous implementation of SqlOperatorBinding to support return type inference. */ - private SqlOperatorBinding createMockBinding(final SqlOperator operator) { - final RelDataTypeFactory typeFactory = SubstraitTypeSystem.TYPE_FACTORY; - return new SqlOperatorBinding(typeFactory, operator) { + private SqlOperatorBinding createMockBinding( + final SqlOperator operator, final List argumentTypes) { + return new SqlOperatorBinding(TYPE_FACTORY, operator) { @Override public int getOperandCount() { - return 0; + return argumentTypes.size(); } @Override public RelDataType getOperandType(final int ordinal) { - throw new IndexOutOfBoundsException(); + return argumentTypes.get(ordinal); } @Override From 6eedacdd72d3e90c1bb783b8f7a2c85b680c2a3c Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Thu, 4 Dec 2025 11:25:21 +0100 Subject: [PATCH 20/22] chore(isthmus): minor refactoring of SimpleExtensionToSqlOperator --- .../io/substrait/isthmus/SimpleExtensionToSqlOperator.java | 4 +++- .../substrait/isthmus/SimpleExtensionToSqlOperatorTest.java | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java index e457d6b09..3c61acd94 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java @@ -28,6 +28,8 @@ public final class SimpleExtensionToSqlOperator { private static final RelDataTypeFactory DEFAULT_TYPE_FACTORY = new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM); + private static final CalciteTypeVisitor CALCITE_TYPE_VISITOR = new CalciteTypeVisitor(); + private SimpleExtensionToSqlOperator() {} public static List from(SimpleExtension.ExtensionCollection collection) { @@ -60,7 +62,7 @@ private static SqlFunction toSqlFunction( for (SimpleExtension.Argument arg : function.requiredArguments()) { if (arg instanceof SimpleExtension.ValueArgument) { SimpleExtension.ValueArgument valueArg = (SimpleExtension.ValueArgument) arg; - SqlTypeName typeName = valueArg.value().accept(new CalciteTypeVisitor()); + SqlTypeName typeName = valueArg.value().accept(CALCITE_TYPE_VISITOR); argFamilies.add(typeName.getFamily()); } else if (arg instanceof SimpleExtension.EnumArgument) { // Treat an EnumArgument as a required string literal. diff --git a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java index c4a243594..030e501d8 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/SimpleExtensionToSqlOperatorTest.java @@ -37,7 +37,7 @@ class SimpleExtensionToSqlOperatorTest { private static final Map OPERATORS; static { - final var extensions = + final SimpleExtension.ExtensionCollection extensions = SimpleExtension.load( CUSTOM_FUNCTION_PATH, SimpleExtensionToSqlOperatorTest.class.getResourceAsStream(CUSTOM_FUNCTION_PATH)); From b002d9be3503bc598f3e4e934ce4e0e99f45d025 Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Fri, 5 Dec 2025 17:32:06 +0100 Subject: [PATCH 21/22] chore(isthmus): fix opt-in for dynamic operators --- .../isthmus/SubstraitRelVisitor.java | 36 +++++++++++-------- .../io/substrait/isthmus/PlanTestBase.java | 3 +- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java index 9ccc2989c..7027a3a4f 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java @@ -92,24 +92,30 @@ public SubstraitRelVisitor( SimpleExtension.ExtensionCollection extensions, FeatureBoard features) { - SimpleExtension.ExtensionCollection dynamicExtensionCollection = - ExtensionUtils.getDynamicExtensions(extensions); - List dynamicOperators = - SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, typeFactory); - - List additionalSignatures = - dynamicOperators.stream() - .map(op -> FunctionMappings.s(op, op.getName())) - .collect(Collectors.toList()); this.typeConverter = TypeConverter.DEFAULT; ArrayList converters = new ArrayList<>(); converters.addAll(CallConverters.defaults(typeConverter)); - converters.add( - new ScalarFunctionConverter( - extensions.scalarFunctions(), - additionalSignatures, - typeFactory, - TypeConverter.DEFAULT)); + + if (features.allowDynamicUdfs()) { + SimpleExtension.ExtensionCollection dynamicExtensionCollection = + ExtensionUtils.getDynamicExtensions(extensions); + List dynamicOperators = + SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, typeFactory); + + List additionalSignatures = + dynamicOperators.stream() + .map(op -> FunctionMappings.s(op, op.getName())) + .collect(Collectors.toList()); + converters.add( + new ScalarFunctionConverter( + extensions.scalarFunctions(), + additionalSignatures, + typeFactory, + TypeConverter.DEFAULT)); + } else { + converters.add(new ScalarFunctionConverter(extensions.scalarFunctions(), typeFactory)); + } + converters.add(CallConverters.CREATE_SEARCH_CONV.apply(new RexBuilder(typeFactory))); this.aggregateFunctionConverter = new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory); diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index 465ba36c5..f74ae90ee 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -220,7 +220,8 @@ protected RelRoot assertSqlSubstraitRelRoundTripLoosePojoComparison( */ protected RelRoot assertSqlSubstraitRelRoundTripLoosePojoComparison( String query, Prepare.CatalogReader catalogReader) throws Exception { - return assertSqlSubstraitRelRoundTripLoosePojoComparison(query, catalogReader, null); + return assertSqlSubstraitRelRoundTripLoosePojoComparison( + query, catalogReader, ImmutableFeatureBoard.builder().build()); } @Beta From fc2d57684c12fdb02b3e443b0f9ff1d5dd5185ec Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Fri, 5 Dec 2025 17:56:31 +0100 Subject: [PATCH 22/22] chore(isthmus): fix regression in test --- isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index 2689d411e..a37916bc4 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -194,7 +194,7 @@ protected RelRoot assertSqlSubstraitRelRoundTripLoosePojoComparison( RelRoot relRoot2 = substraitToCalcite.convert(pojo1); // 4. Calcite RelNode -> Substrait Root (POJO 2) - Plan.Root pojo2 = SubstraitRelVisitor.convert(relRoot2, extensions); + Plan.Root pojo2 = SubstraitRelVisitor.convert(relRoot2, extensions, features); // Note: pojo1 and pojo2 may differ due to different optimization strategies applied by: // - SqlNode->RelRoot conversion during SQL->Substrait conversion @@ -205,7 +205,7 @@ protected RelRoot assertSqlSubstraitRelRoundTripLoosePojoComparison( RelRoot relRoot3 = substraitToCalcite.convert(pojo2); // 6. Calcite RelNode -> Substrait Root (POJO 3) - Plan.Root pojo3 = SubstraitRelVisitor.convert(relRoot3, extensions); + Plan.Root pojo3 = SubstraitRelVisitor.convert(relRoot3, extensions, features); // Verify that subsequent round trips are stable (pojo2 and pojo3 should be identical) assertEquals(pojo2, pojo3);