From 723eb23db96186dccb1da41bf1033b1f41523b7c Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Fri, 28 Oct 2022 09:34:52 +0800 Subject: [PATCH 01/23] substrait yaml file parser and function lookup support --- .clang-format | 87 +++ .gitignore | 2 + .gitmodules | 12 + CMakeLists.txt | 33 + Makefile | 46 ++ README.md | 33 + core/CMakeLists.txt | 16 + core/common/CMakeLists.txt | 20 + core/common/Exceptions.cpp | 36 ++ core/function/CMakeLists.txt | 29 + core/function/Extension.cpp | 291 +++++++++ core/function/Function.cpp | 100 +++ core/function/FunctionLookup.cpp | 40 ++ core/function/tests/CMakeLists.txt | 25 + core/function/tests/FunctionLookupTest.cpp | 144 +++++ core/type/CMakeLists.txt | 24 + core/type/Type.cpp | 529 ++++++++++++++++ core/type/tests/CMakeLists.txt | 25 + core/type/tests/TypeTest.cpp | 175 ++++++ include/common/Exceptions.h | 140 +++++ include/function/Extension.h | 90 +++ include/function/Function.h | 120 ++++ include/function/FunctionLookup.h | 98 +++ include/function/FunctionMapping.h | 48 ++ include/function/FunctionSignature.h | 32 + include/type/Type.h | 693 +++++++++++++++++++++ scripts/setup-helper-functions.sh | 139 +++++ scripts/setup-ubuntu.sh | 79 +++ third_party/CMakeLists.txt | 22 + third_party/fmt | 1 + third_party/googletest | 1 + third_party/substrait | 1 + third_party/yaml-cpp | 1 + 33 files changed, 3132 insertions(+) create mode 100644 .clang-format create mode 100644 .gitmodules create mode 100644 CMakeLists.txt create mode 100644 Makefile create mode 100644 core/CMakeLists.txt create mode 100644 core/common/CMakeLists.txt create mode 100644 core/common/Exceptions.cpp create mode 100644 core/function/CMakeLists.txt create mode 100644 core/function/Extension.cpp create mode 100644 core/function/Function.cpp create mode 100644 core/function/FunctionLookup.cpp create mode 100644 core/function/tests/CMakeLists.txt create mode 100644 core/function/tests/FunctionLookupTest.cpp create mode 100644 core/type/CMakeLists.txt create mode 100644 core/type/Type.cpp create mode 100644 core/type/tests/CMakeLists.txt create mode 100644 core/type/tests/TypeTest.cpp create mode 100644 include/common/Exceptions.h create mode 100644 include/function/Extension.h create mode 100644 include/function/Function.h create mode 100644 include/function/FunctionLookup.h create mode 100644 include/function/FunctionMapping.h create mode 100644 include/function/FunctionSignature.h create mode 100644 include/type/Type.h create mode 100755 scripts/setup-helper-functions.sh create mode 100755 scripts/setup-ubuntu.sh create mode 100644 third_party/CMakeLists.txt create mode 160000 third_party/fmt create mode 160000 third_party/googletest create mode 160000 third_party/substrait create mode 160000 third_party/yaml-cpp diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..eab4576f --- /dev/null +++ b/.clang-format @@ -0,0 +1,87 @@ +--- +AccessModifierOffset: -1 +AlignAfterOpenBracket: AlwaysBreak +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: false +AlignTrailingComments: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ForEachMacros: [ FOR_EACH, FOR_EACH_R, FOR_EACH_RANGE, ] +IncludeCategories: + - Regex: '^<.*\.h(pp)?>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IndentCaseLabels: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... diff --git a/.gitignore b/.gitignore index 259148fa..ca9d349b 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ *.exe *.out *.app + +src/proto/substrait diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..aa615775 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,12 @@ +[submodule "third_party/yaml-cpp"] + path = third_party/yaml-cpp + url = https://github.com/jbeder/yaml-cpp.git +[submodule "third_party/googletest"] + path = third_party/googletest + url = https://github.com/google/googletest.git +[submodule "third_party/substrait"] + path = third_party/substrait + url = https://github.com/substrait-io/substrait.git +[submodule "third_party/fmt"] + path = third_party/fmt + url = https://github.com/fmtlib/fmt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..f873d78b --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,33 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +cmake_minimum_required(VERSION 3.10) + +# set the project name +project(substrait-cpp) + +message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED True) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +option( + BUILD_TESTING + "Enable substrait-cpp tests. This will enable all other build options automatically." + ON) + +find_package(Protobuf REQUIRED) +include_directories(${PROTOBUF_INCLUDE_DIRS}) + +add_subdirectory(third_party) +include_directories(include) +add_subdirectory(core) diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..6b59d481 --- /dev/null +++ b/Makefile @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +.PHONY: all clean build debug release + +BUILD_TYPE := Release + +all: debug + +clean: + @rm -rf build-* + +build-common: + @mkdir -p build-${BUILD_TYPE} + @cd build-${BUILD_TYPE} && \ + cmake -Wno-dev \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DPREFER_STATIC_LIBS=OFF \ + $(FORCE_COLOR) \ + .. + +build: + VERBOSE=1 cmake --build build-${BUILD_TYPE} -j $${CPU_COUNT:-`nproc`} || \ + cmake --build build-${BUILD_TYPE} + +debug: + @$(MAKE) build-common BUILD_TYPE=Debug + @$(MAKE) build BUILD_TYPE=Debug + +release: + @$(MAKE) build-common BUILD_TYPE=Release + @$(MAKE) build BUILD_TYPE=Release diff --git a/README.md b/README.md index 36ca729e..65fbdfaa 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,36 @@ # substrait-cpp Planned home for CPP libraries to help build/consume Substrait query plans. + +## Getting Started + +We provide scripts to help developers setup and install substrait-cpp dependencies. + +### Get the substrait-cpp Source +``` +git clone --recursive https://github.com/substrait-io/substrait-cpp.git +cd substrait-cpp +# if you are updating an existing checkout +git submodule sync --recursive +git submodule update --init --recursive +``` + +### Setting up on Linux (Ubuntu 20.04 or later) + +Once you have checked out substrait-cpp, you can setup and build like so: + +```shell +$ ./scripts/setup-ubuntu.sh +$ make +``` + +## Community + +The main communication channel with the substrait through the +[substrait chanel](http://substrait.slack.com). + + +## License + +substrait-cpp is licensed under the Apache 2.0 License. A copy of the license +[can be found here.](LICENSE) \ No newline at end of file diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt new file mode 100644 index 00000000..ae435aad --- /dev/null +++ b/core/CMakeLists.txt @@ -0,0 +1,16 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +add_subdirectory(common) +add_subdirectory(type) +add_subdirectory(function) diff --git a/core/common/CMakeLists.txt b/core/common/CMakeLists.txt new file mode 100644 index 00000000..97ab6836 --- /dev/null +++ b/core/common/CMakeLists.txt @@ -0,0 +1,20 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_library( + substrait_common + Exceptions.cpp) + +target_link_libraries( + substrait_common + fmt) + diff --git a/core/common/Exceptions.cpp b/core/common/Exceptions.cpp new file mode 100644 index 00000000..5eeb608e --- /dev/null +++ b/core/common/Exceptions.cpp @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common/Exceptions.h" +#include "fmt/format.h" + +namespace io::substrait::common { + +SubstraitException::SubstraitException( + std::string exceptionCode, + std::string& exceptionMessage, + Type exceptionType, + std::string exceptionName) + : msg_(fmt::format( + "Exception: {}\nError Code: {}\nType: {}\nReason: {}\n" + "Function: {}\nFile: {}\n:Line: {}\n", + exceptionName, + exceptionCode, + exceptionType == Type::kSystem ? "system" : "user", + exceptionMessage, + __FUNCTION__, + __FILE__, + std::to_string(__LINE__))) {} + +} // namespace io::substrait::common diff --git a/core/function/CMakeLists.txt b/core/function/CMakeLists.txt new file mode 100644 index 00000000..dce4c4d4 --- /dev/null +++ b/core/function/CMakeLists.txt @@ -0,0 +1,29 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set(FUNCTION_SRCS + Function.cpp + Extension.cpp + ../../include/function/FunctionMapping.h + ../../include/function/FunctionSignature.h + FunctionLookup.cpp) + +add_library(substrait_function ${FUNCTION_SRCS}) + +target_link_libraries( + substrait_function + substrait_type + yaml-cpp) + +if (${BUILD_TESTING}) + add_subdirectory(tests) +endif () \ No newline at end of file diff --git a/core/function/Extension.cpp b/core/function/Extension.cpp new file mode 100644 index 00000000..f7a6a374 --- /dev/null +++ b/core/function/Extension.cpp @@ -0,0 +1,291 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "function/Extension.h" +#include "yaml-cpp/yaml.h" + +bool decodeFunctionVariant( + const YAML::Node& node, + io::substrait::FunctionVariant& function) { + const auto& returnType = node["return"]; + if (returnType && returnType.IsScalar()) { + /// Return type can be an expression. + const auto& returnExpr = returnType.as(); + std::stringstream ss(returnExpr); + + // TODO: currently we only parse the last sentence of type definition, use + // ANTLR in future. + std::string lastReturnType; + while (std::getline(ss, lastReturnType, '\n')) { + } + function.returnType = io::substrait::Type::decode(lastReturnType); + } + const auto& args = node["args"]; + if (args && args.IsSequence()) { + for (auto& arg : args) { + if (arg["options"]) { // enum argument + auto enumArgument = std::make_shared( + arg.as()); + function.arguments.emplace_back(enumArgument); + } else if (arg["value"]) { // value argument + auto valueArgument = std::make_shared( + arg.as()); + function.arguments.emplace_back(valueArgument); + } else { // type argument + auto typeArgument = std::make_shared( + arg.as()); + function.arguments.emplace_back(typeArgument); + } + } + } + + const auto& variadic = node["variadic"]; + if (variadic) { + auto& min = variadic["min"]; + auto& max = variadic["max"]; + if (min) { + function.variadic = std::make_optional( + {min.as(), + max ? std::make_optional(max.as()) : std::nullopt}); + } else { + function.variadic = std::nullopt; + } + } else { + function.variadic = std::nullopt; + } + + return true; +} + +template <> +struct YAML::convert { + static bool decode(const Node& node, io::substrait::EnumArgument& argument) { + // 'options' is required property + const auto& options = node["options"]; + if (options && options.IsSequence()) { + auto& required = node["required"]; + argument.required = required && required.as(); + return true; + } else { + return false; + } + } +}; + +template <> +struct YAML::convert { + static bool decode(const Node& node, io::substrait::ValueArgument& argument) { + const auto& value = node["value"]; + if (value && value.IsScalar()) { + auto valueType = value.as(); + argument.type = io::substrait::Type::decode(valueType); + return true; + } + return false; + } +}; + +template <> +struct YAML::convert { + static bool decode( + const YAML::Node& node, + io::substrait::TypeArgument& argument) { + // no properties need to populate for type argument, just return true if + // 'type' element exists. + if (node["type"]) { + return true; + } + return false; + } +}; + +template <> +struct YAML::convert { + static bool decode( + const Node& node, + io::substrait::ScalarFunctionVariant& function) { + return decodeFunctionVariant(node, function); + }; +}; + +template <> +struct YAML::convert { + static bool decode( + const Node& node, + io::substrait::AggregateFunctionVariant& function) { + const auto& res = decodeFunctionVariant(node, function); + if (res) { + const auto& intermediate = node["intermediate"]; + if (intermediate) { + function.intermediate = + io::substrait::ParameterizedType::decode(intermediate.as()); + } + } + return res; + } +}; + +template <> +struct YAML::convert { + static bool decode(const Node& node, io::substrait::TypeVariant& typeAnchor) { + const auto& name = node["name"]; + if (name && name.IsScalar()) { + typeAnchor.name = name.as(); + return true; + } + return false; + } +}; + +namespace io::substrait { + +std::shared_ptr Extension::load(const std::string& basePath) { + static const std::vector extensionFiles{ + "functions_aggregate_approx.yaml", + "functions_aggregate_generic.yaml", + "functions_arithmetic.yaml", + "functions_arithmetic_decimal.yaml", + "functions_boolean.yaml", + "functions_comparison.yaml", + "functions_datetime.yaml", + "functions_logarithmic.yaml", + "functions_rounding.yaml", + "functions_string.yaml", + "functions_set.yaml", + }; + return load(basePath, extensionFiles); +} + +std::shared_ptr Extension::load( + const std::string& basePath, + const std::vector& extensionFiles) { + std::vector yamlExtensionFiles; + yamlExtensionFiles.reserve(extensionFiles.size()); + for (auto& extensionFile : extensionFiles) { + auto const pos = basePath.find_last_of('/'); + const auto& extensionUri = basePath.substr(0, pos) + "/" + extensionFile; + yamlExtensionFiles.emplace_back(extensionUri); + } + return load(yamlExtensionFiles); +} + +std::shared_ptr Extension::load( + const std::vector& extensionFiles) { + auto extension = std::make_shared(); + for (const auto& extensionUri : extensionFiles) { + const auto& node = YAML::LoadFile(extensionUri); + + const auto& scalarFunctions = node["scalar_functions"]; + if (scalarFunctions && scalarFunctions.IsSequence()) { + for (auto& scalarFunctionNode : scalarFunctions) { + const auto functionName = scalarFunctionNode["name"].as(); + for (auto& scalaFunctionVariantNode : scalarFunctionNode["impls"]) { + auto scalarFunctionVariant = + scalaFunctionVariantNode.as(); + scalarFunctionVariant.name = functionName; + scalarFunctionVariant.uri = extensionUri; + extension->addScalarFunctionVariant( + std::make_shared(scalarFunctionVariant)); + } + } + } + + const auto& aggregateFunctions = node["aggregate_functions"]; + if (aggregateFunctions && aggregateFunctions.IsSequence()) { + for (auto& aggregateFunctionNode : aggregateFunctions) { + const auto functionName = + aggregateFunctionNode["name"].as(); + for (auto& aggregateFunctionVariantNode : + aggregateFunctionNode["impls"]) { + auto aggregateFunctionVariant = + aggregateFunctionVariantNode.as(); + aggregateFunctionVariant.name = functionName; + aggregateFunctionVariant.uri = extensionUri; + extension->addAggregateFunctionVariant( + std::make_shared( + aggregateFunctionVariant)); + } + } + } + + const auto& types = node["types"]; + if (types && types.IsSequence()) { + for (auto& type : types) { + auto typeAnchor = type.as(); + typeAnchor.uri = extensionUri; + extension->addTypeVariant(std::make_shared(typeAnchor)); + } + } + } + return extension; +} + +void Extension::addWindowFunctionVariant( + const FunctionVariantPtr& functionVariant) { + const auto& functionVariants = + windowFunctionVariantMap_.find(functionVariant->name); + if (functionVariants != windowFunctionVariantMap_.end()) { + auto& variants = functionVariants->second; + variants.emplace_back(functionVariant); + } else { + std::vector variants; + variants.emplace_back(functionVariant); + windowFunctionVariantMap_.insert( + {functionVariant->name, std::move(variants)}); + } +} + +void Extension::addTypeVariant(const TypeVariantPtr& functionVariant) { + typeVariantMap_.insert({functionVariant->name, functionVariant}); +} + +TypeVariantPtr Extension::lookupType(const std::string& typeName) const { + auto typeVariantIter = typeVariantMap_.find(typeName); + if (typeVariantIter != typeVariantMap_.end()) { + return typeVariantIter->second; + } + return nullptr; +} + +void Extension::addScalarFunctionVariant( + const FunctionVariantPtr& functionVariant) { + const auto& functionVariants = + scalarFunctionVariantMap_.find(functionVariant->name); + if (functionVariants != scalarFunctionVariantMap_.end()) { + auto& variants = functionVariants->second; + variants.emplace_back(functionVariant); + } else { + std::vector variants; + variants.emplace_back(functionVariant); + scalarFunctionVariantMap_.insert( + {functionVariant->name, std::move(variants)}); + } +} + +void Extension::addAggregateFunctionVariant( + const FunctionVariantPtr& functionVariant) { + const auto& functionVariants = + aggregateFunctionVariantMap_.find(functionVariant->name); + if (functionVariants != aggregateFunctionVariantMap_.end()) { + auto& variants = functionVariants->second; + variants.emplace_back(functionVariant); + } else { + std::vector variants; + variants.emplace_back(functionVariant); + aggregateFunctionVariantMap_.insert( + {functionVariant->name, std::move(variants)}); + } +} + +} // namespace io::substrait diff --git a/core/function/Function.cpp b/core/function/Function.cpp new file mode 100644 index 00000000..c831c521 --- /dev/null +++ b/core/function/Function.cpp @@ -0,0 +1,100 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "function/Function.h" +#include + +namespace io::substrait { + +std::string FunctionVariant::signature( + const std::string& name, + const std::vector& arguments) { + std::stringstream ss; + ss << name; + if (!arguments.empty()) { + ss << ":"; + for (auto it = arguments.begin(); it != arguments.end(); ++it) { + const auto& typeSign = (*it)->toTypeString(); + if (it == arguments.end() - 1) { + ss << typeSign; + } else { + ss << typeSign << "_"; + } + } + } + + return ss.str(); +} + +bool FunctionVariant::tryMatch(const FunctionSignature& signature) { + const auto& actualTypes = signature.arguments; + if (variadic.has_value()) { + // return false if actual types length less than min of variadic + const auto max = variadic->max; + if ((actualTypes.size() < variadic->min) || + (max.has_value() && actualTypes.size() > max.value())) { + return false; + } + + const auto& variadicArgument = arguments[0]; + // actual type must same as the variadicArgument + if (const auto& variadicValueArgument = + std::dynamic_pointer_cast(variadicArgument)) { + for (auto& actualType : actualTypes) { + if (!variadicValueArgument->type->isMatch(actualType)) { + return false; + } + } + } + } else { + std::vector> valueArguments; + for (const auto& argument : arguments) { + if (const auto& variadicValueArgument = + std::dynamic_pointer_cast(argument)) { + valueArguments.emplace_back(variadicValueArgument); + } + } + // return false if size of actual types not equal to size of value + // arguments. + if (valueArguments.size() != actualTypes.size()) { + return false; + } + + for (auto i = 0; i < actualTypes.size(); i++) { + const auto& valueArgument = valueArguments[i]; + if (!valueArgument->type->isMatch(actualTypes[i])) { + return false; + } + } + } + const auto& sigReturnType = signature.returnType; + if (this->returnType && sigReturnType) { + return returnType->isMatch(sigReturnType); + } else { + return true; + } +} + +bool AggregateFunctionVariant::tryMatch(const FunctionSignature& signature) { + bool matched = FunctionVariant::tryMatch(signature); + if (!matched && intermediate) { + const auto& actualTypes = signature.arguments; + if (actualTypes.size() == 1) { + return intermediate->isMatch(actualTypes[0]); + } + } + return matched; +} + +} // namespace io::substrait diff --git a/core/function/FunctionLookup.cpp b/core/function/FunctionLookup.cpp new file mode 100644 index 00000000..f85a0c02 --- /dev/null +++ b/core/function/FunctionLookup.cpp @@ -0,0 +1,40 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "function/FunctionLookup.h" + +namespace io::substrait { + +FunctionVariantPtr FunctionLookup::lookupFunction( + const FunctionSignature& signature) const { + const auto& functionMappings = getFunctionMap(); + + const auto& substraitFunctionName = + functionMappings.find(signature.name) != functionMappings.end() + ? functionMappings.at(signature.name) + : signature.name; + + const auto& functionVariants = getFunctionVariants(); + auto functionVariantIter = functionVariants.find(substraitFunctionName); + if (functionVariantIter != functionVariants.end()) { + for (const auto& candidateFunctionVariant : functionVariantIter->second) { + if (candidateFunctionVariant->tryMatch(signature)) { + return candidateFunctionVariant; + } + } + } + return nullptr; +} + +} // namespace io::substrait diff --git a/core/function/tests/CMakeLists.txt b/core/function/tests/CMakeLists.txt new file mode 100644 index 00000000..209176a0 --- /dev/null +++ b/core/function/tests/CMakeLists.txt @@ -0,0 +1,25 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_executable( + substrait_function_test + FunctionLookupTest.cpp) + +add_test( + substrait_function_test + substrait_function_test) + +target_link_libraries( + substrait_function_test + substrait_function + gtest + gtest_main) diff --git a/core/function/tests/FunctionLookupTest.cpp b/core/function/tests/FunctionLookupTest.cpp new file mode 100644 index 00000000..217f7217 --- /dev/null +++ b/core/function/tests/FunctionLookupTest.cpp @@ -0,0 +1,144 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "function/FunctionLookup.h" +#include +#include + +using namespace io::substrait; + +class VeloxFunctionMappings : public FunctionMapping { + public: + /// scalar function names in difference between velox and Substrait. + const FunctionMap& scalaMapping() const override { + static const FunctionMap scalarMappings{ + {"plus", "add"}, + {"minus", "subtract"}, + {"mod", "modulus"}, + {"eq", "equal"}, + {"neq", "not_equal"}, + {"substr", "substring"}, + }; + return scalarMappings; + }; +}; + +class FunctionLookupTest : public ::testing::Test { + protected: + std::string getExtensionAbsolutePath() { + const std::string absolute_path = __FILE__; + auto const pos = absolute_path.find_last_of('/'); + return absolute_path.substr(0, pos) + + "/../../../third_party/substrait/extensions/"; + } + + void SetUp() override { + ExtensionPtr extension_ = Extension::load(getExtensionAbsolutePath()); + FunctionMappingPtr mappings_ = + std::make_shared(); + scalarFunctionLookup_ = + std::make_shared(extension_, mappings_); + aggregateFunctionLookup_ = + std::make_shared(extension_, mappings_); + } + + void testScalarFunctionLookup( + const FunctionSignature& inputSignature, + const std::string& outputSignature) { + const auto& functionVariant = + scalarFunctionLookup_->lookupFunction(inputSignature); + + ASSERT_TRUE(functionVariant != nullptr); + ASSERT_EQ(functionVariant->signature(), outputSignature); + } + + void testAggregateFunctionLookup( + const FunctionSignature& inputSignature, + const std::string& outputSignature) { + const auto& functionVariant = + aggregateFunctionLookup_->lookupFunction(inputSignature); + + ASSERT_TRUE(functionVariant != nullptr); + ASSERT_EQ(functionVariant->signature(), outputSignature); + } + + private: + FunctionLookupPtr scalarFunctionLookup_; + FunctionLookupPtr aggregateFunctionLookup_; +}; + +TEST_F(FunctionLookupTest, compare_function) { + testScalarFunctionLookup( + {"lt", {TINYINT(), TINYINT()}, BOOL()}, "lt:any1_any1"); + + testScalarFunctionLookup( + {"lt", {SMALLINT(), SMALLINT()}, BOOL()}, "lt:any1_any1"); + + testScalarFunctionLookup( + {"lt", {INTEGER(), INTEGER()}, BOOL()}, "lt:any1_any1"); + + testScalarFunctionLookup( + {"lt", {BIGINT(), BIGINT()}, BOOL()}, "lt:any1_any1"); + + testScalarFunctionLookup({"lt", {FLOAT(), FLOAT()}, BOOL()}, "lt:any1_any1"); + + testScalarFunctionLookup( + {"lt", {DOUBLE(), DOUBLE()}, BOOL()}, "lt:any1_any1"); + testScalarFunctionLookup( + {"between", {TINYINT(), TINYINT(), TINYINT()}, BOOL()}, + "between:any1_any1_any1"); +} + +TEST_F(FunctionLookupTest, arithmetic_function) { + testScalarFunctionLookup( + {"add", {TINYINT(), TINYINT()}, TINYINT()}, "add:opt_i8_i8"); + + testScalarFunctionLookup( + {"plus", {TINYINT(), TINYINT()}, TINYINT()}, "add:opt_i8_i8"); + testScalarFunctionLookup( + {"divide", + { + FLOAT(), + FLOAT(), + }, + FLOAT()}, + "divide:opt_opt_opt_fp32_fp32"); +} + +TEST_F(FunctionLookupTest, aggregate) { + // for intermediate type + testAggregateFunctionLookup( + {"avg", {ROW({DOUBLE(), BIGINT()})}, FLOAT()}, "avg:opt_fp32"); +} + +TEST_F(FunctionLookupTest, logical) { + testScalarFunctionLookup({"and", {}, BOOL()}, "and:bool"); + testScalarFunctionLookup({"and", {BOOL()},BOOL()}, "and:bool"); + testScalarFunctionLookup({"and", {BOOL(), BOOL()},BOOL()}, "and:bool"); + + testScalarFunctionLookup({"or", {BOOL(), BOOL()}, BOOL()}, "or:bool"); + testScalarFunctionLookup({"not", {BOOL()}, BOOL()}, "not:bool"); + testScalarFunctionLookup({"xor", {BOOL(), BOOL()}, BOOL()}, "xor:bool_bool"); +} + +TEST_F(FunctionLookupTest, string_function) { + testScalarFunctionLookup( + {"like", {STRING(), STRING()}, BOOL()}, "like:opt_str_str"); + testScalarFunctionLookup( + {"like", {VARCHAR(3), VARCHAR(4)}, BOOL()}, + "like:opt_vchar_vchar"); + testScalarFunctionLookup( + {"substr", {STRING(), INTEGER(), INTEGER()}, STRING()}, + "substring:str_i32_i32"); +} diff --git a/core/type/CMakeLists.txt b/core/type/CMakeLists.txt new file mode 100644 index 00000000..0de6bd5f --- /dev/null +++ b/core/type/CMakeLists.txt @@ -0,0 +1,24 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set(TYPE_SRCS + Type.cpp) + +add_library(substrait_type ${TYPE_SRCS}) + +target_link_libraries( + substrait_type + substrait_common) + +if (${BUILD_TESTING}) + add_subdirectory(tests) +endif () \ No newline at end of file diff --git a/core/type/Type.cpp b/core/type/Type.cpp new file mode 100644 index 00000000..fbbf7db5 --- /dev/null +++ b/core/type/Type.cpp @@ -0,0 +1,529 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "type/Type.h" +#include +#include +#include +#include "common/Exceptions.h" + +namespace io::substrait { + +namespace { + +size_t findNextComma(const std::string& str, size_t start) { + int cnt = 0; + for (auto i = start; i < str.size(); i++) { + if (str[i] == '<') { + cnt++; + } else if (str[i] == '>') { + cnt--; + } else if (cnt == 0 && str[i] == ',') { + return i; + } + } + + return std::string::npos; +} + +} // namespace + +ParameterizedTypePtr ParameterizedType::decode(const std::string& rawType) { + std::string matchingType = rawType; + std::transform( + matchingType.begin(), + matchingType.end(), + matchingType.begin(), + [](unsigned char c) { return std::tolower(c); }); + + const auto& questionMaskPos = matchingType.find_last_of('?'); + + bool nullable = questionMaskPos != std::string::npos; + + const auto& leftAngleBracketPos = matchingType.find('<'); + if (leftAngleBracketPos == std::string::npos) { + // deal with type and with a question mask like "i32?". + const auto& baseType = nullable + ? matchingType = matchingType.substr(0, questionMaskPos) + : matchingType; + + if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>( + nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>( + nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>( + nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared>(nullable); + } else if (matchingType.rfind("unknown", 0) == 0) { + return std::make_shared(rawType, nullable); + } else { + return std::make_shared(rawType); + } + } else { + const auto& rightAngleBracketPos = rawType.rfind('>'); + const auto& baseTypePos = nullable + ? std::min(leftAngleBracketPos, questionMaskPos) + : leftAngleBracketPos; + + const auto& baseType = matchingType.substr(0, baseTypePos); + + std::vector nestedTypes; + auto prevPos = leftAngleBracketPos + 1; + auto commaPos = findNextComma(rawType, prevPos); + while (commaPos != std::string::npos) { + auto token = rawType.substr(prevPos, commaPos - prevPos); + nestedTypes.emplace_back(decode(token)); + prevPos = commaPos + 1; + commaPos = findNextComma(rawType, prevPos); + } + auto token = rawType.substr(prevPos, rightAngleBracketPos - prevPos); + nestedTypes.emplace_back(decode(token)); + + if (TypeTraits::typeString == baseType) { + return std::make_shared(nestedTypes[0], nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared( + nestedTypes[0], nestedTypes[1], nullable); + } else if (TypeTraits::typeString == baseType) { + return std::make_shared(nestedTypes, nullable); + } else if (TypeTraits::typeString == baseType) { + StringLiteralPtr precision = + std::dynamic_pointer_cast(nestedTypes[0]); + StringLiteralPtr scale = + std::dynamic_pointer_cast(nestedTypes[1]); + return std::make_shared(precision, scale, nullable); + } else if (TypeTraits::typeString == baseType) { + auto length = + std::dynamic_pointer_cast(nestedTypes[0]); + return std::make_shared(length, nullable); + } else if (TypeTraits::typeString == baseType) { + auto length = + std::dynamic_pointer_cast(nestedTypes[0]); + return std::make_shared(length, nullable); + } else if (TypeTraits::typeString == baseType) { + auto length = + std::dynamic_pointer_cast(nestedTypes[0]); + return std::make_shared(length, nullable); + } else { + SUBSTRAIT_UNSUPPORTED("Unsupported type: " + rawType); + } + } +} + +std::string Decimal::signature() const { + std::stringstream sign; + sign << TypeBase::signature(); + sign << "<" << precision_ << "," << scale_ << ">"; + return sign.str(); +} + +bool Decimal::isMatch( + const std::shared_ptr& type) const { + if (auto decimalType = std::dynamic_pointer_cast(type)) { + return TypeBase::isMatch(type) && precision_ == decimalType->precision() && + scale_ == decimalType->scale(); + } + + return false; +} + +std::string FixedBinary::signature() const { + std::stringstream sign; + sign << TypeBase::signature(); + sign << "<" << length() << ">"; + return sign.str(); +} +bool FixedBinary::isMatch( + const std::shared_ptr& type) const { + if (auto fBinaryType = std::dynamic_pointer_cast(type)) { + return TypeBase::isMatch(type) && length_ == fBinaryType->length(); + } + + return false; +} + +std::string FixedChar::signature() const { + std::stringstream sign; + sign << TypeBase::signature(); + sign << "<" << length() << ">"; + return sign.str(); +} + +bool FixedChar::isMatch( + const std::shared_ptr& type) const { + if (auto fBinaryType = std::dynamic_pointer_cast(type)) { + return TypeBase::isMatch(type) && length_ == fBinaryType->length(); + } + return false; +} + +std::string Varchar::signature() const { + std::stringstream sign; + sign << TypeBase::signature(); + sign << "<" << length() << ">"; + return sign.str(); +} + +bool Varchar::isMatch( + const std::shared_ptr& type) const { + if (auto varcharType = std::dynamic_pointer_cast(type)) { + return TypeBase::isMatch(type) && length_ == varcharType->length(); + } + return false; +} + +std::string List::signature() const { + std::stringstream sign; + sign << TypeBase::signature(); + sign << "<" << elementType_->signature() << ">"; + return sign.str(); +} + +bool List::isMatch(const std::shared_ptr& type) const { + if (auto listType = std::dynamic_pointer_cast(type)) { + return TypeBase::isMatch(type) && + elementType()->isMatch(listType->elementType()); + } + return false; +} + +std::string Struct::signature() const { + std::stringstream sign; + sign << TypeBase::signature(); + sign << "<"; + for (auto it = children_.begin(); it != children_.end(); ++it) { + const auto& typeSign = (*it)->signature(); + if (it == children_.end() - 1) { + sign << typeSign; + } else { + sign << typeSign << ","; + } + } + sign << ">"; + return sign.str(); +} +bool Struct::isMatch( + const std::shared_ptr& type) const { + if (auto structType = std::dynamic_pointer_cast(type)) { + bool sameSize = structType->children_.size() == children_.size(); + if (sameSize) { + for (int i = 0; i < children_.size(); i++) { + if (!children_[i]->isMatch(structType->children_[i])) { + return false; + } + } + return true; + } + } + return false; +} + +std::string Map::signature() const { + std::stringstream sign; + sign << TypeBase::signature(); + sign << "<"; + sign << keyType()->signature(); + sign << ","; + sign << valueType()->signature(); + sign << ">"; + return sign.str(); +} + +bool Map::isMatch(const std::shared_ptr& type) const { + if (auto mapType = std::dynamic_pointer_cast(type)) { + return TypeBase::isMatch(type) && keyType()->isMatch(mapType->keyType()) && + valueType()->isMatch(mapType->valueType()); + } + return false; +} + +std::string ParameterizedFixedBinary::signature() const { + std::stringstream sign; + sign << TypeTraits::signature; + sign << "<" << length_->value() << ">"; + return sign.str(); +} + +bool ParameterizedFixedBinary::isMatch( + const std::shared_ptr& type) const { + if (auto parameterizedFixedBinary = + std::dynamic_pointer_cast(type)) { + return length()->isMatch(parameterizedFixedBinary->length()) && + nullMatch(type); + } + + return false; +} + +std::string ParameterizedDecimal::signature() const { + std::stringstream sign; + sign << TypeTraits::signature; + sign << "<" << precision_->value() << "," << scale_->value() << ">"; + return sign.str(); +} + +bool ParameterizedDecimal::isMatch( + const std::shared_ptr& type) const { + if (auto decimal = std::dynamic_pointer_cast(type)) { + return nullMatch(type); + } + + return false; +} + +std::string ParameterizedFixedChar::signature() const { + std::stringstream sign; + sign << TypeTraits::signature; + sign << "<" << length_->value() << ">"; + return sign.str(); +} +bool ParameterizedFixedChar::isMatch( + const std::shared_ptr& type) const { + if (auto fixedChar = std::dynamic_pointer_cast(type)) { + return nullMatch(type); + } + + return false; +} + +std::string ParameterizedVarchar::signature() const { + std::stringstream sign; + sign << TypeTraits::signature; + sign << "<" << length_->value() << ">"; + return sign.str(); +} + +bool ParameterizedVarchar::isMatch( + const std::shared_ptr& type) const { + if (auto varchar = std::dynamic_pointer_cast(type)) { + return nullMatch(type); + } + + return false; +} + +std::string ParameterizedList::signature() const { + std::stringstream sign; + sign << TypeTraits::signature; + sign << "<" << elementType()->signature() << ">"; + return sign.str(); +} + +bool ParameterizedList::isMatch( + const std::shared_ptr& type) const { + if (auto list = std::dynamic_pointer_cast(type)) { + return elementType()->isMatch(list->elementType()) && nullMatch(type); + } + + return false; +} + +std::string ParameterizedStruct::signature() const { + std::stringstream sign; + sign << TypeTraits::signature; + sign << "<"; + for (auto it = children_.begin(); it != children_.end(); ++it) { + const auto& typeSign = (*it)->signature(); + if (it == children_.end() - 1) { + sign << typeSign; + } else { + sign << typeSign << ","; + } + } + sign << ">"; + return sign.str(); +} + +bool ParameterizedStruct::isMatch( + const std::shared_ptr& type) const { + if (auto structType = std::dynamic_pointer_cast(type)) { + bool sameSize = structType->children().size() == children_.size(); + if (sameSize) { + for (int i = 0; i < children_.size(); i++) { + if (!children_[i]->isMatch(structType->children()[i])) { + return false; + } + } + return nullMatch(type); + } + } + return false; +} + +std::string ParameterizedMap::signature() const { + std::stringstream sign; + sign << TypeTraits::signature; + sign << "<"; + sign << keyType()->signature(); + sign << ","; + sign << valueType()->signature(); + sign << ">"; + return sign.str(); +} + +bool ParameterizedMap::isMatch( + const std::shared_ptr& type) const { + if (auto mapType = std::dynamic_pointer_cast(type)) { + return keyType()->isMatch(mapType->keyType()) && + valueType()->isMatch(mapType->valueType()) && nullMatch(type); + } + return false; +} + +std::shared_ptr> BOOL() { + return std::make_shared>(false); +} + +std::shared_ptr> TINYINT() { + return std::make_shared>(false); +} + +std::shared_ptr> SMALLINT() { + return std::make_shared>(false); +} + +std::shared_ptr> INTEGER() { + return std::make_shared>(false); +} + +std::shared_ptr> BIGINT() { + return std::make_shared>(false); +} +std::shared_ptr> FLOAT() { + return std::make_shared>(false); +} + +std::shared_ptr> DOUBLE() { + return std::make_shared>(false); +} + +std::shared_ptr> STRING() { + return std::make_shared>(false); +} + +std::shared_ptr> BINARY() { + return std::make_shared>(false); +} + +std::shared_ptr> TIMESTAMP() { + return std::make_shared>(false); +} + +std::shared_ptr> DATE() { + return std::make_shared>(false); +} + +std::shared_ptr> TIME() { + return std::make_shared>(false); +} + +std::shared_ptr> INTERVAL_YEAR() { + return std::make_shared>(false); +} + +std::shared_ptr> INTERVAL_DAY() { + return std::make_shared>(false); +} + +std::shared_ptr> TIMESTAMP_TZ() { + return std::make_shared>(false); +} + +std::shared_ptr> UUID() { + return std::make_shared>(false); +} + +std::shared_ptr DECIMAL(int precision, int scale) { + return std::make_shared(precision, scale, false); +} + +std::shared_ptr VARCHAR(int len) { + return std::make_shared(len, false); +} +std::shared_ptr FCHAR(int len) { + return std::make_shared(len, false); +} + +std::shared_ptr FBinary(int len) { + return std::make_shared(len, false); +} + +std::shared_ptr LIST(const TypePtr& elementType) { + return std::make_shared(elementType, false); +} + +std::shared_ptr MAP( + const TypePtr& keyType, + const TypePtr& valueType) { + return std::make_shared(keyType, valueType, false); +} + +std::shared_ptr ROW(const std::vector& children) { + return std::make_shared(children, false); +} + +std::shared_ptr FChar(int len) { + return std::make_shared(len); +} + +bool StringLiteral::isMatch( + const std::shared_ptr& type) const { + if (isWildcard()) { + return true; + } else { + if (auto stringLiteral = + std::dynamic_pointer_cast(type)) { + return value_ == stringLiteral->value_; + } + return false; + } +} + +bool UsedDefinedType::isMatch( + const std::shared_ptr& type) const { + if (auto udt = std::dynamic_pointer_cast(type)) { + return value_ == udt->value_ && nullable() == udt->nullable(); + } + return true; +} + +} // namespace io::substrait diff --git a/core/type/tests/CMakeLists.txt b/core/type/tests/CMakeLists.txt new file mode 100644 index 00000000..6b7119d2 --- /dev/null +++ b/core/type/tests/CMakeLists.txt @@ -0,0 +1,25 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_executable( + substrait_type_test + TypeTest.cpp) + +add_test( + substrait_type_test + substrait_type_test) + +target_link_libraries( + substrait_type_test + substrait_type + gtest + gtest_main) diff --git a/core/type/tests/TypeTest.cpp b/core/type/tests/TypeTest.cpp new file mode 100644 index 00000000..b9fe2cfe --- /dev/null +++ b/core/type/tests/TypeTest.cpp @@ -0,0 +1,175 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "type/Type.h" +#include + +using namespace io::substrait; + +class TypeTest : public ::testing::Test { + protected: + template + void testDecode(const std::string& rawType, const std::string& signature) { + const auto& type = ParameterizedType::decode(rawType); + ASSERT_TRUE(type->kind() == kind); + ASSERT_EQ(type->signature(), signature); + } + + void testType( + const ParameterizedTypePtr& type, + TypeKind kind, + const std::string& signature) { + ASSERT_EQ(type->kind(), kind); + ASSERT_EQ(type->signature(), signature); + } + + template + void testDecode( + const std::string& rawType, + const std::function&)>& + typeCallBack) { + const auto& type = ParameterizedType::decode(rawType); + if (typeCallBack) { + typeCallBack(std::dynamic_pointer_cast(type)); + } + } +}; + +TEST_F(TypeTest, typeCreator) { + testType(BOOL(), TypeKind::kBool, "bool"); + testType(TINYINT(), TypeKind::kI8, "i8"); + testType(SMALLINT(), TypeKind::kI16, "i16"); + testType(INTEGER(), TypeKind::kI32, "i32"); + testType(BIGINT(), TypeKind::kI64, "i64"); + testType(FLOAT(), TypeKind::kFp32, "fp32"); + testType(DOUBLE(), TypeKind::kFp64, "fp64"); + testType(BINARY(), TypeKind::kBinary, "vbin"); + testType(TIMESTAMP(), TypeKind::kTimestamp, "ts"); + testType(STRING(), TypeKind::kString, "str"); + testType(TIMESTAMP_TZ(), TypeKind::kTimestampTz, "tstz"); + testType(DATE(), TypeKind::kDate, "date"); + testType(TIME(), TypeKind::kTime, "time"); + testType(INTERVAL_DAY(), TypeKind::kIntervalDay, "iday"); + testType(INTERVAL_YEAR(), TypeKind::kIntervalYear, "iyear"); + testType(UUID(), TypeKind::kUuid, "uuid"); + testType(FChar(12), TypeKind::kFixedChar, "fchar<12>"); + testType(FBinary(12), TypeKind::kFixedBinary, "fbin<12>"); + testType(VARCHAR(12), TypeKind::kVarchar, "vchar<12>"); + testType(DECIMAL(12,23), TypeKind::kDecimal, "dec<12,23>"); + testType(LIST(FLOAT()), TypeKind::kList, "list"); + testType(MAP(STRING(),FLOAT()), TypeKind::kMap, "map"); + testType(ROW({STRING(),FLOAT()}), TypeKind::kStruct, "struct"); +} + +TEST_F(TypeTest, decodeTest) { + testDecode("i32?", "i32"); + testDecode("BOOLEAN", "bool"); + testDecode("boolean", "bool"); + testDecode("i8", "i8"); + testDecode("i16", "i16"); + testDecode("i32", "i32"); + testDecode("i64", "i64"); + testDecode("fp32", "fp32"); + testDecode("fp64", "fp64"); + testDecode("binary", "vbin"); + testDecode("timestamp", "ts"); + testDecode("string", "str"); + testDecode("timestamp_tz", "tstz"); + testDecode("date", "date"); + testDecode("time", "time"); + testDecode("interval_day", "iday"); + testDecode("interval_year", "iyear"); + testDecode("uuid", "uuid"); + + testDecode( + "fixedchar", + [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->length()->value(), "L1"); + ASSERT_EQ(typePtr->signature(), "fchar"); + }); + + testDecode( + "fixedbinary", + [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->length()->value(), "L1"); + ASSERT_EQ(typePtr->signature(), "fbin"); + }); + + testDecode( + "varchar", + [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "vchar"); + ASSERT_EQ(typePtr->length()->value(), "L1"); + }); + + testDecode( + "decimal", + [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "dec"); + ASSERT_EQ(typePtr->precision()->value(), "P"); + ASSERT_EQ(typePtr->scale()->value(), "S"); + }); + + testDecode( + "struct", + [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "struct"); + }); + + testDecode( + "struct>", + [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "struct>"); + }); + + testDecode( + "list", + [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "list"); + }); + testDecode( + "LIST?", + [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "list"); + }); + + testDecode( + "map", + [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "map"); + }); + + testDecode( + "any1", [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "any1"); + ASSERT_TRUE(typePtr->isWildcard()); + }); + + testDecode( + "any", [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "any"); + ASSERT_TRUE(typePtr->isWildcard()); + }); + + testDecode( + "T", [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "T"); + ASSERT_TRUE(typePtr->isWildcard()); + }); + + testDecode( + "unknown", [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "u!name"); + }); +} diff --git a/include/common/Exceptions.h b/include/common/Exceptions.h new file mode 100644 index 00000000..df1f5720 --- /dev/null +++ b/include/common/Exceptions.h @@ -0,0 +1,140 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include "fmt/format.h" + +namespace io::substrait::common { +namespace error_code { + +//====================== User Error Codes ======================: + +// An error raised when an argument verification fails +inline constexpr auto kInvalidArgument = "INVALID_ARGUMENT"; + +// An error raised when a requested operation is not supported. +inline constexpr auto kUnsupported = "UNSUPPORTED"; + +//====================== Runtime Error Codes ======================: + +// An error raised when the current state of a component is invalid. +inline constexpr auto kInvalidState = "INVALID_STATE"; + +// An error raised when unreachable code point was executed. +inline constexpr auto kUnreachableCode = "UNREACHABLE_CODE"; + +// An error raised when a requested operation is not yet supported. +inline constexpr auto kNotImplemented = "NOT_IMPLEMENTED"; + +// An error raised when a method has been passed an illegal or inappropriate +// argument. +inline constexpr auto kIllegalArgument = "ILLEGAL_ARGUMENT"; + +} // namespace error_code + +class SubstraitException : public std::exception { + public: + enum class Type { kUser = 0, kSystem = 1 }; + + SubstraitException( + std::string exceptionCode, + std::string& exceptionMessage, + Type exceptionType = Type::kSystem, + std::string exceptionName = "SubstraitException"); + + // Inherited + const char* what() const noexcept override { + return msg_.c_str(); + } + + private: + const std::string msg_; +}; + +class SubstraitUserError : public SubstraitException { + public: + SubstraitUserError( + std::string exceptionCode, + std::string& exceptionMessage, + std::string exceptionName = "SubstraitUserError") + : SubstraitException( + exceptionCode, + exceptionMessage, + Type::kUser, + exceptionName) {} +}; + +class SubstraitRuntimeError final : public SubstraitException { + public: + SubstraitRuntimeError( + std::string exceptionCode, + std::string& exceptionMessage, + std::string exceptionName = "SubstraitRuntimeError") + : SubstraitException( + exceptionCode, + exceptionMessage, + Type::kSystem, + exceptionName) {} +}; + +template +std::string errorMessage(fmt::string_view fmt, const Args&... args) { + return fmt::vformat(fmt, fmt::make_format_args(args...)); +} + +#define _SUBSTRAIT_THROW(exception, errorCode, ...) \ + { \ + auto message = io::substrait::common::errorMessage(__VA_ARGS__); \ + throw exception(errorCode, message); \ + } + +#define SUBSTRAIT_UNSUPPORTED(...) \ + _SUBSTRAIT_THROW( \ + ::io::substrait::common::SubstraitUserError, \ + ::io::substrait::common::error_code::kUnsupported, \ + ##__VA_ARGS__) + +#define SUBSTRAIT_UNREACHABLE(...) \ + _SUBSTRAIT_THROW( \ + ::io::substrait::common::SubstraitRuntimeError, \ + ::io::substrait::common::error_code::kUnreachableCode, \ + ##__VA_ARGS__) + +#define SUBSTRAIT_FAIL(...) \ + _SUBSTRAIT_THROW( \ + ::io::substrait::common::SubstraitRuntimeError, \ + ::io::substrait::common::error_code::kInvalidState, \ + ##__VA_ARGS__) + +#define SUBSTRAIT_USER_FAIL(...) \ + _SUBSTRAIT_THROW( \ + ::io::substrait::common::SubstraitUserError, \ + ::io::substrait::common::error_code::kInvalidState, \ + ##__VA_ARGS__) + +#define SUBSTRAIT_NYI(...) \ + _SUBSTRAIT_THROW( \ + ::io::substrait::common::SubstraitRuntimeError, \ + ::io::substrait::common::error_code::kNotImplemented, \ + ##__VA_ARGS__) + +#define SUBSTRAIT_ILLEGAL_ARGUMENT(...) \ + _SUBSTRAIT_THROW( \ + ::io::substrait::common::SubstraitUserError, \ + ::io::substrait::common::error_code::kIllegalArgument, \ + ##__VA_ARGS__) + +} // namespace io::substrait::common diff --git a/include/function/Extension.h b/include/function/Extension.h new file mode 100644 index 00000000..e6f05ea6 --- /dev/null +++ b/include/function/Extension.h @@ -0,0 +1,90 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "function/FunctionSignature.h" +#include "function/Function.h" +#include "type/Type.h" + +namespace io::substrait { + +struct TypeVariant { + std::string name; + std::string uri; +}; + +using TypeVariantPtr = std::shared_ptr; + +using FunctionVariantMap = + std::unordered_map>; + +using TypeVariantMap = std::unordered_map; + +class Extension { + public: + /// Deserialize default substrait extension by given basePath + /// @throws exception if file not found + static std::shared_ptr load(const std::string& basePath); + + /// Deserialize substrait extension by given basePath and extensionFiles. + static std::shared_ptr load( + const std::string& basePath, + const std::vector& extensionFiles); + + /// Deserialize substrait extension by given extensionFiles. + static std::shared_ptr load( + const std::vector& extensionFiles); + + /// Add a scalar function variant. + void addScalarFunctionVariant(const FunctionVariantPtr& functionVariant); + + /// Add a aggregate function variant. + void addAggregateFunctionVariant(const FunctionVariantPtr& functionVariant); + + /// Add a window function variant. + void addWindowFunctionVariant(const FunctionVariantPtr& functionVariant); + + /// Add a type variant. + void addTypeVariant(const TypeVariantPtr& functionVariant); + + /// Lookup type variant by given type name. + /// @return matched type variant + TypeVariantPtr lookupType(const std::string& typeName) const; + + const FunctionVariantMap& scalaFunctionVariantMap() const { + return scalarFunctionVariantMap_; + } + + const FunctionVariantMap& windowFunctionVariantMap() const { + return windowFunctionVariantMap_; + } + + const FunctionVariantMap& aggregateFunctionVariantMap() const { + return aggregateFunctionVariantMap_; + } + + private: + FunctionVariantMap scalarFunctionVariantMap_; + + FunctionVariantMap aggregateFunctionVariantMap_; + + FunctionVariantMap windowFunctionVariantMap_; + + TypeVariantMap typeVariantMap_; +}; + +using ExtensionPtr = std::shared_ptr; + +} // namespace io::substrait diff --git a/include/function/Function.h b/include/function/Function.h new file mode 100644 index 00000000..43a0ac32 --- /dev/null +++ b/include/function/Function.h @@ -0,0 +1,120 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "function/FunctionSignature.h" +#include "type/Type.h" + +namespace io::substrait { + +struct FunctionArgument { + virtual bool isRequired() const = 0; + + /// Convert argument type to short type string based on + /// https://substrait.io/extensions/#function-signature-compound-names + virtual std::string toTypeString() const = 0; + + virtual bool isWildcardType() const { + return false; + }; + + virtual bool isValueArgument() const { + return false; + } +}; + +using FunctionArgumentPtr = std::shared_ptr; + +struct EnumArgument : public FunctionArgument { + bool required; + + bool isRequired() const override { + return required; + } + + std::string toTypeString() const override { + return required ? "req" : "opt"; + } +}; + +struct TypeArgument : public FunctionArgument { + std::string toTypeString() const override { + return "type"; + } + + bool isRequired() const override { + return true; + } +}; + +struct ValueArgument : public FunctionArgument { + ParameterizedTypePtr type; + + std::string toTypeString() const override { + return type->signature(); + } + + bool isRequired() const override { + return true; + } + + bool isWildcardType() const override { + return type->isWildcard(); + } + + bool isValueArgument() const override { + return true; + } +}; + +struct FunctionVariadic { + int min; + std::optional max; +}; + +struct FunctionVariant { + std::string name; + std::string uri; + std::vector arguments; + ParameterizedTypePtr returnType; + std::optional variadic; + + /// Test if the actual types matched with this function variant. + virtual bool tryMatch(const FunctionSignature& signature); + + /// Create function signature by given function name and arguments. + static std::string signature( + const std::string& name, + const std::vector& arguments); + + /// Create function signature by function name and arguments. + const std::string signature() const { + return signature(name, arguments); + } +}; + +using FunctionVariantPtr = std::shared_ptr; + +struct ScalarFunctionVariant : public FunctionVariant {}; + +struct AggregateFunctionVariant : public FunctionVariant { + ParameterizedTypePtr intermediate; + bool deterministic; + + + bool tryMatch(const FunctionSignature& signature) override; +}; + +} // namespace io::substrait diff --git a/include/function/FunctionLookup.h b/include/function/FunctionLookup.h new file mode 100644 index 00000000..27948125 --- /dev/null +++ b/include/function/FunctionLookup.h @@ -0,0 +1,98 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "function/Extension.h" +#include "function/FunctionMapping.h" +#include "function/FunctionSignature.h" + +namespace io::substrait { + +class FunctionLookup { + public: + FunctionLookup( + const ExtensionPtr& extension, + const FunctionMappingPtr& functionMapping) + : extension_(extension), functionMapping_(functionMapping) {} + + virtual FunctionVariantPtr lookupFunction( + const FunctionSignature& signature) const; + + virtual ~FunctionLookup() {} + + protected: + virtual FunctionMap getFunctionMap() const = 0; + + virtual FunctionVariantMap getFunctionVariants() const = 0; + + const FunctionMappingPtr functionMapping_; + + ExtensionPtr extension_; +}; + +using FunctionLookupPtr = std::shared_ptr; + +class ScalarFunctionLookup : public FunctionLookup { + public: + ScalarFunctionLookup( + const ExtensionPtr& extension, + const FunctionMappingPtr& functionMapping) + : FunctionLookup(extension, functionMapping) {} + + protected: + FunctionMap getFunctionMap() const override { + return functionMapping_->scalaMapping(); + } + + FunctionVariantMap getFunctionVariants() const override { + return extension_->scalaFunctionVariantMap(); + } +}; + +class AggregateFunctionLookup : public FunctionLookup { + public: + AggregateFunctionLookup( + const ExtensionPtr& extension, + const FunctionMappingPtr& functionMapping) + : FunctionLookup(extension, functionMapping) {} + + protected: + FunctionMap getFunctionMap() const override { + return functionMapping_->aggregateMapping(); + } + + FunctionVariantMap getFunctionVariants() const override { + return extension_->aggregateFunctionVariantMap(); + } +}; + +class WindowFunctionLookup : public FunctionLookup { + public: + WindowFunctionLookup( + const ExtensionPtr& extension, + const FunctionMappingPtr& functionMapping) + : FunctionLookup(extension, functionMapping) {} + + protected: + FunctionMap getFunctionMap() const override { + return functionMapping_->windowMapping(); + } + + FunctionVariantMap getFunctionVariants() const override { + return extension_->windowFunctionVariantMap(); + } +}; + +} // namespace io::substrait diff --git a/include/function/FunctionMapping.h b/include/function/FunctionMapping.h new file mode 100644 index 00000000..0e1bb8ab --- /dev/null +++ b/include/function/FunctionMapping.h @@ -0,0 +1,48 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace io::substrait { + +using FunctionMap = std::unordered_map; + +/// An interface describe the function names in difference between engine-own +/// and substrait system. +class FunctionMapping { + public: + /// Scalar function names in difference between engine own and substrait. + virtual const FunctionMap& scalaMapping() const { + static const FunctionMap scalaFunctionMap{}; + return scalaFunctionMap; + } + + /// Scalar function names in difference between engine own and substrait. + virtual const FunctionMap& aggregateMapping() const { + static const FunctionMap aggregateFunctionMap{}; + return aggregateFunctionMap; + } + + /// Window function names in difference between engine own and substrait. + virtual const FunctionMap& windowMapping() const { + static const FunctionMap windowFunctionMap{}; + return windowFunctionMap; + } +}; + +using FunctionMappingPtr = std::shared_ptr; +} // namespace io::substrait diff --git a/include/function/FunctionSignature.h b/include/function/FunctionSignature.h new file mode 100644 index 00000000..4a3c78a4 --- /dev/null +++ b/include/function/FunctionSignature.h @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include "type/Type.h" + +namespace io::substrait { + +struct FunctionSignature { + std::string name; + std::vector arguments; + TypePtr returnType; +}; + +} // namespace io::substrait diff --git a/include/type/Type.h b/include/type/Type.h new file mode 100644 index 00000000..0c3cc6c0 --- /dev/null +++ b/include/type/Type.h @@ -0,0 +1,693 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace io::substrait { + +enum class TypeKind : int8_t { + kBool = 1, + kI8 = 2, + kI16 = 3, + kI32 = 5, + kI64 = 7, + kFp32 = 10, + kFp64 = 11, + kString = 12, + kBinary = 13, + kTimestamp = 14, + kDate = 16, + kTime = 17, + kIntervalYear = 19, + kIntervalDay = 20, + kTimestampTz = 29, + kUuid = 32, + kFixedChar = 21, + kVarchar = 22, + kFixedBinary = 23, + kDecimal = 24, + kStruct = 25, + kList = 27, + kMap = 28, + kUserDefined = 30, + KIND_NOT_SET = 0, +}; + +template +struct TypeTraits {}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "bool"; + static constexpr const char* typeString = "boolean"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "i8"; + static constexpr const char* typeString = "i8"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "i16"; + static constexpr const char* typeString = "i16"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "i32"; + static constexpr const char* typeString = "i32"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "i64"; + static constexpr const char* typeString = "i64"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "fp32"; + static constexpr const char* typeString = "fp32"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "fp64"; + static constexpr const char* typeString = "fp64"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "str"; + static constexpr const char* typeString = "string"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "vbin"; + static constexpr const char* typeString = "binary"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "ts"; + static constexpr const char* typeString = "timestamp"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "tstz"; + static constexpr const char* typeString = "timestamp_tz"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "date"; + static constexpr const char* typeString = "date"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "time"; + static constexpr const char* typeString = "time"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "iyear"; + static constexpr const char* typeString = "interval_year"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "iday"; + static constexpr const char* typeString = "interval_day"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "uuid"; + static constexpr const char* typeString = "uuid"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "fchar"; + static constexpr const char* typeString = "fixedchar"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "vchar"; + static constexpr const char* typeString = "varchar"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "fbin"; + static constexpr const char* typeString = "fixedbinary"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "dec"; + static constexpr const char* typeString = "decimal"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "struct"; + static constexpr const char* typeString = "struct"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "list"; + static constexpr const char* typeString = "list"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "map"; + static constexpr const char* typeString = "map"; +}; + +template <> +struct TypeTraits { + static constexpr const char* signature = "u!name"; + static constexpr const char* typeString = "user defined type"; +}; + +class ParameterizedType { + public: + ParameterizedType(bool nullable = false) : nullable_(nullable) {} + + virtual std::string signature() const = 0; + + virtual TypeKind kind() const = 0; + + /// Deserialize substrait raw type string into Substrait extension type. + /// @param rawType - substrait extension raw string type + static std::shared_ptr decode( + const std::string& rawType); + + const bool& nullable() const { + return nullable_; + } + + bool nullMatch(const std::shared_ptr& type) const { + return nullable() || nullable() == type->nullable(); + } + /// Test type is a Wildcard type or not. + virtual bool isWildcard() const { + return false; + } + + virtual bool isMatch( + const std::shared_ptr& type) const = 0; + + private: + const bool nullable_; +}; + +using ParameterizedTypePtr = std::shared_ptr; + +class Type : public ParameterizedType { + public: + Type(bool nullable = false) : ParameterizedType(nullable) {} +}; + +using TypePtr = std::shared_ptr; + +/// Types used in function argument declarations. +template +class TypeBase : public Type { + public: + TypeBase(bool nullable = false) : Type(nullable) {} + + std::string signature() const override { + return TypeTraits::signature; + } + + virtual TypeKind kind() const override { + return Kind; + } + + bool isMatch( + const std::shared_ptr& type) const override { + return kind() == type->kind() && nullMatch(type); + } +}; + +template +class ScalarType : public TypeBase { + public: + ScalarType(bool nullable) : TypeBase(nullable) {} +}; + +class Decimal : public TypeBase { + public: + Decimal(int precision, int scale, bool nullable = false) + : TypeBase(nullable), + precision_(precision), + scale_(scale) {} + + std::string signature() const override; + + const int& precision() const { + return precision_; + } + + const int& scale() const { + return scale_; + } + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const int precision_; + const int scale_; +}; + +class FixedBinary : public TypeBase { + public: + FixedBinary(int length, bool nullable = false) + : TypeBase(nullable), length_(length) {} + + const int& length() const { + return length_; + } + + std::string signature() const override; + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const int length_; +}; + +class FixedChar : public TypeBase { + public: + FixedChar(int length, bool nullable = false) + : TypeBase(nullable), length_(length){}; + + const int& length() const { + return length_; + } + + std::string signature() const override; + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const int length_; +}; + +class Varchar : public TypeBase { + public: + Varchar(int length, bool nullable = false) + : TypeBase(nullable), length_(length){}; + + const int& length() const { + return length_; + } + + std::string signature() const override; + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const int length_; +}; + +class List : public TypeBase { + public: + List(TypePtr elementType, bool nullable = false) + : TypeBase(nullable), + elementType_(std::move(elementType)){}; + + const TypePtr& elementType() const { + return elementType_; + } + + std::string signature() const override; + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const TypePtr elementType_; +}; + +class Struct : public TypeBase { + public: + Struct(std::vector types, bool nullable = false) + : TypeBase(nullable), children_(std::move(types)) {} + + std::string signature() const override; + + const std::vector& children() const { + return children_; + } + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const std::vector children_; +}; + +class Map : public TypeBase { + public: + Map(TypePtr keyType, TypePtr valueType, bool nullable = false) + : TypeBase(nullable), + keyType_(std::move(keyType)), + valueType_(std::move(valueType)) {} + + const TypePtr& keyType() const { + return keyType_; + } + + const TypePtr& valueType() const { + return valueType_; + } + + std::string signature() const override; + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const TypePtr keyType_; + const TypePtr valueType_; +}; + +/// ParameterizedType represent a type in +class ParameterizedTypeBase : public ParameterizedType { + public: + ParameterizedTypeBase(bool nullable = false) : ParameterizedType(nullable) {} +}; + +class UsedDefinedType : public ParameterizedTypeBase { + public: + UsedDefinedType(std::string value, bool nullable) + : ParameterizedTypeBase(nullable), value_(std::move(value)) {} + + const std::string& value() const { + return value_; + } + + TypeKind kind() const override { + return TypeKind::kUserDefined; + } + + std::string signature() const override { + return TypeTraits::signature; + } + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + /// raw string of wildcard type. + const std::string value_; +}; + +/// A string literal type can present the 'any1'. +class StringLiteral : public ParameterizedTypeBase { + public: + StringLiteral(std::string value) + : ParameterizedTypeBase(false), value_(std::move(value)) {} + + std::string signature() const override { + return value_; + } + + TypeKind kind() const override { + return TypeKind::KIND_NOT_SET; + } + + const std::string& value() const { + return value_; + } + + bool isWildcard() const override { + return value_.find("any") == 0 || value_ == "T"; + } + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const std::string value_; +}; + +using StringLiteralPtr = std::shared_ptr; + +class ParameterizedDecimal : public ParameterizedTypeBase { + public: + ParameterizedDecimal( + StringLiteralPtr precision, + StringLiteralPtr scale, + bool nullable = false) + : ParameterizedTypeBase(nullable), + precision_(std::move(precision)), + scale_(std::move(scale)) {} + + std::string signature() const override; + + const StringLiteralPtr& precision() const { + return precision_; + } + + TypeKind kind() const override { + return TypeKind::kDecimal; + } + + const StringLiteralPtr& scale() const { + return scale_; + } + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + StringLiteralPtr precision_; + StringLiteralPtr scale_; +}; + +class ParameterizedFixedBinary : public ParameterizedTypeBase { + public: + ParameterizedFixedBinary(StringLiteralPtr length, bool nullable = false) + : ParameterizedTypeBase(nullable), length_(std::move(length)) {} + + const StringLiteralPtr& length() const { + return length_; + } + + TypeKind kind() const override { + return TypeKind::kFixedBinary; + } + + std::string signature() const override; + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const StringLiteralPtr length_; +}; + +class ParameterizedFixedChar : public ParameterizedTypeBase { + public: + ParameterizedFixedChar(StringLiteralPtr length, bool nullable = false) + : ParameterizedTypeBase(nullable), length_(std::move(length)) {} + + const StringLiteralPtr& length() const { + return length_; + } + + TypeKind kind() const override { + return TypeKind::kFixedChar; + } + + std::string signature() const override; + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const StringLiteralPtr length_; +}; + +class ParameterizedVarchar : public ParameterizedTypeBase { + public: + ParameterizedVarchar(const StringLiteralPtr& length, bool nullable = false) + : ParameterizedTypeBase(nullable), length_(length) {} + + const StringLiteralPtr& length() const { + return length_; + } + + TypeKind kind() const override { + return TypeKind::kVarchar; + } + + std::string signature() const override; + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const StringLiteralPtr length_; +}; + +class ParameterizedList : public ParameterizedTypeBase { + public: + ParameterizedList(ParameterizedTypePtr elementType, bool nullable = false) + : ParameterizedTypeBase(nullable), elementType_(std::move(elementType)){}; + + const ParameterizedTypePtr& elementType() const { + return elementType_; + } + + TypeKind kind() const override { + return TypeKind::kList; + } + + std::string signature() const override; + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const ParameterizedTypePtr elementType_; +}; + +class ParameterizedStruct : public ParameterizedTypeBase { + public: + ParameterizedStruct( + std::vector types, + bool nullable = false) + : ParameterizedTypeBase(nullable), children_(std::move(types)) {} + + std::string signature() const override; + + const std::vector& children() const { + return children_; + } + + TypeKind kind() const override { + return TypeKind::kStruct; + } + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const std::vector children_; +}; + +class ParameterizedMap : public ParameterizedTypeBase { + public: + ParameterizedMap( + ParameterizedTypePtr keyType, + ParameterizedTypePtr valueType, + bool nullable = false) + : ParameterizedTypeBase(nullable), + keyType_(std::move(keyType)), + valueType_(std::move(valueType)) {} + + const ParameterizedTypePtr& keyType() const { + return keyType_; + } + + TypeKind kind() const override { + return TypeKind::kMap; + } + const ParameterizedTypePtr& valueType() const { + return valueType_; + } + + std::string signature() const override; + + bool isMatch( + const std::shared_ptr& type) const override; + + private: + const ParameterizedTypePtr keyType_; + const ParameterizedTypePtr valueType_; +}; + +std::shared_ptr> BOOL(); + +std::shared_ptr> TINYINT(); + +std::shared_ptr> SMALLINT(); + +std::shared_ptr> INTEGER(); + +std::shared_ptr> BIGINT(); + +std::shared_ptr> FLOAT(); + +std::shared_ptr> DOUBLE(); + +std::shared_ptr> STRING(); + +std::shared_ptr> BINARY(); + +std::shared_ptr> TIMESTAMP(); + +std::shared_ptr> TIMESTAMP_TZ(); + +std::shared_ptr> DATE(); + +std::shared_ptr> TIME(); + +std::shared_ptr> INTERVAL_YEAR(); + +std::shared_ptr> INTERVAL_DAY(); + +std::shared_ptr> UUID(); + +std::shared_ptr DECIMAL(int precision, int scale); + +std::shared_ptr VARCHAR(int len); + +std::shared_ptr FChar(int len); + +std::shared_ptr FBinary(int len); + +std::shared_ptr LIST(const TypePtr& elementType); + +std::shared_ptr MAP( + const TypePtr& keyType, + const TypePtr& valueType); + +std::shared_ptr ROW(const std::vector& children); + +} // namespace io::substrait diff --git a/scripts/setup-helper-functions.sh b/scripts/setup-helper-functions.sh new file mode 100755 index 00000000..b6238d1d --- /dev/null +++ b/scripts/setup-helper-functions.sh @@ -0,0 +1,139 @@ +#!/bin/bash +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# github_checkout $REPO $VERSION $GIT_CLONE_PARAMS clones or re-uses an existing clone of the +# specified repo, checking out the requested version. +function github_checkout { + local REPO=$1 + shift + local VERSION=$1 + shift + local GIT_CLONE_PARAMS=$@ + local DIRNAME=$(basename $REPO) + cd "${DEPENDENCY_DIR}" + if [ -z "${DIRNAME}" ]; then + echo "Failed to get repo name from ${REPO}" + exit 1 + fi + if [ -d "${DIRNAME}" ] && prompt "${DIRNAME} already exists. Delete?"; then + rm -rf "${DIRNAME}" + fi + if [ ! -d "${DIRNAME}" ]; then + git clone -q -b $VERSION $GIT_CLONE_PARAMS "https://github.com/${REPO}.git" + fi + cd "${DIRNAME}" +} + + +# get_cxx_flags [$CPU_ARCH] +# Sets and exports the variable VELOX_CXX_FLAGS with appropriate compiler flags. +# If $CPU_ARCH is set then we use that else we determine best possible set of flags +# to use based on current cpu architecture. +# The goal of this function is to consolidate all architecture specific flags to one +# location. +# The values that CPU_ARCH can take are as follows: +# arm64 : Target Apple silicon. +# aarch64: Target general 64 bit arm cpus. +# avx: Target Intel CPUs with AVX. +# sse: Target Intel CPUs with sse. +# Echo's the appropriate compiler flags which can be captured as so +# CXX_FLAGS=$(get_cxx_flags) or +# CXX_FLAGS=$(get_cxx_flags "avx") + +function get_cxx_flags { + local CPU_ARCH=$1 + + local OS + OS=$(uname) + local MACHINE + MACHINE=$(uname -m) + + if [ -z "$CPU_ARCH" ]; then + + if [ "$OS" = "Darwin" ]; then + + if [ "$MACHINE" = "x86_64" ]; then + local CPU_CAPABILITIES + CPU_CAPABILITIES=$(sysctl -a | grep machdep.cpu.features | awk '{print tolower($0)}') + + if [[ $CPU_CAPABILITIES =~ "avx" ]]; then + CPU_ARCH="avx" + else + CPU_ARCH="sse" + fi + + elif [[ $(sysctl -a | grep machdep.cpu.brand_string) =~ "Apple" ]]; then + # Apple silicon. + CPU_ARCH="arm64" + fi + else [ "$OS" = "Linux" ]; + + local CPU_CAPABILITIES + CPU_CAPABILITIES=$(cat /proc/cpuinfo | grep flags | head -n 1| awk '{print tolower($0)}') + + if [[ "$CPU_CAPABILITIES" =~ "avx" ]]; then + CPU_ARCH="avx" + elif [[ "$CPU_CAPABILITIES" =~ "sse" ]]; then + CPU_ARCH="sse" + elif [ "$MACHINE" = "aarch64" ]; then + CPU_ARCH="aarch64" + fi + fi + fi + + case $CPU_ARCH in + + "arm64") + echo -n "-mcpu=apple-m1+crc -std=c++17" + ;; + + "avx") + echo -n "-mavx2 -mfma -mavx -mf16c -mlzcnt -std=c++17" + ;; + + "sse") + echo -n "-msse4.2 -std=c++17" + ;; + + "aarch64") + echo -n "-mcpu=neoverse-n1 -std=c++17" + ;; + *) + echo -n "Architecture not supported!" + esac + +} + +function cmake_install { + local NAME=$(basename "$(pwd)") + local BINARY_DIR=_build + if [ -d "${BINARY_DIR}" ] && prompt "Do you want to rebuild ${NAME}?"; then + rm -rf "${BINARY_DIR}" + fi + mkdir -p "${BINARY_DIR}" + CPU_TARGET="${CPU_TARGET:-avx}" + COMPILER_FLAGS=$(get_cxx_flags $CPU_TARGET) + + # CMAKE_POSITION_INDEPENDENT_CODE is required so that Velox can be built into dynamic libraries \ + cmake -Wno-dev -B"${BINARY_DIR}" \ + -GNinja \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_CXX_STANDARD=17 \ + "${INSTALL_PREFIX+-DCMAKE_PREFIX_PATH=}${INSTALL_PREFIX-}" \ + "${INSTALL_PREFIX+-DCMAKE_INSTALL_PREFIX=}${INSTALL_PREFIX-}" \ + -DCMAKE_CXX_FLAGS="$COMPILER_FLAGS" \ + -DBUILD_TESTING=OFF \ + "$@" + ninja -C "${BINARY_DIR}" install +} + diff --git a/scripts/setup-ubuntu.sh b/scripts/setup-ubuntu.sh new file mode 100755 index 00000000..dc0fe5f2 --- /dev/null +++ b/scripts/setup-ubuntu.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Minimal setup for Ubuntu 20.04. +set -eufx -o pipefail +SCRIPTDIR=$(dirname "${BASH_SOURCE[0]}") +source $SCRIPTDIR/setup-helper-functions.sh + +CPU_TARGET="${CPU_TARGET:-avx}" +export COMPILER_FLAGS=$(get_cxx_flags $CPU_TARGET) +NPROC=$(getconf _NPROCESSORS_ONLN) +DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)} + +# Install all dependencies. +sudo --preserve-env apt install -y \ + g++ \ + cmake \ + ccache \ + ninja-build \ + checkinstall \ + git + +function run_and_time { + time "$@" + { echo "+ Finished running $*"; } 2> /dev/null +} + +function prompt { + ( + while true; do + local input="${PROMPT_ALWAYS_RESPOND:-}" + echo -n "$(tput bold)$* [Y, n]$(tput sgr0) " + [[ -z "${input}" ]] && read input + if [[ "${input}" == "Y" || "${input}" == "y" || "${input}" == "" ]]; then + return 0 + elif [[ "${input}" == "N" || "${input}" == "n" ]]; then + return 1 + fi + done + ) 2> /dev/null +} + +function install_protobuf { + wget https://github.com/protocolbuffers/protobuf/releases/download/v21.4/protobuf-all-21.4.tar.gz + tar -xzf protobuf-all-21.4.tar.gz + cd protobuf-21.4 + ./configure --prefix=/usr + make "-j$(nproc)" + make install + ldconfig +} + +function install_deps { + run_and_time install_protobuf +} + +(return 2> /dev/null) && return # If script was sourced, don't run commands. + +( + if [[ $# -ne 0 ]]; then + for cmd in "$@"; do + run_and_time "${cmd}" + done + else + install_deps + fi +) + +echo "All deps installed! Now try \"make\"" diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt new file mode 100644 index 00000000..59405e6a --- /dev/null +++ b/third_party/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_subdirectory(fmt) +include_directories(fmt/include) + +add_subdirectory(googletest) + +set(YAML_CPP_BUILD_TESTS OFF CACHE BOOL "Enable testing") +include_directories(yaml-cpp/include) +add_subdirectory(yaml-cpp) diff --git a/third_party/fmt b/third_party/fmt new file mode 160000 index 00000000..80f8d344 --- /dev/null +++ b/third_party/fmt @@ -0,0 +1 @@ +Subproject commit 80f8d34427d40ec5e7ce3b10ededc46bd4bd5759 diff --git a/third_party/googletest b/third_party/googletest new file mode 160000 index 00000000..3026483a --- /dev/null +++ b/third_party/googletest @@ -0,0 +1 @@ +Subproject commit 3026483ae575e2de942db5e760cf95e973308dd5 diff --git a/third_party/substrait b/third_party/substrait new file mode 160000 index 00000000..f3f6bdc9 --- /dev/null +++ b/third_party/substrait @@ -0,0 +1 @@ +Subproject commit f3f6bdc947e689e800279666ff33f118e42d2146 diff --git a/third_party/yaml-cpp b/third_party/yaml-cpp new file mode 160000 index 00000000..c90c08cc --- /dev/null +++ b/third_party/yaml-cpp @@ -0,0 +1 @@ +Subproject commit c90c08ccc9a08abcca609064fb9a856dfdbbb7b4 From 1add99038cf1d638e1d73d5e1036faa799e89585 Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Sat, 29 Oct 2022 14:29:19 +0800 Subject: [PATCH 02/23] Update Cmakelist --- core/function/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/function/CMakeLists.txt b/core/function/CMakeLists.txt index dce4c4d4..75763f85 100644 --- a/core/function/CMakeLists.txt +++ b/core/function/CMakeLists.txt @@ -13,8 +13,6 @@ set(FUNCTION_SRCS Function.cpp Extension.cpp - ../../include/function/FunctionMapping.h - ../../include/function/FunctionSignature.h FunctionLookup.cpp) add_library(substrait_function ${FUNCTION_SRCS}) From 59d955d7ae630d250a372e7172a4b7b0da728895 Mon Sep 17 00:00:00 2001 From: chaojun-zhang Date: Sun, 6 Nov 2022 15:55:26 +0800 Subject: [PATCH 03/23] refactor code structure --- CMakeLists.txt | 2 +- include/{ => substrait}/common/Exceptions.h | 35 ++-- include/{ => substrait}/function/Extension.h | 6 +- include/{ => substrait}/function/Function.h | 31 ++- .../{ => substrait}/function/FunctionLookup.h | 37 ++-- .../function/FunctionMapping.h | 6 +- .../function/FunctionSignature.h | 2 +- include/{ => substrait}/type/Type.h | 184 +++++++++--------- {core => substrait}/CMakeLists.txt | 0 {core => substrait}/common/CMakeLists.txt | 4 +- {core => substrait}/common/Exceptions.cpp | 8 +- {core => substrait}/function/CMakeLists.txt | 0 {core => substrait}/function/Extension.cpp | 2 +- {core => substrait}/function/Function.cpp | 2 +- .../function/FunctionLookup.cpp | 2 +- .../function/tests/CMakeLists.txt | 0 .../function/tests/FunctionLookupTest.cpp | 10 +- {core => substrait}/type/CMakeLists.txt | 0 {core => substrait}/type/Type.cpp | 4 +- {core => substrait}/type/tests/CMakeLists.txt | 0 {core => substrait}/type/tests/TypeTest.cpp | 2 +- third_party/CMakeLists.txt | 6 +- third_party/fmt | 2 +- third_party/googletest | 2 +- 24 files changed, 179 insertions(+), 168 deletions(-) rename include/{ => substrait}/common/Exceptions.h (80%) rename include/{ => substrait}/function/Extension.h (95%) rename include/{ => substrait}/function/Function.h (75%) rename include/{ => substrait}/function/FunctionLookup.h (65%) rename include/{ => substrait}/function/FunctionMapping.h (87%) rename include/{ => substrait}/function/FunctionSignature.h (96%) rename include/{ => substrait}/type/Type.h (74%) rename {core => substrait}/CMakeLists.txt (100%) rename {core => substrait}/common/CMakeLists.txt (93%) rename {core => substrait}/common/Exceptions.cpp (87%) rename {core => substrait}/function/CMakeLists.txt (100%) rename {core => substrait}/function/Extension.cpp (99%) rename {core => substrait}/function/Function.cpp (98%) rename {core => substrait}/function/FunctionLookup.cpp (96%) rename {core => substrait}/function/tests/CMakeLists.txt (100%) rename {core => substrait}/function/tests/FunctionLookupTest.cpp (93%) rename {core => substrait}/type/CMakeLists.txt (100%) rename {core => substrait}/type/Type.cpp (99%) rename {core => substrait}/type/tests/CMakeLists.txt (100%) rename {core => substrait}/type/tests/TypeTest.cpp (99%) diff --git a/CMakeLists.txt b/CMakeLists.txt index f873d78b..42283c40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,4 +30,4 @@ include_directories(${PROTOBUF_INCLUDE_DIRS}) add_subdirectory(third_party) include_directories(include) -add_subdirectory(core) +add_subdirectory(substrait) diff --git a/include/common/Exceptions.h b/include/substrait/common/Exceptions.h similarity index 80% rename from include/common/Exceptions.h rename to include/substrait/common/Exceptions.h index df1f5720..f0c07d66 100644 --- a/include/common/Exceptions.h +++ b/include/substrait/common/Exceptions.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include "fmt/format.h" namespace io::substrait::common { @@ -50,13 +51,13 @@ class SubstraitException : public std::exception { enum class Type { kUser = 0, kSystem = 1 }; SubstraitException( - std::string exceptionCode, - std::string& exceptionMessage, + const std::string& exceptionCode, + const std::string& exceptionMessage, Type exceptionType = Type::kSystem, - std::string exceptionName = "SubstraitException"); + const std::string& exceptionName = "SubstraitException"); // Inherited - const char* what() const noexcept override { + [[nodiscard]] const char* what() const noexcept override { return msg_.c_str(); } @@ -67,9 +68,9 @@ class SubstraitException : public std::exception { class SubstraitUserError : public SubstraitException { public: SubstraitUserError( - std::string exceptionCode, - std::string& exceptionMessage, - std::string exceptionName = "SubstraitUserError") + const std::string& exceptionCode, + const std::string& exceptionMessage, + const std::string& exceptionName = "SubstraitUserError") : SubstraitException( exceptionCode, exceptionMessage, @@ -80,9 +81,9 @@ class SubstraitUserError : public SubstraitException { class SubstraitRuntimeError final : public SubstraitException { public: SubstraitRuntimeError( - std::string exceptionCode, - std::string& exceptionMessage, - std::string exceptionName = "SubstraitRuntimeError") + const std::string& exceptionCode, + const std::string& exceptionMessage, + const std::string& exceptionName = "SubstraitRuntimeError") : SubstraitException( exceptionCode, exceptionMessage, @@ -95,44 +96,44 @@ std::string errorMessage(fmt::string_view fmt, const Args&... args) { return fmt::vformat(fmt, fmt::make_format_args(args...)); } -#define _SUBSTRAIT_THROW(exception, errorCode, ...) \ +#define SUBSTRAIT_THROW(exception, errorCode, ...) \ { \ auto message = io::substrait::common::errorMessage(__VA_ARGS__); \ throw exception(errorCode, message); \ } #define SUBSTRAIT_UNSUPPORTED(...) \ - _SUBSTRAIT_THROW( \ + SUBSTRAIT_THROW( \ ::io::substrait::common::SubstraitUserError, \ ::io::substrait::common::error_code::kUnsupported, \ ##__VA_ARGS__) #define SUBSTRAIT_UNREACHABLE(...) \ - _SUBSTRAIT_THROW( \ + SUBSTRAIT_THROW( \ ::io::substrait::common::SubstraitRuntimeError, \ ::io::substrait::common::error_code::kUnreachableCode, \ ##__VA_ARGS__) #define SUBSTRAIT_FAIL(...) \ - _SUBSTRAIT_THROW( \ + SUBSTRAIT_THROW( \ ::io::substrait::common::SubstraitRuntimeError, \ ::io::substrait::common::error_code::kInvalidState, \ ##__VA_ARGS__) #define SUBSTRAIT_USER_FAIL(...) \ - _SUBSTRAIT_THROW( \ + SUBSTRAIT_THROW( \ ::io::substrait::common::SubstraitUserError, \ ::io::substrait::common::error_code::kInvalidState, \ ##__VA_ARGS__) #define SUBSTRAIT_NYI(...) \ - _SUBSTRAIT_THROW( \ + SUBSTRAIT_THROW( \ ::io::substrait::common::SubstraitRuntimeError, \ ::io::substrait::common::error_code::kNotImplemented, \ ##__VA_ARGS__) #define SUBSTRAIT_ILLEGAL_ARGUMENT(...) \ - _SUBSTRAIT_THROW( \ + SUBSTRAIT_THROW( \ ::io::substrait::common::SubstraitUserError, \ ::io::substrait::common::error_code::kIllegalArgument, \ ##__VA_ARGS__) diff --git a/include/function/Extension.h b/include/substrait/function/Extension.h similarity index 95% rename from include/function/Extension.h rename to include/substrait/function/Extension.h index e6f05ea6..439f0f0d 100644 --- a/include/function/Extension.h +++ b/include/substrait/function/Extension.h @@ -14,9 +14,9 @@ #pragma once -#include "function/FunctionSignature.h" -#include "function/Function.h" -#include "type/Type.h" +#include "substrait/function/Function.h" +#include "substrait/function/FunctionSignature.h" +#include "substrait/type/Type.h" namespace io::substrait { diff --git a/include/function/Function.h b/include/substrait/function/Function.h similarity index 75% rename from include/function/Function.h rename to include/substrait/function/Function.h index 43a0ac32..a4bcdf36 100644 --- a/include/function/Function.h +++ b/include/substrait/function/Function.h @@ -14,23 +14,23 @@ #pragma once -#include "function/FunctionSignature.h" -#include "type/Type.h" +#include "substrait/function/FunctionSignature.h" +#include "substrait/type/Type.h" namespace io::substrait { struct FunctionArgument { - virtual bool isRequired() const = 0; + [[nodiscard]] virtual bool isRequired() const = 0; /// Convert argument type to short type string based on /// https://substrait.io/extensions/#function-signature-compound-names - virtual std::string toTypeString() const = 0; + [[nodiscard]] virtual std::string toTypeString() const = 0; virtual bool isWildcardType() const { return false; }; - virtual bool isValueArgument() const { + [[nodiscard]] virtual bool isValueArgument() const { return false; } }; @@ -38,23 +38,23 @@ struct FunctionArgument { using FunctionArgumentPtr = std::shared_ptr; struct EnumArgument : public FunctionArgument { - bool required; + bool required{}; - bool isRequired() const override { + [[nodiscard]] bool isRequired() const override { return required; } - std::string toTypeString() const override { + [[nodiscard]] std::string toTypeString() const override { return required ? "req" : "opt"; } }; struct TypeArgument : public FunctionArgument { - std::string toTypeString() const override { + [[nodiscard]] std::string toTypeString() const override { return "type"; } - bool isRequired() const override { + [[nodiscard]] bool isRequired() const override { return true; } }; @@ -62,19 +62,19 @@ struct TypeArgument : public FunctionArgument { struct ValueArgument : public FunctionArgument { ParameterizedTypePtr type; - std::string toTypeString() const override { + [[nodiscard]] std::string toTypeString() const override { return type->signature(); } - bool isRequired() const override { + [[nodiscard]] bool isRequired() const override { return true; } - bool isWildcardType() const override { + [[nodiscard]] bool isWildcardType() const override { return type->isWildcard(); } - bool isValueArgument() const override { + [[nodiscard]] bool isValueArgument() const override { return true; } }; @@ -100,7 +100,7 @@ struct FunctionVariant { const std::vector& arguments); /// Create function signature by function name and arguments. - const std::string signature() const { + [[nodiscard]] const std::string signature() const { return signature(name, arguments); } }; @@ -113,7 +113,6 @@ struct AggregateFunctionVariant : public FunctionVariant { ParameterizedTypePtr intermediate; bool deterministic; - bool tryMatch(const FunctionSignature& signature) override; }; diff --git a/include/function/FunctionLookup.h b/include/substrait/function/FunctionLookup.h similarity index 65% rename from include/function/FunctionLookup.h rename to include/substrait/function/FunctionLookup.h index 27948125..9e6aa9c7 100644 --- a/include/function/FunctionLookup.h +++ b/include/substrait/function/FunctionLookup.h @@ -14,32 +14,33 @@ #pragma once -#include "function/Extension.h" -#include "function/FunctionMapping.h" -#include "function/FunctionSignature.h" +#include + +#include "substrait/function/Extension.h" +#include "substrait/function/FunctionMapping.h" +#include "substrait/function/FunctionSignature.h" namespace io::substrait { class FunctionLookup { public: - FunctionLookup( - const ExtensionPtr& extension, - const FunctionMappingPtr& functionMapping) - : extension_(extension), functionMapping_(functionMapping) {} + FunctionLookup(ExtensionPtr extension, FunctionMappingPtr functionMapping) + : extension_(std::move(extension)), + functionMapping_(std::move(functionMapping)) {} - virtual FunctionVariantPtr lookupFunction( + [[nodiscard]] virtual FunctionVariantPtr lookupFunction( const FunctionSignature& signature) const; - virtual ~FunctionLookup() {} + virtual ~FunctionLookup() = default; protected: - virtual FunctionMap getFunctionMap() const = 0; + [[nodiscard]] virtual FunctionMap getFunctionMap() const = 0; - virtual FunctionVariantMap getFunctionVariants() const = 0; + [[nodiscard]] virtual FunctionVariantMap getFunctionVariants() const = 0; const FunctionMappingPtr functionMapping_; - ExtensionPtr extension_; + ExtensionPtr extension_{}; }; using FunctionLookupPtr = std::shared_ptr; @@ -52,11 +53,11 @@ class ScalarFunctionLookup : public FunctionLookup { : FunctionLookup(extension, functionMapping) {} protected: - FunctionMap getFunctionMap() const override { + [[nodiscard]] FunctionMap getFunctionMap() const override { return functionMapping_->scalaMapping(); } - FunctionVariantMap getFunctionVariants() const override { + [[nodiscard]] FunctionVariantMap getFunctionVariants() const override { return extension_->scalaFunctionVariantMap(); } }; @@ -69,11 +70,11 @@ class AggregateFunctionLookup : public FunctionLookup { : FunctionLookup(extension, functionMapping) {} protected: - FunctionMap getFunctionMap() const override { + [[nodiscard]] FunctionMap getFunctionMap() const override { return functionMapping_->aggregateMapping(); } - FunctionVariantMap getFunctionVariants() const override { + [[nodiscard]] FunctionVariantMap getFunctionVariants() const override { return extension_->aggregateFunctionVariantMap(); } }; @@ -86,11 +87,11 @@ class WindowFunctionLookup : public FunctionLookup { : FunctionLookup(extension, functionMapping) {} protected: - FunctionMap getFunctionMap() const override { + [[nodiscard]] FunctionMap getFunctionMap() const override { return functionMapping_->windowMapping(); } - FunctionVariantMap getFunctionVariants() const override { + [[nodiscard]] FunctionVariantMap getFunctionVariants() const override { return extension_->windowFunctionVariantMap(); } }; diff --git a/include/function/FunctionMapping.h b/include/substrait/function/FunctionMapping.h similarity index 87% rename from include/function/FunctionMapping.h rename to include/substrait/function/FunctionMapping.h index 0e1bb8ab..8be5b9b6 100644 --- a/include/function/FunctionMapping.h +++ b/include/substrait/function/FunctionMapping.h @@ -26,19 +26,19 @@ using FunctionMap = std::unordered_map; class FunctionMapping { public: /// Scalar function names in difference between engine own and substrait. - virtual const FunctionMap& scalaMapping() const { + [[nodiscard]] virtual const FunctionMap& scalaMapping() const { static const FunctionMap scalaFunctionMap{}; return scalaFunctionMap; } /// Scalar function names in difference between engine own and substrait. - virtual const FunctionMap& aggregateMapping() const { + [[nodiscard]] virtual const FunctionMap& aggregateMapping() const { static const FunctionMap aggregateFunctionMap{}; return aggregateFunctionMap; } /// Window function names in difference between engine own and substrait. - virtual const FunctionMap& windowMapping() const { + [[nodiscard]] virtual const FunctionMap& windowMapping() const { static const FunctionMap windowFunctionMap{}; return windowFunctionMap; } diff --git a/include/function/FunctionSignature.h b/include/substrait/function/FunctionSignature.h similarity index 96% rename from include/function/FunctionSignature.h rename to include/substrait/function/FunctionSignature.h index 4a3c78a4..66bce17a 100644 --- a/include/function/FunctionSignature.h +++ b/include/substrait/function/FunctionSignature.h @@ -19,7 +19,7 @@ #pragma once -#include "type/Type.h" +#include "substrait/type/Type.h" namespace io::substrait { diff --git a/include/type/Type.h b/include/substrait/type/Type.h similarity index 74% rename from include/type/Type.h rename to include/substrait/type/Type.h index 0c3cc6c0..868f74d3 100644 --- a/include/type/Type.h +++ b/include/substrait/type/Type.h @@ -199,30 +199,31 @@ struct TypeTraits { class ParameterizedType { public: - ParameterizedType(bool nullable = false) : nullable_(nullable) {} + explicit ParameterizedType(bool nullable = false) : nullable_(nullable) {} - virtual std::string signature() const = 0; + [[nodiscard]] virtual std::string signature() const = 0; - virtual TypeKind kind() const = 0; + [[nodiscard]] virtual TypeKind kind() const = 0; /// Deserialize substrait raw type string into Substrait extension type. /// @param rawType - substrait extension raw string type static std::shared_ptr decode( const std::string& rawType); - const bool& nullable() const { + [[nodiscard]] const bool& nullable() const { return nullable_; } - bool nullMatch(const std::shared_ptr& type) const { + [[nodiscard]] bool nullMatch( + const std::shared_ptr& type) const { return nullable() || nullable() == type->nullable(); } /// Test type is a Wildcard type or not. - virtual bool isWildcard() const { + [[nodiscard]] virtual bool isWildcard() const { return false; } - virtual bool isMatch( + [[nodiscard]] virtual bool isMatch( const std::shared_ptr& type) const = 0; private: @@ -233,7 +234,7 @@ using ParameterizedTypePtr = std::shared_ptr; class Type : public ParameterizedType { public: - Type(bool nullable = false) : ParameterizedType(nullable) {} + explicit Type(bool nullable = false) : ParameterizedType(nullable) {} }; using TypePtr = std::shared_ptr; @@ -242,17 +243,17 @@ using TypePtr = std::shared_ptr; template class TypeBase : public Type { public: - TypeBase(bool nullable = false) : Type(nullable) {} + explicit TypeBase(bool nullable = false) : Type(nullable) {} - std::string signature() const override { + [[nodiscard]] std::string signature() const override { return TypeTraits::signature; } - virtual TypeKind kind() const override { + [[nodiscard]] TypeKind kind() const override { return Kind; } - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override { return kind() == type->kind() && nullMatch(type); } @@ -261,7 +262,7 @@ class TypeBase : public Type { template class ScalarType : public TypeBase { public: - ScalarType(bool nullable) : TypeBase(nullable) {} + explicit ScalarType(bool nullable) : TypeBase(nullable) {} }; class Decimal : public TypeBase { @@ -271,17 +272,17 @@ class Decimal : public TypeBase { precision_(precision), scale_(scale) {} - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - const int& precision() const { + [[nodiscard]] const int& precision() const { return precision_; } - const int& scale() const { + [[nodiscard]] const int& scale() const { return scale_; } - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -291,16 +292,16 @@ class Decimal : public TypeBase { class FixedBinary : public TypeBase { public: - FixedBinary(int length, bool nullable = false) + explicit FixedBinary(int length, bool nullable = false) : TypeBase(nullable), length_(length) {} - const int& length() const { + [[nodiscard]] const int& length() const { return length_; } - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -309,16 +310,16 @@ class FixedBinary : public TypeBase { class FixedChar : public TypeBase { public: - FixedChar(int length, bool nullable = false) + explicit FixedChar(int length, bool nullable = false) : TypeBase(nullable), length_(length){}; - const int& length() const { + [[nodiscard]] const int& length() const { return length_; } - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -327,16 +328,16 @@ class FixedChar : public TypeBase { class Varchar : public TypeBase { public: - Varchar(int length, bool nullable = false) + explicit Varchar(int length, bool nullable = false) : TypeBase(nullable), length_(length){}; - const int& length() const { + [[nodiscard]] const int& length() const { return length_; } - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -345,17 +346,17 @@ class Varchar : public TypeBase { class List : public TypeBase { public: - List(TypePtr elementType, bool nullable = false) + explicit List(TypePtr elementType, bool nullable = false) : TypeBase(nullable), elementType_(std::move(elementType)){}; - const TypePtr& elementType() const { + [[nodiscard]] const TypePtr& elementType() const { return elementType_; } - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -364,16 +365,16 @@ class List : public TypeBase { class Struct : public TypeBase { public: - Struct(std::vector types, bool nullable = false) + explicit Struct(std::vector types, bool nullable = false) : TypeBase(nullable), children_(std::move(types)) {} - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - const std::vector& children() const { + [[nodiscard]] const std::vector& children() const { return children_; } - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -387,17 +388,17 @@ class Map : public TypeBase { keyType_(std::move(keyType)), valueType_(std::move(valueType)) {} - const TypePtr& keyType() const { + [[nodiscard]] const TypePtr& keyType() const { return keyType_; } - const TypePtr& valueType() const { + [[nodiscard]] const TypePtr& valueType() const { return valueType_; } - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -408,7 +409,8 @@ class Map : public TypeBase { /// ParameterizedType represent a type in class ParameterizedTypeBase : public ParameterizedType { public: - ParameterizedTypeBase(bool nullable = false) : ParameterizedType(nullable) {} + explicit ParameterizedTypeBase(bool nullable = false) + : ParameterizedType(nullable) {} }; class UsedDefinedType : public ParameterizedTypeBase { @@ -416,19 +418,19 @@ class UsedDefinedType : public ParameterizedTypeBase { UsedDefinedType(std::string value, bool nullable) : ParameterizedTypeBase(nullable), value_(std::move(value)) {} - const std::string& value() const { + [[nodiscard]] const std::string& value() const { return value_; } - TypeKind kind() const override { + [[nodiscard]] TypeKind kind() const override { return TypeKind::kUserDefined; } - std::string signature() const override { + [[nodiscard]] std::string signature() const override { return TypeTraits::signature; } - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -439,26 +441,26 @@ class UsedDefinedType : public ParameterizedTypeBase { /// A string literal type can present the 'any1'. class StringLiteral : public ParameterizedTypeBase { public: - StringLiteral(std::string value) + explicit StringLiteral(std::string value) : ParameterizedTypeBase(false), value_(std::move(value)) {} - std::string signature() const override { + [[nodiscard]] std::string signature() const override { return value_; } - TypeKind kind() const override { + [[nodiscard]] TypeKind kind() const override { return TypeKind::KIND_NOT_SET; } - const std::string& value() const { + [[nodiscard]] const std::string& value() const { return value_; } - bool isWildcard() const override { + [[nodiscard]] bool isWildcard() const override { return value_.find("any") == 0 || value_ == "T"; } - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -477,21 +479,21 @@ class ParameterizedDecimal : public ParameterizedTypeBase { precision_(std::move(precision)), scale_(std::move(scale)) {} - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - const StringLiteralPtr& precision() const { + [[nodiscard]] const StringLiteralPtr& precision() const { return precision_; } - TypeKind kind() const override { + [[nodiscard]] TypeKind kind() const override { return TypeKind::kDecimal; } - const StringLiteralPtr& scale() const { + [[nodiscard]] const StringLiteralPtr& scale() const { return scale_; } - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -501,20 +503,22 @@ class ParameterizedDecimal : public ParameterizedTypeBase { class ParameterizedFixedBinary : public ParameterizedTypeBase { public: - ParameterizedFixedBinary(StringLiteralPtr length, bool nullable = false) + explicit ParameterizedFixedBinary( + StringLiteralPtr length, + bool nullable = false) : ParameterizedTypeBase(nullable), length_(std::move(length)) {} - const StringLiteralPtr& length() const { + [[nodiscard]] const StringLiteralPtr& length() const { return length_; } - TypeKind kind() const override { + [[nodiscard]] TypeKind kind() const override { return TypeKind::kFixedBinary; } - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -523,20 +527,22 @@ class ParameterizedFixedBinary : public ParameterizedTypeBase { class ParameterizedFixedChar : public ParameterizedTypeBase { public: - ParameterizedFixedChar(StringLiteralPtr length, bool nullable = false) + explicit ParameterizedFixedChar( + StringLiteralPtr length, + bool nullable = false) : ParameterizedTypeBase(nullable), length_(std::move(length)) {} - const StringLiteralPtr& length() const { + [[nodiscard]] const StringLiteralPtr& length() const { return length_; } - TypeKind kind() const override { + [[nodiscard]] TypeKind kind() const override { return TypeKind::kFixedChar; } - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -545,20 +551,20 @@ class ParameterizedFixedChar : public ParameterizedTypeBase { class ParameterizedVarchar : public ParameterizedTypeBase { public: - ParameterizedVarchar(const StringLiteralPtr& length, bool nullable = false) - : ParameterizedTypeBase(nullable), length_(length) {} + explicit ParameterizedVarchar(StringLiteralPtr length, bool nullable = false) + : ParameterizedTypeBase(nullable), length_(std::move(length)) {} - const StringLiteralPtr& length() const { + [[nodiscard]] const StringLiteralPtr& length() const { return length_; } - TypeKind kind() const override { + [[nodiscard]] TypeKind kind() const override { return TypeKind::kVarchar; } - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -567,20 +573,22 @@ class ParameterizedVarchar : public ParameterizedTypeBase { class ParameterizedList : public ParameterizedTypeBase { public: - ParameterizedList(ParameterizedTypePtr elementType, bool nullable = false) + explicit ParameterizedList( + ParameterizedTypePtr elementType, + bool nullable = false) : ParameterizedTypeBase(nullable), elementType_(std::move(elementType)){}; - const ParameterizedTypePtr& elementType() const { + [[nodiscard]] const ParameterizedTypePtr& elementType() const { return elementType_; } - TypeKind kind() const override { + [[nodiscard]] TypeKind kind() const override { return TypeKind::kList; } - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -589,22 +597,22 @@ class ParameterizedList : public ParameterizedTypeBase { class ParameterizedStruct : public ParameterizedTypeBase { public: - ParameterizedStruct( + explicit ParameterizedStruct( std::vector types, bool nullable = false) : ParameterizedTypeBase(nullable), children_(std::move(types)) {} - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - const std::vector& children() const { + [[nodiscard]] const std::vector& children() const { return children_; } - TypeKind kind() const override { + [[nodiscard]] TypeKind kind() const override { return TypeKind::kStruct; } - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: @@ -621,20 +629,20 @@ class ParameterizedMap : public ParameterizedTypeBase { keyType_(std::move(keyType)), valueType_(std::move(valueType)) {} - const ParameterizedTypePtr& keyType() const { + [[nodiscard]] const ParameterizedTypePtr& keyType() const { return keyType_; } - TypeKind kind() const override { + [[nodiscard]] TypeKind kind() const override { return TypeKind::kMap; } - const ParameterizedTypePtr& valueType() const { + [[nodiscard]] const ParameterizedTypePtr& valueType() const { return valueType_; } - std::string signature() const override; + [[nodiscard]] std::string signature() const override; - bool isMatch( + [[nodiscard]] bool isMatch( const std::shared_ptr& type) const override; private: diff --git a/core/CMakeLists.txt b/substrait/CMakeLists.txt similarity index 100% rename from core/CMakeLists.txt rename to substrait/CMakeLists.txt diff --git a/core/common/CMakeLists.txt b/substrait/common/CMakeLists.txt similarity index 93% rename from core/common/CMakeLists.txt rename to substrait/common/CMakeLists.txt index 97ab6836..bdedf591 100644 --- a/core/common/CMakeLists.txt +++ b/substrait/common/CMakeLists.txt @@ -10,11 +10,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +find_package(fmt) + add_library( substrait_common Exceptions.cpp) target_link_libraries( substrait_common - fmt) + fmt::fmt-header-only) diff --git a/core/common/Exceptions.cpp b/substrait/common/Exceptions.cpp similarity index 87% rename from core/common/Exceptions.cpp rename to substrait/common/Exceptions.cpp index 5eeb608e..b55ad429 100644 --- a/core/common/Exceptions.cpp +++ b/substrait/common/Exceptions.cpp @@ -12,16 +12,16 @@ * limitations under the License. */ -#include "common/Exceptions.h" +#include "substrait/common/Exceptions.h" #include "fmt/format.h" namespace io::substrait::common { SubstraitException::SubstraitException( - std::string exceptionCode, - std::string& exceptionMessage, + const std::string& exceptionCode, + const std::string& exceptionMessage, Type exceptionType, - std::string exceptionName) + const std::string& exceptionName) : msg_(fmt::format( "Exception: {}\nError Code: {}\nType: {}\nReason: {}\n" "Function: {}\nFile: {}\n:Line: {}\n", diff --git a/core/function/CMakeLists.txt b/substrait/function/CMakeLists.txt similarity index 100% rename from core/function/CMakeLists.txt rename to substrait/function/CMakeLists.txt diff --git a/core/function/Extension.cpp b/substrait/function/Extension.cpp similarity index 99% rename from core/function/Extension.cpp rename to substrait/function/Extension.cpp index f7a6a374..9d4dd5c7 100644 --- a/core/function/Extension.cpp +++ b/substrait/function/Extension.cpp @@ -12,7 +12,7 @@ * limitations under the License. */ -#include "function/Extension.h" +#include "substrait/function/Extension.h" #include "yaml-cpp/yaml.h" bool decodeFunctionVariant( diff --git a/core/function/Function.cpp b/substrait/function/Function.cpp similarity index 98% rename from core/function/Function.cpp rename to substrait/function/Function.cpp index c831c521..add718b2 100644 --- a/core/function/Function.cpp +++ b/substrait/function/Function.cpp @@ -12,7 +12,7 @@ * limitations under the License. */ -#include "function/Function.h" +#include "substrait/function/Function.h" #include namespace io::substrait { diff --git a/core/function/FunctionLookup.cpp b/substrait/function/FunctionLookup.cpp similarity index 96% rename from core/function/FunctionLookup.cpp rename to substrait/function/FunctionLookup.cpp index f85a0c02..c594f568 100644 --- a/core/function/FunctionLookup.cpp +++ b/substrait/function/FunctionLookup.cpp @@ -12,7 +12,7 @@ * limitations under the License. */ -#include "function/FunctionLookup.h" +#include "substrait/function/FunctionLookup.h" namespace io::substrait { diff --git a/core/function/tests/CMakeLists.txt b/substrait/function/tests/CMakeLists.txt similarity index 100% rename from core/function/tests/CMakeLists.txt rename to substrait/function/tests/CMakeLists.txt diff --git a/core/function/tests/FunctionLookupTest.cpp b/substrait/function/tests/FunctionLookupTest.cpp similarity index 93% rename from core/function/tests/FunctionLookupTest.cpp rename to substrait/function/tests/FunctionLookupTest.cpp index 217f7217..1809461b 100644 --- a/core/function/tests/FunctionLookupTest.cpp +++ b/substrait/function/tests/FunctionLookupTest.cpp @@ -12,7 +12,7 @@ * limitations under the License. */ -#include "function/FunctionLookup.h" +#include "substrait/function/FunctionLookup.h" #include #include @@ -21,7 +21,7 @@ using namespace io::substrait; class VeloxFunctionMappings : public FunctionMapping { public: /// scalar function names in difference between velox and Substrait. - const FunctionMap& scalaMapping() const override { + [[nodiscard]] const FunctionMap& scalaMapping() const override { static const FunctionMap scalarMappings{ {"plus", "add"}, {"minus", "subtract"}, @@ -36,7 +36,7 @@ class VeloxFunctionMappings : public FunctionMapping { class FunctionLookupTest : public ::testing::Test { protected: - std::string getExtensionAbsolutePath() { + static std::string getExtensionAbsolutePath() { const std::string absolute_path = __FILE__; auto const pos = absolute_path.find_last_of('/'); return absolute_path.substr(0, pos) + @@ -124,8 +124,8 @@ TEST_F(FunctionLookupTest, aggregate) { TEST_F(FunctionLookupTest, logical) { testScalarFunctionLookup({"and", {}, BOOL()}, "and:bool"); - testScalarFunctionLookup({"and", {BOOL()},BOOL()}, "and:bool"); - testScalarFunctionLookup({"and", {BOOL(), BOOL()},BOOL()}, "and:bool"); + testScalarFunctionLookup({"and", {BOOL()}, BOOL()}, "and:bool"); + testScalarFunctionLookup({"and", {BOOL(), BOOL()}, BOOL()}, "and:bool"); testScalarFunctionLookup({"or", {BOOL(), BOOL()}, BOOL()}, "or:bool"); testScalarFunctionLookup({"not", {BOOL()}, BOOL()}, "not:bool"); diff --git a/core/type/CMakeLists.txt b/substrait/type/CMakeLists.txt similarity index 100% rename from core/type/CMakeLists.txt rename to substrait/type/CMakeLists.txt diff --git a/core/type/Type.cpp b/substrait/type/Type.cpp similarity index 99% rename from core/type/Type.cpp rename to substrait/type/Type.cpp index fbbf7db5..719bc55a 100644 --- a/core/type/Type.cpp +++ b/substrait/type/Type.cpp @@ -12,11 +12,11 @@ * limitations under the License. */ -#include "type/Type.h" +#include "substrait/type/Type.h" #include #include #include -#include "common/Exceptions.h" +#include "substrait/common/Exceptions.h" namespace io::substrait { diff --git a/core/type/tests/CMakeLists.txt b/substrait/type/tests/CMakeLists.txt similarity index 100% rename from core/type/tests/CMakeLists.txt rename to substrait/type/tests/CMakeLists.txt diff --git a/core/type/tests/TypeTest.cpp b/substrait/type/tests/TypeTest.cpp similarity index 99% rename from core/type/tests/TypeTest.cpp rename to substrait/type/tests/TypeTest.cpp index b9fe2cfe..c8e0c79a 100644 --- a/core/type/tests/TypeTest.cpp +++ b/substrait/type/tests/TypeTest.cpp @@ -12,7 +12,7 @@ * limitations under the License. */ -#include "type/Type.h" +#include "substrait/type/Type.h" #include using namespace io::substrait; diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index 59405e6a..32202afe 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -add_subdirectory(fmt) -include_directories(fmt/include) +# +#add_subdirectory(fmt) +#include_directories(fmt/include) add_subdirectory(googletest) diff --git a/third_party/fmt b/third_party/fmt index 80f8d344..9e8b86fd 160000 --- a/third_party/fmt +++ b/third_party/fmt @@ -1 +1 @@ -Subproject commit 80f8d34427d40ec5e7ce3b10ededc46bd4bd5759 +Subproject commit 9e8b86fd2d9806672cc73133d21780dd182bfd24 diff --git a/third_party/googletest b/third_party/googletest index 3026483a..d1a0039b 160000 --- a/third_party/googletest +++ b/third_party/googletest @@ -1 +1 @@ -Subproject commit 3026483ae575e2de942db5e760cf95e973308dd5 +Subproject commit d1a0039b97291dd1dc14f123b906bb7622ffe07c From a3cce71302b7ea10f6b00291b13c92e3fb50f5e0 Mon Sep 17 00:00:00 2001 From: ChaoJun Zhang Date: Tue, 8 Nov 2022 13:50:11 +0800 Subject: [PATCH 04/23] Update README.md Co-authored-by: Weston Pace --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 65fbdfaa..bc583cb6 100644 --- a/README.md +++ b/README.md @@ -33,4 +33,4 @@ The main communication channel with the substrait through the ## License substrait-cpp is licensed under the Apache 2.0 License. A copy of the license -[can be found here.](LICENSE) \ No newline at end of file +[can be found here.](https://www.apache.org/licenses/LICENSE-2.0.html) \ No newline at end of file From cc3bf4f2ccc2d292b8e115219c54b77eaf7c889e Mon Sep 17 00:00:00 2001 From: ChaoJun Zhang Date: Tue, 8 Nov 2022 13:50:36 +0800 Subject: [PATCH 05/23] Update README.md Co-authored-by: Weston Pace --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index bc583cb6..87090451 100644 --- a/README.md +++ b/README.md @@ -24,12 +24,6 @@ $ ./scripts/setup-ubuntu.sh $ make ``` -## Community - -The main communication channel with the substrait through the -[substrait chanel](http://substrait.slack.com). - - ## License substrait-cpp is licensed under the Apache 2.0 License. A copy of the license From f5e9d6fde85a9b6bbc2d3b30f593f42647870ab5 Mon Sep 17 00:00:00 2001 From: ChaoJun Zhang Date: Tue, 8 Nov 2022 13:50:48 +0800 Subject: [PATCH 06/23] Update include/substrait/common/Exceptions.h Co-authored-by: Weston Pace --- include/substrait/common/Exceptions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/substrait/common/Exceptions.h b/include/substrait/common/Exceptions.h index f0c07d66..8aa5c11c 100644 --- a/include/substrait/common/Exceptions.h +++ b/include/substrait/common/Exceptions.h @@ -16,7 +16,7 @@ #include #include -#include "fmt/format.h" +#include namespace io::substrait::common { namespace error_code { From ed29e4426f3db6ff72ba19ed677bba080a555f40 Mon Sep 17 00:00:00 2001 From: ChaoJun Zhang Date: Tue, 8 Nov 2022 13:51:09 +0800 Subject: [PATCH 07/23] Update substrait/common/Exceptions.cpp Co-authored-by: Weston Pace --- substrait/common/Exceptions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/substrait/common/Exceptions.cpp b/substrait/common/Exceptions.cpp index b55ad429..39768bc9 100644 --- a/substrait/common/Exceptions.cpp +++ b/substrait/common/Exceptions.cpp @@ -13,7 +13,7 @@ */ #include "substrait/common/Exceptions.h" -#include "fmt/format.h" +#include namespace io::substrait::common { From a78a513160229b393d3f5210d88b1582630049e3 Mon Sep 17 00:00:00 2001 From: ChaoJun Zhang Date: Tue, 8 Nov 2022 13:51:21 +0800 Subject: [PATCH 08/23] Update include/substrait/function/FunctionLookup.h Co-authored-by: Weston Pace --- include/substrait/function/FunctionLookup.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/substrait/function/FunctionLookup.h b/include/substrait/function/FunctionLookup.h index 9e6aa9c7..5ffab5fb 100644 --- a/include/substrait/function/FunctionLookup.h +++ b/include/substrait/function/FunctionLookup.h @@ -14,7 +14,6 @@ #pragma once -#include #include "substrait/function/Extension.h" #include "substrait/function/FunctionMapping.h" From ae7299ce574349253f18aa8cff119fb6aa051e69 Mon Sep 17 00:00:00 2001 From: ChaoJun Zhang Date: Tue, 8 Nov 2022 13:51:43 +0800 Subject: [PATCH 09/23] Update include/substrait/function/Extension.h Co-authored-by: Weston Pace --- include/substrait/function/Extension.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/substrait/function/Extension.h b/include/substrait/function/Extension.h index 439f0f0d..d6f07f29 100644 --- a/include/substrait/function/Extension.h +++ b/include/substrait/function/Extension.h @@ -14,6 +14,11 @@ #pragma once +#include +#include +#include +#include + #include "substrait/function/Function.h" #include "substrait/function/FunctionSignature.h" #include "substrait/type/Type.h" From db6a20286c3aac840a86f1cd69d9c2b22801ec62 Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Tue, 8 Nov 2022 14:20:19 +0800 Subject: [PATCH 10/23] fix issues --- include/substrait/common/Exceptions.h | 30 +++++------ include/substrait/function/Extension.h | 4 +- include/substrait/function/Function.h | 10 ++-- include/substrait/function/FunctionLookup.h | 45 ++++------------ include/substrait/function/FunctionMapping.h | 48 ----------------- .../substrait/function/FunctionSignature.h | 4 +- include/substrait/type/Type.h | 4 +- substrait/common/Exceptions.cpp | 6 +-- substrait/function/Extension.cpp | 52 +++++++++---------- substrait/function/Function.cpp | 6 +-- substrait/function/FunctionLookup.cpp | 12 ++--- .../function/tests/FunctionLookupTest.cpp | 32 +++--------- substrait/type/Type.cpp | 6 +-- substrait/type/tests/TypeTest.cpp | 6 +-- 14 files changed, 83 insertions(+), 182 deletions(-) delete mode 100644 include/substrait/function/FunctionMapping.h diff --git a/include/substrait/common/Exceptions.h b/include/substrait/common/Exceptions.h index 8aa5c11c..a5bb072a 100644 --- a/include/substrait/common/Exceptions.h +++ b/include/substrait/common/Exceptions.h @@ -18,7 +18,7 @@ #include #include -namespace io::substrait::common { +namespace substrait::common { namespace error_code { //====================== User Error Codes ======================: @@ -98,44 +98,44 @@ std::string errorMessage(fmt::string_view fmt, const Args&... args) { #define SUBSTRAIT_THROW(exception, errorCode, ...) \ { \ - auto message = io::substrait::common::errorMessage(__VA_ARGS__); \ + auto message = substrait::common::errorMessage(__VA_ARGS__); \ throw exception(errorCode, message); \ } #define SUBSTRAIT_UNSUPPORTED(...) \ SUBSTRAIT_THROW( \ - ::io::substrait::common::SubstraitUserError, \ - ::io::substrait::common::error_code::kUnsupported, \ + substrait::common::SubstraitUserError, \ + substrait::common::error_code::kUnsupported, \ ##__VA_ARGS__) #define SUBSTRAIT_UNREACHABLE(...) \ SUBSTRAIT_THROW( \ - ::io::substrait::common::SubstraitRuntimeError, \ - ::io::substrait::common::error_code::kUnreachableCode, \ + substrait::common::SubstraitRuntimeError, \ + substrait::common::error_code::kUnreachableCode, \ ##__VA_ARGS__) #define SUBSTRAIT_FAIL(...) \ SUBSTRAIT_THROW( \ - ::io::substrait::common::SubstraitRuntimeError, \ - ::io::substrait::common::error_code::kInvalidState, \ + ::substrait::common::SubstraitRuntimeError, \ + ::substrait::common::error_code::kInvalidState, \ ##__VA_ARGS__) #define SUBSTRAIT_USER_FAIL(...) \ SUBSTRAIT_THROW( \ - ::io::substrait::common::SubstraitUserError, \ - ::io::substrait::common::error_code::kInvalidState, \ + substrait::common::SubstraitUserError, \ + substrait::common::error_code::kInvalidState, \ ##__VA_ARGS__) #define SUBSTRAIT_NYI(...) \ SUBSTRAIT_THROW( \ - ::io::substrait::common::SubstraitRuntimeError, \ - ::io::substrait::common::error_code::kNotImplemented, \ + substrait::common::SubstraitRuntimeError, \ + substrait::common::error_code::kNotImplemented, \ ##__VA_ARGS__) #define SUBSTRAIT_ILLEGAL_ARGUMENT(...) \ SUBSTRAIT_THROW( \ - ::io::substrait::common::SubstraitUserError, \ - ::io::substrait::common::error_code::kIllegalArgument, \ + substrait::common::SubstraitUserError, \ + substrait::common::error_code::kIllegalArgument, \ ##__VA_ARGS__) -} // namespace io::substrait::common +} // namespace substrait::common diff --git a/include/substrait/function/Extension.h b/include/substrait/function/Extension.h index d6f07f29..fa2c1289 100644 --- a/include/substrait/function/Extension.h +++ b/include/substrait/function/Extension.h @@ -23,7 +23,7 @@ #include "substrait/function/FunctionSignature.h" #include "substrait/type/Type.h" -namespace io::substrait { +namespace substrait { struct TypeVariant { std::string name; @@ -92,4 +92,4 @@ class Extension { using ExtensionPtr = std::shared_ptr; -} // namespace io::substrait +} // namespace substrait diff --git a/include/substrait/function/Function.h b/include/substrait/function/Function.h index a4bcdf36..8bbb5695 100644 --- a/include/substrait/function/Function.h +++ b/include/substrait/function/Function.h @@ -14,10 +14,10 @@ #pragma once -#include "substrait/function/FunctionSignature.h" #include "substrait/type/Type.h" +#include "substrait/function/FunctionSignature.h" -namespace io::substrait { +namespace substrait { struct FunctionArgument { [[nodiscard]] virtual bool isRequired() const = 0; @@ -26,7 +26,7 @@ struct FunctionArgument { /// https://substrait.io/extensions/#function-signature-compound-names [[nodiscard]] virtual std::string toTypeString() const = 0; - virtual bool isWildcardType() const { + [[nodiscard]] virtual bool isWildcardType() const { return false; }; @@ -100,7 +100,7 @@ struct FunctionVariant { const std::vector& arguments); /// Create function signature by function name and arguments. - [[nodiscard]] const std::string signature() const { + [[nodiscard]] std::string signature() const { return signature(name, arguments); } }; @@ -116,4 +116,4 @@ struct AggregateFunctionVariant : public FunctionVariant { bool tryMatch(const FunctionSignature& signature) override; }; -} // namespace io::substrait +} // namespace substrait diff --git a/include/substrait/function/FunctionLookup.h b/include/substrait/function/FunctionLookup.h index 5ffab5fb..910ac36d 100644 --- a/include/substrait/function/FunctionLookup.h +++ b/include/substrait/function/FunctionLookup.h @@ -14,18 +14,15 @@ #pragma once - #include "substrait/function/Extension.h" -#include "substrait/function/FunctionMapping.h" #include "substrait/function/FunctionSignature.h" -namespace io::substrait { +namespace substrait { class FunctionLookup { public: - FunctionLookup(ExtensionPtr extension, FunctionMappingPtr functionMapping) - : extension_(std::move(extension)), - functionMapping_(std::move(functionMapping)) {} + explicit FunctionLookup(ExtensionPtr extension) + : extension_(std::move(extension)) {} [[nodiscard]] virtual FunctionVariantPtr lookupFunction( const FunctionSignature& signature) const; @@ -33,12 +30,8 @@ class FunctionLookup { virtual ~FunctionLookup() = default; protected: - [[nodiscard]] virtual FunctionMap getFunctionMap() const = 0; - [[nodiscard]] virtual FunctionVariantMap getFunctionVariants() const = 0; - const FunctionMappingPtr functionMapping_; - ExtensionPtr extension_{}; }; @@ -46,16 +39,10 @@ using FunctionLookupPtr = std::shared_ptr; class ScalarFunctionLookup : public FunctionLookup { public: - ScalarFunctionLookup( - const ExtensionPtr& extension, - const FunctionMappingPtr& functionMapping) - : FunctionLookup(extension, functionMapping) {} + ScalarFunctionLookup(const ExtensionPtr& extension) + : FunctionLookup(extension) {} protected: - [[nodiscard]] FunctionMap getFunctionMap() const override { - return functionMapping_->scalaMapping(); - } - [[nodiscard]] FunctionVariantMap getFunctionVariants() const override { return extension_->scalaFunctionVariantMap(); } @@ -63,16 +50,10 @@ class ScalarFunctionLookup : public FunctionLookup { class AggregateFunctionLookup : public FunctionLookup { public: - AggregateFunctionLookup( - const ExtensionPtr& extension, - const FunctionMappingPtr& functionMapping) - : FunctionLookup(extension, functionMapping) {} + explicit AggregateFunctionLookup(const ExtensionPtr& extension) + : FunctionLookup(extension) {} protected: - [[nodiscard]] FunctionMap getFunctionMap() const override { - return functionMapping_->aggregateMapping(); - } - [[nodiscard]] FunctionVariantMap getFunctionVariants() const override { return extension_->aggregateFunctionVariantMap(); } @@ -80,19 +61,13 @@ class AggregateFunctionLookup : public FunctionLookup { class WindowFunctionLookup : public FunctionLookup { public: - WindowFunctionLookup( - const ExtensionPtr& extension, - const FunctionMappingPtr& functionMapping) - : FunctionLookup(extension, functionMapping) {} + explicit WindowFunctionLookup(const ExtensionPtr& extension) + : FunctionLookup(extension) {} protected: - [[nodiscard]] FunctionMap getFunctionMap() const override { - return functionMapping_->windowMapping(); - } - [[nodiscard]] FunctionVariantMap getFunctionVariants() const override { return extension_->windowFunctionVariantMap(); } }; -} // namespace io::substrait +} // namespace substrait diff --git a/include/substrait/function/FunctionMapping.h b/include/substrait/function/FunctionMapping.h deleted file mode 100644 index 8be5b9b6..00000000 --- a/include/substrait/function/FunctionMapping.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace io::substrait { - -using FunctionMap = std::unordered_map; - -/// An interface describe the function names in difference between engine-own -/// and substrait system. -class FunctionMapping { - public: - /// Scalar function names in difference between engine own and substrait. - [[nodiscard]] virtual const FunctionMap& scalaMapping() const { - static const FunctionMap scalaFunctionMap{}; - return scalaFunctionMap; - } - - /// Scalar function names in difference between engine own and substrait. - [[nodiscard]] virtual const FunctionMap& aggregateMapping() const { - static const FunctionMap aggregateFunctionMap{}; - return aggregateFunctionMap; - } - - /// Window function names in difference between engine own and substrait. - [[nodiscard]] virtual const FunctionMap& windowMapping() const { - static const FunctionMap windowFunctionMap{}; - return windowFunctionMap; - } -}; - -using FunctionMappingPtr = std::shared_ptr; -} // namespace io::substrait diff --git a/include/substrait/function/FunctionSignature.h b/include/substrait/function/FunctionSignature.h index 66bce17a..e6a63c6f 100644 --- a/include/substrait/function/FunctionSignature.h +++ b/include/substrait/function/FunctionSignature.h @@ -21,7 +21,7 @@ #include "substrait/type/Type.h" -namespace io::substrait { +namespace substrait { struct FunctionSignature { std::string name; @@ -29,4 +29,4 @@ struct FunctionSignature { TypePtr returnType; }; -} // namespace io::substrait +} // namespace substrait diff --git a/include/substrait/type/Type.h b/include/substrait/type/Type.h index 868f74d3..aa595ef0 100644 --- a/include/substrait/type/Type.h +++ b/include/substrait/type/Type.h @@ -20,7 +20,7 @@ #include #include -namespace io::substrait { +namespace substrait { enum class TypeKind : int8_t { kBool = 1, @@ -698,4 +698,4 @@ std::shared_ptr MAP( std::shared_ptr ROW(const std::vector& children); -} // namespace io::substrait +} // namespace substrait diff --git a/substrait/common/Exceptions.cpp b/substrait/common/Exceptions.cpp index 39768bc9..3286ecd2 100644 --- a/substrait/common/Exceptions.cpp +++ b/substrait/common/Exceptions.cpp @@ -12,10 +12,10 @@ * limitations under the License. */ -#include "substrait/common/Exceptions.h" #include +#include "substrait/common/Exceptions.h" -namespace io::substrait::common { +namespace substrait::common { SubstraitException::SubstraitException( const std::string& exceptionCode, @@ -33,4 +33,4 @@ SubstraitException::SubstraitException( __FILE__, std::to_string(__LINE__))) {} -} // namespace io::substrait::common +} // namespace substrait::common diff --git a/substrait/function/Extension.cpp b/substrait/function/Extension.cpp index 9d4dd5c7..eab4d3fd 100644 --- a/substrait/function/Extension.cpp +++ b/substrait/function/Extension.cpp @@ -12,12 +12,12 @@ * limitations under the License. */ +#include #include "substrait/function/Extension.h" -#include "yaml-cpp/yaml.h" bool decodeFunctionVariant( const YAML::Node& node, - io::substrait::FunctionVariant& function) { + substrait::FunctionVariant& function) { const auto& returnType = node["return"]; if (returnType && returnType.IsScalar()) { /// Return type can be an expression. @@ -29,22 +29,22 @@ bool decodeFunctionVariant( std::string lastReturnType; while (std::getline(ss, lastReturnType, '\n')) { } - function.returnType = io::substrait::Type::decode(lastReturnType); + function.returnType = substrait::Type::decode(lastReturnType); } const auto& args = node["args"]; if (args && args.IsSequence()) { for (auto& arg : args) { if (arg["options"]) { // enum argument - auto enumArgument = std::make_shared( - arg.as()); + auto enumArgument = std::make_shared( + arg.as()); function.arguments.emplace_back(enumArgument); } else if (arg["value"]) { // value argument - auto valueArgument = std::make_shared( - arg.as()); + auto valueArgument = std::make_shared( + arg.as()); function.arguments.emplace_back(valueArgument); } else { // type argument - auto typeArgument = std::make_shared( - arg.as()); + auto typeArgument = std::make_shared( + arg.as()); function.arguments.emplace_back(typeArgument); } } @@ -55,7 +55,7 @@ bool decodeFunctionVariant( auto& min = variadic["min"]; auto& max = variadic["max"]; if (min) { - function.variadic = std::make_optional( + function.variadic = std::make_optional( {min.as(), max ? std::make_optional(max.as()) : std::nullopt}); } else { @@ -69,8 +69,8 @@ bool decodeFunctionVariant( } template <> -struct YAML::convert { - static bool decode(const Node& node, io::substrait::EnumArgument& argument) { +struct YAML::convert { + static bool decode(const Node& node, substrait::EnumArgument& argument) { // 'options' is required property const auto& options = node["options"]; if (options && options.IsSequence()) { @@ -84,12 +84,12 @@ struct YAML::convert { }; template <> -struct YAML::convert { - static bool decode(const Node& node, io::substrait::ValueArgument& argument) { +struct YAML::convert { + static bool decode(const Node& node, substrait::ValueArgument& argument) { const auto& value = node["value"]; if (value && value.IsScalar()) { auto valueType = value.as(); - argument.type = io::substrait::Type::decode(valueType); + argument.type = substrait::Type::decode(valueType); return true; } return false; @@ -97,10 +97,10 @@ struct YAML::convert { }; template <> -struct YAML::convert { +struct YAML::convert { static bool decode( const YAML::Node& node, - io::substrait::TypeArgument& argument) { + substrait::TypeArgument& argument) { // no properties need to populate for type argument, just return true if // 'type' element exists. if (node["type"]) { @@ -111,25 +111,25 @@ struct YAML::convert { }; template <> -struct YAML::convert { +struct YAML::convert { static bool decode( const Node& node, - io::substrait::ScalarFunctionVariant& function) { + substrait::ScalarFunctionVariant& function) { return decodeFunctionVariant(node, function); }; }; template <> -struct YAML::convert { +struct YAML::convert { static bool decode( const Node& node, - io::substrait::AggregateFunctionVariant& function) { + substrait::AggregateFunctionVariant& function) { const auto& res = decodeFunctionVariant(node, function); if (res) { const auto& intermediate = node["intermediate"]; if (intermediate) { function.intermediate = - io::substrait::ParameterizedType::decode(intermediate.as()); + substrait::ParameterizedType::decode(intermediate.as()); } } return res; @@ -137,8 +137,8 @@ struct YAML::convert { }; template <> -struct YAML::convert { - static bool decode(const Node& node, io::substrait::TypeVariant& typeAnchor) { +struct YAML::convert { + static bool decode(const Node& node, substrait::TypeVariant& typeAnchor) { const auto& name = node["name"]; if (name && name.IsScalar()) { typeAnchor.name = name.as(); @@ -148,7 +148,7 @@ struct YAML::convert { } }; -namespace io::substrait { +namespace substrait { std::shared_ptr Extension::load(const std::string& basePath) { static const std::vector extensionFiles{ @@ -288,4 +288,4 @@ void Extension::addAggregateFunctionVariant( } } -} // namespace io::substrait +} // namespace substrait diff --git a/substrait/function/Function.cpp b/substrait/function/Function.cpp index add718b2..c4574db1 100644 --- a/substrait/function/Function.cpp +++ b/substrait/function/Function.cpp @@ -12,10 +12,10 @@ * limitations under the License. */ -#include "substrait/function/Function.h" #include +#include "substrait/function/Function.h" -namespace io::substrait { +namespace substrait { std::string FunctionVariant::signature( const std::string& name, @@ -97,4 +97,4 @@ bool AggregateFunctionVariant::tryMatch(const FunctionSignature& signature) { return matched; } -} // namespace io::substrait +} // namespace substrait diff --git a/substrait/function/FunctionLookup.cpp b/substrait/function/FunctionLookup.cpp index c594f568..e2323fcc 100644 --- a/substrait/function/FunctionLookup.cpp +++ b/substrait/function/FunctionLookup.cpp @@ -14,19 +14,13 @@ #include "substrait/function/FunctionLookup.h" -namespace io::substrait { +namespace substrait { FunctionVariantPtr FunctionLookup::lookupFunction( const FunctionSignature& signature) const { - const auto& functionMappings = getFunctionMap(); - - const auto& substraitFunctionName = - functionMappings.find(signature.name) != functionMappings.end() - ? functionMappings.at(signature.name) - : signature.name; const auto& functionVariants = getFunctionVariants(); - auto functionVariantIter = functionVariants.find(substraitFunctionName); + auto functionVariantIter = functionVariants.find(signature.name); if (functionVariantIter != functionVariants.end()) { for (const auto& candidateFunctionVariant : functionVariantIter->second) { if (candidateFunctionVariant->tryMatch(signature)) { @@ -37,4 +31,4 @@ FunctionVariantPtr FunctionLookup::lookupFunction( return nullptr; } -} // namespace io::substrait +} // namespace substrait diff --git a/substrait/function/tests/FunctionLookupTest.cpp b/substrait/function/tests/FunctionLookupTest.cpp index 1809461b..7b0fc15e 100644 --- a/substrait/function/tests/FunctionLookupTest.cpp +++ b/substrait/function/tests/FunctionLookupTest.cpp @@ -12,27 +12,11 @@ * limitations under the License. */ -#include "substrait/function/FunctionLookup.h" -#include #include +#include +#include "substrait/function/FunctionLookup.h" -using namespace io::substrait; - -class VeloxFunctionMappings : public FunctionMapping { - public: - /// scalar function names in difference between velox and Substrait. - [[nodiscard]] const FunctionMap& scalaMapping() const override { - static const FunctionMap scalarMappings{ - {"plus", "add"}, - {"minus", "subtract"}, - {"mod", "modulus"}, - {"eq", "equal"}, - {"neq", "not_equal"}, - {"substr", "substring"}, - }; - return scalarMappings; - }; -}; +using namespace substrait; class FunctionLookupTest : public ::testing::Test { protected: @@ -45,12 +29,10 @@ class FunctionLookupTest : public ::testing::Test { void SetUp() override { ExtensionPtr extension_ = Extension::load(getExtensionAbsolutePath()); - FunctionMappingPtr mappings_ = - std::make_shared(); scalarFunctionLookup_ = - std::make_shared(extension_, mappings_); + std::make_shared(extension_); aggregateFunctionLookup_ = - std::make_shared(extension_, mappings_); + std::make_shared(extension_); } void testScalarFunctionLookup( @@ -104,8 +86,6 @@ TEST_F(FunctionLookupTest, arithmetic_function) { testScalarFunctionLookup( {"add", {TINYINT(), TINYINT()}, TINYINT()}, "add:opt_i8_i8"); - testScalarFunctionLookup( - {"plus", {TINYINT(), TINYINT()}, TINYINT()}, "add:opt_i8_i8"); testScalarFunctionLookup( {"divide", { @@ -139,6 +119,6 @@ TEST_F(FunctionLookupTest, string_function) { {"like", {VARCHAR(3), VARCHAR(4)}, BOOL()}, "like:opt_vchar_vchar"); testScalarFunctionLookup( - {"substr", {STRING(), INTEGER(), INTEGER()}, STRING()}, + {"substring", {STRING(), INTEGER(), INTEGER()}, STRING()}, "substring:str_i32_i32"); } diff --git a/substrait/type/Type.cpp b/substrait/type/Type.cpp index 719bc55a..90890179 100644 --- a/substrait/type/Type.cpp +++ b/substrait/type/Type.cpp @@ -12,13 +12,13 @@ * limitations under the License. */ -#include "substrait/type/Type.h" #include #include #include +#include "substrait/type/Type.h" #include "substrait/common/Exceptions.h" -namespace io::substrait { +namespace substrait { namespace { @@ -526,4 +526,4 @@ bool UsedDefinedType::isMatch( return true; } -} // namespace io::substrait +} // namespace substrait diff --git a/substrait/type/tests/TypeTest.cpp b/substrait/type/tests/TypeTest.cpp index c8e0c79a..4e938d2b 100644 --- a/substrait/type/tests/TypeTest.cpp +++ b/substrait/type/tests/TypeTest.cpp @@ -12,10 +12,10 @@ * limitations under the License. */ -#include "substrait/type/Type.h" #include +#include "substrait/type/Type.h" -using namespace io::substrait; +using namespace substrait; class TypeTest : public ::testing::Test { protected: @@ -26,7 +26,7 @@ class TypeTest : public ::testing::Test { ASSERT_EQ(type->signature(), signature); } - void testType( + static void testType( const ParameterizedTypePtr& type, TypeKind kind, const std::string& signature) { From 2568fc1c9721972aa1b5f47c195efffd1df1ad0f Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Tue, 8 Nov 2022 15:04:48 +0800 Subject: [PATCH 11/23] fix issues --- include/substrait/common/Exceptions.h | 23 +++++++++----- include/substrait/function/Extension.h | 8 ++--- include/substrait/function/Function.h | 33 +++++++++++++-------- include/substrait/function/FunctionLookup.h | 2 +- substrait/function/Extension.cpp | 30 +++++++++---------- substrait/function/Function.cpp | 15 +++++++--- substrait/function/FunctionLookup.cpp | 2 +- 7 files changed, 68 insertions(+), 45 deletions(-) diff --git a/include/substrait/common/Exceptions.h b/include/substrait/common/Exceptions.h index a5bb072a..0ea27234 100644 --- a/include/substrait/common/Exceptions.h +++ b/include/substrait/common/Exceptions.h @@ -37,18 +37,25 @@ inline constexpr auto kInvalidState = "INVALID_STATE"; // An error raised when unreachable code point was executed. inline constexpr auto kUnreachableCode = "UNREACHABLE_CODE"; -// An error raised when a requested operation is not yet supported. +// An error raised when a requested operation is not implemented. inline constexpr auto kNotImplemented = "NOT_IMPLEMENTED"; -// An error raised when a method has been passed an illegal or inappropriate -// argument. -inline constexpr auto kIllegalArgument = "ILLEGAL_ARGUMENT"; - } // namespace error_code class SubstraitException : public std::exception { public: - enum class Type { kUser = 0, kSystem = 1 }; + + enum class Type { + // Errors where the root cause of the problem is either because of bad input + // or an unsupported pattern of use are classified with USER. Examples + // of errors in this category include syntax errors, unavailable names or + // objects. + kUser = 0, + + // Errors where the root cause of the problem is some unreliable aspect of the + // system are classified with SYSTEM. + kSystem = 1 + }; SubstraitException( const std::string& exceptionCode, @@ -132,10 +139,10 @@ std::string errorMessage(fmt::string_view fmt, const Args&... args) { substrait::common::error_code::kNotImplemented, \ ##__VA_ARGS__) -#define SUBSTRAIT_ILLEGAL_ARGUMENT(...) \ +#define SUBSTRAIT_IVALID_ARGUMENT(...) \ SUBSTRAIT_THROW( \ substrait::common::SubstraitUserError, \ - substrait::common::error_code::kIllegalArgument, \ + substrait::common::error_code::kInvalidArgument, \ ##__VA_ARGS__) } // namespace substrait::common diff --git a/include/substrait/function/Extension.h b/include/substrait/function/Extension.h index fa2c1289..520d4eda 100644 --- a/include/substrait/function/Extension.h +++ b/include/substrait/function/Extension.h @@ -33,7 +33,7 @@ struct TypeVariant { using TypeVariantPtr = std::shared_ptr; using FunctionVariantMap = - std::unordered_map>; + std::unordered_map>; using TypeVariantMap = std::unordered_map; @@ -53,13 +53,13 @@ class Extension { const std::vector& extensionFiles); /// Add a scalar function variant. - void addScalarFunctionVariant(const FunctionVariantPtr& functionVariant); + void addScalarFunctionVariant(const FunctionImplementationPtr& functionVariant); /// Add a aggregate function variant. - void addAggregateFunctionVariant(const FunctionVariantPtr& functionVariant); + void addAggregateFunctionVariant(const FunctionImplementationPtr& functionVariant); /// Add a window function variant. - void addWindowFunctionVariant(const FunctionVariantPtr& functionVariant); + void addWindowFunctionVariant(const FunctionImplementationPtr& functionVariant); /// Add a type variant. void addTypeVariant(const TypeVariantPtr& functionVariant); diff --git a/include/substrait/function/Function.h b/include/substrait/function/Function.h index 8bbb5695..30438c1f 100644 --- a/include/substrait/function/Function.h +++ b/include/substrait/function/Function.h @@ -33,6 +33,14 @@ struct FunctionArgument { [[nodiscard]] virtual bool isValueArgument() const { return false; } + + [[nodiscard]] virtual bool isEnumArgument() const { + return false; + } + + [[nodiscard]] virtual bool isTypeArgument() const { + return false; + } }; using FunctionArgumentPtr = std::shared_ptr; @@ -47,6 +55,10 @@ struct EnumArgument : public FunctionArgument { [[nodiscard]] std::string toTypeString() const override { return required ? "req" : "opt"; } + + [[nodiscard]] bool isEnumArgument() const override { + return true; + } }; struct TypeArgument : public FunctionArgument { @@ -57,6 +69,10 @@ struct TypeArgument : public FunctionArgument { [[nodiscard]] bool isRequired() const override { return true; } + + [[nodiscard]] bool isTypeArgument() const override { + return true; + } }; struct ValueArgument : public FunctionArgument { @@ -84,7 +100,7 @@ struct FunctionVariadic { std::optional max; }; -struct FunctionVariant { +struct FunctionImplementation { std::string name; std::string uri; std::vector arguments; @@ -94,22 +110,15 @@ struct FunctionVariant { /// Test if the actual types matched with this function variant. virtual bool tryMatch(const FunctionSignature& signature); - /// Create function signature by given function name and arguments. - static std::string signature( - const std::string& name, - const std::vector& arguments); - /// Create function signature by function name and arguments. - [[nodiscard]] std::string signature() const { - return signature(name, arguments); - } + [[nodiscard]] std::string signature() const; }; -using FunctionVariantPtr = std::shared_ptr; +using FunctionImplementationPtr = std::shared_ptr; -struct ScalarFunctionVariant : public FunctionVariant {}; +struct ScalarFunctionImplementation : public FunctionImplementation {}; -struct AggregateFunctionVariant : public FunctionVariant { +struct AggregateFunctionImplementation : public FunctionImplementation { ParameterizedTypePtr intermediate; bool deterministic; diff --git a/include/substrait/function/FunctionLookup.h b/include/substrait/function/FunctionLookup.h index 910ac36d..61c55715 100644 --- a/include/substrait/function/FunctionLookup.h +++ b/include/substrait/function/FunctionLookup.h @@ -24,7 +24,7 @@ class FunctionLookup { explicit FunctionLookup(ExtensionPtr extension) : extension_(std::move(extension)) {} - [[nodiscard]] virtual FunctionVariantPtr lookupFunction( + [[nodiscard]] virtual FunctionImplementationPtr lookupFunction( const FunctionSignature& signature) const; virtual ~FunctionLookup() = default; diff --git a/substrait/function/Extension.cpp b/substrait/function/Extension.cpp index eab4d3fd..8312f47c 100644 --- a/substrait/function/Extension.cpp +++ b/substrait/function/Extension.cpp @@ -17,7 +17,7 @@ bool decodeFunctionVariant( const YAML::Node& node, - substrait::FunctionVariant& function) { + substrait::FunctionImplementation& function) { const auto& returnType = node["return"]; if (returnType && returnType.IsScalar()) { /// Return type can be an expression. @@ -111,19 +111,19 @@ struct YAML::convert { }; template <> -struct YAML::convert { +struct YAML::convert { static bool decode( const Node& node, - substrait::ScalarFunctionVariant& function) { + substrait::ScalarFunctionImplementation& function) { return decodeFunctionVariant(node, function); }; }; template <> -struct YAML::convert { +struct YAML::convert { static bool decode( const Node& node, - substrait::AggregateFunctionVariant& function) { + substrait::AggregateFunctionImplementation& function) { const auto& res = decodeFunctionVariant(node, function); if (res) { const auto& intermediate = node["intermediate"]; @@ -192,11 +192,11 @@ std::shared_ptr Extension::load( const auto functionName = scalarFunctionNode["name"].as(); for (auto& scalaFunctionVariantNode : scalarFunctionNode["impls"]) { auto scalarFunctionVariant = - scalaFunctionVariantNode.as(); + scalaFunctionVariantNode.as(); scalarFunctionVariant.name = functionName; scalarFunctionVariant.uri = extensionUri; extension->addScalarFunctionVariant( - std::make_shared(scalarFunctionVariant)); + std::make_shared(scalarFunctionVariant)); } } } @@ -209,11 +209,11 @@ std::shared_ptr Extension::load( for (auto& aggregateFunctionVariantNode : aggregateFunctionNode["impls"]) { auto aggregateFunctionVariant = - aggregateFunctionVariantNode.as(); + aggregateFunctionVariantNode.as(); aggregateFunctionVariant.name = functionName; aggregateFunctionVariant.uri = extensionUri; extension->addAggregateFunctionVariant( - std::make_shared( + std::make_shared( aggregateFunctionVariant)); } } @@ -232,14 +232,14 @@ std::shared_ptr Extension::load( } void Extension::addWindowFunctionVariant( - const FunctionVariantPtr& functionVariant) { + const FunctionImplementationPtr& functionVariant) { const auto& functionVariants = windowFunctionVariantMap_.find(functionVariant->name); if (functionVariants != windowFunctionVariantMap_.end()) { auto& variants = functionVariants->second; variants.emplace_back(functionVariant); } else { - std::vector variants; + std::vector variants; variants.emplace_back(functionVariant); windowFunctionVariantMap_.insert( {functionVariant->name, std::move(variants)}); @@ -259,14 +259,14 @@ TypeVariantPtr Extension::lookupType(const std::string& typeName) const { } void Extension::addScalarFunctionVariant( - const FunctionVariantPtr& functionVariant) { + const FunctionImplementationPtr& functionVariant) { const auto& functionVariants = scalarFunctionVariantMap_.find(functionVariant->name); if (functionVariants != scalarFunctionVariantMap_.end()) { auto& variants = functionVariants->second; variants.emplace_back(functionVariant); } else { - std::vector variants; + std::vector variants; variants.emplace_back(functionVariant); scalarFunctionVariantMap_.insert( {functionVariant->name, std::move(variants)}); @@ -274,14 +274,14 @@ void Extension::addScalarFunctionVariant( } void Extension::addAggregateFunctionVariant( - const FunctionVariantPtr& functionVariant) { + const FunctionImplementationPtr& functionVariant) { const auto& functionVariants = aggregateFunctionVariantMap_.find(functionVariant->name); if (functionVariants != aggregateFunctionVariantMap_.end()) { auto& variants = functionVariants->second; variants.emplace_back(functionVariant); } else { - std::vector variants; + std::vector variants; variants.emplace_back(functionVariant); aggregateFunctionVariantMap_.insert( {functionVariant->name, std::move(variants)}); diff --git a/substrait/function/Function.cpp b/substrait/function/Function.cpp index c4574db1..5d8a8c5c 100644 --- a/substrait/function/Function.cpp +++ b/substrait/function/Function.cpp @@ -17,7 +17,8 @@ namespace substrait { -std::string FunctionVariant::signature( +namespace { +std::string signatureFor( const std::string& name, const std::vector& arguments) { std::stringstream ss; @@ -36,8 +37,10 @@ std::string FunctionVariant::signature( return ss.str(); } +} + -bool FunctionVariant::tryMatch(const FunctionSignature& signature) { +bool FunctionImplementation::tryMatch(const FunctionSignature& signature) { const auto& actualTypes = signature.arguments; if (variadic.has_value()) { // return false if actual types length less than min of variadic @@ -86,8 +89,12 @@ bool FunctionVariant::tryMatch(const FunctionSignature& signature) { } } -bool AggregateFunctionVariant::tryMatch(const FunctionSignature& signature) { - bool matched = FunctionVariant::tryMatch(signature); +std::string FunctionImplementation::signature() const { + return signatureFor(name, arguments); +} + +bool AggregateFunctionImplementation::tryMatch(const FunctionSignature& signature) { + bool matched = FunctionImplementation::tryMatch(signature); if (!matched && intermediate) { const auto& actualTypes = signature.arguments; if (actualTypes.size() == 1) { diff --git a/substrait/function/FunctionLookup.cpp b/substrait/function/FunctionLookup.cpp index e2323fcc..2179bd83 100644 --- a/substrait/function/FunctionLookup.cpp +++ b/substrait/function/FunctionLookup.cpp @@ -16,7 +16,7 @@ namespace substrait { -FunctionVariantPtr FunctionLookup::lookupFunction( +FunctionImplementationPtr FunctionLookup::lookupFunction( const FunctionSignature& signature) const { const auto& functionVariants = getFunctionVariants(); From 70c7a31b470b24b228f914309f042456cf55686a Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Tue, 8 Nov 2022 15:07:13 +0800 Subject: [PATCH 12/23] rename functionVariant to functionImplementation --- substrait/function/FunctionLookup.cpp | 8 ++++---- substrait/function/tests/FunctionLookupTest.cpp | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/substrait/function/FunctionLookup.cpp b/substrait/function/FunctionLookup.cpp index 2179bd83..4bfb1e9d 100644 --- a/substrait/function/FunctionLookup.cpp +++ b/substrait/function/FunctionLookup.cpp @@ -19,10 +19,10 @@ namespace substrait { FunctionImplementationPtr FunctionLookup::lookupFunction( const FunctionSignature& signature) const { - const auto& functionVariants = getFunctionVariants(); - auto functionVariantIter = functionVariants.find(signature.name); - if (functionVariantIter != functionVariants.end()) { - for (const auto& candidateFunctionVariant : functionVariantIter->second) { + const auto& functionImpls = getFunctionVariants(); + auto functionImplsIter = functionImpls.find(signature.name); + if (functionImplsIter != functionImpls.end()) { + for (const auto& candidateFunctionVariant : functionImplsIter->second) { if (candidateFunctionVariant->tryMatch(signature)) { return candidateFunctionVariant; } diff --git a/substrait/function/tests/FunctionLookupTest.cpp b/substrait/function/tests/FunctionLookupTest.cpp index 7b0fc15e..cf788ef2 100644 --- a/substrait/function/tests/FunctionLookupTest.cpp +++ b/substrait/function/tests/FunctionLookupTest.cpp @@ -38,21 +38,21 @@ class FunctionLookupTest : public ::testing::Test { void testScalarFunctionLookup( const FunctionSignature& inputSignature, const std::string& outputSignature) { - const auto& functionVariant = + const auto& functionImpl = scalarFunctionLookup_->lookupFunction(inputSignature); - ASSERT_TRUE(functionVariant != nullptr); - ASSERT_EQ(functionVariant->signature(), outputSignature); + ASSERT_TRUE(functionImpl != nullptr); + ASSERT_EQ(functionImpl->signature(), outputSignature); } void testAggregateFunctionLookup( const FunctionSignature& inputSignature, const std::string& outputSignature) { - const auto& functionVariant = + const auto& functionImpl = aggregateFunctionLookup_->lookupFunction(inputSignature); - ASSERT_TRUE(functionVariant != nullptr); - ASSERT_EQ(functionVariant->signature(), outputSignature); + ASSERT_TRUE(functionImpl != nullptr); + ASSERT_EQ(functionImpl->signature(), outputSignature); } private: From 3c5acae2930fed42fb8cfd1b2786ad9e9b2704d6 Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Tue, 8 Nov 2022 15:22:36 +0800 Subject: [PATCH 13/23] use spdx license header --- CMakeLists.txt | 13 +- LICENSE | 201 ------------------ Makefile | 17 +- include/substrait/common/Exceptions.h | 14 +- include/substrait/function/Extension.h | 14 +- include/substrait/function/Function.h | 14 +- include/substrait/function/FunctionLookup.h | 14 +- .../substrait/function/FunctionSignature.h | 19 +- include/substrait/type/Type.h | 14 +- scripts/setup-helper-functions.sh | 12 +- scripts/setup-ubuntu.sh | 12 +- substrait/CMakeLists.txt | 13 +- substrait/common/CMakeLists.txt | 12 +- substrait/common/Exceptions.cpp | 14 +- substrait/function/CMakeLists.txt | 12 +- substrait/function/Extension.cpp | 14 +- substrait/function/Function.cpp | 14 +- substrait/function/FunctionLookup.cpp | 14 +- substrait/function/tests/CMakeLists.txt | 12 +- .../function/tests/FunctionLookupTest.cpp | 14 +- substrait/type/CMakeLists.txt | 12 +- substrait/type/Type.cpp | 14 +- substrait/type/tests/CMakeLists.txt | 12 +- substrait/type/tests/TypeTest.cpp | 14 +- third_party/CMakeLists.txt | 17 +- 25 files changed, 25 insertions(+), 507 deletions(-) delete mode 100644 LICENSE diff --git a/CMakeLists.txt b/CMakeLists.txt index 42283c40..ab8efc31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,14 +1,5 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# SPDX-License-Identifier: Apache-2.0 + cmake_minimum_required(VERSION 3.10) # set the project name diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 261eeb9e..00000000 --- a/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/Makefile b/Makefile index 6b59d481..dd855043 100644 --- a/Makefile +++ b/Makefile @@ -1,19 +1,4 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. +# SPDX-License-Identifier: Apache-2.0 .PHONY: all clean build debug release diff --git a/include/substrait/common/Exceptions.h b/include/substrait/common/Exceptions.h index 0ea27234..e08466aa 100644 --- a/include/substrait/common/Exceptions.h +++ b/include/substrait/common/Exceptions.h @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/include/substrait/function/Extension.h b/include/substrait/function/Extension.h index 520d4eda..9feb66b6 100644 --- a/include/substrait/function/Extension.h +++ b/include/substrait/function/Extension.h @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/include/substrait/function/Function.h b/include/substrait/function/Function.h index 30438c1f..8de8434b 100644 --- a/include/substrait/function/Function.h +++ b/include/substrait/function/Function.h @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/include/substrait/function/FunctionLookup.h b/include/substrait/function/FunctionLookup.h index 61c55715..8985a1ae 100644 --- a/include/substrait/function/FunctionLookup.h +++ b/include/substrait/function/FunctionLookup.h @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/include/substrait/function/FunctionSignature.h b/include/substrait/function/FunctionSignature.h index e6a63c6f..62983d70 100644 --- a/include/substrait/function/FunctionSignature.h +++ b/include/substrait/function/FunctionSignature.h @@ -1,21 +1,4 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/include/substrait/type/Type.h b/include/substrait/type/Type.h index aa595ef0..1fcd9c13 100644 --- a/include/substrait/type/Type.h +++ b/include/substrait/type/Type.h @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/scripts/setup-helper-functions.sh b/scripts/setup-helper-functions.sh index b6238d1d..8c2c4c7a 100755 --- a/scripts/setup-helper-functions.sh +++ b/scripts/setup-helper-functions.sh @@ -1,15 +1,5 @@ #!/bin/bash -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# SPDX-License-Identifier: Apache-2.0 # github_checkout $REPO $VERSION $GIT_CLONE_PARAMS clones or re-uses an existing clone of the # specified repo, checking out the requested version. diff --git a/scripts/setup-ubuntu.sh b/scripts/setup-ubuntu.sh index dc0fe5f2..377f6708 100755 --- a/scripts/setup-ubuntu.sh +++ b/scripts/setup-ubuntu.sh @@ -1,15 +1,5 @@ #!/bin/bash -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# SPDX-License-Identifier: Apache-2.0 # Minimal setup for Ubuntu 20.04. set -eufx -o pipefail diff --git a/substrait/CMakeLists.txt b/substrait/CMakeLists.txt index ae435aad..2d18d0b2 100644 --- a/substrait/CMakeLists.txt +++ b/substrait/CMakeLists.txt @@ -1,15 +1,4 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - +# SPDX-License-Identifier: Apache-2.0 add_subdirectory(common) add_subdirectory(type) diff --git a/substrait/common/CMakeLists.txt b/substrait/common/CMakeLists.txt index bdedf591..8c6936ca 100644 --- a/substrait/common/CMakeLists.txt +++ b/substrait/common/CMakeLists.txt @@ -1,14 +1,4 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# SPDX-License-Identifier: Apache-2.0 find_package(fmt) diff --git a/substrait/common/Exceptions.cpp b/substrait/common/Exceptions.cpp index 3286ecd2..7f5d9c73 100644 --- a/substrait/common/Exceptions.cpp +++ b/substrait/common/Exceptions.cpp @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #include #include "substrait/common/Exceptions.h" diff --git a/substrait/function/CMakeLists.txt b/substrait/function/CMakeLists.txt index 75763f85..fd124c60 100644 --- a/substrait/function/CMakeLists.txt +++ b/substrait/function/CMakeLists.txt @@ -1,14 +1,4 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# SPDX-License-Identifier: Apache-2.0 set(FUNCTION_SRCS Function.cpp diff --git a/substrait/function/Extension.cpp b/substrait/function/Extension.cpp index 8312f47c..da3c323d 100644 --- a/substrait/function/Extension.cpp +++ b/substrait/function/Extension.cpp @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #include #include "substrait/function/Extension.h" diff --git a/substrait/function/Function.cpp b/substrait/function/Function.cpp index 5d8a8c5c..15c2531b 100644 --- a/substrait/function/Function.cpp +++ b/substrait/function/Function.cpp @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #include #include "substrait/function/Function.h" diff --git a/substrait/function/FunctionLookup.cpp b/substrait/function/FunctionLookup.cpp index 4bfb1e9d..b918aa26 100644 --- a/substrait/function/FunctionLookup.cpp +++ b/substrait/function/FunctionLookup.cpp @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #include "substrait/function/FunctionLookup.h" diff --git a/substrait/function/tests/CMakeLists.txt b/substrait/function/tests/CMakeLists.txt index 209176a0..94835efe 100644 --- a/substrait/function/tests/CMakeLists.txt +++ b/substrait/function/tests/CMakeLists.txt @@ -1,14 +1,4 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# SPDX-License-Identifier: Apache-2.0 add_executable( substrait_function_test diff --git a/substrait/function/tests/FunctionLookupTest.cpp b/substrait/function/tests/FunctionLookupTest.cpp index cf788ef2..2193d0c2 100644 --- a/substrait/function/tests/FunctionLookupTest.cpp +++ b/substrait/function/tests/FunctionLookupTest.cpp @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #include #include diff --git a/substrait/type/CMakeLists.txt b/substrait/type/CMakeLists.txt index 0de6bd5f..3aeb3e96 100644 --- a/substrait/type/CMakeLists.txt +++ b/substrait/type/CMakeLists.txt @@ -1,14 +1,4 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# SPDX-License-Identifier: Apache-2.0 set(TYPE_SRCS Type.cpp) diff --git a/substrait/type/Type.cpp b/substrait/type/Type.cpp index 90890179..adb7b587 100644 --- a/substrait/type/Type.cpp +++ b/substrait/type/Type.cpp @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #include #include diff --git a/substrait/type/tests/CMakeLists.txt b/substrait/type/tests/CMakeLists.txt index 6b7119d2..781cd5cd 100644 --- a/substrait/type/tests/CMakeLists.txt +++ b/substrait/type/tests/CMakeLists.txt @@ -1,14 +1,4 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# SPDX-License-Identifier: Apache-2.0 add_executable( substrait_type_test diff --git a/substrait/type/tests/TypeTest.cpp b/substrait/type/tests/TypeTest.cpp index 4e938d2b..1b3419ae 100644 --- a/substrait/type/tests/TypeTest.cpp +++ b/substrait/type/tests/TypeTest.cpp @@ -1,16 +1,4 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* SPDX-License-Identifier: Apache-2.0 */ #include #include "substrait/type/Type.h" diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index 32202afe..cf3b0980 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -1,19 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -#add_subdirectory(fmt) -#include_directories(fmt/include) +# SPDX-License-Identifier: Apache-2.0 add_subdirectory(googletest) From 893a974d175660ab094a027dcea05c2eb1260dd9 Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Tue, 8 Nov 2022 15:42:45 +0800 Subject: [PATCH 14/23] fix issues --- include/substrait/type/Type.h | 1 - substrait/function/Function.cpp | 39 +++++++++++++-------------------- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/include/substrait/type/Type.h b/include/substrait/type/Type.h index 1fcd9c13..e87c8746 100644 --- a/include/substrait/type/Type.h +++ b/include/substrait/type/Type.h @@ -394,7 +394,6 @@ class Map : public TypeBase { const TypePtr valueType_; }; -/// ParameterizedType represent a type in class ParameterizedTypeBase : public ParameterizedType { public: explicit ParameterizedTypeBase(bool nullable = false) diff --git a/substrait/function/Function.cpp b/substrait/function/Function.cpp index 15c2531b..83d597be 100644 --- a/substrait/function/Function.cpp +++ b/substrait/function/Function.cpp @@ -5,29 +5,6 @@ namespace substrait { -namespace { -std::string signatureFor( - const std::string& name, - const std::vector& arguments) { - std::stringstream ss; - ss << name; - if (!arguments.empty()) { - ss << ":"; - for (auto it = arguments.begin(); it != arguments.end(); ++it) { - const auto& typeSign = (*it)->toTypeString(); - if (it == arguments.end() - 1) { - ss << typeSign; - } else { - ss << typeSign << "_"; - } - } - } - - return ss.str(); -} -} - - bool FunctionImplementation::tryMatch(const FunctionSignature& signature) { const auto& actualTypes = signature.arguments; if (variadic.has_value()) { @@ -78,7 +55,21 @@ bool FunctionImplementation::tryMatch(const FunctionSignature& signature) { } std::string FunctionImplementation::signature() const { - return signatureFor(name, arguments); + std::stringstream ss; + ss << name; + if (!arguments.empty()) { + ss << ":"; + for (auto it = arguments.begin(); it != arguments.end(); ++it) { + const auto& typeSign = (*it)->toTypeString(); + if (it == arguments.end() - 1) { + ss << typeSign; + } else { + ss << typeSign << "_"; + } + } + } + + return ss.str(); } bool AggregateFunctionImplementation::tryMatch(const FunctionSignature& signature) { From 8e60d95bc7d719ab77e0ebc46578d05c6ce2c4a2 Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Sun, 27 Nov 2022 16:12:04 +0800 Subject: [PATCH 15/23] fix issues --- include/substrait/common/Exceptions.h | 10 +-- include/substrait/type/Type.h | 78 ++++++------------- .../function/tests/FunctionLookupTest.cpp | 2 +- substrait/type/Type.cpp | 16 +--- substrait/type/tests/TypeTest.cpp | 11 +-- 5 files changed, 35 insertions(+), 82 deletions(-) diff --git a/include/substrait/common/Exceptions.h b/include/substrait/common/Exceptions.h index e08466aa..f944da7d 100644 --- a/include/substrait/common/Exceptions.h +++ b/include/substrait/common/Exceptions.h @@ -12,21 +12,21 @@ namespace error_code { //====================== User Error Codes ======================: // An error raised when an argument verification fails -inline constexpr auto kInvalidArgument = "INVALID_ARGUMENT"; +inline constexpr const char* kInvalidArgument = "INVALID_ARGUMENT"; // An error raised when a requested operation is not supported. -inline constexpr auto kUnsupported = "UNSUPPORTED"; +inline constexpr const char* kUnsupported = "UNSUPPORTED"; //====================== Runtime Error Codes ======================: // An error raised when the current state of a component is invalid. -inline constexpr auto kInvalidState = "INVALID_STATE"; +inline constexpr const char* kInvalidState = "INVALID_STATE"; // An error raised when unreachable code point was executed. -inline constexpr auto kUnreachableCode = "UNREACHABLE_CODE"; +inline constexpr const char* kUnreachableCode = "UNREACHABLE_CODE"; // An error raised when a requested operation is not implemented. -inline constexpr auto kNotImplemented = "NOT_IMPLEMENTED"; +inline constexpr const char* kNotImplemented = "NOT_IMPLEMENTED"; } // namespace error_code diff --git a/include/substrait/type/Type.h b/include/substrait/type/Type.h index e87c8746..8124f3d5 100644 --- a/include/substrait/type/Type.h +++ b/include/substrait/type/Type.h @@ -14,27 +14,26 @@ enum class TypeKind : int8_t { kBool = 1, kI8 = 2, kI16 = 3, - kI32 = 5, - kI64 = 7, - kFp32 = 10, - kFp64 = 11, - kString = 12, - kBinary = 13, - kTimestamp = 14, - kDate = 16, - kTime = 17, - kIntervalYear = 19, - kIntervalDay = 20, - kTimestampTz = 29, - kUuid = 32, - kFixedChar = 21, - kVarchar = 22, - kFixedBinary = 23, - kDecimal = 24, - kStruct = 25, - kList = 27, - kMap = 28, - kUserDefined = 30, + kI32 = 4, + kI64 = 5, + kFp32 = 6, + kFp64 = 7, + kString = 8, + kBinary = 9, + kTimestamp = 10, + kDate = 11, + kTime = 12, + kIntervalYear = 13, + kIntervalDay = 14, + kTimestampTz = 15, + kUuid = 16, + kFixedChar = 17, + kVarchar = 18, + kFixedBinary = 19, + kDecimal = 20, + kStruct = 21, + kList = 22, + kMap = 23, KIND_NOT_SET = 0, }; @@ -179,12 +178,6 @@ struct TypeTraits { static constexpr const char* typeString = "map"; }; -template <> -struct TypeTraits { - static constexpr const char* signature = "u!name"; - static constexpr const char* typeString = "user defined type"; -}; - class ParameterizedType { public: explicit ParameterizedType(bool nullable = false) : nullable_(nullable) {} @@ -400,31 +393,6 @@ class ParameterizedTypeBase : public ParameterizedType { : ParameterizedType(nullable) {} }; -class UsedDefinedType : public ParameterizedTypeBase { - public: - UsedDefinedType(std::string value, bool nullable) - : ParameterizedTypeBase(nullable), value_(std::move(value)) {} - - [[nodiscard]] const std::string& value() const { - return value_; - } - - [[nodiscard]] TypeKind kind() const override { - return TypeKind::kUserDefined; - } - - [[nodiscard]] std::string signature() const override { - return TypeTraits::signature; - } - - [[nodiscard]] bool isMatch( - const std::shared_ptr& type) const override; - - private: - /// raw string of wildcard type. - const std::string value_; -}; - /// A string literal type can present the 'any1'. class StringLiteral : public ParameterizedTypeBase { public: @@ -673,9 +641,9 @@ std::shared_ptr DECIMAL(int precision, int scale); std::shared_ptr VARCHAR(int len); -std::shared_ptr FChar(int len); +std::shared_ptr FIXED_CHAR(int len); -std::shared_ptr FBinary(int len); +std::shared_ptr FIXED_BINARY(int len); std::shared_ptr LIST(const TypePtr& elementType); @@ -683,6 +651,6 @@ std::shared_ptr MAP( const TypePtr& keyType, const TypePtr& valueType); -std::shared_ptr ROW(const std::vector& children); +std::shared_ptr STRUCT(const std::vector& children); } // namespace substrait diff --git a/substrait/function/tests/FunctionLookupTest.cpp b/substrait/function/tests/FunctionLookupTest.cpp index 2193d0c2..643ac2e9 100644 --- a/substrait/function/tests/FunctionLookupTest.cpp +++ b/substrait/function/tests/FunctionLookupTest.cpp @@ -87,7 +87,7 @@ TEST_F(FunctionLookupTest, arithmetic_function) { TEST_F(FunctionLookupTest, aggregate) { // for intermediate type testAggregateFunctionLookup( - {"avg", {ROW({DOUBLE(), BIGINT()})}, FLOAT()}, "avg:opt_fp32"); + {"avg", {STRUCT({DOUBLE(), BIGINT()})}, FLOAT()}, "avg:opt_fp32"); } TEST_F(FunctionLookupTest, logical) { diff --git a/substrait/type/Type.cpp b/substrait/type/Type.cpp index adb7b587..53f603fe 100644 --- a/substrait/type/Type.cpp +++ b/substrait/type/Type.cpp @@ -81,8 +81,6 @@ ParameterizedTypePtr ParameterizedType::decode(const std::string& rawType) { return std::make_shared>(nullable); } else if (TypeTraits::typeString == baseType) { return std::make_shared>(nullable); - } else if (matchingType.rfind("unknown", 0) == 0) { - return std::make_shared(rawType, nullable); } else { return std::make_shared(rawType); } @@ -471,7 +469,7 @@ std::shared_ptr FCHAR(int len) { return std::make_shared(len, false); } -std::shared_ptr FBinary(int len) { +std::shared_ptr FIXED_BINARY(int len) { return std::make_shared(len, false); } @@ -485,11 +483,11 @@ std::shared_ptr MAP( return std::make_shared(keyType, valueType, false); } -std::shared_ptr ROW(const std::vector& children) { +std::shared_ptr STRUCT(const std::vector& children) { return std::make_shared(children, false); } -std::shared_ptr FChar(int len) { +std::shared_ptr FIXED_CHAR(int len) { return std::make_shared(len); } @@ -506,12 +504,4 @@ bool StringLiteral::isMatch( } } -bool UsedDefinedType::isMatch( - const std::shared_ptr& type) const { - if (auto udt = std::dynamic_pointer_cast(type)) { - return value_ == udt->value_ && nullable() == udt->nullable(); - } - return true; -} - } // namespace substrait diff --git a/substrait/type/tests/TypeTest.cpp b/substrait/type/tests/TypeTest.cpp index 1b3419ae..2191966d 100644 --- a/substrait/type/tests/TypeTest.cpp +++ b/substrait/type/tests/TypeTest.cpp @@ -51,13 +51,13 @@ TEST_F(TypeTest, typeCreator) { testType(INTERVAL_DAY(), TypeKind::kIntervalDay, "iday"); testType(INTERVAL_YEAR(), TypeKind::kIntervalYear, "iyear"); testType(UUID(), TypeKind::kUuid, "uuid"); - testType(FChar(12), TypeKind::kFixedChar, "fchar<12>"); - testType(FBinary(12), TypeKind::kFixedBinary, "fbin<12>"); + testType(FIXED_CHAR(12), TypeKind::kFixedChar, "fchar<12>"); + testType(FIXED_BINARY(12), TypeKind::kFixedBinary, "fbin<12>"); testType(VARCHAR(12), TypeKind::kVarchar, "vchar<12>"); testType(DECIMAL(12,23), TypeKind::kDecimal, "dec<12,23>"); testType(LIST(FLOAT()), TypeKind::kList, "list"); testType(MAP(STRING(),FLOAT()), TypeKind::kMap, "map"); - testType(ROW({STRING(),FLOAT()}), TypeKind::kStruct, "struct"); + testType(STRUCT({STRING(), FLOAT()}), TypeKind::kStruct, "struct"); } TEST_F(TypeTest, decodeTest) { @@ -155,9 +155,4 @@ TEST_F(TypeTest, decodeTest) { ASSERT_EQ(typePtr->signature(), "T"); ASSERT_TRUE(typePtr->isWildcard()); }); - - testDecode( - "unknown", [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "u!name"); - }); } From 1404ebd72ac3c4f48aff0250296ec68a11c93c2c Mon Sep 17 00:00:00 2001 From: vibhatha Date: Mon, 26 Dec 2022 12:12:52 +0530 Subject: [PATCH 16/23] feat(docker): adding initial docker setup (wip) --- docker/Dockerfile | 38 ++++++++++++++++++++++++++++++++++++++ scripts/setup-ubuntu.sh | 1 + 2 files changed, 39 insertions(+) create mode 100644 docker/Dockerfile diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 00000000..e3371f03 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,38 @@ +FROM ubuntu:20.04 + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +WORKDIR /substrait + +RUN DEBIAN_FRONTEND=noninteractive TZ=America/New_York apt-get update -y && apt-get upgrade -y \ + && apt-get install -y sudo apt-utils tzdata +RUN dpkg-reconfigure tzdata + +RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && apt-get install -y git build-essential cmake + +RUN echo "Cloning Substrait-CPP" + +RUN git clone https://github.com/substrait-io/substrait-cpp.git \ + && cd substrait-cpp \ + && git submodule sync --recursive \ + && git submodule update --init --recursive + +RUN echo $(pwd) + +RUN cd substrait-cpp && echo $(ls) + +RUN echo $(pwd) + +#RUN useradd -ms /bin/bash substrait && adduser substrait sudo + +#RUN chown substrait /substrait/substrait-cpp + +#USER substrait +RUN apt-get install wget + +RUN cd substrait-cpp && ./scripts/setup-ubuntu.sh + +ENTRYPOINT ["/bin/bash"] + + + diff --git a/scripts/setup-ubuntu.sh b/scripts/setup-ubuntu.sh index 377f6708..d36982be 100755 --- a/scripts/setup-ubuntu.sh +++ b/scripts/setup-ubuntu.sh @@ -13,6 +13,7 @@ DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)} # Install all dependencies. sudo --preserve-env apt install -y \ + wget \ g++ \ cmake \ ccache \ From 0885a8c38de6dc9aa5783326f2bd7803c43fe712 Mon Sep 17 00:00:00 2001 From: vibhatha Date: Mon, 26 Dec 2022 12:14:08 +0530 Subject: [PATCH 17/23] fix(library): added wget to dependencies in installation --- scripts/setup-ubuntu.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/setup-ubuntu.sh b/scripts/setup-ubuntu.sh index 377f6708..b2e9dabc 100755 --- a/scripts/setup-ubuntu.sh +++ b/scripts/setup-ubuntu.sh @@ -18,7 +18,8 @@ sudo --preserve-env apt install -y \ ccache \ ninja-build \ checkinstall \ - git + git \ + wget function run_and_time { time "$@" From acb7c596d78abfe0083f39b2e1daa523a1078c09 Mon Sep 17 00:00:00 2001 From: vibhatha Date: Tue, 27 Dec 2022 17:36:11 +0530 Subject: [PATCH 18/23] fix(build): adding fmt and cleanup docker image --- docker/Dockerfile | 21 +++------------------ docker/README.md | 23 +++++++++++++++++++++++ third_party/CMakeLists.txt | 1 + 3 files changed, 27 insertions(+), 18 deletions(-) create mode 100644 docker/README.md diff --git a/docker/Dockerfile b/docker/Dockerfile index e3371f03..fe1505ae 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: Apache-2.0 FROM ubuntu:20.04 SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -10,29 +11,13 @@ RUN dpkg-reconfigure tzdata RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && apt-get install -y git build-essential cmake -RUN echo "Cloning Substrait-CPP" - RUN git clone https://github.com/substrait-io/substrait-cpp.git \ && cd substrait-cpp \ && git submodule sync --recursive \ && git submodule update --init --recursive -RUN echo $(pwd) - -RUN cd substrait-cpp && echo $(ls) - -RUN echo $(pwd) - -#RUN useradd -ms /bin/bash substrait && adduser substrait sudo - -#RUN chown substrait /substrait/substrait-cpp - -#USER substrait -RUN apt-get install wget - RUN cd substrait-cpp && ./scripts/setup-ubuntu.sh -ENTRYPOINT ["/bin/bash"] - - +RUN cd substrait-cpp && make +ENTRYPOINT ["/bin/bash"] diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 00000000..b59be517 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,23 @@ +# Setup Docker Container + +## Build + +```bash +docker build -t substrait-cpp . +``` + +## Run + +```bash +docker run -it substrait-cpp +``` + +## Evaluate + +Run function tests + +```bash +./build-Debug/substrait/function/tests/substrait_function_test +``` + + diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index cf3b0980..5780a71d 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 +add_subdirectory(fmt) add_subdirectory(googletest) set(YAML_CPP_BUILD_TESTS OFF CACHE BOOL "Enable testing") From dfc1dd9ef90a735ead435ba69960b10c1357d572 Mon Sep 17 00:00:00 2001 From: vibhatha Date: Tue, 27 Dec 2022 17:43:03 +0530 Subject: [PATCH 19/23] fix(cleanup): remove unnecessary newlines --- docker/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker/README.md b/docker/README.md index b59be517..0f30717f 100644 --- a/docker/README.md +++ b/docker/README.md @@ -19,5 +19,3 @@ Run function tests ```bash ./build-Debug/substrait/function/tests/substrait_function_test ``` - - From 04f183e664bb0ca50a8af1a1990f8c24a6958b3d Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Tue, 3 Jan 2023 15:42:59 +0800 Subject: [PATCH 20/23] fix(naming): Rename namespace 'substrait' to 'io::substrait' --- include/substrait/common/Exceptions.h | 4 +- include/substrait/function/Extension.h | 4 +- include/substrait/function/Function.h | 4 +- include/substrait/function/FunctionLookup.h | 4 +- .../substrait/function/FunctionSignature.h | 4 +- include/substrait/type/Type.h | 4 +- substrait/common/Exceptions.cpp | 4 +- substrait/function/Extension.cpp | 50 +++++++++---------- substrait/function/Function.cpp | 4 +- substrait/function/FunctionLookup.cpp | 4 +- .../function/tests/FunctionLookupTest.cpp | 2 +- substrait/type/Type.cpp | 4 +- substrait/type/tests/TypeTest.cpp | 2 +- 13 files changed, 47 insertions(+), 47 deletions(-) diff --git a/include/substrait/common/Exceptions.h b/include/substrait/common/Exceptions.h index f944da7d..91c7768e 100644 --- a/include/substrait/common/Exceptions.h +++ b/include/substrait/common/Exceptions.h @@ -6,7 +6,7 @@ #include #include -namespace substrait::common { +namespace io::substrait::common { namespace error_code { //====================== User Error Codes ======================: @@ -133,4 +133,4 @@ std::string errorMessage(fmt::string_view fmt, const Args&... args) { substrait::common::error_code::kInvalidArgument, \ ##__VA_ARGS__) -} // namespace substrait::common +} // namespace io::substrait::common diff --git a/include/substrait/function/Extension.h b/include/substrait/function/Extension.h index 9feb66b6..7a1b55ce 100644 --- a/include/substrait/function/Extension.h +++ b/include/substrait/function/Extension.h @@ -11,7 +11,7 @@ #include "substrait/function/FunctionSignature.h" #include "substrait/type/Type.h" -namespace substrait { +namespace io::substrait { struct TypeVariant { std::string name; @@ -80,4 +80,4 @@ class Extension { using ExtensionPtr = std::shared_ptr; -} // namespace substrait +} // namespace io::substrait diff --git a/include/substrait/function/Function.h b/include/substrait/function/Function.h index 8de8434b..74e4466f 100644 --- a/include/substrait/function/Function.h +++ b/include/substrait/function/Function.h @@ -5,7 +5,7 @@ #include "substrait/type/Type.h" #include "substrait/function/FunctionSignature.h" -namespace substrait { +namespace io::substrait { struct FunctionArgument { [[nodiscard]] virtual bool isRequired() const = 0; @@ -113,4 +113,4 @@ struct AggregateFunctionImplementation : public FunctionImplementation { bool tryMatch(const FunctionSignature& signature) override; }; -} // namespace substrait +} // namespace io::substrait diff --git a/include/substrait/function/FunctionLookup.h b/include/substrait/function/FunctionLookup.h index 8985a1ae..f723ff63 100644 --- a/include/substrait/function/FunctionLookup.h +++ b/include/substrait/function/FunctionLookup.h @@ -5,7 +5,7 @@ #include "substrait/function/Extension.h" #include "substrait/function/FunctionSignature.h" -namespace substrait { +namespace io::substrait { class FunctionLookup { public: @@ -58,4 +58,4 @@ class WindowFunctionLookup : public FunctionLookup { } }; -} // namespace substrait +} // namespace io::substrait diff --git a/include/substrait/function/FunctionSignature.h b/include/substrait/function/FunctionSignature.h index 62983d70..5ebf54f3 100644 --- a/include/substrait/function/FunctionSignature.h +++ b/include/substrait/function/FunctionSignature.h @@ -4,7 +4,7 @@ #include "substrait/type/Type.h" -namespace substrait { +namespace io::substrait { struct FunctionSignature { std::string name; @@ -12,4 +12,4 @@ struct FunctionSignature { TypePtr returnType; }; -} // namespace substrait +} // namespace io::substrait diff --git a/include/substrait/type/Type.h b/include/substrait/type/Type.h index 8124f3d5..93208f9f 100644 --- a/include/substrait/type/Type.h +++ b/include/substrait/type/Type.h @@ -8,7 +8,7 @@ #include #include -namespace substrait { +namespace io::substrait { enum class TypeKind : int8_t { kBool = 1, @@ -653,4 +653,4 @@ std::shared_ptr MAP( std::shared_ptr STRUCT(const std::vector& children); -} // namespace substrait +} // namespace io::substrait diff --git a/substrait/common/Exceptions.cpp b/substrait/common/Exceptions.cpp index 7f5d9c73..15537cf4 100644 --- a/substrait/common/Exceptions.cpp +++ b/substrait/common/Exceptions.cpp @@ -3,7 +3,7 @@ #include #include "substrait/common/Exceptions.h" -namespace substrait::common { +namespace io::substrait::common { SubstraitException::SubstraitException( const std::string& exceptionCode, @@ -21,4 +21,4 @@ SubstraitException::SubstraitException( __FILE__, std::to_string(__LINE__))) {} -} // namespace substrait::common +} // namespace io::substrait::common diff --git a/substrait/function/Extension.cpp b/substrait/function/Extension.cpp index da3c323d..4d80713e 100644 --- a/substrait/function/Extension.cpp +++ b/substrait/function/Extension.cpp @@ -5,7 +5,7 @@ bool decodeFunctionVariant( const YAML::Node& node, - substrait::FunctionImplementation& function) { + io::substrait::FunctionImplementation& function) { const auto& returnType = node["return"]; if (returnType && returnType.IsScalar()) { /// Return type can be an expression. @@ -17,22 +17,22 @@ bool decodeFunctionVariant( std::string lastReturnType; while (std::getline(ss, lastReturnType, '\n')) { } - function.returnType = substrait::Type::decode(lastReturnType); + function.returnType = io::substrait::Type::decode(lastReturnType); } const auto& args = node["args"]; if (args && args.IsSequence()) { for (auto& arg : args) { if (arg["options"]) { // enum argument - auto enumArgument = std::make_shared( - arg.as()); + auto enumArgument = std::make_shared( + arg.as()); function.arguments.emplace_back(enumArgument); } else if (arg["value"]) { // value argument - auto valueArgument = std::make_shared( - arg.as()); + auto valueArgument = std::make_shared( + arg.as()); function.arguments.emplace_back(valueArgument); } else { // type argument - auto typeArgument = std::make_shared( - arg.as()); + auto typeArgument = std::make_shared( + arg.as()); function.arguments.emplace_back(typeArgument); } } @@ -43,7 +43,7 @@ bool decodeFunctionVariant( auto& min = variadic["min"]; auto& max = variadic["max"]; if (min) { - function.variadic = std::make_optional( + function.variadic = std::make_optional( {min.as(), max ? std::make_optional(max.as()) : std::nullopt}); } else { @@ -57,8 +57,8 @@ bool decodeFunctionVariant( } template <> -struct YAML::convert { - static bool decode(const Node& node, substrait::EnumArgument& argument) { +struct YAML::convert { + static bool decode(const Node& node, io::substrait::EnumArgument& argument) { // 'options' is required property const auto& options = node["options"]; if (options && options.IsSequence()) { @@ -72,12 +72,12 @@ struct YAML::convert { }; template <> -struct YAML::convert { - static bool decode(const Node& node, substrait::ValueArgument& argument) { +struct YAML::convert { + static bool decode(const Node& node, io::substrait::ValueArgument& argument) { const auto& value = node["value"]; if (value && value.IsScalar()) { auto valueType = value.as(); - argument.type = substrait::Type::decode(valueType); + argument.type = io::substrait::Type::decode(valueType); return true; } return false; @@ -85,10 +85,10 @@ struct YAML::convert { }; template <> -struct YAML::convert { +struct YAML::convert { static bool decode( const YAML::Node& node, - substrait::TypeArgument& argument) { + io::substrait::TypeArgument& argument) { // no properties need to populate for type argument, just return true if // 'type' element exists. if (node["type"]) { @@ -99,25 +99,25 @@ struct YAML::convert { }; template <> -struct YAML::convert { +struct YAML::convert { static bool decode( const Node& node, - substrait::ScalarFunctionImplementation& function) { + io::substrait::ScalarFunctionImplementation& function) { return decodeFunctionVariant(node, function); }; }; template <> -struct YAML::convert { +struct YAML::convert { static bool decode( const Node& node, - substrait::AggregateFunctionImplementation& function) { + io::substrait::AggregateFunctionImplementation& function) { const auto& res = decodeFunctionVariant(node, function); if (res) { const auto& intermediate = node["intermediate"]; if (intermediate) { function.intermediate = - substrait::ParameterizedType::decode(intermediate.as()); + io::substrait::ParameterizedType::decode(intermediate.as()); } } return res; @@ -125,8 +125,8 @@ struct YAML::convert { }; template <> -struct YAML::convert { - static bool decode(const Node& node, substrait::TypeVariant& typeAnchor) { +struct YAML::convert { + static bool decode(const Node& node, io::substrait::TypeVariant& typeAnchor) { const auto& name = node["name"]; if (name && name.IsScalar()) { typeAnchor.name = name.as(); @@ -136,7 +136,7 @@ struct YAML::convert { } }; -namespace substrait { +namespace io::substrait { std::shared_ptr Extension::load(const std::string& basePath) { static const std::vector extensionFiles{ @@ -276,4 +276,4 @@ void Extension::addAggregateFunctionVariant( } } -} // namespace substrait +} // namespace io::substrait diff --git a/substrait/function/Function.cpp b/substrait/function/Function.cpp index 83d597be..d7c0eee7 100644 --- a/substrait/function/Function.cpp +++ b/substrait/function/Function.cpp @@ -3,7 +3,7 @@ #include #include "substrait/function/Function.h" -namespace substrait { +namespace io::substrait { bool FunctionImplementation::tryMatch(const FunctionSignature& signature) { const auto& actualTypes = signature.arguments; @@ -83,4 +83,4 @@ bool AggregateFunctionImplementation::tryMatch(const FunctionSignature& signatur return matched; } -} // namespace substrait +} // namespace io::substrait diff --git a/substrait/function/FunctionLookup.cpp b/substrait/function/FunctionLookup.cpp index b918aa26..ca6fc069 100644 --- a/substrait/function/FunctionLookup.cpp +++ b/substrait/function/FunctionLookup.cpp @@ -2,7 +2,7 @@ #include "substrait/function/FunctionLookup.h" -namespace substrait { +namespace io::substrait { FunctionImplementationPtr FunctionLookup::lookupFunction( const FunctionSignature& signature) const { @@ -19,4 +19,4 @@ FunctionImplementationPtr FunctionLookup::lookupFunction( return nullptr; } -} // namespace substrait +} // namespace io::substrait diff --git a/substrait/function/tests/FunctionLookupTest.cpp b/substrait/function/tests/FunctionLookupTest.cpp index 643ac2e9..03232b22 100644 --- a/substrait/function/tests/FunctionLookupTest.cpp +++ b/substrait/function/tests/FunctionLookupTest.cpp @@ -4,7 +4,7 @@ #include #include "substrait/function/FunctionLookup.h" -using namespace substrait; +using namespace io::substrait; class FunctionLookupTest : public ::testing::Test { protected: diff --git a/substrait/type/Type.cpp b/substrait/type/Type.cpp index 53f603fe..f8b676ac 100644 --- a/substrait/type/Type.cpp +++ b/substrait/type/Type.cpp @@ -6,7 +6,7 @@ #include "substrait/type/Type.h" #include "substrait/common/Exceptions.h" -namespace substrait { +namespace io::substrait { namespace { @@ -504,4 +504,4 @@ bool StringLiteral::isMatch( } } -} // namespace substrait +} // namespace io::substrait diff --git a/substrait/type/tests/TypeTest.cpp b/substrait/type/tests/TypeTest.cpp index 2191966d..2e30a14d 100644 --- a/substrait/type/tests/TypeTest.cpp +++ b/substrait/type/tests/TypeTest.cpp @@ -3,7 +3,7 @@ #include #include "substrait/type/Type.h" -using namespace substrait; +using namespace io::substrait; class TypeTest : public ::testing::Test { protected: From 0a67c5b4e417dbd7a6dbdcfb44be4827050cad99 Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Tue, 20 Dec 2022 17:06:34 +0800 Subject: [PATCH 21/23] fix(naming): rename variant to implementation --- include/substrait/function/Extension.h | 34 +++---- include/substrait/function/Function.h | 2 +- include/substrait/function/FunctionLookup.h | 14 +-- substrait/function/Extension.cpp | 105 ++++++++++---------- substrait/function/FunctionLookup.cpp | 8 +- 5 files changed, 82 insertions(+), 81 deletions(-) diff --git a/include/substrait/function/Extension.h b/include/substrait/function/Extension.h index 7a1b55ce..07bc7adf 100644 --- a/include/substrait/function/Extension.h +++ b/include/substrait/function/Extension.h @@ -20,7 +20,7 @@ struct TypeVariant { using TypeVariantPtr = std::shared_ptr; -using FunctionVariantMap = +using FunctionImplMap = std::unordered_map>; using TypeVariantMap = std::unordered_map; @@ -40,40 +40,40 @@ class Extension { static std::shared_ptr load( const std::vector& extensionFiles); - /// Add a scalar function variant. - void addScalarFunctionVariant(const FunctionImplementationPtr& functionVariant); + /// Add a scalar function implementation. + void addScalarFunctionImpl(const FunctionImplementationPtr& functionImpl); - /// Add a aggregate function variant. - void addAggregateFunctionVariant(const FunctionImplementationPtr& functionVariant); + /// Add a aggregate function implementation. + void addAggregateFunctionImpl(const FunctionImplementationPtr& functionImpl); - /// Add a window function variant. - void addWindowFunctionVariant(const FunctionImplementationPtr& functionVariant); + /// Add a window function implementation. + void addWindowFunctionImpl(const FunctionImplementationPtr& functionImpl); /// Add a type variant. - void addTypeVariant(const TypeVariantPtr& functionVariant); + void addTypeVariant(const TypeVariantPtr& typeVariant); /// Lookup type variant by given type name. /// @return matched type variant TypeVariantPtr lookupType(const std::string& typeName) const; - const FunctionVariantMap& scalaFunctionVariantMap() const { - return scalarFunctionVariantMap_; + const FunctionImplMap& scalaFunctionImplMap() const { + return scalarFunctionImplMap_; } - const FunctionVariantMap& windowFunctionVariantMap() const { - return windowFunctionVariantMap_; + const FunctionImplMap& windowFunctionImplMap() const { + return windowFunctionImplMap_; } - const FunctionVariantMap& aggregateFunctionVariantMap() const { - return aggregateFunctionVariantMap_; + const FunctionImplMap& aggregateFunctionImplMap() const { + return aggregateFunctionImplMap_; } private: - FunctionVariantMap scalarFunctionVariantMap_; + FunctionImplMap scalarFunctionImplMap_; - FunctionVariantMap aggregateFunctionVariantMap_; + FunctionImplMap aggregateFunctionImplMap_; - FunctionVariantMap windowFunctionVariantMap_; + FunctionImplMap windowFunctionImplMap_; TypeVariantMap typeVariantMap_; }; diff --git a/include/substrait/function/Function.h b/include/substrait/function/Function.h index 74e4466f..7e8145ce 100644 --- a/include/substrait/function/Function.h +++ b/include/substrait/function/Function.h @@ -95,7 +95,7 @@ struct FunctionImplementation { ParameterizedTypePtr returnType; std::optional variadic; - /// Test if the actual types matched with this function variant. + /// Test if the actual types matched with this function implement. virtual bool tryMatch(const FunctionSignature& signature); /// Create function signature by function name and arguments. diff --git a/include/substrait/function/FunctionLookup.h b/include/substrait/function/FunctionLookup.h index f723ff63..90f98426 100644 --- a/include/substrait/function/FunctionLookup.h +++ b/include/substrait/function/FunctionLookup.h @@ -18,7 +18,7 @@ class FunctionLookup { virtual ~FunctionLookup() = default; protected: - [[nodiscard]] virtual FunctionVariantMap getFunctionVariants() const = 0; + [[nodiscard]] virtual FunctionImplMap getFunctionImpls() const = 0; ExtensionPtr extension_{}; }; @@ -31,8 +31,8 @@ class ScalarFunctionLookup : public FunctionLookup { : FunctionLookup(extension) {} protected: - [[nodiscard]] FunctionVariantMap getFunctionVariants() const override { - return extension_->scalaFunctionVariantMap(); + [[nodiscard]] FunctionImplMap getFunctionImpls() const override { + return extension_->scalaFunctionImplMap(); } }; @@ -42,8 +42,8 @@ class AggregateFunctionLookup : public FunctionLookup { : FunctionLookup(extension) {} protected: - [[nodiscard]] FunctionVariantMap getFunctionVariants() const override { - return extension_->aggregateFunctionVariantMap(); + [[nodiscard]] FunctionImplMap getFunctionImpls() const override { + return extension_->aggregateFunctionImplMap(); } }; @@ -53,8 +53,8 @@ class WindowFunctionLookup : public FunctionLookup { : FunctionLookup(extension) {} protected: - [[nodiscard]] FunctionVariantMap getFunctionVariants() const override { - return extension_->windowFunctionVariantMap(); + [[nodiscard]] FunctionImplMap getFunctionImpls() const override { + return extension_->windowFunctionImplMap(); } }; diff --git a/substrait/function/Extension.cpp b/substrait/function/Extension.cpp index 4d80713e..d2778f98 100644 --- a/substrait/function/Extension.cpp +++ b/substrait/function/Extension.cpp @@ -3,7 +3,7 @@ #include #include "substrait/function/Extension.h" -bool decodeFunctionVariant( +bool decodeFunctionImpl( const YAML::Node& node, io::substrait::FunctionImplementation& function) { const auto& returnType = node["return"]; @@ -103,7 +103,7 @@ struct YAML::convert { static bool decode( const Node& node, io::substrait::ScalarFunctionImplementation& function) { - return decodeFunctionVariant(node, function); + return decodeFunctionImpl(node, function); }; }; @@ -112,7 +112,7 @@ struct YAML::convert { static bool decode( const Node& node, io::substrait::AggregateFunctionImplementation& function) { - const auto& res = decodeFunctionVariant(node, function); + const auto& res = decodeFunctionImpl(node, function); if (res) { const auto& intermediate = node["intermediate"]; if (intermediate) { @@ -178,13 +178,14 @@ std::shared_ptr Extension::load( if (scalarFunctions && scalarFunctions.IsSequence()) { for (auto& scalarFunctionNode : scalarFunctions) { const auto functionName = scalarFunctionNode["name"].as(); - for (auto& scalaFunctionVariantNode : scalarFunctionNode["impls"]) { - auto scalarFunctionVariant = - scalaFunctionVariantNode.as(); - scalarFunctionVariant.name = functionName; - scalarFunctionVariant.uri = extensionUri; - extension->addScalarFunctionVariant( - std::make_shared(scalarFunctionVariant)); + for (auto& scalaFunctionImplNode : scalarFunctionNode["impls"]) { + auto scalarFunctionImpl = + scalaFunctionImplNode.as(); + scalarFunctionImpl.name = functionName; + scalarFunctionImpl.uri = extensionUri; + extension->addScalarFunctionImpl( + std::make_shared( + scalarFunctionImpl)); } } } @@ -194,15 +195,15 @@ std::shared_ptr Extension::load( for (auto& aggregateFunctionNode : aggregateFunctions) { const auto functionName = aggregateFunctionNode["name"].as(); - for (auto& aggregateFunctionVariantNode : + for (auto& aggregateFunctionImplNode : aggregateFunctionNode["impls"]) { - auto aggregateFunctionVariant = - aggregateFunctionVariantNode.as(); - aggregateFunctionVariant.name = functionName; - aggregateFunctionVariant.uri = extensionUri; - extension->addAggregateFunctionVariant( + auto aggregateFunctionImpl = + aggregateFunctionImplNode.as(); + aggregateFunctionImpl.name = functionName; + aggregateFunctionImpl.uri = extensionUri; + extension->addAggregateFunctionImpl( std::make_shared( - aggregateFunctionVariant)); + aggregateFunctionImpl)); } } } @@ -219,23 +220,23 @@ std::shared_ptr Extension::load( return extension; } -void Extension::addWindowFunctionVariant( - const FunctionImplementationPtr& functionVariant) { - const auto& functionVariants = - windowFunctionVariantMap_.find(functionVariant->name); - if (functionVariants != windowFunctionVariantMap_.end()) { - auto& variants = functionVariants->second; - variants.emplace_back(functionVariant); +void Extension::addWindowFunctionImpl( + const FunctionImplementationPtr& functionImpl) { + const auto& functionImpls = + windowFunctionImplMap_.find(functionImpl->name); + if (functionImpls != windowFunctionImplMap_.end()) { + auto& impls = functionImpls->second; + impls.emplace_back(functionImpl); } else { - std::vector variants; - variants.emplace_back(functionVariant); - windowFunctionVariantMap_.insert( - {functionVariant->name, std::move(variants)}); + std::vector impls; + impls.emplace_back(functionImpl); + windowFunctionImplMap_.insert( + {functionImpl->name, std::move(impls)}); } } -void Extension::addTypeVariant(const TypeVariantPtr& functionVariant) { - typeVariantMap_.insert({functionVariant->name, functionVariant}); +void Extension::addTypeVariant(const TypeVariantPtr& typeVariant) { + typeVariantMap_.insert({typeVariant->name, typeVariant}); } TypeVariantPtr Extension::lookupType(const std::string& typeName) const { @@ -246,33 +247,33 @@ TypeVariantPtr Extension::lookupType(const std::string& typeName) const { return nullptr; } -void Extension::addScalarFunctionVariant( - const FunctionImplementationPtr& functionVariant) { - const auto& functionVariants = - scalarFunctionVariantMap_.find(functionVariant->name); - if (functionVariants != scalarFunctionVariantMap_.end()) { - auto& variants = functionVariants->second; - variants.emplace_back(functionVariant); +void Extension::addScalarFunctionImpl( + const FunctionImplementationPtr& functionImpl) { + const auto& functionImpls = + scalarFunctionImplMap_.find(functionImpl->name); + if (functionImpls != scalarFunctionImplMap_.end()) { + auto& impls = functionImpls->second; + impls.emplace_back(functionImpl); } else { - std::vector variants; - variants.emplace_back(functionVariant); - scalarFunctionVariantMap_.insert( - {functionVariant->name, std::move(variants)}); + std::vector impls; + impls.emplace_back(functionImpl); + scalarFunctionImplMap_.insert( + {functionImpl->name, std::move(impls)}); } } -void Extension::addAggregateFunctionVariant( - const FunctionImplementationPtr& functionVariant) { - const auto& functionVariants = - aggregateFunctionVariantMap_.find(functionVariant->name); - if (functionVariants != aggregateFunctionVariantMap_.end()) { - auto& variants = functionVariants->second; - variants.emplace_back(functionVariant); +void Extension::addAggregateFunctionImpl( + const FunctionImplementationPtr& functionImpl) { + const auto& functionImpls = + aggregateFunctionImplMap_.find(functionImpl->name); + if (functionImpls != aggregateFunctionImplMap_.end()) { + auto& impls = functionImpls->second; + impls.emplace_back(functionImpl); } else { - std::vector variants; - variants.emplace_back(functionVariant); - aggregateFunctionVariantMap_.insert( - {functionVariant->name, std::move(variants)}); + std::vector impls; + impls.emplace_back(functionImpl); + aggregateFunctionImplMap_.insert( + {functionImpl->name, std::move(impls)}); } } diff --git a/substrait/function/FunctionLookup.cpp b/substrait/function/FunctionLookup.cpp index ca6fc069..7f24c47a 100644 --- a/substrait/function/FunctionLookup.cpp +++ b/substrait/function/FunctionLookup.cpp @@ -7,12 +7,12 @@ namespace io::substrait { FunctionImplementationPtr FunctionLookup::lookupFunction( const FunctionSignature& signature) const { - const auto& functionImpls = getFunctionVariants(); + const auto& functionImpls = getFunctionImpls(); auto functionImplsIter = functionImpls.find(signature.name); if (functionImplsIter != functionImpls.end()) { - for (const auto& candidateFunctionVariant : functionImplsIter->second) { - if (candidateFunctionVariant->tryMatch(signature)) { - return candidateFunctionVariant; + for (const auto& candidateFunctionImpl : functionImplsIter->second) { + if (candidateFunctionImpl->tryMatch(signature)) { + return candidateFunctionImpl; } } } From 7a78b4fd58adce57ee9a78bd6e3c5dbaaef3ec37 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Wed, 8 Feb 2023 09:37:40 -0800 Subject: [PATCH 22/23] Empty commit to trigger GHA checks From 7c3b0e31acd344cd7e8e3df43e5d183aa5e5ad34 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Wed, 8 Feb 2023 09:54:03 -0800 Subject: [PATCH 23/23] Apply suggestions from code review --- include/substrait/function/Extension.h | 2 +- include/substrait/function/Function.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/substrait/function/Extension.h b/include/substrait/function/Extension.h index 07bc7adf..611e5ad9 100644 --- a/include/substrait/function/Extension.h +++ b/include/substrait/function/Extension.h @@ -43,7 +43,7 @@ class Extension { /// Add a scalar function implementation. void addScalarFunctionImpl(const FunctionImplementationPtr& functionImpl); - /// Add a aggregate function implementation. + /// Add an aggregate function implementation. void addAggregateFunctionImpl(const FunctionImplementationPtr& functionImpl); /// Add a window function implementation. diff --git a/include/substrait/function/Function.h b/include/substrait/function/Function.h index 7e8145ce..a198c593 100644 --- a/include/substrait/function/Function.h +++ b/include/substrait/function/Function.h @@ -95,7 +95,7 @@ struct FunctionImplementation { ParameterizedTypePtr returnType; std::optional variadic; - /// Test if the actual types matched with this function implement. + /// Test if the actual types matched with this function's implementation. virtual bool tryMatch(const FunctionSignature& signature); /// Create function signature by function name and arguments.