Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions be/src/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ ADD_BE_BENCHMARK(network-perf-benchmark)
ADD_BE_BENCHMARK(overflow-benchmark)
ADD_BE_BENCHMARK(parse-timestamp-benchmark)
ADD_BE_BENCHMARK(parquet-delta-benchmark)
ADD_BE_BENCHMARK(parquet-byte-stream-split-decoder-benchmark)
ADD_BE_BENCHMARK(process-wide-locks-benchmark)
ADD_BE_BENCHMARK(rle-benchmark)
ADD_BE_BENCHMARK(row-batch-serialize-benchmark)
Expand Down
514 changes: 514 additions & 0 deletions be/src/benchmarks/parquet-byte-stream-split-decoder-benchmark.cc

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions be/src/codegen/impala-ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "exprs/ai-functions-ir.cc"
#include "exprs/bit-byte-functions-ir.cc"
#include "exprs/cast-functions-ir.cc"
#include "exprs/collection-functions-ir.cc"
#include "exprs/compound-predicates-ir.cc"
#include "exprs/conditional-functions-ir.cc"
#include "exprs/datasketches-functions-ir.cc"
Expand Down
1 change: 1 addition & 0 deletions be/src/exprs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ add_library(ExprsIr
ai-functions-ir.cc
bit-byte-functions-ir.cc
cast-functions-ir.cc
collection-functions-ir.cc
compound-predicates-ir.cc
conditional-functions-ir.cc
datasketches-functions-ir.cc
Expand Down
32 changes: 32 additions & 0 deletions be/src/exprs/anyval-util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,22 @@ FunctionContext::TypeDesc AnyValUtil::ColumnTypeToTypeDesc(const ColumnType& typ
case TYPE_DATE:
out.type = FunctionContext::TYPE_DATE;
break;
case TYPE_ARRAY:
out.type = FunctionContext::TYPE_ARRAY;
DCHECK_EQ(type.children.size(), 1);
out.children.push_back(ColumnTypeToTypeDesc(type.children[0]));
break;
case TYPE_MAP:
out.type = FunctionContext::TYPE_MAP;
DCHECK_EQ(type.children.size(), 2);
out.children.push_back(ColumnTypeToTypeDesc(type.children[0]));
out.children.push_back(ColumnTypeToTypeDesc(type.children[1]));
break;
case TYPE_STRUCT:
out.type = FunctionContext::TYPE_STRUCT;
for (const ColumnType& child : type.children) {
out.children.push_back(ColumnTypeToTypeDesc(child));
}
break;
default:
DCHECK(false) << "Unknown type: " << type;
Expand Down Expand Up @@ -137,6 +151,24 @@ ColumnType AnyValUtil::TypeDescToColumnType(const FunctionContext::TypeDesc& typ
return ColumnType::CreateVarcharType(type.len);
case FunctionContext::TYPE_DATE:
return ColumnType(TYPE_DATE);
case FunctionContext::TYPE_ARRAY: {
DCHECK_EQ(type.children.size(), 1);
ColumnType element_type = TypeDescToColumnType(type.children[0]);
return ColumnType::CreateArrayType(element_type);
}
case FunctionContext::TYPE_MAP: {
DCHECK_EQ(type.children.size(), 2);
ColumnType key_type = TypeDescToColumnType(type.children[0]);
ColumnType value_type = TypeDescToColumnType(type.children[1]);
return ColumnType::CreateMapType(key_type, value_type);
}
case FunctionContext::TYPE_STRUCT: {
vector<ColumnType> children;
for (const FunctionContext::TypeDesc& child : type.children) {
children.push_back(TypeDescToColumnType(child));
}
return ColumnType::CreateStructType(children);
}
default:
DCHECK(false) << "Unknown type: " << type.type;
return ColumnType(INVALID_TYPE);
Expand Down
12 changes: 12 additions & 0 deletions be/src/exprs/anyval-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <algorithm>

#include "runtime/collection-value.h"
#include "runtime/date-value.h"
#include "runtime/runtime-state.h"
#include "runtime/string-value.inline.h"
Expand All @@ -42,6 +43,7 @@ using impala_udf::TimestampVal;
using impala_udf::StringVal;
using impala_udf::DecimalVal;
using impala_udf::DateVal;
using impala_udf::CollectionVal;

class ObjectPool;

Expand Down Expand Up @@ -203,6 +205,8 @@ class AnyValUtil {
case TYPE_TIMESTAMP: return sizeof(TimestampVal);
case TYPE_DECIMAL: return sizeof(DecimalVal);
case TYPE_DATE: return sizeof(DateVal);
case TYPE_ARRAY:
case TYPE_MAP: return sizeof(CollectionVal);
default:
DCHECK(false) << t;
return 0;
Expand All @@ -227,6 +231,8 @@ class AnyValUtil {
case TYPE_TIMESTAMP: return alignof(TimestampVal);
case TYPE_DECIMAL: return alignof(DecimalVal);
case TYPE_DATE: return alignof(DateVal);
case TYPE_ARRAY:
case TYPE_MAP: return alignof(CollectionVal);
default:
DCHECK(false) << t;
return 0;
Expand Down Expand Up @@ -337,6 +343,12 @@ class AnyValUtil {
*reinterpret_cast<DateVal*>(dst) =
reinterpret_cast<const DateValue*>(slot)->ToDateVal();
return;
case TYPE_ARRAY:
case TYPE_MAP: {
*reinterpret_cast<CollectionVal*>(dst) =
reinterpret_cast<const CollectionValue*>(slot)->ToCollectionVal();
return;
}
default:
DCHECK(false) << "NYI: " << type;
}
Expand Down
194 changes: 194 additions & 0 deletions be/src/exprs/collection-functions-ir.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "exprs/collection-functions.h"
#include "exprs/scalar-expr.h"
#include "runtime/collection-value.h"
#include "runtime/descriptors.h"
#include "runtime/string-value.h"
#include "runtime/tuple.h"
#include "udf/udf-internal.h"

#include <cstring>

namespace impala {

using impala_udf::FunctionContext;
using impala_udf::BooleanVal;
using impala_udf::CollectionVal;

namespace {

struct ArrayContainsState {
const TupleDescriptor* tuple_desc = nullptr;
const SlotDescriptor* slot_desc = nullptr;
int tuple_byte_size = 0;
int slot_offset = 0;
NullIndicatorOffset null_offset;
};

ScalarExpr* GetArrayExpr(FunctionContext* ctx) {
const auto& non_constant_args = ctx->impl()->non_constant_args();
for (const auto& child : non_constant_args) {
if (child.first != nullptr && child.first->type().IsCollectionType()) {
return child.first;
}
}
return nullptr;
}

bool InitArrayContainsState(FunctionContext* ctx, ArrayContainsState* state) {
DCHECK(state != nullptr);
ScalarExpr* array_expr = GetArrayExpr(ctx);
if (array_expr == nullptr) {
ctx->SetError("array_contains requires a non-constant ARRAY argument.");
return false;
}
const TupleDescriptor* tuple_desc = array_expr->GetCollectionTupleDesc();
if (tuple_desc == nullptr || tuple_desc->slots().empty()) {
ctx->SetError(
"Failed to resolve collection item tuple descriptor for array_contains().");
return false;
}
if (tuple_desc->slots().size() != 1) {
ctx->SetError("array_contains only supports ARRAYs with a single element slot.");
return false;
}
state->tuple_desc = tuple_desc;
state->slot_desc = tuple_desc->slots()[0];
state->tuple_byte_size = tuple_desc->byte_size();
state->slot_offset = state->slot_desc->tuple_offset();
state->null_offset = state->slot_desc->null_indicator_offset();
return true;
}

ArrayContainsState* GetOrCreateArrayState(FunctionContext* ctx) {
ArrayContainsState* state = reinterpret_cast<ArrayContainsState*>(
ctx->GetFunctionState(FunctionContext::THREAD_LOCAL));
if (state != nullptr) return state;
state = new ArrayContainsState();
if (!InitArrayContainsState(ctx, state)) {
delete state;
ctx->SetFunctionState(FunctionContext::THREAD_LOCAL, nullptr);
return nullptr;
}
ctx->SetFunctionState(FunctionContext::THREAD_LOCAL, state);
return state;
}

inline bool IsElementNull(const ArrayContainsState* state, Tuple* tuple) {
return state->slot_desc->is_nullable() && tuple->IsNull(state->null_offset);
}

template <typename NativeType, typename ValType>
BooleanVal ArrayContainsPrimitive(FunctionContext* ctx, const CollectionVal& arr,
const ValType& item) {
if (arr.is_null || item.is_null) return BooleanVal::null();
ArrayContainsState* state = GetOrCreateArrayState(ctx);
if (state == nullptr) return BooleanVal::null();
DCHECK_GT(state->tuple_byte_size, 0);
uint8_t* tuple_ptr = arr.ptr;
for (int i = 0; i < arr.num_tuples; ++i, tuple_ptr += state->tuple_byte_size) {
Tuple* tuple = reinterpret_cast<Tuple*>(tuple_ptr);
if (IsElementNull(state, tuple)) continue;
NativeType current_value =
*reinterpret_cast<NativeType*>(tuple->GetSlot(state->slot_offset));
if (current_value == item.val) return BooleanVal(true);
}
return BooleanVal(false);
}

} // namespace

void CollectionFunctions::ArrayContainsPrepare(FunctionContext* ctx,
FunctionContext::FunctionStateScope scope) {
if (scope != FunctionContext::THREAD_LOCAL) return;
auto* state = new ArrayContainsState();
if (!InitArrayContainsState(ctx, state)) {
delete state;
state = nullptr;
}
ctx->SetFunctionState(scope, state);
}

void CollectionFunctions::ArrayContainsClose(FunctionContext* ctx,
FunctionContext::FunctionStateScope scope) {
if (scope != FunctionContext::THREAD_LOCAL) return;
delete reinterpret_cast<ArrayContainsState*>(
ctx->GetFunctionState(FunctionContext::THREAD_LOCAL));
ctx->SetFunctionState(FunctionContext::THREAD_LOCAL, nullptr);
}

BooleanVal CollectionFunctions::ArrayContainsBoolean(
FunctionContext* ctx, const CollectionVal& arr, const BooleanVal& item) {
return ArrayContainsPrimitive<bool>(ctx, arr, item);
}

BooleanVal CollectionFunctions::ArrayContainsTinyInt(
FunctionContext* ctx, const CollectionVal& arr, const TinyIntVal& item) {
return ArrayContainsPrimitive<int8_t>(ctx, arr, item);
}

BooleanVal CollectionFunctions::ArrayContainsSmallInt(
FunctionContext* ctx, const CollectionVal& arr, const SmallIntVal& item) {
return ArrayContainsPrimitive<int16_t>(ctx, arr, item);
}

BooleanVal CollectionFunctions::ArrayContainsInt(
FunctionContext* ctx, const CollectionVal& arr, const IntVal& item) {
return ArrayContainsPrimitive<int32_t>(ctx, arr, item);
}

BooleanVal CollectionFunctions::ArrayContainsBigInt(
FunctionContext* ctx, const CollectionVal& arr, const BigIntVal& item) {
return ArrayContainsPrimitive<int64_t>(ctx, arr, item);
}

BooleanVal CollectionFunctions::ArrayContainsFloat(
FunctionContext* ctx, const CollectionVal& arr, const FloatVal& item) {
return ArrayContainsPrimitive<float>(ctx, arr, item);
}

BooleanVal CollectionFunctions::ArrayContainsDouble(
FunctionContext* ctx, const CollectionVal& arr, const DoubleVal& item) {
return ArrayContainsPrimitive<double>(ctx, arr, item);
}

BooleanVal CollectionFunctions::ArrayContainsString(
FunctionContext* ctx, const CollectionVal& arr, const StringVal& item) {
if (arr.is_null || item.is_null) return BooleanVal::null();
ArrayContainsState* state = GetOrCreateArrayState(ctx);
if (state == nullptr) return BooleanVal::null();
DCHECK_GT(state->tuple_byte_size, 0);
uint8_t* tuple_ptr = arr.ptr;
for (int i = 0; i < arr.num_tuples; ++i, tuple_ptr += state->tuple_byte_size) {
Tuple* tuple = reinterpret_cast<Tuple*>(tuple_ptr);
if (IsElementNull(state, tuple)) continue;
StringValue* current_value =
reinterpret_cast<StringValue*>(tuple->GetSlot(state->slot_offset));
if (current_value->IrLen() != item.len) continue;
if (current_value->IrLen() == 0) return BooleanVal(true);
if (current_value->IrPtr() != nullptr && item.ptr != nullptr &&
memcmp(current_value->IrPtr(), reinterpret_cast<const char*>(item.ptr),
current_value->IrLen()) == 0) {
return BooleanVal(true);
}
}
return BooleanVal(false);
}

} // namespace impala
62 changes: 62 additions & 0 deletions be/src/exprs/collection-functions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "udf/udf.h"
#include "udf/udf-internal.h" // for CollectionVal

namespace impala {

using impala_udf::FunctionContext;
using impala_udf::BooleanVal;
using impala_udf::TinyIntVal;
using impala_udf::SmallIntVal;
using impala_udf::IntVal;
using impala_udf::BigIntVal;
using impala_udf::FloatVal;
using impala_udf::DoubleVal;
using impala_udf::StringVal;
using impala_udf::CollectionVal;

class CollectionFunctions {
public:
static void ArrayContainsPrepare(FunctionContext* ctx,
FunctionContext::FunctionStateScope scope);
static void ArrayContainsClose(FunctionContext* ctx,
FunctionContext::FunctionStateScope scope);

static BooleanVal ArrayContainsBoolean(
FunctionContext* ctx, const CollectionVal& arr, const BooleanVal& item);
static BooleanVal ArrayContainsTinyInt(
FunctionContext* ctx, const CollectionVal& arr, const TinyIntVal& item);
static BooleanVal ArrayContainsSmallInt(
FunctionContext* ctx, const CollectionVal& arr, const SmallIntVal& item);
static BooleanVal ArrayContainsInt(
FunctionContext* ctx, const CollectionVal& arr, const IntVal& item);
static BooleanVal ArrayContainsBigInt(
FunctionContext* ctx, const CollectionVal& arr, const BigIntVal& item);
static BooleanVal ArrayContainsFloat(
FunctionContext* ctx, const CollectionVal& arr, const FloatVal& item);
static BooleanVal ArrayContainsDouble(
FunctionContext* ctx, const CollectionVal& arr, const DoubleVal& item);

static BooleanVal ArrayContainsString(
FunctionContext* ctx, const CollectionVal& arr, const StringVal& item);
};

} // namespace impala
2 changes: 2 additions & 0 deletions be/src/exprs/scalar-expr-evaluator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "exprs/bit-byte-functions.h"
#include "exprs/case-expr.h"
#include "exprs/cast-functions.h"
#include "exprs/collection-functions.h"
#include "exprs/compound-predicates.h"
#include "exprs/conditional-functions.h"
#include "exprs/datasketches-functions.h"
Expand Down Expand Up @@ -454,6 +455,7 @@ void ScalarExprEvaluator::InitBuiltinsDummy() {
AggregateFunctions::InitNull(nullptr, nullptr);
BitByteFunctions::CountSet(nullptr, TinyIntVal::null());
CastFunctions::CastToBooleanVal(nullptr, TinyIntVal::null());
CollectionFunctions::ArrayContainsInt(nullptr, CollectionVal::null(), IntVal::null());
CompoundPredicate::Not(nullptr, BooleanVal::null());
ConditionalFunctions::NullIfZero(nullptr, TinyIntVal::null());
DataSketchesFunctions::DsHllEstimate(nullptr, StringVal::null());
Expand Down
Loading