Skip to content

Commit 9cb051b

Browse files
authored
GH-39784: [C++][Gandiva] Fix decimal in_expr crash with cached object code (#49951)
### Rationale for this change The decimal-specific `VisitInExpression` LLVM generation path embedded a raw holder pointer directly into generated object code. When Gandiva reused cached object code, the embedded pointer could become stale, leading to crashes during cached decimal `in_expr` evaluation. ### What changes are included in this PR? - Updated decimal `in_expr` LLVM generation to load holder pointers dynamically at runtime using `arg_holder_ptrs_`, matching the existing generic `InExpression` implementation. - Added a regression test covering cached decimal `in_expr` execution. ### Are these changes tested? Yes. - Reproduced the crash locally before the fix. - Verified the new regression test passes after the fix. - Ran: - `TestIn.TestInDecimalCached` - `TestIn.*` - repeated cached execution using `--gtest_repeat=100` ### Are there any user-facing changes? No. **This PR contains a "Critical Fix".** This change fixes a cache-path crash in Gandiva when evaluating decimal `in_expr` expressions using cached object code. * GitHub Issue: #39784 Authored-by: Aaditya Srinivasan <aadityasri03@gmail.com> Signed-off-by: Sutou Kouhei <kou@clear-code.com>
1 parent 591c14b commit 9cb051b

2 files changed

Lines changed: 29 additions & 19 deletions

File tree

cpp/src/gandiva/llvm_generator.cc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,9 +1118,16 @@ void LLVMGenerator::Visitor::VisitInExpression<gandiva::DecimalScalar128>(
11181118
const InExprDex<gandiva::DecimalScalar128>& dex_instance =
11191119
dynamic_cast<const InExprDex<gandiva::DecimalScalar128>&>(dex);
11201120
/* add the holder at the beginning */
1121-
llvm::Constant* ptr_int_cast =
1122-
types->i64_constant((int64_t)(dex_instance.in_holder().get()));
1123-
params.push_back(ptr_int_cast);
1121+
// Switch to the entry block and load the holder pointer
1122+
auto builder = ir_builder();
1123+
llvm::BasicBlock* saved_block = builder->GetInsertBlock();
1124+
builder->SetInsertPoint(entry_block_);
1125+
1126+
llvm::Value* in_holder = generator_->LoadVectorAtIndex(
1127+
arg_holder_ptrs_, types->i64_type(), dex_instance.get_holder_idx(), "in_holder");
1128+
1129+
builder->SetInsertPoint(saved_block);
1130+
params.push_back(in_holder);
11241131

11251132
/* eval expr result */
11261133
for (auto& pair : dex.args()) {

cpp/src/gandiva/tests/in_expr_test.cc

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -184,41 +184,44 @@ TEST_F(TestIn, TestInDecimal) {
184184
auto field0 = field("f0", arrow::decimal128(precision, scale));
185185
auto schema = arrow::schema({field0});
186186

187-
// Build In f0 + f1 in (6, 11)
188187
auto node_f0 = TreeExprBuilder::MakeField(field0);
189188

190189
gandiva::DecimalScalar128 d0("6", precision, scale);
191190
gandiva::DecimalScalar128 d1("12", precision, scale);
192191
gandiva::DecimalScalar128 d2("11", precision, scale);
193192
std::unordered_set<gandiva::DecimalScalar128> in_constants({d0, d1, d2});
193+
194194
auto in_expr = TreeExprBuilder::MakeInExpressionDecimal(node_f0, in_constants);
195195
auto condition = TreeExprBuilder::MakeCondition(in_expr);
196196

197-
std::shared_ptr<Filter> filter;
198-
auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
199-
EXPECT_TRUE(status.ok());
200-
201-
// Create a row-batch with some sample data
202197
int num_records = 5;
203198
auto values0 = MakeDecimalVector({"1", "2", "0", "-6", "6"});
204199
auto array0 =
205200
MakeArrowArrayDecimal(decimal_type, values0, {true, true, true, false, true});
206-
// expected output (indices for which condition matches)
201+
207202
auto exp = MakeArrowArrayUint16({4});
208203

209-
// prepare input record batch
210204
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
211205

212-
std::shared_ptr<SelectionVector> selection_vector;
213-
status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
214-
EXPECT_TRUE(status.ok());
206+
// GH-39784: Verify decimal in_expr works correctly when the generated
207+
// object code is reused from cache.
208+
for (int i = 0; i < 2; ++i) {
209+
std::shared_ptr<Filter> filter;
210+
ASSERT_OK(Filter::Make(schema, condition, TestConfiguration(), &filter));
215211

216-
// Evaluate expression
217-
status = filter->Evaluate(*in_batch, selection_vector);
218-
EXPECT_TRUE(status.ok());
212+
if (i == 0) {
213+
EXPECT_FALSE(filter->GetBuiltFromCache());
214+
} else {
215+
EXPECT_TRUE(filter->GetBuiltFromCache());
216+
}
219217

220-
// Validate results
221-
EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
218+
std::shared_ptr<SelectionVector> selection_vector;
219+
ASSERT_OK(SelectionVector::MakeInt16(num_records, pool_, &selection_vector));
220+
221+
ASSERT_OK(filter->Evaluate(*in_batch, selection_vector));
222+
223+
EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
224+
}
222225
}
223226

224227
TEST_F(TestIn, TestInString) {

0 commit comments

Comments
 (0)