Skip to content

Commit

Permalink
[fix](inverted index) normal process query for null condition when in…
Browse files Browse the repository at this point in the history
…dex is missing (apache#33663)
  • Loading branch information
zzzxl1993 authored Apr 17, 2024
1 parent 7408df2 commit 8522d63
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 11 deletions.
38 changes: 35 additions & 3 deletions be/src/olap/rowset/segment_v2/column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "olap/types.h"
#include "runtime/collection_value.h"
#include "util/block_compression.h"
#include "util/debug_points.h"
#include "util/faststring.h"
#include "util/rle_encoding.h"
#include "vec/core/types.h"
Expand Down Expand Up @@ -481,9 +482,40 @@ Status ScalarColumnWriter::init() {
}

if (_opts.need_inverted_index) {
RETURN_IF_ERROR(InvertedIndexColumnWriter::create(get_field(), &_inverted_index_builder,
_opts.inverted_index_file_writer,
_opts.inverted_index));
do {
DBUG_EXECUTE_IF("column_writer.init", {
class InvertedIndexColumnWriterEmptyImpl final : public InvertedIndexColumnWriter {
public:
Status init() override { return Status::OK(); }
Status add_values(const std::string name, const void* values,
size_t count) override {
return Status::OK();
}
Status add_array_values(size_t field_size, const CollectionValue* values,
size_t count) override {
return Status::OK();
}
Status add_array_values(size_t field_size, const void* value_ptr,
const uint8_t* null_map, const uint8_t* offsets_ptr,
size_t count) override {
return Status::OK();
}
Status add_nulls(uint32_t count) override { return Status::OK(); }
Status finish() override { return Status::OK(); }
int64_t size() const override { return 0; }
int64_t file_size() const override { return 0; }
void close_on_error() override {}
};

_inverted_index_builder = std::make_unique<InvertedIndexColumnWriterEmptyImpl>();

break;
});

RETURN_IF_ERROR(InvertedIndexColumnWriter::create(get_field(), &_inverted_index_builder,
_opts.inverted_index_file_writer,
_opts.inverted_index));
} while (false);
}
if (_opts.need_bloom_filter) {
if (_opts.is_ngram_bf_index) {
Expand Down
22 changes: 14 additions & 8 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach
if (cache->lookup(cache_key, cache_handle)) {
return Status::OK();
}
std::unique_ptr<InvertedIndexFileReader> multi_compound_reader;

RETURN_IF_ERROR(check_file_exist(index_file_key));

if (!dir) {
// TODO: ugly code here, try to refact.
Expand Down Expand Up @@ -209,13 +210,7 @@ Status InvertedIndexReader::handle_searcher_cache(
auto mem_tracker = std::make_unique<MemTracker>("InvertedIndexSearcherCacheWithRead");
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer);
IndexSearcherPtr searcher;
bool exists = false;
RETURN_IF_ERROR(_inverted_index_file_reader->index_file_exist(&_index_meta, &exists));
if (!exists) {
LOG(WARNING) << "inverted index: " << index_file_key << " not exist.";
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"inverted index input file {} not found", index_file_key);
}
RETURN_IF_ERROR(check_file_exist(index_file_key));
auto dir = DORIS_TRY(_inverted_index_file_reader->open(&_index_meta));
// try to reuse index_searcher's directory to read null_bitmap to cache
// to avoid open directory additionally for null_bitmap
Expand Down Expand Up @@ -247,6 +242,17 @@ Status InvertedIndexReader::create_index_searcher(lucene::store::Directory* dir,
return Status::OK();
};

Status InvertedIndexReader::check_file_exist(const std::string& index_file_key) {
bool exists = false;
RETURN_IF_ERROR(_inverted_index_file_reader->index_file_exist(&_index_meta, &exists));
if (!exists) {
LOG(WARNING) << "inverted index: " << index_file_key << " not exist.";
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"inverted index input file {} not found", index_file_key);
}
return Status::OK();
}

Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state,
std::unique_ptr<InvertedIndexIterator>* iterator) {
*iterator = InvertedIndexIterator::create_unique(stats, runtime_state, shared_from_this());
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexRea
MemTracker* mem_tracker,
InvertedIndexReaderType reader_type);

Status check_file_exist(const std::string& index_file_key);

protected:
friend class InvertedIndexIterator;
std::shared_ptr<InvertedIndexFileReader> _inverted_index_file_reader;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
2

-- !sql --
3

Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.


suite("test_no_index_null", "nonConcurrent") {
// define a sql table
def testTable = "test_no_index_null"

def create_httplogs_unique_table = {testTablex ->
// multi-line sql
def result = sql """
CREATE TABLE ${testTablex} (
`@timestamp` INT NULL,
`clientip` VARCHAR(20) NULL,
`request` TEXT NULL,
`status` INT NULL,
`size` INT NULL,
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true", "lower_case" = "true") COMMENT ''''
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT 'OLAP'
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""
}

try {
sql "DROP TABLE IF EXISTS ${testTable}"
create_httplogs_unique_table.call(testTable)

try {
GetDebugPoint().enableDebugPointForAllBEs("column_writer.init")

sql """ INSERT INTO ${testTable} VALUES (1, '40.135.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 24736); """
sql """ INSERT INTO ${testTable} VALUES (1, '40.135.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 24736); """
sql """ INSERT INTO ${testTable} VALUES (1, '40.135.0.0', NULL, 200, 24736); """
sql """ INSERT INTO ${testTable} VALUES (1, '40.135.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 24736); """
sql """ INSERT INTO ${testTable} VALUES (1, '40.135.0.0', NULL, 200, 24736); """
sql 'sync'

qt_sql """ select count() from ${testTable} where request IS NULL; """
qt_sql """ select count() from ${testTable} where request IS NOT NULL; """
} finally {
GetDebugPoint().disableDebugPointForAllBEs("column_writer.init")
}
} finally {
}
}

0 comments on commit 8522d63

Please sign in to comment.