diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query.cpp index d503a118834dfd..8698202f5def82 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query.cpp @@ -25,7 +25,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.h" #include "olap/rowset/segment_v2/inverted_index/query_v2/term_query.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { Status BooleanQuery::Builder::set_op(OperatorType type) { _op = DORIS_TRY(OperatorFactory::create(type)); @@ -88,4 +88,4 @@ void BooleanQuery::search_by_skiplist(const std::shared_ptr& r void BooleanQuery::search_by_bitmap(const std::shared_ptr& result) {} -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query.h index c66bf1d3536cfb..4779b0dbc84e63 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query.h @@ -20,7 +20,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/node.h" #include "olap/rowset/segment_v2/inverted_index/query_v2/query.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { enum class OperatorType; class BooleanQuery : public Query { @@ -53,4 +53,4 @@ class BooleanQuery : public Query { Node _op; }; -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/conjunction_op.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/conjunction_op.cpp index 4f32e1d84457ac..faa7f00397321a 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/conjunction_op.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/conjunction_op.cpp @@ -22,7 +22,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.h" #include "olap/rowset/segment_v2/inverted_index/query_v2/term_query.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { Status ConjunctionOp::init() { if (_childrens.size() < 2) { @@ -91,4 +91,4 @@ int64_t ConjunctionOp::cost() const { return visit_node(*_lead1, Cost {}); } -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/conjunction_op.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/conjunction_op.h index 37f0eea0a63cae..484163f1eb0712 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/conjunction_op.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/conjunction_op.h @@ -19,7 +19,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/operator.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { class ConjunctionOp : public Operator { public: @@ -43,4 +43,4 @@ class ConjunctionOp : public Operator { using ConjunctionOpPtr = std::shared_ptr; -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/disjunction_op.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/disjunction_op.cpp index e4f196146f9af8..3a627893763f85 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/disjunction_op.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/disjunction_op.cpp @@ -23,7 +23,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.h" #include "olap/rowset/segment_v2/inverted_index/query_v2/term_query.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { DisjunctionOp::~DisjunctionOp() { while (!_pq.empty()) { @@ -83,4 +83,4 @@ int64_t DisjunctionOp::cost() const { return _cost; } -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/disjunction_op.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/disjunction_op.h index 7abbef45efa1b0..591f3caa1cd86b 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/disjunction_op.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/disjunction_op.h @@ -21,7 +21,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/operator.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { class DisjunctionOp : public Operator { public: @@ -56,4 +56,4 @@ class DisjunctionOp : public Operator { using DisjunctionOpPtr = std::shared_ptr; -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.cpp index 710b4b7ed54f85..cead7a0bec4e3c 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.cpp @@ -20,7 +20,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/conjunction_op.h" #include "olap/rowset/segment_v2/inverted_index/query_v2/disjunction_op.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { Result OperatorFactory::create(OperatorType query_type) { switch (query_type) { @@ -33,4 +33,4 @@ Result OperatorFactory::create(OperatorType query_type) { } } -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.h index 5432a10bbb6e88..ec96c3de671c32 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.h @@ -19,7 +19,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/node.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { enum class QueryType; enum class OperatorType; @@ -38,4 +38,4 @@ class OperatorFactory { static Result create(OperatorType query_type); }; -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.inline.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.inline.h index fff60cc0626e3a..660090639b7c3e 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.inline.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/factory.inline.h @@ -19,7 +19,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.h" #include "olap/rowset/segment_v2/inverted_index/query_v2/term_query.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { template Result QueryFactory::create(QueryType query_type, Args&&... args) { @@ -44,4 +44,4 @@ Result QueryFactory::create(QueryType query_type, Args&&... args) { } } -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/node.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/node.h index 2e70f50548ab72..ffaca5d507189c 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/node.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/node.h @@ -24,7 +24,7 @@ #include "common/status.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { class ConjunctionOp; class DisjunctionOp; @@ -125,4 +125,4 @@ struct Cost { } }; -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/operator.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/operator.h index 47eeaf3e1d76eb..134b11350af622 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/operator.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/operator.h @@ -21,7 +21,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/node.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { enum class OperatorType { OP_AND = 0, OP_OR }; @@ -42,4 +42,4 @@ class Operator { std::vector _childrens; }; -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query.h index fc9e54e498bc7e..23fee72d407c5d 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query.h @@ -19,7 +19,7 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/query.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { class PhraseQuery : public Query { public: @@ -35,4 +35,4 @@ class PhraseQuery : public Query { int64_t cost() const { return -1; } }; -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/query.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/query.h index 8bf6cf39c9dde6..08b88e9e899a6f 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/query.h @@ -30,7 +30,7 @@ #include "common/status.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { enum class QueryType { TERM_QUERY, PHRASE_QUERY, ROARING_QUERY }; @@ -49,4 +49,4 @@ class Query { virtual ~Query() = default; }; -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/query_result.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/query_result.h new file mode 100644 index 00000000000000..f1b4ab2ad05911 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/query_result.h @@ -0,0 +1,121 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once +#include +#include + +namespace doris::segment_v2::inverted_index { +class InvertedIndexResultBitmap { +private: + std::shared_ptr _data_bitmap = nullptr; + std::shared_ptr _null_bitmap = nullptr; + +public: + // Default constructor + InvertedIndexResultBitmap() = default; + + ~InvertedIndexResultBitmap() = default; + + // Constructor with arguments + InvertedIndexResultBitmap(std::shared_ptr data_bitmap, + std::shared_ptr null_bitmap) + : _data_bitmap(std::move(data_bitmap)), _null_bitmap(std::move(null_bitmap)) {} + + // Copy constructor + InvertedIndexResultBitmap(const InvertedIndexResultBitmap& other) + : _data_bitmap(std::make_shared(*other._data_bitmap)), + _null_bitmap(std::make_shared(*other._null_bitmap)) {} + + // Move constructor + InvertedIndexResultBitmap(InvertedIndexResultBitmap&& other) noexcept + : _data_bitmap(std::move(other._data_bitmap)), + _null_bitmap(std::move(other._null_bitmap)) {} + + // Copy assignment operator + InvertedIndexResultBitmap& operator=(const InvertedIndexResultBitmap& other) { + if (this != &other) { // Prevent self-assignment + _data_bitmap = std::make_shared(*other._data_bitmap); + _null_bitmap = std::make_shared(*other._null_bitmap); + } + return *this; + } + + // Move assignment operator + InvertedIndexResultBitmap& operator=(InvertedIndexResultBitmap&& other) noexcept { + if (this != &other) { // Prevent self-assignment + _data_bitmap = std::move(other._data_bitmap); + _null_bitmap = std::move(other._null_bitmap); + } + return *this; + } + + // Operator &= + InvertedIndexResultBitmap& operator&=(const InvertedIndexResultBitmap& other) { + if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) { + auto new_null_bitmap = (*_data_bitmap & *other._null_bitmap) | + (*_null_bitmap & *other._data_bitmap) | + (*_null_bitmap & *other._null_bitmap); + *_data_bitmap &= *other._data_bitmap; + *_null_bitmap = std::move(new_null_bitmap); + } + return *this; + } + + // Operator |= + InvertedIndexResultBitmap& operator|=(const InvertedIndexResultBitmap& other) { + if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) { + auto new_null_bitmap = (*_null_bitmap | *other._null_bitmap) - *_data_bitmap; + *_data_bitmap |= *other._data_bitmap; + *_null_bitmap = std::move(new_null_bitmap); + } + return *this; + } + + // NOT operation + const InvertedIndexResultBitmap& op_not(const roaring::Roaring* universe) const { + if (_data_bitmap && _null_bitmap) { + *_data_bitmap = *universe - *_data_bitmap - *_null_bitmap; + // The _null_bitmap remains unchanged. + } + return *this; + } + + // Operator -= + InvertedIndexResultBitmap& operator-=(const InvertedIndexResultBitmap& other) { + if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) { + *_data_bitmap -= *other._data_bitmap; + *_data_bitmap -= *other._null_bitmap; + *_null_bitmap -= *other._null_bitmap; + } + return *this; + } + + void mask_out_null() { + if (_data_bitmap && _null_bitmap) { + *_data_bitmap -= *_null_bitmap; + } + } + + const std::shared_ptr& get_data_bitmap() const { return _data_bitmap; } + + const std::shared_ptr& get_null_bitmap() const { return _null_bitmap; } + + // Check if both bitmaps are empty + bool is_empty() const { return (_data_bitmap == nullptr && _null_bitmap == nullptr); } +}; +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.cpp index 48f53901559fbe..1b1157b6a6b883 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.cpp @@ -17,9 +17,9 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { RoaringQuery::RoaringQuery(const std::shared_ptr& roaring) : _roaring(roaring), _iter(_roaring->end()), _end(_roaring->end()) {} -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.h index c092fdcd4c57e0..70d75181cb783e 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/roaring_query.h @@ -22,7 +22,7 @@ #include "gutil/integral_types.h" #include "olap/rowset/segment_v2/inverted_index/query_v2/query.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { class RoaringQuery : public Query { public: @@ -60,4 +60,4 @@ class RoaringQuery : public Query { roaring::Roaring::const_iterator _end; }; -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.cpp index ab2d32fca763c8..127a8c99df8b29 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.cpp @@ -19,7 +19,7 @@ #include -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { TermQuery::~TermQuery() { if (_term_docs) { @@ -27,13 +27,13 @@ TermQuery::~TermQuery() { } } -TermQuery::TermQuery(const std::shared_ptr& searcher, +TermQuery::TermQuery(const std::shared_ptr& reader, const TQueryOptions& query_options, QueryInfo query_info) { std::wstring ws_term = StringUtil::string_to_wstring(query_info.terms[0]); auto* t = _CLNEW Term(query_info.field_name.c_str(), ws_term.c_str()); - _term_docs = searcher->getReader()->termDocs(t); + _term_docs = reader->termDocs(t); _iter = TermIterator(_term_docs); _CLDECDELETE(t); } -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.h index 419be194e42f44..27e75c37e93a44 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.h @@ -21,11 +21,11 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/query.h" -namespace doris::segment_v2::idx_query_v2 { +namespace doris::segment_v2::inverted_index { class TermQuery : public Query { public: - TermQuery(const std::shared_ptr& searcher, + TermQuery(const std::shared_ptr& reader, const TQueryOptions& query_options, QueryInfo query_info); ~TermQuery() override; @@ -41,4 +41,4 @@ class TermQuery : public Query { TermIterator _iter; }; -} // namespace doris::segment_v2::idx_query_v2 \ No newline at end of file +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/reader/reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index/reader/reader.cpp new file mode 100644 index 00000000000000..ffdfc739ed90fc --- /dev/null +++ b/be/src/olap/rowset/segment_v2/inverted_index/reader/reader.cpp @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/inverted_index/reader/reader.h" + +#include "CLucene.h" + +namespace doris::segment_v2::inverted_index { + +Status InvertedIndexReader::init_index_reader() { + auto close_directory = true; + lucene::index::IndexReader* reader = nullptr; + try { + auto result = DORIS_TRY(_inverted_index_file_reader->open(&_index_meta)); + auto directory = result.release(); + reader = lucene::index::IndexReader::open( + directory, config::inverted_index_read_buffer_size, close_directory); + _CLDECDELETE(directory) + } catch (const CLuceneError& e) { + std::string msg = "FulltextIndexSearcherBuilder build error: " + std::string(e.what()); + if (e.number() == CL_ERR_EmptyIndexSegment) { + return Status::Error(msg); + } + return Status::Error(msg); + } + _index_reader = std::shared_ptr(reader); + return Status::OK(); +} + +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/reader/reader.h b/be/src/olap/rowset/segment_v2/inverted_index/reader/reader.h new file mode 100644 index 00000000000000..548f53c2f6fe45 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/inverted_index/reader/reader.h @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "olap/rowset/segment_v2/inverted_index_compound_reader.h" +#include "olap/rowset/segment_v2/inverted_index_file_reader.h" +#include "olap/tablet_schema.h" + +namespace doris::segment_v2::inverted_index { +class InvertedIndexReader { + ENABLE_FACTORY_CREATOR(InvertedIndexReader); + +public: + explicit InvertedIndexReader( + const TabletIndex* index_meta, + std::shared_ptr inverted_index_file_reader) + : _inverted_index_file_reader(std::move(inverted_index_file_reader)), + _index_meta(*index_meta) {} + + ~InvertedIndexReader() = default; + + Status init_index_reader(); + std::shared_ptr get_index_reader() { return _index_reader; } + +private: + std::shared_ptr _inverted_index_file_reader = nullptr; + std::shared_ptr _index_reader = nullptr; + TabletIndex _index_meta; +}; +using InvertedIndexReaderPtr = std::shared_ptr; +} // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h index 8bc28b1882f9d8..f20b05eac63d5f 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h @@ -56,7 +56,7 @@ class InvertedIndexFileReader { : _fs(std::move(fs)), _index_path_prefix(std::move(index_path_prefix)), _storage_format(storage_format), - _idx_file_info(idx_file_info) {} + _idx_file_info(std::move(idx_file_info)) {} Status init(int32_t read_buffer_size = config::inverted_index_read_buffer_size, bool open_idx_file_cache = false); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp index f752c5300204de..52f7df6c76ae8d 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp @@ -599,8 +599,9 @@ lucene::store::IndexOutput* DorisFSDirectory::createOutput(const char* name) { } catch (CLuceneError& err) { ret->close(); _CLDELETE(ret) - LOG(WARNING) << "FSIndexOutput init error: " << err.what(); - _CLTHROWA(CL_ERR_IO, "FSIndexOutput init error"); + auto err_msg = fmt::format("FSIndexOutput init error: {}", err.what()); + LOG(WARNING) << err_msg; + _CLTHROWA(CL_ERR_IO, err_msg.c_str()); } return ret; } diff --git a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/query_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/query_test.cpp index 688f72c74f3321..774fb6aeccc250 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/query_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/query_test.cpp @@ -27,6 +27,8 @@ #include "olap/rowset/segment_v2/inverted_index/query_v2/factory.inline.h" #include "olap/rowset/segment_v2/inverted_index/query_v2/node.h" #include "olap/rowset/segment_v2/inverted_index/query_v2/operator.h" +#include "olap/rowset/segment_v2/inverted_index/reader/reader.h" +#include "olap/rowset/segment_v2/inverted_index_fs_directory.h" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow-field" @@ -46,13 +48,36 @@ namespace doris::segment_v2 { class QueryTest : public testing::Test { public: - const std::string kTestDir = "./ut_dir/query_test"; +public: + const std::string kTestDir = "./ut_dir/reader_test"; + const std::string rowset_id = "test_rowset"; + const int64_t seg_id = 1; + const std::string index_path_prefix = "test_rowset_1"; + std::string test_dir = ""; + const InvertedIndexStorageFormatPB storage_format = InvertedIndexStorageFormatPB::V2; + const int64_t index_id = 1; + TabletIndex index_meta; + std::string local_fs_index_path = ""; void SetUp() override { auto st = io::global_local_filesystem()->delete_directory(kTestDir); ASSERT_TRUE(st.ok()) << st; st = io::global_local_filesystem()->create_directory(kTestDir); ASSERT_TRUE(st.ok()) << st; + + TabletIndexPB index_meta_pb; + index_meta_pb.set_index_id(index_id); + index_meta.init_from_pb(index_meta_pb); + + test_dir = kTestDir + "/" + index_path_prefix; + auto fs = io::global_local_filesystem(); + + local_fs_index_path = InvertedIndexDescriptor::get_temporary_index_path( + test_dir, rowset_id, seg_id, index_meta.index_id(), index_meta.get_index_suffix()); + st = io::global_local_filesystem()->delete_directory(local_fs_index_path); + ASSERT_TRUE(st.ok()) << st; + st = io::global_local_filesystem()->create_directory(local_fs_index_path); + ASSERT_TRUE(st.ok()) << st; } void TearDown() override { EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok()); @@ -62,10 +87,10 @@ class QueryTest : public testing::Test { ~QueryTest() override = default; }; -using namespace idx_query_v2; +using namespace inverted_index; static Status boolean_query_search(const std::string& name, - const std::shared_ptr& search) { + const std::shared_ptr& reader) { BooleanQuery::Builder builder; RETURN_IF_ERROR(builder.set_op(OperatorType::OP_AND)); { @@ -88,7 +113,7 @@ static Status boolean_query_search(const std::string& name, query_info.field_name = StringUtil::string_to_wstring(name); query_info.terms.emplace_back("hm"); auto clause = DORIS_TRY( - QueryFactory::create(QueryType::TERM_QUERY, search, options, query_info)); + QueryFactory::create(QueryType::TERM_QUERY, reader, options, query_info)); RETURN_IF_ERROR(builder1.add(clause)); } { @@ -97,7 +122,7 @@ static Status boolean_query_search(const std::string& name, query_info.field_name = StringUtil::string_to_wstring(name); query_info.terms.emplace_back("ac"); auto clause = DORIS_TRY( - QueryFactory::create(QueryType::TERM_QUERY, search, options, query_info)); + QueryFactory::create(QueryType::TERM_QUERY, reader, options, query_info)); RETURN_IF_ERROR(builder1.add(clause)); } auto boolean_query = DORIS_TRY(builder1.build()); @@ -114,8 +139,10 @@ static Status boolean_query_search(const std::string& name, } TEST_F(QueryTest, test_boolean_query) { - std::string name = "name"; + DorisFSDirectory* dir = _CLNEW DorisFSDirectory(); + dir->init(io::global_local_filesystem(), local_fs_index_path.c_str(), nullptr); + std::string name = "name"; // write { std::vector datas; @@ -132,7 +159,7 @@ TEST_F(QueryTest, test_boolean_query) { auto* analyzer = _CLNEW lucene::analysis::standard95::StandardAnalyzer(); analyzer->set_stopwords(nullptr); - auto* indexwriter = _CLNEW lucene::index::IndexWriter(kTestDir.c_str(), analyzer, true); + auto* indexwriter = _CLNEW lucene::index::IndexWriter(dir, analyzer, true); indexwriter->setMaxBufferedDocs(100); indexwriter->setRAMBufferSizeMB(-1); indexwriter->setMaxFieldLength(0x7FFFFFFFL); @@ -163,20 +190,41 @@ TEST_F(QueryTest, test_boolean_query) { _CLLDELETE(doc); _CLLDELETE(analyzer); _CLLDELETE(char_string_reader); + + auto writer = std::make_unique( + io::global_local_filesystem(), test_dir, rowset_id, seg_id, storage_format); + InvertedIndexDirectoryMap dir_map; + dir_map.emplace(std::make_pair(index_id, ""), + std::unique_ptr(dir)); + auto st = writer->initialize(dir_map); + EXPECT_TRUE(st.ok()); + st = writer->close(); + EXPECT_TRUE(st.ok()); } - // query + // open reader { - auto* dir = FSDirectory::getDirectory(kTestDir.c_str()); - auto* reader = IndexReader::open(dir, 1024 * 1024, true); - auto search = std::make_shared(reader); - - Status res = boolean_query_search(name, search); - EXPECT_TRUE(res.ok()); - + auto file_reader = std::make_shared(io::global_local_filesystem(), + test_dir, storage_format); + auto st = file_reader->init(4096); + if (!st.ok()) { + std::cerr << st.msg() << std::endl; + ASSERT_TRUE(st.ok()); + } + auto inverted_index_reader = + inverted_index::InvertedIndexReader::create_unique(&index_meta, file_reader); + st = inverted_index_reader->init_index_reader(); + if (!st.ok()) { + std::cerr << st.msg() << std::endl; + ASSERT_TRUE(st.ok()); + } + auto reader = inverted_index_reader->get_index_reader(); + // query + { + Status res = boolean_query_search(name, reader); + EXPECT_TRUE(res.ok()); + } reader->close(); - _CLLDELETE(reader); - _CLDECDELETE(dir); } } diff --git a/be/test/olap/rowset/segment_v2/inverted_index/reader/reader_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/reader/reader_test.cpp new file mode 100644 index 00000000000000..1316633ca24ecd --- /dev/null +++ b/be/test/olap/rowset/segment_v2/inverted_index/reader/reader_test.cpp @@ -0,0 +1,168 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/inverted_index/reader/reader.h" + +#include + +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow-field" +#include + +#include "CLucene/analysis/standard95/StandardAnalyzer.h" +#include "CLucene/store/FSDirectory.h" +#pragma GCC diagnostic pop + +#include "io/fs/local_file_system.h" +#include "olap/rowset/segment_v2/inverted_index_fs_directory.h" + +CL_NS_USE(search) +CL_NS_USE(store) + +namespace doris::segment_v2 { + +class ReaderTest : public testing::Test { +public: + const std::string kTestDir = "./ut_dir/reader_test"; + const std::string rowset_id = "test_rowset"; + const int64_t seg_id = 1; + const std::string index_path_prefix = "test_rowset_1"; + std::string test_dir = ""; + const InvertedIndexStorageFormatPB storage_format = InvertedIndexStorageFormatPB::V2; + const int64_t index_id = 1; + TabletIndex index_meta; + std::string local_fs_index_path = ""; + + void SetUp() override { + auto st = io::global_local_filesystem()->delete_directory(kTestDir); + ASSERT_TRUE(st.ok()) << st; + st = io::global_local_filesystem()->create_directory(kTestDir); + ASSERT_TRUE(st.ok()) << st; + + TabletIndexPB index_meta_pb; + index_meta_pb.set_index_id(index_id); + index_meta.init_from_pb(index_meta_pb); + + test_dir = kTestDir + "/" + index_path_prefix; + auto fs = io::global_local_filesystem(); + + local_fs_index_path = InvertedIndexDescriptor::get_temporary_index_path( + test_dir, rowset_id, seg_id, index_meta.index_id(), index_meta.get_index_suffix()); + st = io::global_local_filesystem()->delete_directory(local_fs_index_path); + ASSERT_TRUE(st.ok()) << st; + st = io::global_local_filesystem()->create_directory(local_fs_index_path); + ASSERT_TRUE(st.ok()) << st; + } + void TearDown() override { + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok()); + } + + ReaderTest() = default; + ~ReaderTest() override = default; +}; + +using namespace inverted_index; + +TEST_F(ReaderTest, test_inverted_index_reader) { + DorisFSDirectory* dir = _CLNEW DorisFSDirectory(); + dir->init(io::global_local_filesystem(), local_fs_index_path.c_str(), nullptr); + + std::string name = "name"; + // write + { + std::vector datas; + datas.emplace_back("0 hm"); + datas.emplace_back("1 hm"); + datas.emplace_back("2 hm"); + datas.emplace_back("3 bg"); + datas.emplace_back("4 bg"); + datas.emplace_back("5 bg"); + datas.emplace_back("6 ac"); + datas.emplace_back("7 ac"); + datas.emplace_back("8 ac"); + datas.emplace_back("9 ac"); + + auto* analyzer = _CLNEW lucene::analysis::standard95::StandardAnalyzer(); + analyzer->set_stopwords(nullptr); + auto* indexwriter = _CLNEW lucene::index::IndexWriter(dir, analyzer, true); + indexwriter->setMaxBufferedDocs(100); + indexwriter->setRAMBufferSizeMB(-1); + indexwriter->setMaxFieldLength(0x7FFFFFFFL); + indexwriter->setMergeFactor(1000000000); + indexwriter->setUseCompoundFile(false); + + auto* char_string_reader = _CLNEW lucene::util::SStringReader; + + auto* doc = _CLNEW lucene::document::Document(); + int32_t field_config = lucene::document::Field::STORE_NO; + field_config |= lucene::document::Field::INDEX_NONORMS; + field_config |= lucene::document::Field::INDEX_TOKENIZED; + auto field_name = std::wstring(name.begin(), name.end()); + auto* field = _CLNEW lucene::document::Field(field_name.c_str(), field_config); + field->setOmitTermFreqAndPositions(false); + doc->add(*field); + + for (const auto& data : datas) { + char_string_reader->init(data.data(), data.size(), false); + auto* stream = analyzer->reusableTokenStream(field->name(), char_string_reader); + field->setValue(stream); + indexwriter->addDocument(doc); + } + + indexwriter->close(); + + _CLLDELETE(indexwriter); + _CLLDELETE(doc); + _CLLDELETE(analyzer); + _CLLDELETE(char_string_reader); + + auto writer = std::make_unique( + io::global_local_filesystem(), test_dir, rowset_id, seg_id, storage_format); + InvertedIndexDirectoryMap dir_map; + dir_map.emplace(std::make_pair(index_id, ""), + std::unique_ptr(dir)); + auto st = writer->initialize(dir_map); + EXPECT_TRUE(st.ok()); + st = writer->close(); + EXPECT_TRUE(st.ok()); + } + + // open reader + { + auto file_reader = std::make_shared(io::global_local_filesystem(), + test_dir, storage_format); + auto st = file_reader->init(4096); + if (!st.ok()) { + std::cerr << st.msg() << std::endl; + ASSERT_TRUE(st.ok()); + } + auto inverted_index_reader = + inverted_index::InvertedIndexReader::create_unique(&index_meta, file_reader); + st = inverted_index_reader->init_index_reader(); + if (!st.ok()) { + std::cerr << st.msg() << std::endl; + ASSERT_TRUE(st.ok()); + } + auto reader = inverted_index_reader->get_index_reader(); + reader->close(); + } +} + +} // namespace doris::segment_v2