Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 54 additions & 39 deletions src/joinsorter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,21 +203,27 @@ CSphVector<std::pair<int,bool>> FetchJoinRightTableFilters ( const CSphVector<CS
}


bool NeedToMoveMixedJoinFilters ( const CSphQuery & tQuery, const ISphSchema & tSchema )
bool NeedPostJoinFilterEvaluation ( const CSphQuery & tQuery, const ISphSchema & tSchema )
{
CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( tQuery.m_dFilters, tSchema, tQuery.m_sJoinIdx.cstr() );
return NeedPostJoinFilterEvaluation ( tQuery.m_dFilters, tQuery.m_sJoinIdx, tQuery.m_dFilterTree.GetLength()>0, tQuery.m_eJoinType, tSchema );
}


bool NeedPostJoinFilterEvaluation ( const CSphVector<CSphFilterSettings> & dFilters, const CSphString & sJoinIdx, bool bFilterTree, JoinType_e eJoinType, const ISphSchema & tSchema )
{
CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( dFilters, tSchema, sJoinIdx.cstr() );
if ( !dRightFilters.GetLength() )
return false;

// move all filters to the left query in case of LEFT JOIN
// otherwise we can't distinguish between 'no match' and 'match with null part from right table'
if ( tQuery.m_eJoinType==JoinType_e::LEFT )
if ( eJoinType==JoinType_e::LEFT )
return true;

if ( !tQuery.m_dFilterTree.GetLength() )
return false;
if ( dRightFilters.GetLength()!=dFilters.GetLength() )
return bFilterTree; // we don't (yet) support splitting the filter tree between left and right queries

return dRightFilters.GetLength()!=tQuery.m_dFilters.GetLength();
return false;
}

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -719,6 +725,7 @@ class JoinSorter_c : public ISphMatchSorter
void SetupAggregates();
FORCE_INLINE uint64_t SetupJoinFilters ( const CSphMatch & tEntry );
bool SetupRightFilters ( CSphString & sError );
bool ValidateLeftTableNotPrefixedInFilters ( CSphString & sError );
bool SetupOnFilters ( CSphString & sError );
bool SetupOnValueFilters ( CSphString & sError );
void SetupRightStandaloneLocators();
Expand Down Expand Up @@ -795,7 +802,7 @@ JoinSorter_c::JoinSorter_c ( const CSphIndex * pIndex, const CSphIndex * pJoined

CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( m_tQuery.m_dFilters, tSorterSchema, m_tQuery.m_sJoinIdx.cstr() );
bool bDisableByImplicitGrouping = HasImplicitGrouping(m_tQuery) && m_tQuery.m_eJoinType!=JoinType_e::LEFT;
m_bFinalCalcOnly = !pIndex->IsRT() && !bJoinedGroupSort && !bHaveAggregates && !dRightFilters.GetLength() && !NeedToMoveMixedJoinFilters ( m_tQuery, tSorterSchema ) && !pSorter->IsPrecalc() && !bDisableByImplicitGrouping;
m_bFinalCalcOnly = !pIndex->IsRT() && !bJoinedGroupSort && !bHaveAggregates && !dRightFilters.GetLength() && !NeedPostJoinFilterEvaluation ( m_tQuery, tSorterSchema ) && !pSorter->IsPrecalc() && !bDisableByImplicitGrouping;
m_bErrorFlag = !SetupJoinQuery ( m_pSorter->GetSchema()->GetDynamicSize(), m_sErrorMessage );
if ( m_bFinalCalcOnly || !m_iBatchSize )
m_bCanBatch = false;
Expand Down Expand Up @@ -1642,14 +1649,8 @@ static void RemoveTableNamePrefix ( CSphString & sAttr, const CSphFilterSettings
}


bool JoinSorter_c::SetupRightFilters ( CSphString & sError )
bool JoinSorter_c::ValidateLeftTableNotPrefixedInFilters ( CSphString & sError )
{
m_tJoinQuery.m_dFilters.Resize(0);

CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( m_tQuery.m_dFilters, *m_pSorterSchema, GetJoinedIndexName().cstr() );
bool bLeftJoin = m_tQuery.m_eJoinType==JoinType_e::LEFT;

// Validate: left table should not be prefixed in WHERE clause filters
CSphString sLeftTableName = m_pIndex->GetName();
CSphString sLeftPrefix;
sLeftPrefix.SetSprintf ( "%s.", sLeftTableName.cstr() );
Expand All @@ -1663,34 +1664,45 @@ bool JoinSorter_c::SetupRightFilters ( CSphString & sError )
return false;
}
}

if ( bLeftJoin || m_tQuery.m_dFilterTree.GetLength() )
{
if ( !dRightFilters.GetLength() )
return true;

if ( bLeftJoin || dRightFilters.GetLength()!=m_tQuery.m_dFilters.GetLength() )
{
CreateFilterContext_t tCtx;
tCtx.m_pFilters = &m_tQuery.m_dFilters;
tCtx.m_pFilterTree = &m_tQuery.m_dFilterTree;
tCtx.m_pMatchSchema = m_pSorterSchema;
tCtx.m_pIndexSchema = &m_pIndex->GetMatchSchema();
tCtx.m_bScan = m_tQuery.m_sQuery.IsEmpty();
tCtx.m_sJoinIdx = GetJoinedIndexName();
if ( !sphCreateFilters ( tCtx, sError, sError ) )
{
sError.SetSprintf ( "failed to create query filters: %s", sError.cstr() );
return false;
}
return true;
}

m_tMixedFilter.SetFilter ( tCtx.m_pFilter );
return true;

bool JoinSorter_c::SetupRightFilters ( CSphString & sError )
{
m_tJoinQuery.m_dFilters.Resize(0);

if ( !ValidateLeftTableNotPrefixedInFilters(sError) )
return false;

if ( NeedPostJoinFilterEvaluation ( m_tQuery, *m_pSorterSchema ) )
{
// all filters stay in the left query; evaluated post-join
CreateFilterContext_t tCtx;
tCtx.m_pFilters = &m_tQuery.m_dFilters;
tCtx.m_pFilterTree = &m_tQuery.m_dFilterTree;
tCtx.m_pMatchSchema = m_pSorterSchema;
tCtx.m_pIndexSchema = &m_pIndex->GetMatchSchema();
tCtx.m_bScan = m_tQuery.m_sQuery.IsEmpty();
tCtx.m_sJoinIdx = GetJoinedIndexName();
tCtx.m_eJoinType = m_tQuery.m_eJoinType;
if ( !sphCreateFilters ( tCtx, sError, sError ) )
{
sError.SetSprintf ( "failed to create query filters: %s", sError.cstr() );
return false;
}

m_tJoinQuery.m_dFilterTree = m_tQuery.m_dFilterTree;
m_tMixedFilter.SetFilter ( tCtx.m_pFilter );
return true;
}

// we have 2 options if we have a filter tree: either it is completely in the left query or completely in the right query
// otherwise NeedPostJoinFilterEvaluation() would return true
CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( m_tQuery.m_dFilters, *m_pSorterSchema, GetJoinedIndexName().cstr() );
if ( dRightFilters.GetLength() )
m_tJoinQuery.m_dFilterTree = m_tQuery.m_dFilterTree;

CSphString sPrefix;
sPrefix.SetSprintf ( "%s.", GetJoinedIndexName().cstr() );

Expand Down Expand Up @@ -2087,15 +2099,18 @@ void JoinSorter_c::AddRemappedStringItemsToJoinSelectList()
void JoinSorter_c::AddExpressionItemsToJoinSelectList()
{
// find JSON/columnar attrs present in filters and add them to select list (only when all filters are moved to the left query)
if ( !NeedToMoveMixedJoinFilters ( m_tQuery, *m_pSorterSchema ) )
if ( !NeedPostJoinFilterEvaluation ( m_tQuery, *m_pSorterSchema ) )
return;

const CSphSchema & tJoinedSchema = m_pJoinedIndex->GetMatchSchema();
for ( const auto & i : m_tQuery.m_dFilters )
{
if ( sphJsonNameSplit ( i.m_sAttrName.cstr(), GetJoinedIndexName().cstr() ) )
bool bIndexPrefix = false;
if ( sphJsonNameSplit ( i.m_sAttrName.cstr(), GetJoinedIndexName().cstr(), nullptr, &bIndexPrefix ) )
{
AddToJoinSelectList ( i.m_sAttrName );
if ( bIndexPrefix )
AddToJoinSelectList ( i.m_sAttrName );

continue;
}

Expand Down
3 changes: 2 additions & 1 deletion src/joinsorter.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ void SetJoinBatchSize ( int iSize );
int GetJoinBatchSize();

CSphVector<std::pair<int,bool>> FetchJoinRightTableFilters ( const CSphVector<CSphFilterSettings> & dFilters, const ISphSchema & tSchema, const char * szJoinedIndex );
bool NeedToMoveMixedJoinFilters ( const CSphQuery & tQuery, const ISphSchema & tSchema );
bool NeedPostJoinFilterEvaluation ( const CSphQuery & tQuery, const ISphSchema & tSchema );
bool NeedPostJoinFilterEvaluation ( const CSphVector<CSphFilterSettings> & dFilters, const CSphString & sJoinIdx, bool bFilterTree, JoinType_e eJoinType, const ISphSchema & tSchema );
bool ExprHasLeftTableAttrs ( const CSphString & sAttr, const ISphSchema & tLeftSchema );
std::unique_ptr<ISphFilter> CreateJoinNullFilter ( const CSphFilterSettings & tSettings, const CSphAttrLocator & tNullMapLocator );
bool SplitJoinedAttrName ( const CSphString & sJoinedAttr, CSphString & sTable, CSphString & sAttr, CSphString * pError = nullptr );
Expand Down
10 changes: 8 additions & 2 deletions src/queuecreator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1791,12 +1791,18 @@ bool QueueCreator_c::AddJoinFilterAttrs()
}
}

if ( NeedToMoveMixedJoinFilters ( m_tQuery, *m_pSorterSchema ) )
if ( NeedPostJoinFilterEvaluation ( m_tQuery, *m_pSorterSchema ) )
for ( const auto & i : m_tQuery.m_dFilters )
{
const CSphString & sAttr = i.m_sAttrName;
const CSphColumnInfo * pAttr = m_pSorterSchema->GetAttr ( sAttr.cstr() );
if ( pAttr || !sphJsonNameSplit ( sAttr.cstr(), sRightIndex.cstr() ) )
CSphString sSplitAttrName;
bool bIndexPrefix = false;
if ( pAttr || !sphJsonNameSplit ( sAttr.cstr(), sRightIndex.cstr(), &sSplitAttrName, &bIndexPrefix ) )
continue;

// check if it's a json attribute from the left table and not a joined attribute
if ( !bIndexPrefix )
continue;

CSphColumnInfo tExprCol ( sAttr.cstr(), FilterType2AttrType ( i.m_eType ) );
Expand Down
1 change: 1 addition & 0 deletions src/sphinx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8272,6 +8272,7 @@ bool CSphIndex_VLN::SetupFiltersAndContext ( CSphQueryContext & tCtx, CreateFilt
tFlx.m_pSI = &m_tSI;
tFlx.m_iTotalDocs = m_iDocinfo;
tFlx.m_sJoinIdx = tQuery.m_sJoinIdx;
tFlx.m_eJoinType = tQuery.m_eJoinType;

// may modify eval stages in schema; needs to be before SetupCalc
if ( !TransformFilters ( tFlx, dTransformedFilters, dTransformedFilterTree, pModifiedMatchSchema, tQuery.m_dItems, tMeta.m_sError ) )
Expand Down
8 changes: 1 addition & 7 deletions src/sphinxfilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1847,13 +1847,7 @@ static void RemoveJoinFilters ( const CreateFilterContext_t & tCtx, CSphVector<C
if ( tCtx.m_sJoinIdx.IsEmpty() )
return;

CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( dModified, *tCtx.m_pMatchSchema, tCtx.m_sJoinIdx.cstr() );

bool bHaveNullFilters = false;
for ( const auto & i : dRightFilters )
bHaveNullFilters |= dModified[i.first].m_eType==SPH_FILTER_NULL;

if ( tCtx.m_pFilterTree && tCtx.m_pFilterTree->GetLength() && dRightFilters.GetLength() && ( bHaveNullFilters || dRightFilters.GetLength()!=dModified.GetLength() ) )
if ( NeedPostJoinFilterEvaluation ( dModified, tCtx.m_sJoinIdx, tCtx.m_pFilterTree && tCtx.m_pFilterTree->GetLength(), tCtx.m_eJoinType, *tCtx.m_pMatchSchema ) )
{
// mixed joined and non-joined filters; they will be handled in join sorter
// remove all filters from the query (keep the @rowid/pseudo_sharding filter)
Expand Down
1 change: 1 addition & 0 deletions src/sphinxfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ struct CreateFilterContext_t
const SIContainer_c * m_pSI = nullptr;
int64_t m_iTotalDocs = 0;
CSphString m_sJoinIdx;
JoinType_e m_eJoinType = JoinType_e::NONE;
bool m_bAddKNNDistFilter = false;
};

Expand Down
8 changes: 7 additions & 1 deletion src/sphinxjson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1371,8 +1371,11 @@ static bool FindNextSeparator ( const char * & pSep )
}


bool sphJsonNameSplit ( const char * szName, const char * szIndex, CSphString * pColumn )
bool sphJsonNameSplit ( const char * szName, const char * szIndex, CSphString * pColumn, bool * pIndexPrefix )
{
if ( pIndexPrefix )
*pIndexPrefix = false;

if ( !szName )
return false;

Expand All @@ -1382,6 +1385,9 @@ bool sphJsonNameSplit ( const char * szName, const char * szIndex, CSphString *

if ( szIndex && *pSep=='.' && !strncmp ( szIndex, szName, pSep - szName ) )
{
if ( pIndexPrefix )
*pIndexPrefix = true;

// this was not a json separator, but a dot between table name and column name
pSep++;
if ( !FindNextSeparator(pSep) )
Expand Down
2 changes: 1 addition & 1 deletion src/sphinxjson.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ ESphJsonType sphJsonFindByKey ( ESphJsonType eType, const BYTE ** ppValue, const
ESphJsonType sphJsonFindByIndex ( ESphJsonType eType, const BYTE ** ppValue, int iIndex );

/// extract object part from the name; return false if not JSON name. szIndex is the possible name of joined index
bool sphJsonNameSplit ( const char * szName, const char * szIndex = nullptr, CSphString * pColumn = nullptr );
bool sphJsonNameSplit ( const char * szName, const char * szIndex = nullptr, CSphString * pColumn = nullptr, bool * pIndexPrefix = nullptr );

/// compute node size, in bytes
/// returns -1 when data itself is required to compute the size, but pData is NULL
Expand Down
1 change: 1 addition & 0 deletions src/sphinxrt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7819,6 +7819,7 @@ static bool SetupFilters ( const CSphQuery & tQuery, const ISphSchema & tMatchSc
tFlx.m_eCollation = tQuery.m_eCollation;
tFlx.m_bScan = bFullscan;
tFlx.m_sJoinIdx = tQuery.m_sJoinIdx;
tFlx.m_eJoinType = tQuery.m_eJoinType;
tFlx.m_bAddKNNDistFilter = true;

std::unique_ptr<ISphSchema> pModifiedMatchSchema;
Expand Down
Binary file modified test/test_278/model.bin
Binary file not shown.
2 changes: 2 additions & 0 deletions test/test_278/test.xml
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ insert into t values(1, 'abc');
insert into t2 values(1, 'abc', 1),(2, 'def', 1);

select * from t inner join t2 on t.id = t2.t_id where t2.id = 1 or t2.id = 2;
select * from t inner join t2 on t.id = t2.t_id where id&lt;2 and ( t2.id = 1 or t2.id = 2 );
select * from t inner join t2 on t.id = t2.t_id where id&lt;1 and ( t2.id = 1 or t2.id = 2 );

drop table t;
drop table t2;
Expand Down