Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 73 additions & 9 deletions src/gtests/gtests_tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ void QueryParser::TestMany (const char* szQuery, const char* szReconst)
{
XQQuery_t tQuery;
sphParseExtendedQuery ( tQuery, szQuery, nullptr, pTokenizer, &tSchema, pDict, tTmpSettings, nullptr );
CSphString sReconst = sphReconstructNode ( tQuery.m_pRoot, &tSchema );
CSphString sReconst = sphReconstructNode ( tQuery.m_pRoot, &tSchema, &tQuery.m_dZones );
ASSERT_STREQ ( sReconst.cstr(), szReconst );
}

Expand Down Expand Up @@ -849,7 +849,7 @@ void QueryParser::Transform ( const char * szQuery, const char * szReconst, cons
if (!sphParseExtendedQuery ( tQuery, szQuery, nullptr, pTokenizer, &tSchema, pDict, tTmpSettings, nullptr ))
return;

CSphString sReconst = sphReconstructNode ( tQuery.m_pRoot, &tSchema );
CSphString sReconst = sphReconstructNode ( tQuery.m_pRoot, &tSchema, &tQuery.m_dZones );

CSphDummyIndex tIndex;
if ( pKeywordHits )
Expand All @@ -861,7 +861,7 @@ void QueryParser::Transform ( const char * szQuery, const char * szReconst, cons
TransformExtendedQueryArgs_t tTranformArgs { true, tQuery.m_bNeedPhraseTransform, &tIndex };
EXPECT_TRUE ( sphTransformExtendedQuery ( &tQuery.m_pRoot, tTmpSettings, tQuery.m_sParseError, tTranformArgs, tQuery.m_sParseWarning ) );

CSphString sReconstTransformed = sphReconstructNode ( tQuery.m_pRoot, &tSchema );
CSphString sReconstTransformed = sphReconstructNode ( tQuery.m_pRoot, &tSchema, &tQuery.m_dZones );
EXPECT_STREQ ( sReconst.cstr(), szReconst );
ASSERT_STREQ ( sReconstTransformed.cstr(), szReconstTransformed );
}
Expand Down Expand Up @@ -1141,17 +1141,17 @@ TEST_F ( QueryParser, different_zones )
{
Transform (
"(( ZONE:(h1) ACCEL !aaa)|( ACCEL !bbb))",
"( ( accel AND NOT aaa ) | ( accel AND NOT bbb ) )",
"( ( accel AND NOT aaa ) | ( accel AND NOT bbb ) )" // not "( accel AND NOT ( aaa bbb ) )"
"( ( ZONE:(h1) accel AND NOT ZONE:(h1) aaa ) | ( accel AND NOT bbb ) )",
"( ( ZONE:(h1) accel AND NOT ZONE:(h1) aaa ) | ( accel AND NOT bbb ) )" // not "( accel AND NOT ( aaa bbb ) )"
);
}

TEST_F ( QueryParser, different_zonespan )
{
Transform (
"(( ZONESPAN:(h1) ACCEL !aaa)|( ACCEL !bbb))",
"( ( accel AND NOT aaa ) | ( accel AND NOT bbb ) )",
"( ( accel AND NOT aaa ) | ( accel AND NOT bbb ) )" // not "( accel AND NOT ( aaa bbb ) )"
"( ( ZONESPAN:(h1) accel AND NOT ZONESPAN:(h1) aaa ) | ( accel AND NOT bbb ) )",
"( ( ZONESPAN:(h1) accel AND NOT ZONESPAN:(h1) aaa ) | ( accel AND NOT bbb ) )" // not "( accel AND NOT ( aaa bbb ) )"
);
}

Expand Down Expand Up @@ -1361,6 +1361,70 @@ TEST_F ( QueryParser, transform_common_and_not_factor_1 )
);
}

// common subphrases spec
TEST_F ( QueryParser, transform_naked_common_subphrases )
{
Transform (
"(@title \"aaa bbb\") | (@title \"aaa bbb\")",
"( ( @title: \"aaa bbb\" ) | ( @title: \"aaa bbb\" ) )",
"( @title: \"aaa bbb\" )"
);
}

TEST_F ( QueryParser, transform_common_subphrases_with_different_fields )
{
Transform (
"(@title \"aaa bbb\") | (@body \"aaa bbb\")",
"( ( @title: \"aaa bbb\" ) | ( @body: \"aaa bbb\" ) )",
"( ( @title: \"aaa bbb\" ) | ( @body: \"aaa bbb\" ) )"
);
}

TEST_F ( QueryParser, transform_common_subphrases_with_same_field_limits )
{
Transform (
"(@title[10] \"aaa bbb\") | (@title[10] \"aaa bbb\")",
"( ( @title[10]: \"aaa bbb\" ) | ( @title[10]: \"aaa bbb\" ) )",
"( @title[10]: \"aaa bbb\" )"
);
}

TEST_F ( QueryParser, transform_common_subphrases_with_different_field_limits )
{
Transform (
"(@title \"aaa bbb\") | (@title[10] \"aaa bbb\")",
"( ( @title: \"aaa bbb\" ) | ( @title[10]: \"aaa bbb\" ) )",
"( ( @title: \"aaa bbb\" ) | ( @title[10]: \"aaa bbb\" ) )"
);
}

TEST_F ( QueryParser, transform_common_subphrases_with_same_zones )
{
Transform (
"(ZONE:h1 \"aaa bbb\") | (ZONE:h1 \"aaa bbb\")",
"( ZONE:(h1) \"aaa bbb\" | ZONE:(h1) \"aaa bbb\" )",
"ZONE:(h1) \"aaa bbb\""
);
}

TEST_F ( QueryParser, transform_common_subphrases_with_different_zones )
{
Transform (
"(ZONE:h1 \"aaa bbb\") | (\"aaa bbb\")",
"( ZONE:(h1) \"aaa bbb\" | \"aaa bbb\" )",
"( ZONE:(h1) \"aaa bbb\" | \"aaa bbb\" )"
);
}

TEST_F ( QueryParser, transform_common_subphrases_with_different_zones_and_zonespans )
{
Transform (
"(ZONE:h1 \"aaa bbb\") | (ZONESPAN:h1 \"aaa bbb\")",
"( ZONE:(h1) \"aaa bbb\" | ZONESPAN:(h1) \"aaa bbb\" )",
"( ZONE:(h1) \"aaa bbb\" | ZONESPAN:(h1) \"aaa bbb\" )"
);
}

// different fields
TEST_F ( QueryParser, transform_different_fields )
{
Expand Down Expand Up @@ -1466,8 +1530,8 @@ TEST_F ( QueryParser, query_mixed_fields_zones_relaxed_1 )
{
Transform (
"@@relaxed ZONESPAN:aaa bbb | @missed ddd | fff eee",
"bbb",
"bbb"
"ZONESPAN:(aaa) bbb",
"ZONESPAN:(aaa) bbb"
);
}

Expand Down
48 changes: 40 additions & 8 deletions src/sphinxquery/sphinxquery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ void XQLimitSpec_t::SetFieldSpec ( const FieldMask_t & uMask, int iMaxPos )
m_iFieldMaxPos = iMaxPos;
}

uint64_t XQLimitSpec_t::Hash () const noexcept
{
uint64_t uHash = sphFNV64 ( &m_dFieldMask, sizeof ( m_dFieldMask ) );
uHash = sphFNV64 ( &m_iFieldMaxPos, sizeof ( m_iFieldMaxPos ), uHash );
if ( m_bZoneSpan )
++uHash;
if ( !m_dZones.IsEmpty() )
uHash = sphFNV64 ( m_dZones.begin(), m_dZones.GetLengthBytes(), uHash );
return uHash;
}

XQQuery_t * CloneXQQuery ( const XQQuery_t & tQuery )
{
auto * pQuery = new XQQuery_t;
Expand Down Expand Up @@ -146,7 +157,7 @@ void Dump (const XQNode_t *, const char *, bool)
#endif


CSphString sphReconstructNode ( const XQNode_t * pNode, const CSphSchema * pSchema )
CSphString sphReconstructNode ( const XQNode_t * pNode, const CSphSchema * pSchema, StrVec_t * pZones )
{
StringBuilder_c sRes ( " " );

Expand Down Expand Up @@ -187,10 +198,20 @@ CSphString sphReconstructNode ( const XQNode_t * pNode, const CSphSchema * pSche
else if (!pNode->m_dSpec.m_dFieldMask.TestAll(false))
sFields.SetSprintf ( "%s,%u", sFields.cstr(), pNode->m_dSpec.m_dFieldMask.GetMask32() );

if ( sFields.IsEmpty() )
sRes.Sprintf ( "( @missed: %s )", sTrim.cstr() );
else
sRes.Sprintf ( "( @%s: %s )", sFields.cstr() + 1, sTrim.cstr() );
const int iFieldLimit = pNode->m_dSpec.m_iFieldMaxPos;
if ( iFieldLimit )
{
if ( sFields.IsEmpty() )
sRes.Sprintf ( "( @missed[%d]: %s )", sTrim.cstr(), iFieldLimit );
else
sRes.Sprintf ( "( @%s[%d]: %s )", sFields.cstr() + 1, iFieldLimit, sTrim.cstr() );
} else
{
if ( sFields.IsEmpty() )
sRes.Sprintf ( "( @missed: %s )", sTrim.cstr() );
else
sRes.Sprintf ( "( @%s: %s )", sFields.cstr() + 1, sTrim.cstr() );
}
} else
{
if ( pNode->GetOp()==SPH_QUERY_AND && dWords.GetLength()>1 )
Expand All @@ -199,13 +220,24 @@ CSphString sphReconstructNode ( const XQNode_t * pNode, const CSphSchema * pSche
sRes << sTrim;
}

if ( pZones || !pNode->m_dSpec.m_dZones.IsEmpty () )
{
sRes.MoveTo ( sTrim );
{
ScopedComma_c sZone ( sRes, ",", (pNode->m_dSpec.m_bZoneSpan?"ZONESPAN:(":"ZONE:("), ") " );
for ( const auto& iZone: pNode->m_dSpec.m_dZones )
sRes << (*pZones)[iZone];
}
sRes << sTrim;
}

} else
{
ARRAY_FOREACH ( i, pNode->dChildren() )
{
if ( !i )
{
auto sFoo = sphReconstructNode ( pNode->dChild(i), pSchema );
auto sFoo = sphReconstructNode ( pNode->dChild(i), pSchema, pZones );
sRes.Clear();
sRes << sFoo;
} else
Expand All @@ -224,9 +256,9 @@ CSphString sphReconstructNode ( const XQNode_t * pNode, const CSphSchema * pSche
sRes.Clear();

if ( pNode->GetOp()==SPH_QUERY_PHRASE )
sRes.Sprintf ( "\"%s %s\"", sTrim.cstr(), sphReconstructNode ( pNode->dChild(i), pSchema ).cstr() );
sRes.Sprintf ( "\"%s %s\"", sTrim.cstr(), sphReconstructNode ( pNode->dChild(i), pSchema, pZones ).cstr() );
else
sRes.Sprintf ( "%s %s %s", sTrim.cstr(), sOp, sphReconstructNode ( pNode->dChild(i), pSchema ).cstr() );
sRes.Sprintf ( "%s %s %s", sTrim.cstr(), sOp, sphReconstructNode ( pNode->dChild(i), pSchema, pZones ).cstr() );
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/sphinxquery/sphinxquery.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ struct XQLimitSpec_t

void SetZoneSpec ( const CSphVector<int> & dZones, bool bZoneSpan );
void SetFieldSpec ( const FieldMask_t& uMask, int iMaxPos );
uint64_t Hash () const noexcept;
};

/// extended query node
Expand Down Expand Up @@ -406,7 +407,7 @@ bool IsAllowOnlyNot();
void SetBooleanSimplify ( bool bSimplify );
bool GetBooleanSimplify ( const CSphQuery & tQuery );
bool GetBooleanSimplify ();
CSphString sphReconstructNode ( const XQNode_t * pNode, const CSphSchema * pSchema = nullptr );
CSphString sphReconstructNode ( const XQNode_t * pNode, const CSphSchema * pSchema = nullptr, StrVec_t * pZones = nullptr );
inline int GetExpansionLimit ( int iQueryLimit, int iIndexLimit )
{
return ( iQueryLimit!=DEFAULT_QUERY_EXPANSION_LIMIT ? iQueryLimit : iIndexLimit );
Expand Down
17 changes: 10 additions & 7 deletions src/sphinxquery/transform_commonkeywords.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ struct XQNodeAtomPos_fn
uint64_t sphHashPhrase ( const XQNode_t * pNode )
{
assert ( pNode );
uint64_t uHash = 0;
uint64_t uHash = pNode->m_dSpec.Hash();

auto iWords = pNode->dWords().GetLength();
if ( !iWords )
Expand Down Expand Up @@ -218,9 +218,10 @@ void sphHashSubphrases ( XQNode_t * pNode,

const CSphVector<XQKeyword_t> & dWords = pNode->dWords();
const int iLen = dWords.GetLength();
const uint64_t uSpec = pNode->m_dSpec.Hash();
for ( int i=0; i<iLen; ++i )
{
uint64_t uSubPhrase = fnHornerHash ( dWords[i].m_sWord.cstr() );
uint64_t uSubPhrase = fnHornerHash ( dWords[i].m_sWord.cstr(), uSpec );

// skip whole phrase
const int iSubLen = i ? iLen : (iLen-1);
Expand Down Expand Up @@ -459,8 +460,9 @@ bool CSphTransformation::TransformCommonPhrase () const noexcept
assert ( dWords.GetLength()>=2 );
dNodes[iPhrase]->m_iAtomPos = dWords.Begin()->m_iAtomPos;

uint64_t uHead = sphFNV64cont ( dWords[0].m_sWord.cstr(), SPH_FNV64_SEED );
uint64_t uTail = sphFNV64cont ( dWords [ dWords.GetLength() - 1 ].m_sWord.cstr(), SPH_FNV64_SEED );
const uint64_t uSpec = dNodes[iPhrase]->m_dSpec.Hash();
uint64_t uHead = sphFNV64cont ( dWords[0].m_sWord.cstr(), uSpec );
uint64_t uTail = sphFNV64cont ( dWords [ dWords.GetLength() - 1 ].m_sWord.cstr(), uSpec );
uHead = sphFNV64 ( g_sPhraseDelimiter, sizeof(g_sPhraseDelimiter), uHead );
uHead = sphFNV64cont ( dWords[1].m_sWord.cstr(), uHead );

Expand Down Expand Up @@ -587,7 +589,8 @@ bool CSphTransformation::TransformCommonPhrase () const noexcept

void CSphTransformation::MakeTransformCommonPhrase ( const CSphVector<XQNode_t *> & dCommonNodes, int iCommonLen, bool bHeadIsCommon )
{
auto * pCommonPhrase = new XQNode_t ( XQLimitSpec_t() );
const XQLimitSpec_t & tSpec = dCommonNodes[0]->m_dSpec;
auto * pCommonPhrase = new XQNode_t ( tSpec );
pCommonPhrase->SetOp ( SPH_QUERY_PHRASE );

XQNode_t * pGrandOr = dCommonNodes[0]->m_pParent;
Expand Down Expand Up @@ -637,7 +640,7 @@ void CSphTransformation::MakeTransformCommonPhrase ( const CSphVector<XQNode_t *

if (!pMaybeNewOr)
{
pMaybeNewOr.emplace ( new XQNode_t ( XQLimitSpec_t() ) );
pMaybeNewOr.emplace ( new XQNode_t ( tSpec ) );
(*pMaybeNewOr)->SetOp ( SPH_QUERY_OR );
}

Expand Down Expand Up @@ -685,7 +688,7 @@ void CSphTransformation::MakeTransformCommonPhrase ( const CSphVector<XQNode_t *
// parent phrase need valid atom position of children
pNewOr->m_iAtomPos = pNewOr->dChild(0)->dWord(0).m_iAtomPos;

auto * pNewPhrase = new XQNode_t ( XQLimitSpec_t() );
auto * pNewPhrase = new XQNode_t ( tSpec );
if ( bHeadIsCommon )
pNewPhrase->SetOp ( SPH_QUERY_PHRASE, pCommonPhrase, pNewOr );
else
Expand Down
8 changes: 6 additions & 2 deletions src/sphinxsearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,12 @@ void AddAccessSpecsBson ( bson::Assoc_c & tNode, const XQNode_t * pNode, const C
}
if ( s.m_iFieldMaxPos )
tNode.AddInt ( SZ_MAX_FIELD_POS, s.m_iFieldMaxPos );
if ( pZones && !s.m_dZones.IsEmpty () )
tNode.AddStringVec ( s.m_bZoneSpan ? SZ_ZONESPANS : SZ_ZONES, *pZones );
if ( !pZones || s.m_dZones.IsEmpty () )
return;
StrVec_t dZones;
for ( const auto& iZone: s.m_dZones )
dZones.Add ( (*pZones)[iZone] );
tNode.AddStringVec ( s.m_bZoneSpan ? SZ_ZONESPANS : SZ_ZONES, dZones );
}

void CreateKeywordBson ( bson::Assoc_c& tWord, const XQKeyword_t & tKeyword )
Expand Down
Loading