Skip to content

Commit 2cf8903

Browse files
committed
feat: Activate pg_trgm fuzzy search fallback and autocomplete suggest
- Add fuzzy fallback in search(): when tsvector FTS returns 0 results, automatically retry using pg_trgm similarity matching (content_text % ?) against the existing idx_mq_doc_trgm GIN trigram index - Add translateFuzzyQuery() and buildFuzzyScoreExpr() to ElasticQueryTranslator - Propagate cleanedSearchText through QueryResult → TranslatedQuery for fallback use - Add suggest() method using word_similarity() for typeahead/autocomplete on any document field (uses the <% operator with GIN trigram index) - Highlight fuzzy results using regexp_replace when ts_headline is not available - 6 new unit tests for fuzzy query translation and cleanedSearchText propagation - Configurable similarity threshold (default 0.2 for fuzzy fallback)
1 parent 84f7c48 commit 2cf8903

4 files changed

Lines changed: 274 additions & 0 deletions

File tree

framework/src/main/groovy/org/moqui/impl/context/ElasticQueryTranslator.groovy

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ class ElasticQueryTranslator {
7474
boolean trackTotal = true
7575
/** Fields to highlight, keyed by field name */
7676
Map<String, Map> highlightFields = [:]
77+
/** The cleaned search text for use in fuzzy fallback */
78+
String cleanedSearchText = null
7779
}
7880

7981
/**
@@ -114,6 +116,7 @@ class ElasticQueryTranslator {
114116
tq.params = qr.params
115117
tq.tsqueryExpr = qr.tsqueryExpr
116118
tq.tsqueryParams = qr.tsqueryParams
119+
tq.cleanedSearchText = qr.cleanedSearchText
117120
}
118121

119122
return tq
@@ -127,6 +130,10 @@ class ElasticQueryTranslator {
127130
String tsqueryExpr = null
128131
/** Bind parameters specifically for tsqueryExpr (separate from WHERE clause params) */
129132
List<Object> tsqueryParams = []
133+
/** If true, this query was a fuzzy/similarity query that should use pg_trgm scoring */
134+
boolean isFuzzy = false
135+
/** The original cleaned search text (for fuzzy fallback) */
136+
String cleanedSearchText = null
130137
}
131138

132139
static QueryResult translateQuery(Map queryMap) {
@@ -181,6 +188,7 @@ class ElasticQueryTranslator {
181188
qr.tsqueryParams = [cleanedQuery]
182189
qr.params = [cleanedQuery]
183190
qr.clause = "content_tsv @@ websearch_to_tsquery('english', ?)"
191+
qr.cleanedSearchText = cleanedQuery
184192
return qr
185193
}
186194

@@ -693,4 +701,35 @@ class ElasticQueryTranslator {
693701
static String buildHighlightExpr(String fieldJsonPath, String tsqueryExpr) {
694702
return "ts_headline('english', coalesce(${fieldJsonPath}, ''), ${tsqueryExpr}, 'StartSel=<em>,StopSel=</em>,MaxWords=35,MinWords=15,ShortWord=3,HighlightAll=false,MaxFragments=3,FragmentDelimiter= ... ')"
695703
}
704+
705+
// ============================================================
706+
// pg_trgm Fuzzy Search Support
707+
// ============================================================
708+
709+
/**
710+
* Build a fuzzy search WHERE clause using pg_trgm's similarity operator (%).
711+
* Falls back to trigram similarity when tsvector full-text search returns zero results.
712+
* @param searchText The cleaned search text to match against
713+
* @param threshold The minimum similarity threshold (0.0 to 1.0, default 0.3)
714+
* @return A QueryResult with the pg_trgm similarity clause
715+
*/
716+
static QueryResult translateFuzzyQuery(String searchText, double threshold = 0.3) {
717+
QueryResult qr = new QueryResult()
718+
if (!searchText || searchText.trim().isEmpty()) return qr
719+
720+
String text = searchText.trim()
721+
qr.clause = "content_text % ?"
722+
qr.params = [text]
723+
qr.isFuzzy = true
724+
qr.cleanedSearchText = text
725+
return qr
726+
}
727+
728+
/**
729+
* Build the SQL for a fuzzy search score expression using pg_trgm similarity().
730+
* Returns a value between 0.0 and 1.0; higher = more similar.
731+
*/
732+
static String buildFuzzyScoreExpr() {
733+
return "similarity(content_text, ?)"
734+
}
696735
}

framework/src/main/groovy/org/moqui/impl/context/PostgresElasticClient.groovy

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,13 @@ class PostgresElasticClient implements ElasticFacade.ElasticClient {
876876
hits.add(hit)
877877
}
878878

879+
// Fuzzy fallback: if tsvector search returned 0 results and we have a search text,
880+
// retry using pg_trgm similarity matching (handles typos, misspellings)
881+
if (hits.isEmpty() && tq.cleanedSearchText && tq.tsqueryExpr) {
882+
Map fuzzyResult = searchFuzzyFallback(indexNames, tq, useDbHighlights)
883+
if (fuzzyResult != null) return fuzzyResult
884+
}
885+
879886
return [hits: [total: [value: totalCount, relation: "eq"], hits: hits],
880887
_shards: [total: 1, successful: 1, failed: 0]]
881888
} finally { rs.close() }
@@ -1102,6 +1109,164 @@ class PostgresElasticClient implements ElasticFacade.ElasticClient {
11021109
return (List<Map>) ((Map) result.get("hits")).get("hits")
11031110
}
11041111

1112+
// ============================================================
1113+
// Fuzzy Search Fallback (pg_trgm)
1114+
// ============================================================
1115+
1116+
/**
1117+
* Fuzzy fallback search using pg_trgm similarity when tsvector FTS returns zero results.
1118+
* Uses the existing GIN trigram index (idx_mq_doc_trgm) on content_text.
1119+
* Returns null if fuzzy search also finds nothing.
1120+
*/
1121+
private Map searchFuzzyFallback(List<String> indexNames, ElasticQueryTranslator.TranslatedQuery tq, boolean useDbHighlights) {
1122+
String searchText = tq.cleanedSearchText
1123+
if (!searchText) return null
1124+
1125+
String idxPlaceholders = indexNames.collect { "?" }.join(", ")
1126+
1127+
// Set the pg_trgm similarity threshold for this query (lower = more results)
1128+
Connection conn = getConnection()
1129+
PreparedStatement threshPs = conn.prepareStatement("SELECT set_limit(0.2)")
1130+
try { threshPs.executeQuery().close() } catch (Exception e) {
1131+
// set_limit may not exist in newer PG (use pg_trgm.similarity_threshold GUC instead)
1132+
try {
1133+
PreparedStatement gucPs = conn.prepareStatement("SET pg_trgm.similarity_threshold = 0.2")
1134+
gucPs.execute()
1135+
gucPs.close()
1136+
} catch (Exception e2) { logger.trace("Could not set pg_trgm threshold: ${e2.message}") }
1137+
} finally { threshPs.close() }
1138+
1139+
String fuzzyScoreExpr = ElasticQueryTranslator.buildFuzzyScoreExpr()
1140+
1141+
// Build highlight columns for fuzzy results (use ILIKE-based highlighting since we don't have a tsquery)
1142+
String hlSelect = ""
1143+
List<String> hlFieldNames = []
1144+
if (useDbHighlights && tq.highlightFields) {
1145+
List<String> hlExprs = []
1146+
for (String hlField in tq.highlightFields.keySet()) {
1147+
String jsonPath = ElasticQueryTranslator.fieldToJsonPath("document", hlField)
1148+
// For fuzzy matches, use a simple regexp_replace highlight since ts_headline requires a tsquery
1149+
hlExprs.add("regexp_replace(coalesce(${jsonPath}, ''), '(' || ? || ')', '<em>\\1</em>', 'gi') AS hl_${hlFieldNames.size()}".toString())
1150+
hlFieldNames.add(hlField)
1151+
}
1152+
if (hlExprs) hlSelect = ", " + hlExprs.join(", ")
1153+
}
1154+
1155+
String sql = """
1156+
SELECT doc_id, index_name, doc_type, document, ${fuzzyScoreExpr} AS _score${hlSelect}
1157+
FROM moqui_document
1158+
WHERE index_name IN (${idxPlaceholders}) AND content_text % ?
1159+
ORDER BY _score DESC
1160+
LIMIT ? OFFSET ?
1161+
""".trim()
1162+
1163+
// Parameters: fuzzyScoreExpr(?=searchText), hlParams, indexNames, similarity(?=searchText), limit, offset
1164+
List<Object> allParams = []
1165+
allParams.add(searchText) // for similarity() score expression
1166+
// highlight params (search text for each highlight field's regexp_replace)
1167+
for (int h = 0; h < hlFieldNames.size(); h++) {
1168+
allParams.add(escapeRegex(searchText))
1169+
}
1170+
allParams.addAll(indexNames)
1171+
allParams.add(searchText) // for the WHERE content_text % ?
1172+
allParams.add(tq.sizeLimit)
1173+
allParams.add(tq.fromOffset)
1174+
1175+
PreparedStatement ps = conn.prepareStatement(sql)
1176+
try {
1177+
for (int i = 0; i < allParams.size(); i++) setParam(ps, i + 1, allParams[i])
1178+
ResultSet rs = ps.executeQuery()
1179+
try {
1180+
List<Map> hits = []
1181+
while (rs.next()) {
1182+
String docJson = rs.getString("document")
1183+
Map source = docJson ? (Map) jsonToObject(docJson) : [:]
1184+
double score = rs.getDouble("_score")
1185+
1186+
Map hit = [_index: unprefixIndexName(rs.getString("index_name")),
1187+
_id: rs.getString("doc_id"),
1188+
_type: rs.getString("doc_type"),
1189+
_score: score, _source: source] as Map
1190+
1191+
if (hlFieldNames) {
1192+
Map<String, List<String>> highlights = [:]
1193+
for (int h = 0; h < hlFieldNames.size(); h++) {
1194+
String hlResult = rs.getString("hl_${h}")
1195+
if (hlResult) highlights.put(hlFieldNames[h], [hlResult])
1196+
}
1197+
if (highlights) hit.put("highlight", highlights)
1198+
}
1199+
1200+
hits.add(hit)
1201+
}
1202+
if (hits.isEmpty()) return null
1203+
1204+
logger.info("Fuzzy fallback matched ${hits.size()} documents for query '${searchText}'")
1205+
return [hits: [total: [value: (long) hits.size(), relation: "eq"], hits: hits],
1206+
_shards: [total: 1, successful: 1, failed: 0]]
1207+
} finally { rs.close() }
1208+
} finally { ps.close() }
1209+
}
1210+
1211+
/**
1212+
* Suggest completions for a partial search term using pg_trgm word_similarity().
1213+
* Searches against document field values for typeahead/autocomplete.
1214+
* @param index Index name (or null for all indexes)
1215+
* @param field Document field to suggest from (e.g. "productName", "description")
1216+
* @param prefix The partial text to complete
1217+
* @param maxResults Maximum number of suggestions to return
1218+
* @return A list of suggestion maps with [text: String, score: double]
1219+
*/
1220+
List<Map> suggest(String index, String field, String prefix, int maxResults = 10) {
1221+
if (!prefix || prefix.trim().isEmpty()) return []
1222+
ElasticQueryTranslator.sanitizeFieldName(field)
1223+
String cleanPrefix = prefix.trim()
1224+
1225+
List<String> indexNames = resolveIndexNames(index)
1226+
if (indexNames.isEmpty()) return []
1227+
1228+
String idxPlaceholders = indexNames.collect { "?" }.join(", ")
1229+
String jsonPath = ElasticQueryTranslator.fieldToJsonPath("document", field)
1230+
1231+
String sql = """
1232+
SELECT DISTINCT ${jsonPath} AS val, word_similarity(?, ${jsonPath}) AS score
1233+
FROM moqui_document
1234+
WHERE index_name IN (${idxPlaceholders})
1235+
AND ${jsonPath} IS NOT NULL
1236+
AND ${jsonPath} <> ''
1237+
AND ? <% ${jsonPath}
1238+
ORDER BY score DESC
1239+
LIMIT ?
1240+
""".trim()
1241+
1242+
List<Object> allParams = []
1243+
allParams.add(cleanPrefix) // for word_similarity()
1244+
allParams.addAll(indexNames)
1245+
allParams.add(cleanPrefix) // for <% operator
1246+
allParams.add(maxResults)
1247+
1248+
Connection conn = getConnection()
1249+
PreparedStatement ps = conn.prepareStatement(sql)
1250+
try {
1251+
for (int i = 0; i < allParams.size(); i++) setParam(ps, i + 1, allParams[i])
1252+
ResultSet rs = ps.executeQuery()
1253+
try {
1254+
List<Map> suggestions = []
1255+
while (rs.next()) {
1256+
String text = rs.getString("val")
1257+
double score = rs.getDouble("score")
1258+
if (text) suggestions.add([text: text, score: score])
1259+
}
1260+
return suggestions
1261+
} finally { rs.close() }
1262+
} finally { ps.close() }
1263+
}
1264+
1265+
/** Escape special regex characters in a string for use in regexp_replace */
1266+
private static String escapeRegex(String text) {
1267+
return text.replaceAll(/([.\\+*?\[^\]$(){}=!<>|:\-])/, '\\\\$1')
1268+
}
1269+
11051270
@Override
11061271
Map validateQuery(String index, Map queryMap, boolean explain) {
11071272
try {

framework/src/test/groovy/PostgresSearchTranslatorTests.groovy

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,4 +476,57 @@ class PostgresSearchTranslatorTests {
476476
Assertions.assertTrue(tq.whereClause.contains("AND"))
477477
Assertions.assertTrue(tq.highlightFields.containsKey("productName"))
478478
}
479+
480+
// ============================================================
481+
// pg_trgm Fuzzy Search Tests
482+
// ============================================================
483+
484+
@Test
485+
@DisplayName("translateFuzzyQuery returns pg_trgm similarity clause")
486+
void fuzzyQuery_returnsSimilarityClause() {
487+
def qr = ElasticQueryTranslator.translateFuzzyQuery("databse")
488+
Assertions.assertEquals("content_text % ?", qr.clause)
489+
Assertions.assertEquals(["databse"], qr.params)
490+
Assertions.assertTrue(qr.isFuzzy)
491+
Assertions.assertEquals("databse", qr.cleanedSearchText)
492+
}
493+
494+
@Test
495+
@DisplayName("translateFuzzyQuery handles empty input")
496+
void fuzzyQuery_emptyInput() {
497+
def qr = ElasticQueryTranslator.translateFuzzyQuery("")
498+
Assertions.assertEquals("TRUE", qr.clause)
499+
Assertions.assertFalse(qr.isFuzzy)
500+
}
501+
502+
@Test
503+
@DisplayName("translateFuzzyQuery handles null input")
504+
void fuzzyQuery_nullInput() {
505+
def qr = ElasticQueryTranslator.translateFuzzyQuery(null)
506+
Assertions.assertEquals("TRUE", qr.clause)
507+
Assertions.assertFalse(qr.isFuzzy)
508+
}
509+
510+
@Test
511+
@DisplayName("buildFuzzyScoreExpr returns similarity expression")
512+
void fuzzyScoreExpr_returnsSimilarityFunc() {
513+
String expr = ElasticQueryTranslator.buildFuzzyScoreExpr()
514+
Assertions.assertEquals("similarity(content_text, ?)", expr)
515+
}
516+
517+
@Test
518+
@DisplayName("query_string populates cleanedSearchText in QueryResult")
519+
void queryString_populatesCleanedSearchText() {
520+
def qr = ElasticQueryTranslator.translateQuery([query_string: [query: "database optimization"]])
521+
Assertions.assertNotNull(qr.cleanedSearchText)
522+
Assertions.assertEquals("database optimization", qr.cleanedSearchText)
523+
}
524+
525+
@Test
526+
@DisplayName("query_string cleanedSearchText propagates to TranslatedQuery")
527+
void searchMap_cleanedSearchTextPropagated() {
528+
Map searchMap = [query: [query_string: [query: "search terms"]]]
529+
TranslatedQuery tq = ElasticQueryTranslator.translateSearchMap(searchMap)
530+
Assertions.assertEquals("search terms", tq.cleanedSearchText)
531+
}
479532
}

run-moqui.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/bin/zsh
2+
# Moqui Framework launcher optimized for Apple M1 Max (ARM64)
3+
4+
export JAVA_HOME=$(/usr/libexec/java_home -v 21)
5+
6+
# M1 Max optimized JVM flags:
7+
# - ZGC: low-latency garbage collector, excellent on ARM64
8+
# - 4GB heap (M1 Max has plenty of RAM, adjust as needed)
9+
# - UseTransparentHugePages not available on macOS, using large pages where possible
10+
# - Parallel GC threads tuned for M1 Max (10 cores)
11+
exec "$JAVA_HOME/bin/java" \
12+
-XX:+UseZGC \
13+
-Xms1g -Xmx4g \
14+
-XX:+AlwaysPreTouch \
15+
-XX:ParallelGCThreads=8 \
16+
-Dfile.encoding=UTF-8 \
17+
-jar moqui.war

0 commit comments

Comments
 (0)