Skip to content

Commit

Permalink
#455 - Search query does not correctly handle translations
Browse files Browse the repository at this point in the history
- modify elastic search queries for both ontology and codeable concept
- add should and must not clauses for the translation fields
  • Loading branch information
michael-82 committed Feb 20, 2025
1 parent df65a8f commit ee6fe99
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
@Slf4j
@ConditionalOnExpression("${app.elastic.enabled}")
public class CodeableConceptService {
public static final String FIELD_NAME_DISPLAY_DE = "display.de";
public static final String FIELD_NAME_DISPLAY_EN = "display.en";
public static final String FIELD_NAME_DISPLAY_ORIGINAL = "display.original";
public static final String FIELD_NAME_TERMCODE_WITH_BOOST = "termcode.code^2";
private ElasticsearchOperations operations;

private CodeableConceptEsRepository repo;
Expand Down Expand Up @@ -88,33 +92,41 @@ private SearchHits<CodeableConceptDocument> findByCodeOrDisplay(String keyword,
.build();

} else {
// First the "upper" part of the query, when translations are present
var mustMultiMatchQueryWithTranslations = new MultiMatchQuery.Builder()
var translationDeExistsQuery = new ExistsQuery.Builder()
.field(FIELD_NAME_DISPLAY_DE)
.build();

var translationEnExistsQuery = new ExistsQuery.Builder()
.field(FIELD_NAME_DISPLAY_EN)
.build();

var mmQueryWithTranslations = new MultiMatchQuery.Builder()
.query(keyword)
.fields(List.of("display.de", "display.en", "termcode.code^2"))
.fields(List.of(FIELD_NAME_DISPLAY_DE, FIELD_NAME_DISPLAY_EN, FIELD_NAME_TERMCODE_WITH_BOOST))
.build();

var innerBoolQueryMatchTranslations = new BoolQuery.Builder()
.must(List.of(mustMultiMatchQueryWithTranslations._toQuery()))
var boolQueryWithTranslations = new BoolQuery.Builder()
.should(List.of(translationDeExistsQuery._toQuery(), translationEnExistsQuery._toQuery()))
.must(List.of(mmQueryWithTranslations._toQuery()))
.filter(filterTerms.isEmpty() ? List.of() : filterTerms)
.build();


// The "lower" part that will only be considered when the translations are empty
var mustMultiMatchQueryWithOriginal = new MultiMatchQuery.Builder()
var mmQueryWithOriginal = new MultiMatchQuery.Builder()
.query(keyword)
.fields(List.of("display.original", "termcode.code^2"))
.fields(List.of(FIELD_NAME_DISPLAY_ORIGINAL, FIELD_NAME_TERMCODE_WITH_BOOST))
.build();

var innerBoolQueryMatchOriginal = new BoolQuery.Builder()
.must(List.of(mustMultiMatchQueryWithOriginal._toQuery()))
var boolQueryWithOriginal = new BoolQuery.Builder()
.mustNot(List.of(translationDeExistsQuery._toQuery(), translationEnExistsQuery._toQuery()))
.must(List.of(mmQueryWithOriginal._toQuery()))
.filter(filterTerms.isEmpty() ? List.of() : filterTerms)
.build();

// Combine both parts in the top level bool query
outerBoolQuery = new BoolQuery.Builder()
.should(List.of(innerBoolQueryMatchTranslations._toQuery(), innerBoolQueryMatchOriginal._toQuery()))
.minimumShouldMatch("1")
.should(List.of(boolQueryWithTranslations._toQuery(), boolQueryWithOriginal._toQuery()))
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@
@Slf4j
@ConditionalOnExpression("${app.elastic.enabled}")
public class TerminologyEsService {
public static final String FILTER_KEY_CRITERIA_SETS = "criteria_sets";
public static final String FILTER_KEY_CONTEXT_CODE = "context.code";
public static final String FILTER_KEY_KDS_MODULE = "kds_module";
public static final String FILTER_KEY_TERMINOLOGY = "terminology";
public static final String FIELD_NAME_DISPLAY_DE = "display.de";
public static final String FIELD_NAME_DISPLAY_EN = "display.en";
public static final String FIELD_NAME_DISPLAY_ORIGINAL = "display.original";
public static final String FIELD_NAME_TERMCODE_WITH_BOOST = "termcode^2";
private ElasticsearchOperations operations;

private String[] filterFields;
Expand Down Expand Up @@ -91,16 +99,16 @@ public EsSearchResult performOntologySearchWithPaging(String keyword,

List<Pair<String, List<String>>> filterList = new ArrayList<>();
if (!CollectionUtils.isEmpty(criteriaSets)) {
filterList.add(Pair.of("criteria_sets", criteriaSets));
filterList.add(Pair.of(FILTER_KEY_CRITERIA_SETS, criteriaSets));
}
if (!CollectionUtils.isEmpty(context)) {
filterList.add(Pair.of("context.code", context));
filterList.add(Pair.of(FILTER_KEY_CONTEXT_CODE, context));
}
if (!CollectionUtils.isEmpty(kdsModule)) {
filterList.add(Pair.of("kds_module", kdsModule));
filterList.add(Pair.of(FILTER_KEY_KDS_MODULE, kdsModule));
}
if (!CollectionUtils.isEmpty(terminology)) {
filterList.add(Pair.of("terminology", terminology));
filterList.add(Pair.of(FILTER_KEY_TERMINOLOGY, terminology));
}

SearchHits<OntologyListItemDocument> searchHitPage = findByNameOrTermcode(
Expand Down Expand Up @@ -156,19 +164,36 @@ private SearchHits<OntologyListItemDocument> findByNameOrTermcode(String keyword
.filter(filterTerms.isEmpty() ? List.of() : filterTerms)
.build();
} else {
var translationDeExistsQuery = new ExistsQuery.Builder()
.field(FIELD_NAME_DISPLAY_DE)
.build();

var translationEnExistsQuery = new ExistsQuery.Builder()
.field(FIELD_NAME_DISPLAY_EN)
.build();

var mmQueryWithTranslations = new MultiMatchQuery.Builder()
.query(keyword)
.fields(List.of("display.de", "display.en", "termcode^2"))
.fields(List.of(FIELD_NAME_DISPLAY_DE, FIELD_NAME_DISPLAY_EN, FIELD_NAME_TERMCODE_WITH_BOOST))
.build();

var boolQueryWithTranslations = new BoolQuery.Builder()
.should(List.of(translationDeExistsQuery._toQuery(), translationEnExistsQuery._toQuery()))
.must(mmQueryWithTranslations._toQuery())
.build();

var mmQueryWithOriginal = new MultiMatchQuery.Builder()
.query(keyword)
.fields(List.of("display.original", "termcode^2"))
.fields(List.of(FIELD_NAME_DISPLAY_ORIGINAL, FIELD_NAME_TERMCODE_WITH_BOOST))
.build();

var boolQueryWithOriginal = new BoolQuery.Builder()
.mustNot(List.of(translationDeExistsQuery._toQuery(), translationEnExistsQuery._toQuery()))
.must(mmQueryWithOriginal._toQuery())
.build();

boolQuery = new BoolQuery.Builder()
.should(List.of(mmQueryWithTranslations._toQuery(), mmQueryWithOriginal._toQuery()))
.minimumShouldMatch("1")
.should(List.of(boolQueryWithTranslations._toQuery(), boolQueryWithOriginal._toQuery()))
.filter(filterTerms.isEmpty() ? List.of() : filterTerms)
.build();
}
Expand Down

0 comments on commit ee6fe99

Please sign in to comment.