infiniflow · likenamehaojie · Aug 21, 2025 · Aug 21, 2025 · Aug 21, 2025 · Aug 22, 2025
diff --git a/api/apps/sdk/dify_retrieval.py b/api/apps/sdk/dify_retrieval.py
@@ -1,4 +1,4 @@
-    #
+#
 #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
@@ -27,23 +27,28 @@
 from api.db.services.dialog_service import meta_filter, convert_conditions
 
 
-@manager.route('/dify/retrieval', methods=['POST'])  # noqa: F821
+@manager.route("/dify/retrieval", methods=["POST"])  # noqa: F821
 @apikey_required
 @validate_request("knowledge_id", "query")
 def retrieval(tenant_id):
     req = request.json
     question = req["query"]
     kb_id = req["knowledge_id"]
     use_kg = req.get("use_kg", False)
+    ignore_nometa = req.get("ignore_nometa", True)
     retrieval_setting = req.get("retrieval_setting", {})
     similarity_threshold = float(retrieval_setting.get("score_threshold", 0.0))
     top = int(retrieval_setting.get("top_k", 1024))
     metadata_condition = req.get("metadata_condition",{})
-    metas = DocumentService.get_meta_by_kbs([kb_id])
-
+    if ignore_nometa:
+        metas, docs = DocumentService.get_documents_by_kbs([kb_id])
+    else:
+        metas = DocumentService.get_meta_by_kbs([kb_id])
+
     doc_ids = []
+    if ignore_nometa:
+        doc_ids.extend(docs)
     try:
-
         e, kb = KnowledgebaseService.get_by_id(kb_id)
         if not e:
             return build_error_result(message="Knowledgebase not found!", code=settings.RetCode.NOT_FOUND)
@@ -80,25 +85,33 @@ def retrieval(tenant_id):
 
         records = []
         for c in ranks["chunks"]:
-            e, doc = DocumentService.get_by_id( c["doc_id"])
+            e, doc = DocumentService.get_by_id(c["doc_id"])
             c.pop("vector", None)
-            meta = getattr(doc, 'meta_fields', {})
+            meta = getattr(doc, "meta_fields", {})
             meta["doc_id"] = c["doc_id"]
-            records.append({
-                "content": c["content_with_weight"],
-                "score": c["similarity"],
-                "title": c["docnm_kwd"],
-                "metadata": meta
-            })
+            records.append({"content": c["content_with_weight"], "score": c["similarity"], "title": c["docnm_kwd"], "metadata": meta})
 
         return jsonify({"records": records})
     except Exception as e:
         if str(e).find("not_found") > 0:
-            return build_error_result(
-                message='No chunk found! Check the chunk status please!',
-                code=settings.RetCode.NOT_FOUND
-            )
+            return build_error_result(message="No chunk found! Check the chunk status please!", code=settings.RetCode.NOT_FOUND)
         logging.exception(e)
         return build_error_result(message=str(e), code=settings.RetCode.SERVER_ERROR)
 
 
+def convert_conditions(metadata_condition):
+    if metadata_condition is None:
+        metadata_condition = {}
+    op_mapping = {
+        "is": "=",
+        "not is": "≠"
+    }
+    return [
+    {
+        "op": op_mapping.get(cond["comparison_operator"], cond["comparison_operator"]),
+        "key": cond["name"],
+        "value": cond["value"]
+    }
+    for cond in metadata_condition.get("conditions", [])
+]
+
diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
@@ -43,6 +43,7 @@
 from rag.prompts.prompts import gen_meta_filter, PROMPT_JINJA_ENV, ASK_SUMMARY
 from rag.utils import num_tokens_from_string, rmSpace
 from rag.utils.tavily_conn import Tavily
+import ast
 
 
 class DialogService(CommonService):
@@ -276,27 +277,34 @@ def meta_filter(metas: dict, filters: list[dict]):
     def filter_out(v2docs, operator, value):
         ids = []
         for input, docids in v2docs.items():
-            try:
-                input = float(input)
-                value = float(value)
-            except Exception:
-                input = str(input)
-                value = str(value)
-
-            for conds in [
-                    (operator == "contains", str(value).lower() in str(input).lower()),
-                    (operator == "not contains", str(value).lower() not in str(input).lower()),
-                    (operator == "start with", str(input).lower().startswith(str(value).lower())),
-                    (operator == "end with", str(input).lower().endswith(str(value).lower())),
-                    (operator == "empty", not input),
-                    (operator == "not empty", input),
-                    (operator == "=", input == value),
-                    (operator == "≠", input != value),
-                    (operator == ">", input > value),
-                    (operator == "<", input < value),
-                    (operator == "≥", input >= value),
-                    (operator == "≤", input <= value),
-                ]:
+            conditons = []
+
+            if input.startswith("[") and isinstance(value, list) and operator in ["not in", "in"]:
+                input = ast.literal_eval(input)
+                conditons.extend([(operator == "not in", set(input).isdisjoint(set(value)))] + [(operator == "in", bool(set(input) & set(value)))])
+                print("conditons", conditons)
+            else:
+                try:
+                    input = float(input)
+                    value = float(value)
+                except Exception:
+                    input = str(input)
+                    value = str(value)
+            all_conditions = conditons + [
+                (operator == "contains", str(value).lower() in str(input).lower()),
+                (operator == "not contains", str(value).lower() not in str(input).lower()),
+                (operator == "start with", str(input).lower().startswith(str(value).lower())),
+                (operator == "end with", str(input).lower().endswith(str(value).lower())),
+                (operator == "empty", not input),
+                (operator == "not empty", input),
+                (operator == "=", input == value),
+                (operator == "≠", input != value),
+                (operator == ">", input > value),
+                (operator == "<", input < value),
+                (operator == "≥", input >= value),
+                (operator == "≤", input <= value),
+            ]
+            for conds in all_conditions:
                 try:
                     if all(conds):
                         ids.extend(docids)
@@ -753,7 +761,7 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}):
                 doc_ids = None
 
     kbinfos = retriever.retrieval(
-        question = question,
+        question=question,
         embd_mdl=embd_mdl,
         tenant_ids=tenant_ids,
         kb_ids=kb_ids,
@@ -765,7 +773,7 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}):
         doc_ids=doc_ids,
         aggs=False,
         rerank_mdl=rerank_mdl,
-        rank_feature=label_question(question, kbs)
+        rank_feature=label_question(question, kbs),
     )
 
     knowledges = kb_prompt(kbinfos, max_tokens)

diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
@@ -592,7 +592,29 @@ def get_meta_by_kbs(cls, kb_ids):
                     meta[k][v] = []
                 meta[k][v].append(doc_id)
         return meta
-
+
+    @classmethod
+    @DB.connection_context()
+    def get_documents_by_kbs(cls, kb_ids):
+        fields = [
+            cls.model.id,
+            cls.model.meta_fields,
+        ]
+        meta = {}
+        docs = []
+        for r in cls.model.select(*fields).where(cls.model.kb_id.in_(kb_ids)):
+            doc_id = r.id
+            if not r.meta_fields.items():
+                docs.append(doc_id)
+                continue
+            for k,v in r.meta_fields.items():
+                if k not in meta:
+                    meta[k] = {}
+                v = str(v)
+                if v not in meta[k]:
+                    meta[k][v] = []
+                meta[k][v].append(doc_id)
+        return meta, docs
     @classmethod
     @DB.connection_context()
     def update_progress(cls):