infiniflow · adrianad · Jul 9, 2025 · Jul 9, 2025 · Jul 9, 2025 · Jul 9, 2025
diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py
diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
@@ -567,7 +567,7 @@ def update_progress(cls):
                     if (d["parser_config"].get("raptor") or {}).get("use_raptor") and not has_raptor:
                         queue_raptor_o_graphrag_tasks(d, "raptor", priority)
                         prg = 0.98 * len(tsks) / (len(tsks) + 1)
-                    elif (d["parser_config"].get("graphrag") or {}).get("use_graphrag") and not has_graphrag:
+                    elif _should_auto_queue_graphrag(d["parser_config"]) and not has_graphrag:
                         queue_raptor_o_graphrag_tasks(d, "graphrag", priority)
                         prg = 0.98 * len(tsks) / (len(tsks) + 1)
                     else:
@@ -608,6 +608,40 @@ def do_cancel(cls, doc_id):
             pass
         return False
 
+    @classmethod
+    @DB.connection_context()
+    def has_documents_parsing(cls, kb_id):
+        """Check if any documents in the knowledge base are currently being parsed."""
+        docs = cls.model.select().where(
+            cls.model.kb_id == kb_id,
+            cls.model.run == TaskStatus.RUNNING.value,
+            cls.model.progress < 1
+        )
+        return docs.count() > 0
+
+
+def _should_auto_queue_graphrag(parser_config):
+    """
+    Determine if GraphRAG tasks should be automatically queued after parsing.
+
+    With the new UI, GraphRAG is now manual-only (extract_only mode):
+    - New enum graphrag_mode="full_auto" -> auto queue  
+    - New enum graphrag_mode="extract_only" -> no auto queue
+    - New enum graphrag_mode="none" -> no auto queue
+    - Legacy boolean use_graphrag=true -> no auto queue (changed behavior)
+    - Default -> no auto queue
+    """
+    graphrag_config = parser_config.get("graphrag", {})
+
+    # Check for new enum format first
+    graphrag_mode = graphrag_config.get("graphrag_mode")
+    if graphrag_mode is not None:
+        return graphrag_mode == "full_auto"
+
+    # Legacy format no longer triggers auto-queuing since toggle was removed
+    # This allows existing configs to work but use manual GraphRAG workflow
+    return False
+
 
 def queue_raptor_o_graphrag_tasks(doc, ty, priority):
     chunking_config = DocumentService.get_chunking_config(doc["id"])

diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py
@@ -436,3 +436,54 @@ def update_document_number_in_init(cls, kb_id, doc_num):
             else:
                 raise e
 
+    @classmethod
+    @DB.connection_context()
+    def migrate_graphrag_config(cls):
+        """
+        Migrate existing knowledge bases from legacy boolean use_graphrag 
+        to new enum graphrag_mode format.
+
+        Migration rules:
+        - use_graphrag: true -> graphrag_mode: "full_auto"
+        - use_graphrag: false -> graphrag_mode: "none"
+        - Missing use_graphrag -> graphrag_mode: "none"
+        - Already has graphrag_mode -> no change
+        """
+        migrated_count = 0
+
+        # Get all knowledge bases
+        kbs = cls.model.select()
+
+        for kb in kbs:
+            parser_config = kb.parser_config or {}
+            graphrag_config = parser_config.get("graphrag", {})
+
+            # Skip if already has new enum format
+            if "graphrag_mode" in graphrag_config:
+                continue
+
+            # Check for legacy boolean format
+            use_graphrag = graphrag_config.get("use_graphrag")
+
+            if isinstance(use_graphrag, bool):
+                # Migrate from boolean to enum
+                new_mode = "full_auto" if use_graphrag else "none"
+                graphrag_config["graphrag_mode"] = new_mode
+
+                # Remove old boolean field
+                graphrag_config.pop("use_graphrag", None)
+
+                # Update the knowledge base
+                parser_config["graphrag"] = graphrag_config
+                cls.update_by_id(kb.id, {"parser_config": parser_config})
+                migrated_count += 1
+
+            elif use_graphrag is None:
+                # No legacy config, set default
+                graphrag_config["graphrag_mode"] = "none"
+                parser_config["graphrag"] = graphrag_config
+                cls.update_by_id(kb.id, {"parser_config": parser_config})
+                migrated_count += 1
+
+        return migrated_count
+
diff --git a/api/utils/validation_utils.py b/api/utils/validation_utils.py
@@ -353,11 +353,11 @@ class RaptorConfig(Base):
 
 
 class GraphragConfig(Base):
-    use_graphrag: bool = Field(default=False)
+    use_graphrag: bool = Field(default=False)  # Default off - GraphRAG is manual-only via Knowledge Graph UI
     entity_types: list[str] = Field(default_factory=lambda: ["organization", "person", "geo", "event", "category"])
     method: GraphragMethodEnum = Field(default=GraphragMethodEnum.light)
-    community: bool = Field(default=False)
-    resolution: bool = Field(default=False)
+    community: bool = Field(default=False)  # Will be handled via Knowledge Graph UI
+    resolution: bool = Field(default=False)  # Will be handled via Knowledge Graph UI
 
 
 class ParserConfig(Base):

diff --git a/graphrag/entity_resolution.py b/graphrag/entity_resolution.py
@@ -66,7 +66,8 @@ def __init__(
     async def __call__(self, graph: nx.Graph,
                        subgraph_nodes: set[str],
                        prompt_variables: dict[str, Any] | None = None,
-                       callback: Callable | None = None) -> EntityResolutionResult:
+                       callback: Callable | None = None,
+                       kb_id: str = None) -> EntityResolutionResult:
         """Call method definition."""
         if prompt_variables is None:
             prompt_variables = {}
@@ -106,14 +107,9 @@ async def limited_resolve_candidate(candidate_batch, result_set, result_lock):
             nonlocal remain_candidates_to_resolve, callback
             async with semaphore:
                 try:
-                    with trio.move_on_after(180) as cancel_scope:
-                        await self._resolve_candidate(candidate_batch, result_set, result_lock)
-                        remain_candidates_to_resolve = remain_candidates_to_resolve - len(candidate_batch[1])
-                        callback(msg=f"Resolved {len(candidate_batch[1])} pairs, {remain_candidates_to_resolve} are remained to resolve. ")
-                    if cancel_scope.cancelled_caught:
-                        logging.warning(f"Timeout resolving {candidate_batch}, skipping...")
-                        remain_candidates_to_resolve = remain_candidates_to_resolve - len(candidate_batch[1])
-                        callback(msg=f"Fail to resolved {len(candidate_batch[1])} pairs due to timeout reason, skipped. {remain_candidates_to_resolve} are remained to resolve. ")
+                    await self._resolve_candidate(candidate_batch, result_set, result_lock)
+                    remain_candidates_to_resolve = remain_candidates_to_resolve - len(candidate_batch[1])
+                    callback(msg=f"Resolved {len(candidate_batch[1])} pairs, {remain_candidates_to_resolve} are remained to resolve. ")
                 except Exception as e:
                     logging.error(f"Error resolving candidate batch: {e}")
 
@@ -152,7 +148,7 @@ async def limited_merge_nodes(graph, nodes, change):
         )
 
     async def _resolve_candidate(self, candidate_resolution_i: tuple[str, list[tuple[str, str]]], resolution_result: set[str], resolution_result_lock: trio.Lock):
-        gen_conf = {"temperature": 0.5}
+        gen_conf = {"temperature": 0.5, "max_tokens": 8000}
         pair_txt = [
             f'When determining whether two {candidate_resolution_i[0]}s are the same, you should only focus on critical properties and overlook noisy factors.\n']
         for index, candidate in enumerate(candidate_resolution_i[1]):
@@ -170,11 +166,7 @@ async def _resolve_candidate(self, candidate_resolution_i: tuple[str, list[tuple
         logging.info(f"Created resolution prompt {len(text)} bytes for {len(candidate_resolution_i[1])} entity pairs of type {candidate_resolution_i[0]}")
         async with chat_limiter:
             try:
-                with trio.move_on_after(120) as cancel_scope:
-                    response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], gen_conf)
-                if cancel_scope.cancelled_caught:
-                    logging.warning("_resolve_candidate._chat timeout, skipping...")
-                    return
+                response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], gen_conf)
             except Exception as e:
                 logging.error(f"_resolve_candidate._chat failed: {e}")
                 return

diff --git a/graphrag/general/community_reports_extractor.py b/graphrag/general/community_reports_extractor.py
@@ -50,7 +50,7 @@ def __init__(
         self._extraction_prompt = COMMUNITY_REPORT_PROMPT
         self._max_report_length = max_report_length or 1500
 
-    async def __call__(self, graph: nx.Graph, callback: Callable | None = None):
+    async def __call__(self, graph: nx.Graph, callback: Callable | None = None, kb_id: str = None):
         for node_degree in graph.degree:
             graph.nodes[str(node_degree[0])]["rank"] = int(node_degree[1])
 
@@ -90,7 +90,7 @@ async def extract_community_report(community):
                 "relation_df": rela_df.to_csv(index_label="id")
             }
             text = perform_variable_replacements(self._extraction_prompt, variables=prompt_variables)
-            gen_conf = {"temperature": 0.3}
+            gen_conf = {"temperature": 0.3, "max_tokens": 8000}
             async with chat_limiter:
                 try:
                     with trio.move_on_after(80) as cancel_scope: