Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
3589b8e
Added total number of nodes and edges
adrianad Jul 9, 2025
6afd75f
Added Graph statistics to the frontend
adrianad Jul 9, 2025
e61d7fc
Added translations
adrianad Jul 9, 2025
08f72ce
Added API Endpoints for detect communites and entity resolution
adrianad Jul 9, 2025
27cbb9d
Added buttons and progress bars for community detection and entity re…
adrianad Jul 9, 2025
76a731d
Fixing import
adrianad Jul 9, 2025
0988204
Community detection gets started
adrianad Jul 9, 2025
05d1d6d
Community Progress starting
adrianad Jul 10, 2025
2aec2fb
Community detection with updates complete
adrianad Jul 10, 2025
6607d3c
Show communities in Statistics
adrianad Jul 10, 2025
0a60955
Fixing endpoint naming
adrianad Jul 10, 2025
a76ecce
Update on the Redis Lock
adrianad Jul 10, 2025
db63bb5
Implement two-step GraphRAG workflow with manual entity extraction an…
adrianad Jul 14, 2025
61848a1
Moved the configuration to the knowledge graph section
adrianad Jul 14, 2025
ce6ae8d
Improving Progress displays
adrianad Jul 14, 2025
3236c22
Extract entities now concurrent
adrianad Jul 14, 2025
b200980
Remove GraphRAG operation timeouts and reduce polling frequency
adrianad Jul 14, 2025
d4875c4
Progress updates working
adrianad Jul 15, 2025
9f66d17
Removed Logging
adrianad Jul 15, 2025
904dbd2
Remove duplicated code
adrianad Jul 16, 2025
2a00d28
Removed some comments
adrianad Jul 16, 2025
ebbb73e
Remove Threading
adrianad Jul 16, 2025
2ab26d9
Merge branch 'main' into feature/knowledge-graph-view-update
adrianad Jul 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
902 changes: 899 additions & 3 deletions api/apps/kb_app.py

Large diffs are not rendered by default.

36 changes: 35 additions & 1 deletion api/db/services/document_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ def update_progress(cls):
if (d["parser_config"].get("raptor") or {}).get("use_raptor") and not has_raptor:
queue_raptor_o_graphrag_tasks(d, "raptor", priority)
prg = 0.98 * len(tsks) / (len(tsks) + 1)
elif (d["parser_config"].get("graphrag") or {}).get("use_graphrag") and not has_graphrag:
elif _should_auto_queue_graphrag(d["parser_config"]) and not has_graphrag:
queue_raptor_o_graphrag_tasks(d, "graphrag", priority)
prg = 0.98 * len(tsks) / (len(tsks) + 1)
else:
Expand Down Expand Up @@ -608,6 +608,40 @@ def do_cancel(cls, doc_id):
pass
return False

@classmethod
@DB.connection_context()
def has_documents_parsing(cls, kb_id):
"""Check if any documents in the knowledge base are currently being parsed."""
docs = cls.model.select().where(
cls.model.kb_id == kb_id,
cls.model.run == TaskStatus.RUNNING.value,
cls.model.progress < 1
)
return docs.count() > 0


def _should_auto_queue_graphrag(parser_config):
"""
Determine if GraphRAG tasks should be automatically queued after parsing.

With the new UI, GraphRAG is now manual-only (extract_only mode):
- New enum graphrag_mode="full_auto" -> auto queue
- New enum graphrag_mode="extract_only" -> no auto queue
- New enum graphrag_mode="none" -> no auto queue
- Legacy boolean use_graphrag=true -> no auto queue (changed behavior)
- Default -> no auto queue
"""
graphrag_config = parser_config.get("graphrag", {})

# Check for new enum format first
graphrag_mode = graphrag_config.get("graphrag_mode")
if graphrag_mode is not None:
return graphrag_mode == "full_auto"

# Legacy format no longer triggers auto-queuing since toggle was removed
# This allows existing configs to work but use manual GraphRAG workflow
return False


def queue_raptor_o_graphrag_tasks(doc, ty, priority):
chunking_config = DocumentService.get_chunking_config(doc["id"])
Expand Down
51 changes: 51 additions & 0 deletions api/db/services/knowledgebase_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,3 +436,54 @@ def update_document_number_in_init(cls, kb_id, doc_num):
else:
raise e

@classmethod
@DB.connection_context()
def migrate_graphrag_config(cls):
"""
Migrate existing knowledge bases from legacy boolean use_graphrag
to new enum graphrag_mode format.

Migration rules:
- use_graphrag: true -> graphrag_mode: "full_auto"
- use_graphrag: false -> graphrag_mode: "none"
- Missing use_graphrag -> graphrag_mode: "none"
- Already has graphrag_mode -> no change
"""
migrated_count = 0

# Get all knowledge bases
kbs = cls.model.select()

for kb in kbs:
parser_config = kb.parser_config or {}
graphrag_config = parser_config.get("graphrag", {})

# Skip if already has new enum format
if "graphrag_mode" in graphrag_config:
continue

# Check for legacy boolean format
use_graphrag = graphrag_config.get("use_graphrag")

if isinstance(use_graphrag, bool):
# Migrate from boolean to enum
new_mode = "full_auto" if use_graphrag else "none"
graphrag_config["graphrag_mode"] = new_mode

# Remove old boolean field
graphrag_config.pop("use_graphrag", None)

# Update the knowledge base
parser_config["graphrag"] = graphrag_config
cls.update_by_id(kb.id, {"parser_config": parser_config})
migrated_count += 1

elif use_graphrag is None:
# No legacy config, set default
graphrag_config["graphrag_mode"] = "none"
parser_config["graphrag"] = graphrag_config
cls.update_by_id(kb.id, {"parser_config": parser_config})
migrated_count += 1

return migrated_count

6 changes: 3 additions & 3 deletions api/utils/validation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,11 +353,11 @@ class RaptorConfig(Base):


class GraphragConfig(Base):
use_graphrag: bool = Field(default=False)
use_graphrag: bool = Field(default=False) # Default off - GraphRAG is manual-only via Knowledge Graph UI
entity_types: list[str] = Field(default_factory=lambda: ["organization", "person", "geo", "event", "category"])
method: GraphragMethodEnum = Field(default=GraphragMethodEnum.light)
community: bool = Field(default=False)
resolution: bool = Field(default=False)
community: bool = Field(default=False) # Will be handled via Knowledge Graph UI
resolution: bool = Field(default=False) # Will be handled via Knowledge Graph UI


class ParserConfig(Base):
Expand Down
22 changes: 7 additions & 15 deletions graphrag/entity_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def __init__(
async def __call__(self, graph: nx.Graph,
subgraph_nodes: set[str],
prompt_variables: dict[str, Any] | None = None,
callback: Callable | None = None) -> EntityResolutionResult:
callback: Callable | None = None,
kb_id: str = None) -> EntityResolutionResult:
"""Call method definition."""
if prompt_variables is None:
prompt_variables = {}
Expand Down Expand Up @@ -106,14 +107,9 @@ async def limited_resolve_candidate(candidate_batch, result_set, result_lock):
nonlocal remain_candidates_to_resolve, callback
async with semaphore:
try:
with trio.move_on_after(180) as cancel_scope:
await self._resolve_candidate(candidate_batch, result_set, result_lock)
remain_candidates_to_resolve = remain_candidates_to_resolve - len(candidate_batch[1])
callback(msg=f"Resolved {len(candidate_batch[1])} pairs, {remain_candidates_to_resolve} are remained to resolve. ")
if cancel_scope.cancelled_caught:
logging.warning(f"Timeout resolving {candidate_batch}, skipping...")
remain_candidates_to_resolve = remain_candidates_to_resolve - len(candidate_batch[1])
callback(msg=f"Fail to resolved {len(candidate_batch[1])} pairs due to timeout reason, skipped. {remain_candidates_to_resolve} are remained to resolve. ")
await self._resolve_candidate(candidate_batch, result_set, result_lock)
remain_candidates_to_resolve = remain_candidates_to_resolve - len(candidate_batch[1])
callback(msg=f"Resolved {len(candidate_batch[1])} pairs, {remain_candidates_to_resolve} are remained to resolve. ")
except Exception as e:
logging.error(f"Error resolving candidate batch: {e}")

Expand Down Expand Up @@ -152,7 +148,7 @@ async def limited_merge_nodes(graph, nodes, change):
)

async def _resolve_candidate(self, candidate_resolution_i: tuple[str, list[tuple[str, str]]], resolution_result: set[str], resolution_result_lock: trio.Lock):
gen_conf = {"temperature": 0.5}
gen_conf = {"temperature": 0.5, "max_tokens": 8000}
pair_txt = [
f'When determining whether two {candidate_resolution_i[0]}s are the same, you should only focus on critical properties and overlook noisy factors.\n']
for index, candidate in enumerate(candidate_resolution_i[1]):
Expand All @@ -170,11 +166,7 @@ async def _resolve_candidate(self, candidate_resolution_i: tuple[str, list[tuple
logging.info(f"Created resolution prompt {len(text)} bytes for {len(candidate_resolution_i[1])} entity pairs of type {candidate_resolution_i[0]}")
async with chat_limiter:
try:
with trio.move_on_after(120) as cancel_scope:
response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], gen_conf)
if cancel_scope.cancelled_caught:
logging.warning("_resolve_candidate._chat timeout, skipping...")
return
response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], gen_conf)
except Exception as e:
logging.error(f"_resolve_candidate._chat failed: {e}")
return
Expand Down
4 changes: 2 additions & 2 deletions graphrag/general/community_reports_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(
self._extraction_prompt = COMMUNITY_REPORT_PROMPT
self._max_report_length = max_report_length or 1500

async def __call__(self, graph: nx.Graph, callback: Callable | None = None):
async def __call__(self, graph: nx.Graph, callback: Callable | None = None, kb_id: str = None):
for node_degree in graph.degree:
graph.nodes[str(node_degree[0])]["rank"] = int(node_degree[1])

Expand Down Expand Up @@ -90,7 +90,7 @@ async def extract_community_report(community):
"relation_df": rela_df.to_csv(index_label="id")
}
text = perform_variable_replacements(self._extraction_prompt, variables=prompt_variables)
gen_conf = {"temperature": 0.3}
gen_conf = {"temperature": 0.3, "max_tokens": 8000}
async with chat_limiter:
try:
with trio.move_on_after(80) as cancel_scope:
Expand Down
Loading