diff --git a/litho-example.toml b/litho-example.toml index e5c648c..53df9e0 100644 --- a/litho-example.toml +++ b/litho-example.toml @@ -189,6 +189,52 @@ cache_dir = ".litho/cache" # Default: 8760 (365 days) expire_hours = 8760 +# ============================================================================ +# Boundary Analysis Configuration +# ============================================================================ +# Controls how boundary interfaces (CLI, API, Router) are analyzed +# Reducing these values can significantly speed up processing and reduce timeout risk + +[boundary_analysis] +# Maximum number of boundary code insights to analyze +# Lower values = faster processing, less risk of timeout +# Higher values = more comprehensive analysis +# Default: 15, Recommended for large projects: 10-15 +max_boundary_insights = 15 + +# Code insights limit for formatting in prompts +# Controls the detail level in boundary analysis +# Default: 25, Recommended for large projects: 20-30 +code_insights_limit = 25 + +# Whether to include full source code in boundary analysis +# Setting to false significantly reduces token usage and processing time +# Default: false, Recommended for large projects: false +include_source_code = false + +# Only show directories when file count exceeds this threshold +# Helps avoid information overload for large codebases +# Default: 100, Recommended for large projects: 50-100 +only_directories_when_files_more_than = 100 + +# === Quick Fix for 504 Gateway Timeout Errors === +# If you're experiencing timeout errors during boundary analysis: +# 1. Set max_boundary_insights = 15 +# (Reduces the number of boundary code fragments analyzed) +# 2. Set code_insights_limit = 25 +# (Reduces detail level in prompts) +# 3. Set include_source_code = false +# (Excludes full source code, only includes structure info) +# 4. Set only_directories_when_files_more_than = 100 +# (Simplifies large directory structures) +# +# Example configuration for large projects or slow LLM providers: +# [boundary_analysis] +# max_boundary_insights = 15 +# code_insights_limit = 25 +# include_source_code = false +# only_directories_when_files_more_than = 100 + # ============================================================================ # Knowledge Configuration (External Documentation Sources) # ============================================================================ diff --git a/src/cli.rs b/src/cli.rs index 7c11931..9f83a08 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -94,6 +94,22 @@ pub struct Args { /// Force regeneration (clear cache) #[arg(long)] pub force_regenerate: bool, + + /// Maximum boundary insights to analyze (reduces timeout risk) + #[arg(long)] + pub boundary_max_insights: Option, + + /// Code insights limit for boundary analysis + #[arg(long)] + pub boundary_code_limit: Option, + + /// Include source code in boundary analysis (default: true) + #[arg(long)] + pub boundary_include_source: Option, + + /// Show only directories when files exceed this count + #[arg(long)] + pub boundary_only_directories_when_files_more_than: Option, } /// CLI subcommands @@ -124,7 +140,7 @@ impl Args { let mut config = if let Some(config_path) = &self.config { // If config file path is explicitly specified, load from that path let msg = target_lang.msg_config_read_error().replace("{:?}", &format!("{:?}", config_path)); - return Config::from_file(config_path).expect(&msg); + Config::from_file(config_path).expect(&msg) } else { // If no config file is explicitly specified, try loading from default location let default_config_path = std::env::current_dir() @@ -133,7 +149,7 @@ impl Args { if default_config_path.exists() { let msg = target_lang.msg_config_read_error().replace("{:?}", &format!("{:?}", default_config_path)); - return Config::from_file(&default_config_path).expect(&msg); + Config::from_file(&default_config_path).expect(&msg) } else { // Default config file doesn't exist, use default values Config::default() @@ -203,6 +219,21 @@ impl Args { config.cache.enabled = false; } + // Boundary analysis configuration overrides + if let Some(max_insights) = self.boundary_max_insights { + config.boundary_analysis.max_boundary_insights = max_insights; + } + if let Some(code_limit) = self.boundary_code_limit { + config.boundary_analysis.code_insights_limit = code_limit; + } + if let Some(include_source) = self.boundary_include_source { + config.boundary_analysis.include_source_code = include_source; + } + if let Some(only_dirs_threshold) = self.boundary_only_directories_when_files_more_than { + config.boundary_analysis.only_directories_when_files_more_than = Some(only_dirs_threshold); + } + + config } } diff --git a/src/config.rs b/src/config.rs index 158e97d..df5c3ea 100644 --- a/src/config.rs +++ b/src/config.rs @@ -129,6 +129,10 @@ pub struct Config { /// Architecture meta description file path pub architecture_meta_path: Option, + + /// Boundary analysis configuration + #[serde(default)] + pub boundary_analysis: BoundaryAnalysisConfig, } /// LLM model configuration @@ -183,6 +187,46 @@ pub struct CacheConfig { pub expire_hours: u64, } +/// Boundary analysis configuration +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct BoundaryAnalysisConfig { + /// Maximum number of boundary code insights to analyze + /// Reducing this value can significantly speed up processing and reduce timeout risk + /// Default: 50 + #[serde(default = "default_max_boundary_insights")] + pub max_boundary_insights: usize, + + /// Code insights limit for formatting + /// Controls how many code insights are included in the prompt + /// Default: 100 + #[serde(default = "default_code_insights_limit")] + pub code_insights_limit: usize, + + /// Whether to include source code in boundary analysis + /// Setting to false significantly reduces token usage + /// Default: true + #[serde(default = "default_false")] + pub include_source_code: bool, + + /// Only show directories when file count exceeds this threshold + /// Helps avoid information overload for large codebases + /// Default: 500 + #[serde(default = "default_files_threshold")] + pub only_directories_when_files_more_than: Option, +} + +fn default_max_boundary_insights() -> usize { + 15 // Reduced default to avoid 504 timeouts on large codebases +} + +fn default_code_insights_limit() -> usize { + 25 // Reduced default to balance performance and quality +} + +fn default_files_threshold() -> Option { + Some(100) // Reduced threshold for better performance +} + /// Knowledge configuration for external documentation sources #[derive(Debug, Deserialize, Serialize, Clone, Default)] pub struct KnowledgeConfig { @@ -294,6 +338,9 @@ pub struct LocalDocsConfig { fn default_true() -> bool { true } +fn default_false() -> bool { + false +} impl Config { /// Load configuration from file @@ -499,7 +546,9 @@ impl Config { // Try to extract or from XML for line in content.lines() { let line = line.trim(); - if line.starts_with("") && line.ends_with("") { + if line.starts_with("") + && line.ends_with("") + { let name = line .trim_start_matches("") .trim_end_matches(""); @@ -632,6 +681,7 @@ impl Default for Config { llm: LLMConfig::default(), cache: CacheConfig::default(), knowledge: KnowledgeConfig::default(), + boundary_analysis: BoundaryAnalysisConfig::default(), } } } @@ -664,3 +714,29 @@ impl Default for CacheConfig { } } } + +impl Default for BoundaryAnalysisConfig { + fn default() -> Self { + Self { + max_boundary_insights: default_max_boundary_insights(), + code_insights_limit: default_code_insights_limit(), + include_source_code: default_false(), + only_directories_when_files_more_than: default_files_threshold(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_boundary_analysis_default_values() { + let config = BoundaryAnalysisConfig::default(); + + assert_eq!(config.max_boundary_insights, 15); + assert_eq!(config.code_insights_limit, 25); + assert_eq!(config.include_source_code, false); + assert_eq!(config.only_directories_when_files_more_than, Some(100)); + } +} diff --git a/src/generator/compose/agents/architecture_editor.rs b/src/generator/compose/agents/architecture_editor.rs index a70a4b7..4757be4 100644 --- a/src/generator/compose/agents/architecture_editor.rs +++ b/src/generator/compose/agents/architecture_editor.rs @@ -32,7 +32,12 @@ impl StepForwardAgent for ArchitectureEditor { DataSource::ResearchResult(ResearchAgentType::WorkflowResearcher.to_string()), ], // Use architecture, deployment, database and ADR docs - optional_sources: vec![DataSource::knowledge_categories(vec!["architecture", "deployment", "database", "adr"])], + optional_sources: vec![DataSource::knowledge_categories(vec![ + "architecture", + "deployment", + "database", + "adr", + ])], } } @@ -40,6 +45,15 @@ impl StepForwardAgent for ArchitectureEditor { PromptTemplate { system_prompt: r#"You are a professional software architecture documentation expert, focused on generating complete, in-depth, and detailed C4 architecture model documentation. Your task is to write an architecture documentation titled `Architecture Overview` based on the provided research reports. +## Mermaid Diagram Safety Rules (MUST follow): +- Always output Mermaid that compiles in strict Mermaid parsers. +- Use ASCII-only node IDs: `[A-Za-z0-9_]` (example: `WebDemo`, `InferenceService`, `FabricAPI`). +- Keep business/localized text in labels only, e.g. `WebDemo["Web Demo hội thoại"]`. +- Define all nodes first, then declare edges between existing IDs. +- Use only supported headers (`graph TD`, `graph LR`, `flowchart TD`, `sequenceDiagram`, `erDiagram`). +- Do not include hidden characters, smart quotes, or non-standard symbols in Mermaid source. +- Keep edge labels short plain text; avoid markdown and overly complex punctuation. + ## Your Professional Capabilities: 1. **Architecture Analysis Capability**: Deep understanding of system architecture patterns, design principles, and technology selection 2. **Documentation Writing Capability**: Proficient in C4 model, UML diagrams, and architecture visualization, with rich and detailed language descriptions diff --git a/src/generator/compose/agents/overview_editor.rs b/src/generator/compose/agents/overview_editor.rs index bb30ebd..6e59779 100644 --- a/src/generator/compose/agents/overview_editor.rs +++ b/src/generator/compose/agents/overview_editor.rs @@ -43,6 +43,15 @@ impl StepForwardAgent for OverviewEditor { Your task is to write a complete, in-depth, detailed, and easy-to-read C4 SystemContext document titled `Project Overview` based on the provided system context research report and domain module analysis results. +## Mermaid Diagram Safety Rules (MUST follow): +- Always generate Mermaid that is syntactically valid in strict parsers. +- Use ASCII-only node IDs: `[A-Za-z0-9_]` (e.g. `ClientApp`, `BackendAPI`). +- Put localized/human-readable text only inside node labels, e.g. `ClientApp["Ứng dụng khách hàng"]`. +- Define every node ID before using it in edges. +- Use only standard diagram headers like `graph TD`, `graph LR`, `flowchart TD`, `sequenceDiagram`, `erDiagram`. +- Do not use hidden/zero-width characters, smart quotes, or unusual Unicode symbols in Mermaid code. +- Keep edge labels simple plain text without markdown formatting. + ## External Knowledge Integration: You may have access to existing product description, requirements and architecture documentation from external sources. If available: diff --git a/src/generator/compose/agents/workflow_editor.rs b/src/generator/compose/agents/workflow_editor.rs index 5692ee1..cf5963e 100644 --- a/src/generator/compose/agents/workflow_editor.rs +++ b/src/generator/compose/agents/workflow_editor.rs @@ -32,7 +32,10 @@ impl StepForwardAgent for WorkflowEditor { DataSource::CODE_INSIGHTS, ], // Use workflow docs for workflow documentation - optional_sources: vec![DataSource::knowledge_categories(vec!["workflow", "architecture"])], + optional_sources: vec![DataSource::knowledge_categories(vec![ + "workflow", + "architecture", + ])], } } @@ -42,6 +45,15 @@ impl StepForwardAgent for WorkflowEditor { Your task is to write a complete, in-depth, and detailed workflow document titled `Core Workflows` based on the provided multi-dimensional research analysis results. +## Mermaid Diagram Safety Rules (MUST follow): +- Always produce Mermaid syntax that is valid for strict Mermaid parsers. +- Use ASCII-only node IDs: `[A-Za-z0-9_]` (e.g. `StartNode`, `ValidateInput`, `CallBackend`). +- Put localized text in labels only, e.g. `StartNode["Người dùng bắt đầu quy trình"]`. +- Declare all node IDs before referencing them in edges. +- Use standard diagram headers only (`flowchart TD`, `graph TD`, `graph LR`, `sequenceDiagram`). +- Avoid hidden characters, smart quotes, markdown formatting, and unusual Unicode symbols inside Mermaid source. +- Keep edge labels concise plain text. + ## Your Professional Capabilities: 1. **Workflow Analysis Skills**: Deep understanding of system core workflows, business processes, and technical processes 2. **Process Visualization Skills**: Proficient in flowchart design, sequence diagrams, and workflow diagram design diff --git a/src/generator/preprocess/agents/code_analyze.rs b/src/generator/preprocess/agents/code_analyze.rs index 3a673c0..7ed095d 100644 --- a/src/generator/preprocess/agents/code_analyze.rs +++ b/src/generator/preprocess/agents/code_analyze.rs @@ -42,15 +42,24 @@ impl CodeAnalyze { Box::pin(async move { let code_analyze = CodeAnalyze { language_processor }; - let agent_params = code_analyze + let (agent_params, mut static_insight) = code_analyze .prepare_single_code_agent_params(&project_structure_clone, &code_clone) .await?; - let mut code_insight = - extract::(&context_clone, agent_params).await?; - - // LLM will rewrite source_summary, so exclude it and override here + static_insight.code_dossier.source_summary = code_clone.source_summary.to_owned(); + + let mut code_insight = match extract::(&context_clone, agent_params).await { + Ok(insight) => insight, + Err(e) => { + eprintln!( + "⚠️ AI code insight failed for {}: {}. Falling back to static analysis.", + code_clone.name, e + ); + return Result::::Ok(static_insight); + } + }; + + // LLM may rewrite source_summary, so exclude it and override here code_insight.code_dossier.source_summary = code_clone.source_summary.to_owned(); - Result::::Ok(code_insight) }) }) @@ -73,7 +82,10 @@ impl CodeAnalyze { } } - println!("✓ Concurrent code analysis completed, successfully analyzed {} files", code_insights.len()); + println!( + "✓ Concurrent code analysis completed, successfully analyzed {} files", + code_insights.len() + ); Ok(code_insights) } } @@ -83,7 +95,7 @@ impl CodeAnalyze { &self, project_structure: &ProjectStructure, codes: &CodeDossier, - ) -> Result { + ) -> Result<(AgentExecuteParams, CodeInsight)> { // First perform static analysis let code_analyse = self.analyze_code_by_rules(codes, project_structure).await?; @@ -91,12 +103,15 @@ impl CodeAnalyze { let prompt_user = self.build_code_analysis_prompt(project_structure, &code_analyse); let prompt_sys = include_str!("prompts/code_analyze_sys.tpl").to_string(); - Ok(AgentExecuteParams { - prompt_sys, - prompt_user, - cache_scope: "ai_code_insight".to_string(), - log_tag: codes.name.to_string(), - }) + Ok(( + AgentExecuteParams { + prompt_sys, + prompt_user, + cache_scope: "ai_code_insight".to_string(), + log_tag: codes.name.to_string(), + }, + code_analyse, + )) } } diff --git a/src/generator/preprocess/agents/code_purpose_analyze.rs b/src/generator/preprocess/agents/code_purpose_analyze.rs index fc869e7..b1196c9 100644 --- a/src/generator/preprocess/agents/code_purpose_analyze.rs +++ b/src/generator/preprocess/agents/code_purpose_analyze.rs @@ -1,21 +1,75 @@ use anyhow::Result; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; use std::path::Path; -use crate::{ - types::code::{CodePurpose, CodePurposeMapper}, -}; use crate::generator::agent_executor::{AgentExecuteParams, extract}; use crate::generator::context::GeneratorContext; +use crate::types::code::{CodePurpose, CodePurposeMapper}; + +fn deserialize_code_purpose_from_any<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let value = serde_json::Value::deserialize(deserializer)?; + let raw = match value { + serde_json::Value::Null => String::new(), + serde_json::Value::String(s) => s, + serde_json::Value::Bool(v) => v.to_string(), + serde_json::Value::Number(v) => v.to_string(), + serde_json::Value::Array(v) => serde_json::to_string(&v).unwrap_or_default(), + serde_json::Value::Object(v) => serde_json::to_string(&v).unwrap_or_default(), + }; + Ok(CodePurposeMapper::map_from_raw(&raw)) +} + +fn deserialize_f64_lenient<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let value = serde_json::Value::deserialize(deserializer)?; + let result = match value { + serde_json::Value::Number(n) => n.as_f64().unwrap_or(0.0), + serde_json::Value::String(s) => s.parse::().unwrap_or(0.0), + serde_json::Value::Bool(v) => { + if v { + 1.0 + } else { + 0.0 + } + } + _ => 0.0, + }; + Ok(result) +} + +fn deserialize_string_lenient<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let value = serde_json::Value::deserialize(deserializer)?; + let result = match value { + serde_json::Value::Null => String::new(), + serde_json::Value::String(s) => s, + serde_json::Value::Bool(v) => v.to_string(), + serde_json::Value::Number(v) => v.to_string(), + serde_json::Value::Array(v) => serde_json::to_string(&v).unwrap_or_default(), + serde_json::Value::Object(v) => serde_json::to_string(&v).unwrap_or_default(), + }; + Ok(result) +} /// AI component type analysis result -#[derive(Debug, Serialize, Deserialize, Clone, JsonSchema)] +#[derive(Debug, Serialize, Deserialize, Clone, Default, JsonSchema)] +#[serde(default)] pub struct AICodePurposeAnalysis { // Inferred code functionality classification + #[serde(default, deserialize_with = "deserialize_code_purpose_from_any")] pub code_purpose: CodePurpose, // Confidence of the inference result (min 0.0, max 1.0), confidence is high when > 0.7. + #[serde(default, deserialize_with = "deserialize_f64_lenient")] pub confidence: f64, + #[serde(default, deserialize_with = "deserialize_string_lenient")] pub reasoning: String, } @@ -32,8 +86,8 @@ impl CodePurposeEnhancer { context: &GeneratorContext, file_path: &Path, file_name: &str, - file_content: &str) -> Result - { + file_content: &str, + ) -> Result { // First use rule mapping let rule_based_type = CodePurposeMapper::map_by_path_and_name(&file_path.to_string_lossy(), file_name); @@ -45,14 +99,19 @@ impl CodePurposeEnhancer { // If there's AI analyzer and file content, use AI enhanced analysis let prompt_sys = "You are a professional code architecture analyst specializing in analyzing component types of code files.".to_string(); - let prompt_user = self.build_code_purpose_analysis_prompt(file_path, file_content, file_name); + let prompt_user = + self.build_code_purpose_analysis_prompt(file_path, file_content, file_name); - let analyze_result = extract::(context, AgentExecuteParams { - prompt_sys, - prompt_user, - cache_scope: "ai_code_purpose".to_string(), - log_tag: file_name.to_string(), - }).await; + let analyze_result = extract::( + context, + AgentExecuteParams { + prompt_sys, + prompt_user, + cache_scope: "ai_code_purpose".to_string(), + log_tag: file_name.to_string(), + }, + ) + .await; return match analyze_result { Ok(ai_analysis) => { @@ -71,7 +130,7 @@ impl CodePurposeEnhancer { // AI analysis failed, use rule result Ok(rule_based_type) } - } + }; } /// Build component type analysis prompt @@ -97,3 +156,38 @@ impl CodePurposeEnhancer { ) } } + +#[cfg(test)] +mod tests { + use super::AICodePurposeAnalysis; + use crate::types::code::CodePurpose; + + #[test] + fn test_ai_code_purpose_analysis_deserialize_unknown_variant_text() { + let payload = serde_json::json!({ + "code_purpose": "Migration configuration script (Alembic env file)", + "confidence": "0.91", + "reasoning": {"summary":"matched migration config"} + }); + + let parsed: AICodePurposeAnalysis = serde_json::from_value(payload) + .expect("AICodePurposeAnalysis should deserialize loose purpose variant"); + + assert_eq!(parsed.code_purpose, CodePurpose::Config); + assert_eq!(parsed.confidence, 0.91); + } + + #[test] + fn test_ai_code_purpose_analysis_deserialize_short_service_api_text() { + let payload = serde_json::json!({ + "code_purpose": "Service API for external calls", + "confidence": 0.8, + "reasoning": "api classification" + }); + + let parsed: AICodePurposeAnalysis = serde_json::from_value(payload) + .expect("AICodePurposeAnalysis should deserialize shortened API variant"); + + assert_eq!(parsed.code_purpose, CodePurpose::Api); + } +} diff --git a/src/generator/research/agents/boundary_analyzer.rs b/src/generator/research/agents/boundary_analyzer.rs index 431de51..9475531 100644 --- a/src/generator/research/agents/boundary_analyzer.rs +++ b/src/generator/research/agents/boundary_analyzer.rs @@ -83,12 +83,8 @@ Please return the analysis results in structured JSON format."# .to_string(), llm_call_mode: LLMCallMode::Extract, - formatter_config: FormatterConfig { - include_source_code: true, // Boundary analysis requires viewing source code details - code_insights_limit: 100, // Increase code insights limit to ensure no boundary code is missed - only_directories_when_files_more_than: Some(500), // Appropriate limit to avoid information overload - ..FormatterConfig::default() - }, + + formatter_config: FormatterConfig::default(), } } @@ -155,7 +151,7 @@ impl BoundaryAnalyzer { }) .collect(); - // Sort by importance, take top 50 most important + // Sort by importance let mut sorted_insights = boundary_insights; sorted_insights.sort_by(|a, b| { b.code_dossier @@ -163,7 +159,10 @@ impl BoundaryAnalyzer { .partial_cmp(&a.code_dossier.importance_score) .unwrap_or(std::cmp::Ordering::Equal) }); - sorted_insights.truncate(50); + + // Use configuration value for max boundary insights + let max_insights = context.config.boundary_analysis.max_boundary_insights; + sorted_insights.truncate(max_insights); // Group by type and count let mut entry_count = 0; diff --git a/src/generator/research/types.rs b/src/generator/research/types.rs index c0d4751..323c261 100644 --- a/src/generator/research/types.rs +++ b/src/generator/research/types.rs @@ -182,7 +182,8 @@ pub struct BusinessFlow { } /// Core component analysis result -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[derive(Debug, Clone, Serialize, Deserialize, Default, JsonSchema)] +#[serde(default)] pub struct KeyModuleReport { /// Domain name pub domain_name: String, @@ -498,3 +499,22 @@ impl Default for DatabaseOverviewReport { // https://c4model.com/abstractions/software-system // System name, project's role and value, system type, who is using it, how to use, which external systems it interacts with, diagram + +#[cfg(test)] +mod tests { + use super::KeyModuleReport; + + #[test] + fn test_key_module_report_deserialize_with_missing_module_name() { + let payload = serde_json::json!({ + "domain_name": "Tài liệu & IaC", + "module_description": "Infrastructure and documentation module" + }); + + let report: KeyModuleReport = serde_json::from_value(payload) + .expect("KeyModuleReport should deserialize when module_name is missing"); + + assert_eq!(report.module_name, ""); + assert_eq!(report.domain_name, "Tài liệu & IaC"); + } +} diff --git a/src/generator/step_forward_agent.rs b/src/generator/step_forward_agent.rs index fdd677e..773e075 100644 --- a/src/generator/step_forward_agent.rs +++ b/src/generator/step_forward_agent.rs @@ -111,13 +111,13 @@ pub struct FormatterConfig { impl Default for FormatterConfig { fn default() -> Self { Self { - code_insights_limit: 50, - include_source_code: false, + code_insights_limit: 25, // Reduced from 50 to avoid 504 timeouts on large codebases + include_source_code: false, // Disabled to reduce token usage dependency_limit: 50, readme_truncate_length: Some(16384), enable_compression: true, compression_config: CompressionConfig::default(), - only_directories_when_files_more_than: None, + only_directories_when_files_more_than: Some(100), // Show only directories when files > 100 } } } diff --git a/src/types/code.rs b/src/types/code.rs index 53299d7..0fc9512 100644 --- a/src/types/code.rs +++ b/src/types/code.rs @@ -4,72 +4,256 @@ use std::{ }; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; + +fn json_value_to_string(value: serde_json::Value) -> String { + match value { + serde_json::Value::Null => String::new(), + serde_json::Value::String(s) => s, + serde_json::Value::Bool(v) => v.to_string(), + serde_json::Value::Number(v) => v.to_string(), + serde_json::Value::Array(v) => serde_json::to_string(&v).unwrap_or_default(), + serde_json::Value::Object(v) => { + for key in [ + "name", + "module", + "path", + "summary", + "description", + "title", + "value", + "text", + "id", + ] { + if let Some(inner) = v.get(key) { + let text = json_value_to_string(inner.clone()); + if !text.is_empty() { + return text; + } + } + } + serde_json::to_string(&v).unwrap_or_default() + } + } +} + +fn deserialize_string_lenient<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let value = serde_json::Value::deserialize(deserializer)?; + Ok(json_value_to_string(value)) +} + +fn deserialize_option_string_lenient<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let value = serde_json::Value::deserialize(deserializer)?; + match value { + serde_json::Value::Null => Ok(None), + other => Ok(Some(json_value_to_string(other))), + } +} + +fn deserialize_vec_string_lenient<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let value = serde_json::Value::deserialize(deserializer)?; + let values = match value { + serde_json::Value::Null => Vec::new(), + serde_json::Value::Array(items) => items + .into_iter() + .map(json_value_to_string) + .filter(|v| !v.is_empty()) + .collect(), + other => { + let one = json_value_to_string(other); + if one.is_empty() { + Vec::new() + } else { + vec![one] + } + } + }; + + Ok(values) +} + +fn deserialize_interfaces_lenient<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let value = serde_json::Value::deserialize(deserializer)?; + let mut interfaces = Vec::new(); + + let items = match value { + serde_json::Value::Null => Vec::new(), + serde_json::Value::Array(items) => items, + other => vec![other], + }; + + for item in items { + match item { + serde_json::Value::String(name) => interfaces.push(InterfaceInfo { + name, + ..Default::default() + }), + serde_json::Value::Object(_) => { + if let Ok(interface) = serde_json::from_value::(item.clone()) { + interfaces.push(interface); + } else { + interfaces.push(InterfaceInfo { + name: json_value_to_string(item), + ..Default::default() + }); + } + } + other => interfaces.push(InterfaceInfo { + name: json_value_to_string(other), + ..Default::default() + }), + } + } + + Ok(interfaces) +} + +fn deserialize_dependencies_lenient<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let value = serde_json::Value::deserialize(deserializer)?; + let mut dependencies = Vec::new(); + + let items = match value { + serde_json::Value::Null => Vec::new(), + serde_json::Value::Array(items) => items, + other => vec![other], + }; + + for item in items { + match item { + serde_json::Value::String(name) => dependencies.push(Dependency { + name, + ..Default::default() + }), + serde_json::Value::Object(_) => { + if let Ok(dependency) = serde_json::from_value::(item.clone()) { + dependencies.push(dependency); + } else { + dependencies.push(Dependency { + name: json_value_to_string(item), + ..Default::default() + }); + } + } + other => dependencies.push(Dependency { + name: json_value_to_string(other), + ..Default::default() + }), + } + } + + Ok(dependencies) +} + +fn deserialize_code_purpose_lenient<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let value = serde_json::Value::deserialize(deserializer)?; + let raw = json_value_to_string(value); + Ok(CodePurposeMapper::map_from_raw(&raw)) +} /// Code basic information -#[derive(Debug, Serialize, Deserialize, Clone, JsonSchema)] +#[derive(Debug, Serialize, Deserialize, Clone, Default, JsonSchema)] +#[serde(default)] pub struct CodeDossier { /// Code file name + #[serde(default, deserialize_with = "deserialize_string_lenient")] pub name: String, /// File path pub file_path: PathBuf, /// Source code summary #[schemars(skip)] - #[serde(default)] + #[serde(default, deserialize_with = "deserialize_string_lenient")] pub source_summary: String, /// Purpose type + #[serde(default, deserialize_with = "deserialize_code_purpose_lenient")] pub code_purpose: CodePurpose, /// Importance score pub importance_score: f64, pub description: Option, + #[serde(default, deserialize_with = "deserialize_vec_string_lenient")] pub functions: Vec, /// Interfaces list + #[serde(default, deserialize_with = "deserialize_vec_string_lenient")] pub interfaces: Vec, } /// Intelligent insight information of code file -#[derive(Debug, Serialize, Deserialize, Clone, JsonSchema)] +#[derive(Debug, Serialize, Deserialize, Clone, Default, JsonSchema)] +#[serde(default)] pub struct CodeInsight { /// Code basic information pub code_dossier: CodeDossier, + #[serde(default, deserialize_with = "deserialize_string_lenient")] pub detailed_description: String, /// Responsibilities + #[serde(default, deserialize_with = "deserialize_vec_string_lenient")] pub responsibilities: Vec, /// Contained interfaces + #[serde(default, deserialize_with = "deserialize_interfaces_lenient")] pub interfaces: Vec, /// Dependency information + #[serde(default, deserialize_with = "deserialize_dependencies_lenient")] pub dependencies: Vec, pub complexity_metrics: CodeComplexity, } /// Interface information -#[derive(Debug, Serialize, Deserialize, Clone, JsonSchema)] +#[derive(Debug, Serialize, Deserialize, Clone, Default, JsonSchema)] +#[serde(default)] pub struct InterfaceInfo { + #[serde(default, deserialize_with = "deserialize_string_lenient")] pub name: String, + #[serde(default, deserialize_with = "deserialize_string_lenient")] pub interface_type: String, // "function", "method", "class", "trait", etc. - pub visibility: String, // "public", "private", "protected" + #[serde(default, deserialize_with = "deserialize_string_lenient")] + pub visibility: String, // "public", "private", "protected" pub parameters: Vec, pub return_type: Option, pub description: Option, } /// Parameter information -#[derive(Debug, Serialize, Deserialize, Clone, JsonSchema)] +#[derive(Debug, Serialize, Deserialize, Clone, Default, JsonSchema)] +#[serde(default)] pub struct ParameterInfo { + #[serde(default, deserialize_with = "deserialize_string_lenient")] pub name: String, + #[serde(default, deserialize_with = "deserialize_string_lenient")] pub param_type: String, pub is_optional: bool, pub description: Option, } /// Dependency information -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[derive(Debug, Clone, Serialize, Deserialize, Default, JsonSchema)] +#[serde(default)] pub struct Dependency { + #[serde(default, deserialize_with = "deserialize_string_lenient")] pub name: String, + #[serde(default, deserialize_with = "deserialize_option_string_lenient")] pub path: Option, pub is_external: bool, pub line_number: Option, + #[serde(default, deserialize_with = "deserialize_string_lenient")] pub dependency_type: String, // "import", "use", "include", "require", etc. + #[serde(default, deserialize_with = "deserialize_option_string_lenient")] pub version: Option, } @@ -90,7 +274,8 @@ impl Display for Dependency { } /// Component complexity metrics -#[derive(Debug, Serialize, Deserialize, Clone, JsonSchema)] +#[derive(Debug, Serialize, Deserialize, Clone, Default, JsonSchema)] +#[serde(default)] pub struct CodeComplexity { pub cyclomatic_complexity: f64, pub lines_of_code: usize, @@ -115,7 +300,14 @@ pub enum CodePurpose { #[serde(alias = "Frontend UI component")] Widget, /// Code module for implementing specific logical functionality - #[serde(alias = "feature", alias = "specific_feature", alias = "specific-feature", alias = "Code module for implementing specific logical functionality")] + #[serde( + alias = "feature", + alias = "specific_feature", + alias = "specific-feature", + alias = "Specificfeature", + alias = "SpecificFeature", + alias = "Code module for implementing specific logical functionality" + )] SpecificFeature, /// Data type or model #[serde(alias = "Data type or model")] @@ -127,7 +319,9 @@ pub enum CodePurpose { #[serde(alias = "Functional tool code for specific scenarios")] Tool, /// Common, basic utility functions and classes, providing low-level auxiliary functions unrelated to business logic - #[serde(alias = "Common, basic utility functions and classes, providing low-level auxiliary functions unrelated to business logic")] + #[serde( + alias = "Common, basic utility functions and classes, providing low-level auxiliary functions unrelated to business logic" + )] Util, /// Configuration #[serde(alias = "configuration", alias = "Configuration")] @@ -145,16 +339,24 @@ pub enum CodePurpose { #[serde(alias = "Database component")] Database, /// Service API for external calls, providing calling capabilities based on HTTP, RPC, IPC and other protocols. - #[serde(alias = "Service API for external calls, providing calling capabilities based on HTTP, RPC, IPC and other protocols.")] + #[serde( + alias = "Service API for external calls, providing calling capabilities based on HTTP, RPC, IPC and other protocols." + )] Api, /// Controller component in MVC architecture, responsible for handling business logic - #[serde(alias = "Controller component in MVC architecture, responsible for handling business logic")] + #[serde( + alias = "Controller component in MVC architecture, responsible for handling business logic" + )] Controller, /// Service component in MVC architecture, responsible for handling business rules - #[serde(alias = "Service component in MVC architecture, responsible for handling business rules")] + #[serde( + alias = "Service component in MVC architecture, responsible for handling business rules" + )] Service, /// Collection of related code (functions, classes, resources) with clear boundaries and responsibilities - #[serde(alias = "Collection of related code (functions, classes, resources) with clear boundaries and responsibilities")] + #[serde( + alias = "Collection of related code (functions, classes, resources) with clear boundaries and responsibilities" + )] Module, /// Dependency library #[serde(alias = "library", alias = "package", alias = "Dependency library")] @@ -163,7 +365,11 @@ pub enum CodePurpose { #[serde(alias = "testing", alias = "tests", alias = "Test component")] Test, /// Documentation component - #[serde(alias = "documentation", alias = "docs", alias = "Documentation component")] + #[serde( + alias = "documentation", + alias = "docs", + alias = "Documentation component" + )] Doc, /// Data Access Layer component #[serde(alias = "Data Access Layer component")] @@ -172,10 +378,18 @@ pub enum CodePurpose { #[serde(alias = "Context component")] Context, /// command-line interface (CLI) commands or message/request handlers - #[serde(alias = "command-line interface (CLI) commands or message/request handlers", alias = "command-line interface (CLI) commands or message/request handlers")] + #[serde( + alias = "command-line interface (CLI) commands or message/request handlers", + alias = "command-line interface (CLI) commands or message/request handlers" + )] Command, /// Other uncategorized or unknown - #[serde(alias = "unknown", alias = "misc", alias = "miscellaneous", alias = "Other uncategorized or unknown")] + #[serde( + alias = "unknown", + alias = "misc", + alias = "miscellaneous", + alias = "Other uncategorized or unknown" + )] Other, } @@ -228,6 +442,89 @@ impl Default for CodePurpose { pub struct CodePurposeMapper; impl CodePurposeMapper { + pub fn map_from_raw(raw: &str) -> CodePurpose { + let normalized = raw + .to_lowercase() + .chars() + .filter(char::is_ascii_alphanumeric) + .collect::(); + + if normalized.is_empty() { + return CodePurpose::Other; + } + if normalized.contains("specificfeature") || normalized == "feature" { + return CodePurpose::SpecificFeature; + } + if normalized.contains("frontenduicomponent") || normalized == "widget" { + return CodePurpose::Widget; + } + if normalized.contains("frontenduipage") || normalized == "page" { + return CodePurpose::Page; + } + if normalized.contains("agent") { + return CodePurpose::Agent; + } + if normalized.contains("entry") { + return CodePurpose::Entry; + } + if normalized.contains("database") { + return CodePurpose::Database; + } + if normalized.contains("config") { + return CodePurpose::Config; + } + if normalized.contains("context") { + return CodePurpose::Context; + } + if normalized.contains("router") { + return CodePurpose::Router; + } + if normalized.contains("serviceapi") { + return CodePurpose::Api; + } + if normalized.contains("service") { + return CodePurpose::Service; + } + if normalized.contains("controller") { + return CodePurpose::Controller; + } + if normalized.contains("api") { + return CodePurpose::Api; + } + if normalized.contains("model") { + return CodePurpose::Model; + } + if normalized.contains("types") { + return CodePurpose::Types; + } + if normalized.contains("util") || normalized.contains("helper") { + return CodePurpose::Util; + } + if normalized.contains("tool") { + return CodePurpose::Tool; + } + if normalized.contains("module") { + return CodePurpose::Module; + } + if normalized.contains("dao") || normalized.contains("repository") { + return CodePurpose::Dao; + } + if normalized.contains("test") { + return CodePurpose::Test; + } + if normalized.contains("doc") { + return CodePurpose::Doc; + } + if normalized.contains("command") || normalized.contains("cli") { + return CodePurpose::Command; + } + if normalized.contains("library") || normalized.contains("package") || normalized == "lib" { + return CodePurpose::Lib; + } + + CodePurpose::Other + } + /// Intelligent mapping based on file path and name pub fn map_by_path_and_name(file_path: &str, file_name: &str) -> CodePurpose { let path_lower = file_path.to_lowercase(); @@ -383,28 +680,19 @@ mod tests { fn test_sql_file_classification() { // .sqlproj files should always be classified as Database assert_eq!( - CodePurposeMapper::map_by_path_and_name( - "/src/MyProject.sqlproj", - "MyProject.sqlproj" - ), + CodePurposeMapper::map_by_path_and_name("/src/MyProject.sqlproj", "MyProject.sqlproj"), CodePurpose::Database ); // .sql files should always be classified as Database assert_eq!( - CodePurposeMapper::map_by_path_and_name( - "/src/CreateTable.sql", - "CreateTable.sql" - ), + CodePurposeMapper::map_by_path_and_name("/src/CreateTable.sql", "CreateTable.sql"), CodePurpose::Database ); // Even in root directory assert_eq!( - CodePurposeMapper::map_by_path_and_name( - "/Schema.sql", - "Schema.sql" - ), + CodePurposeMapper::map_by_path_and_name("/Schema.sql", "Schema.sql"), CodePurpose::Database ); @@ -422,10 +710,7 @@ mod tests { fn test_sql_file_in_database_folder() { // SQL files in /database/ folder should still be Database assert_eq!( - CodePurposeMapper::map_by_path_and_name( - "/src/database/schema.sql", - "schema.sql" - ), + CodePurposeMapper::map_by_path_and_name("/src/database/schema.sql", "schema.sql"), CodePurpose::Database ); } @@ -434,10 +719,7 @@ mod tests { fn test_path_based_classification() { // Files in /database/ folder assert_eq!( - CodePurposeMapper::map_by_path_and_name( - "/src/database/connection.cs", - "connection.cs" - ), + CodePurposeMapper::map_by_path_and_name("/src/database/connection.cs", "connection.cs"), CodePurpose::Database ); @@ -450,4 +732,92 @@ mod tests { CodePurpose::Dao ); } + + #[test] + fn test_code_insight_deserialize_with_missing_fields() { + let payload = serde_json::json!({ + "code_dossier": { + "name": "chat-window.tsx", + "file_path": "src/chat-window.tsx" + }, + "interfaces": [ + { + "interface_type": "function" + } + ], + "dependencies": [ + { + "path": "react", + "is_external": true + } + ] + }); + + let insight: CodeInsight = serde_json::from_value(payload).expect( + "CodeInsight should support partial model outputs and fill defaults for missing fields", + ); + + assert_eq!(insight.code_dossier.name, "chat-window.tsx"); + assert_eq!( + insight.code_dossier.file_path, + PathBuf::from("src/chat-window.tsx") + ); + assert_eq!(insight.code_dossier.code_purpose, CodePurpose::Other); + assert!(insight.responsibilities.is_empty()); + assert_eq!(insight.interfaces[0].name, ""); + assert_eq!(insight.dependencies[0].name, ""); + } + + #[test] + fn test_code_insight_deserialize_with_specificfeature_variant() { + let payload = serde_json::json!({ + "code_dossier": { + "name": "connect-button.tsx", + "file_path": "src/connect-button.tsx", + "code_purpose": "Specificfeature" + } + }); + + let insight: CodeInsight = serde_json::from_value(payload).expect( + "CodeInsight should accept common model variant typo `Specificfeature` for code_purpose", + ); + + assert_eq!( + insight.code_dossier.code_purpose, + CodePurpose::SpecificFeature + ); + } + + #[test] + fn test_code_insight_deserialize_with_loose_schema_values() { + let payload = serde_json::json!({ + "code_dossier": { + "name": "use-toast.ts", + "file_path": "src/use-toast.ts", + "code_purpose": "widget" + }, + "detailed_description": { + "summary": "hook for toast state" + }, + "interfaces": [ + "State interface" + ], + "dependencies": [ + { + "name": {"module": "react"}, + "is_external": true + } + ] + }); + + let insight: CodeInsight = serde_json::from_value(payload) + .expect("CodeInsight should tolerate loose schema values from LLM output"); + + assert_eq!(insight.code_dossier.name, "use-toast.ts"); + assert_eq!(insight.detailed_description, "hook for toast state"); + assert_eq!(insight.interfaces.len(), 1); + assert_eq!(insight.interfaces[0].name, "State interface"); + assert_eq!(insight.dependencies.len(), 1); + assert_eq!(insight.dependencies[0].name, "react"); + } }