Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/docs_src/javascripts/performance.js
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,19 @@
};

// Initialize all performance features
/**
* Initializes various performance optimization features when the DOM is ready.
* @example
* init()
* // Logs "📊 Performance optimization features initialized"
* @param {void} None - This function does not take any parameters.
* @returns {void} This function does not return anything.
* @description
* - Sets up event listener to delay initialization until the DOM is fully loaded.
* - Initializes features like Performance, LazyLoading, Prefetching, ProgressiveEnhancement, and ErrorTracking.
* - Adds a loading progress indicator to the body of the document.
* - Ensures progress bar activation during page navigation.
*/
function init() {
// Wait for DOM to be ready
if (document.readyState === 'loading') {
Expand Down
41 changes: 41 additions & 0 deletions scripts/batch_compliance_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ def is_vcf_or_bcf(filename):
return ext.endswith(".vcf") or ext.endswith(".vcf.gz") or ext.endswith(".bcf")

def generate_markdown_report(results, directory):
"""Generate a markdown report based on compliance check results.
Parameters:
- results (List[Tuple[str, str, bool, str]]): A list of tuples containing the file name, tool used, validity, and error message.
- directory (str): The directory that was checked.
Returns:
- str: A markdown-formatted string representing the compliance report.
Processing Logic:
- Formats each result into a markdown table row, replacing pipe characters in error messages to avoid markdown misinterpretation.
- Marks validity with a check or cross emoji based on the boolean value."""
lines = ["# Batch Compliance Check Report\n",
f"Checked directory: `{directory}`\n",
"| File | Tool | Valid | Error |",
Expand All @@ -43,6 +52,17 @@ def generate_markdown_report(results, directory):

def generate_color_markdown_report(results, directory):
# Uses HTML <span> for color, which works in GitHub/HTML renderers
"""Generate a markdown report with colored pass/fail annotations based on compliance check results.
Parameters:
- results (list of tuples): A list where each tuple contains details about the compliance check (`filename`, `tool`, `validity`, `error_message`).
- directory (str): The directory that was checked.
Returns:
- str: A markdown-formatted string that represents the batch compliance check report.
Processing Logic:
- Builds a markdown table with headers and formatted data for each file and tool checked.
- Uses HTML `<span>` elements to apply color styling for pass and fail indicators.
- Replaces any pipe characters in the error messages to avoid markdown table misalignment.
- Appends a legend explaining the color codes used for pass and fail statuses."""
lines = ["# Batch Compliance Check Report\n",
f"Checked directory: `{directory}`\n",
"| File | Tool | Valid | Error |",
Expand All @@ -58,6 +78,17 @@ def generate_color_markdown_report(results, directory):
return "\n".join(lines)

def generate_html_report(results, directory):
"""Generates an HTML report summarizing the compliance checks performed on files.
Parameters:
- results (list of tuples): A list where each tuple contains file information from compliance checks. Each tuple consists of four items: file name (str), tool name (str), validation status (bool), and error message (str).
- directory (str): The directory path where the compliance checks were performed.
Returns:
- str: An HTML formatted string representing the compliance check report.
Processing Logic:
- Constructs an HTML document with a styled report table.
- Generates current date and time to include in the report.
- Formats validation status with PASS or FAIL indicators.
- Converts error message pipes ('|') into spaces for better readability."""
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
html = [
"<!DOCTYPE html>",
Expand Down Expand Up @@ -97,6 +128,16 @@ def generate_html_report(results, directory):
return "\n".join(html)

def main():
"""Batch VCF/BCF Compliance Checker main function.
Parameters:
- None
Returns:
- None
Processing Logic:
- Parses command-line arguments for directory, tool, output file, edge-case generation, notifications, and output format.
- Scans the specified directory for VCF/BCF files and validates them against the selected compliance tool.
- Handles edge-case generation if specified and outputs validation results in the chosen format.
- Saves the compliance report to stdout or a specified file, potentially notifying by saving to a specific folder if the notify option is selected."""
parser = argparse.ArgumentParser(description="Batch VCF/BCF Compliance Checker")
parser.add_argument("-d", "--directory", default="sample_data", help="Directory to scan for VCF/BCF files")
parser.add_argument("-t", "--tool", default=None, help="Compliance tool to use (bcftools, gatk, or as configured)")
Expand Down
19 changes: 19 additions & 0 deletions scripts/cli_enhanced_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,17 @@ class CacheManager:
"""

def __init__(self, cache_dir: str = ".validation-cache", ttl_hours: int = 24, max_size_mb: int = 100):
"""Initialize a cache manager with specified directory, time-to-live, and size constraints.
Parameters:
- cache_dir (str): Directory where cached data will be stored. Defaults to ".validation-cache".
- ttl_hours (int): Time-to-live for cached items in hours. Defaults to 24 hours.
- max_size_mb (int): Maximum cache size in megabytes. Defaults to 100MB.
Returns:
- None: This method initializes the cache manager; there is no return value.
Processing Logic:
- Converts ttl_hours to seconds for internal use.
- Ensures that the cache directory exists, creating it if necessary.
- Initializes cache hit/miss statistics counters."""
self.cache_dir = Path(cache_dir)
self.ttl_seconds = ttl_hours * 3600
self.max_size_bytes = max_size_mb * 1024 * 1024
Expand Down Expand Up @@ -568,6 +579,14 @@ class ValidationConfig:
"""

def __init__(self, config_file: Optional[str] = None):
"""Initializes the ValidationConfig class with default settings and optional configuration file.
Parameters:
- config_file (Optional[str]): The path to a configuration file to override default settings.
Returns:
- None: This constructor does not return any value.
Processing Logic:
- Initializes various default settings related to caching, performance, validation rules, and CI/CD.
- Loads additional configuration settings from the provided file, if specified."""
self.logger = logging.getLogger(__name__ + '.ValidationConfig')

# Default configuration
Expand Down
14 changes: 14 additions & 0 deletions src/vcf_agent/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,20 @@ def __init__(
ollama_base_url: Optional[str] = "http://localhost:11434",
memory_optimization: Optional[MemoryOptimizationConfig] = None
):
"""Initializes an instance of the class with configuration settings for model interaction.
Parameters:
- raw_mode (Optional[bool]): Indicates if the instance should operate in raw mode; default is None.
- model_provider (Literal["ollama", "openai", "cerebras"]): Specifies the provider of the model, defaults to "ollama".
- credentials_file (Optional[str]): Path to the credentials file, if applicable.
- reference_fasta (Optional[str]): Path to the reference FASTA file, if necessary.
- ollama_model_name (Optional[str]): Name of the Ollama model to use; default is "qwen3:4b".
- ollama_base_url (Optional[str]): Base URL for accessing Ollama model services, defaults to "http://localhost:11434".
- memory_optimization (Optional[MemoryOptimizationConfig]): Configuration for memory optimization, defaults to a new MemoryOptimizationConfig instance.
Returns:
- None: This is a constructor, therefore it does not return a value.
Processing Logic:
- Sets the model provider to a default of "ollama" if not specified.
- Initializes a memory optimization configuration if none is provided."""
self.raw_mode = raw_mode
self.model_provider = model_provider
self.credentials_file = credentials_file
Expand Down
89 changes: 89 additions & 0 deletions src/vcf_agent/enhanced_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,19 @@ def __init__(
tracing_config: Optional[TracingConfig] = None,
environment: Optional[str] = None
):
"""Initializes a tracer for monitoring service performance and memory optimization.
Parameters:
- service_name (str): Name of the service for which tracing is initialized.
- config (Optional[MemoryOptimizationConfig]): Configuration for optimizing memory usage, defaults to a standard configuration if not provided.
- tracing_config (Optional[TracingConfig]): Configuration for tracing, defaults to environment-based configuration if not provided.
- environment (Optional[str]): Environment setting for tracing, overrides default if specified.
Returns:
- None: This is a constructor method, it does not return a value.
Processing Logic:
- Initializes tracing configuration with environment setting if provided.
- Sets up tracer and sampling based on environmental configurations.
- Initializes performance tracking structures for operations.
- Sets logger to debug mode if tracing in debug mode is enabled."""
self.service_name = service_name
self.memory_config = config or MemoryOptimizationConfig()
self.tracing_config = tracing_config or TracingConfig.from_environment()
Expand Down Expand Up @@ -484,7 +497,30 @@ def ai_provider_span(self, provider: Union[str, AIProvider], model: str, operati
"""Decorator for AI provider operations."""
def decorator(func):
@functools.wraps(func)
"""Wraps a function to add AI provider span context for tracking execution details.
Parameters:
- func (function): The function to be wrapped, which can be asynchronous or synchronous.
Returns:
- function: The wrapped version of the input function with added span context attributes.
Processing Logic:
- Determines if the function is synchronous or asynchronous and uses the appropriate wrapper.
- Establishes an AI provider span context to track execution details such as function name, arguments, and success status.
- Handles exceptions by setting success attribute to false, while successful executions set it to true."""
async def async_wrapper(*args, **kwargs):
"""A wrapper for an asynchronous function that integrates tracing and error handling.
Parameters:
- *args: Arguments to be passed to the asynchronous function.
- **kwargs: Keyword arguments to be passed to the asynchronous function.
- provider (str): The AI provider managing the span context.
- model (str): The model being used for the operation.
- operation (str): The operation being traced.
- func (Callable): The asynchronous function to be wrapped.
Returns:
- Any: The result returned by the asynchronous function.
Processing Logic:
- Executes the wrapped asynchronous function within a tracing span context.
- Sets attributes on the span based on function name, argument counts, and execution success.
- Raises any exception from the asynchronous function after logging it in the span."""
with self.ai_provider_span_context(provider, model, operation) as span:
# Add function-specific attributes
span.set_ai_attributes(
Expand All @@ -503,6 +539,16 @@ async def async_wrapper(*args, **kwargs):

@functools.wraps(func)
def sync_wrapper(*args, **kwargs):
"""Syncs function calls within an AI provider span context for monitoring and tracking.
Parameters:
- *args: Variable length argument list for the target function.
- **kwargs: Arbitrary keyword arguments for the target function.
Returns:
- Any: The result of the function `func` when called with provided arguments.
Processing Logic:
- The synchronous execution of the function is wrapped in a monitoring span context.
- Function metadata like name and argument count are logged in the span.
- Success or failure of the function execution is recorded as span attributes."""
with self.ai_provider_span_context(provider, model, operation) as span:
span.set_ai_attributes(
function_name=func.__name__,
Expand Down Expand Up @@ -531,7 +577,29 @@ def vcf_operation_span(self, operation: str):
"""Decorator for VCF processing operations."""
def decorator(func):
@functools.wraps(func)
"""Apply a decorator to a function to perform context logging during its execution.
Parameters:
- func (callable): The function to be wrapped by the decorator.
Returns:
- callable: A wrapped function that logs its execution context including arguments and success status.
Processing Logic:
- Establish a context for the operation before function execution.
- Log function name, argument count, and keyword argument count.
- Execute the function within this context.
- Capture and log the success status or handle exceptions accordingly."""
def wrapper(*args, **kwargs):
"""Wrapper function for executing a given function within a VCF operation context, recording attributes about the operation.
Parameters:
- *args: Variable length argument list passed to the wrapped function.
- **kwargs: Arbitrary keyword arguments passed to the wrapped function.
- operation (Operation object): The context of VCF operation during execution.
Returns:
- any: Whatever the wrapped function returns upon successful execution.
Processing Logic:
- The function execution is wrapped within a context manager.
- VCF operation attributes such as function name and argument counts are recorded.
- Success or failure of the function invocation is captured in the context attributes.
- Raises any exception encountered during function execution after setting 'success' to False."""
with self.vcf_operation_context(operation) as span:
span.set_vcf_attributes(
function_name=func.__name__,
Expand All @@ -554,7 +622,28 @@ def memory_operation_span(self, operation: str):
"""Decorator for memory optimization operations."""
def decorator(func):
@functools.wraps(func)
"""A decorator for function memory context management.
Parameters:
- func (callable): The function to be wrapped and managed within a memory context.
Returns:
- callable: The wrapped function with added memory context and attribute management.
Processing Logic:
- Opens a memory context before executing the function, capturing it in a span.
- Sets memory attributes such as function name and optimization level.
- Upon successful function execution, sets a success attribute in the span.
- On exception, sets a failure attribute and re-raises the exception."""
def wrapper(*args, **kwargs):
"""Wrapper function to execute another function while managing memory context and capturing its attributes.
Parameters:
- *args: Positional arguments to pass to the `func`.
- **kwargs: Keyword arguments to pass to the `func`.
Returns:
- The return value of the executed function `func`.
Processing Logic:
- Establishes a memory context using `self.memory_context`.
- Sets attributes related to the function name and optimization level.
- Executes the given function, capturing success or failure state.
- Raises any exceptions encountered during execution."""
with self.memory_context(operation) as span:
span.set_memory_attributes(
function_name=func.__name__,
Expand Down
11 changes: 11 additions & 0 deletions src/vcf_agent/graph_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,17 @@ def link_variant_to_sample(conn: kuzu.Connection, sample_id: str, variant_id: st
raise

def execute_query(conn: kuzu.Connection, cypher_query: str, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
"""Executes a Cypher query using the Kuzu connection and returns the results as a list of dictionaries.
Parameters:
- conn (kuzu.Connection): The database connection object to execute the query.
- cypher_query (str): The Cypher query to execute.
- params (Optional[Dict[str, Any]]): Parameters to include in the query, defaults to None.
Returns:
- List[Dict[str, Any]]: A list of dictionaries representing the query results.
Processing Logic:
- Handles both single and list results from the query execution, focusing on the first query result in case of a list.
- Converts the query result to a Pandas DataFrame and then to a list of dictionaries.
- Contains error handling for unexpected types and ensures cleanup of resources."""
query_result_union: Optional[Union[kuzu.query_result.QueryResult, List[kuzu.query_result.QueryResult]]] = None
try:
print(f"Executing Cypher query: {cypher_query} with params: {params}")
Expand Down
11 changes: 11 additions & 0 deletions src/vcf_agent/lancedb_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,17 @@ class DimensionReducer:
Integrated directly into the embedding service.
"""
def __init__(self, target_dimensions: int = 768):
"""Initialize an object for dimensionality reduction using PCA.
Parameters:
- target_dimensions (int): Desired number of dimensions for the reduced dataset. Defaults to 768.
Returns:
- None: The function does not return any value. It sets up internal state for future processing.
Processing Logic:
- Sets up initial dimensions and checks for availability of scikit-learn.
- Initializes model attributes such as PCA model and scaler as None.
- Flags is_trained to False, indicating model needs training.
- Prepares to store training embeddings and specifies a minimum sample size for reliable PCA application.
- Issues a warning when scikit-learn is unavailable, showing dimension reduction feature will not work."""
self.target_dimensions = target_dimensions
self.original_dimensions = 1536
self.pca_model = None
Expand Down
Loading
Loading