From 05e4b466c80a19852196fbfcf8c2388cfe816d12 Mon Sep 17 00:00:00 2001 From: avrevic Date: Thu, 19 Feb 2026 17:22:24 +0100 Subject: [PATCH 01/10] Make functions_to_track.csv not mandatory for create --- README.md | 6 ++++-- src/commands/create.rs | 14 ++++++++++---- tests/structure_commands_test.rs | 15 ++++++++++----- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 405ad966..ad66c413 100644 --- a/README.md +++ b/README.md @@ -202,9 +202,11 @@ verilib-cli create --root custom/path | `--root ` | Custom structure root (default: `.verilib/structure`) | **Requirements:** -- `functions_to_track.csv` in project root - `scripts/analyze_verus_specs_proofs.py` script +**Optional:** +- `functions_to_track.csv` in project root — when absent, a minimal seed is used + ### `atomize` Enrich structure files with metadata from SCIP atoms. @@ -327,7 +329,7 @@ cd dalek-lite git checkout -b sl/structure # Step 1: Create structure files -# Creates .md stub files in .verilib/structure/ from functions_to_track.csv +# Creates .md stub files in .verilib/structure/ (from functions_to_track.csv if present) verilib-cli create # Step 2: Run atomization diff --git a/src/commands/create.rs b/src/commands/create.rs index cdb2e63a..8799c5be 100644 --- a/src/commands/create.rs +++ b/src/commands/create.rs @@ -30,9 +30,15 @@ pub async fn handle_create(project_root: PathBuf, root: Option) -> Resu // NOTE: .gitignore creation is moved to the 'init' subcommand let tracked_path = project_root.join("functions_to_track.csv"); - if !tracked_path.exists() { - bail!("{} not found", tracked_path.display()); - } + let seed_path: PathBuf = if tracked_path.exists() { + tracked_path + } else { + // Optional: use minimal seed when functions_to_track.csv is absent + let fallback_seed = verilib_path.join("seed.csv"); + std::fs::write(&fallback_seed, "function,module,link\n") + .context("Failed to write fallback seed.csv")?; + fallback_seed + }; let tracked_output_path = verilib_path.join("tracked_functions.csv"); @@ -41,7 +47,7 @@ pub async fn handle_create(project_root: PathBuf, root: Option) -> Resu execution_mode: ExecutionMode::Local, ..Default::default() }; - run_analyze_verus_specs_proofs(&project_root, &tracked_path, &tracked_output_path, &local_config)?; + run_analyze_verus_specs_proofs(&project_root, &seed_path, &tracked_output_path, &local_config)?; let tracked = read_tracked_csv(&tracked_output_path)?; let tracked = disambiguate_names(tracked); diff --git a/tests/structure_commands_test.rs b/tests/structure_commands_test.rs index fedc2bd6..1cfd7af8 100644 --- a/tests/structure_commands_test.rs +++ b/tests/structure_commands_test.rs @@ -424,19 +424,24 @@ mod create_tests { use super::*; #[test] - fn test_create_fails_without_functions_to_track_csv() { + fn test_create_uses_fallback_seed_without_functions_to_track_csv() { let temp_dir = TempDir::new().expect("Failed to create temp dir"); - // No functions_to_track.csv - should fail + // No functions_to_track.csv - create uses fallback seed and does not fail for that reason let output = run_command(&["create"], temp_dir.path()); - assert!(!output.status.success(), "Should fail without functions_to_track.csv"); let stderr = String::from_utf8_lossy(&output.stderr); assert!( - stderr.contains("functions_to_track.csv") && stderr.contains("not found"), - "Should report missing functions_to_track.csv: {}", + !(stderr.contains("functions_to_track.csv") && stderr.contains("not found")), + "Should NOT fail with 'functions_to_track.csv not found' (now optional): {}", stderr ); + // Fallback seed should be created when functions_to_track.csv is absent + let seed_path = temp_dir.path().join(".verilib").join("seed.csv"); + assert!( + seed_path.exists(), + "Fallback .verilib/seed.csv should be created when functions_to_track.csv is missing" + ); } #[test] From ed8538ce982e73bdf7a6ef094b21f66b97b0234e Mon Sep 17 00:00:00 2001 From: avrevic Date: Thu, 19 Feb 2026 18:52:36 +0100 Subject: [PATCH 02/10] Remove py file dependency, generate dummy config file if needed --- README.md | 6 +- src/commands/create.rs | 95 +++++++++++++++++++------------- src/structure/config.rs | 25 +++++---- tests/structure_commands_test.rs | 25 +++------ 4 files changed, 82 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index ad66c413..6cef3d6c 100644 --- a/README.md +++ b/README.md @@ -201,11 +201,9 @@ verilib-cli create --root custom/path |--------|-------------| | `--root ` | Custom structure root (default: `.verilib/structure`) | -**Requirements:** -- `scripts/analyze_verus_specs_proofs.py` script - **Optional:** -- `functions_to_track.csv` in project root — when absent, a minimal seed is used +- `scripts/analyze_verus_specs_proofs.py` — when present (in project or bundled), generates structure files; when absent, create still runs and sets up config with no structure files +- `functions_to_track.csv` in project root — when absent, all functions are tracked by default (when script runs) ### `atomize` Enrich structure files with metadata from SCIP atoms. diff --git a/src/commands/create.rs b/src/commands/create.rs index 8799c5be..44cbd07a 100644 --- a/src/commands/create.rs +++ b/src/commands/create.rs @@ -5,7 +5,7 @@ use crate::structure::{ parse_github_link, run_command, write_frontmatter, CommandConfig, ExecutionMode, StructureConfig, }; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use serde_json::{json, Value}; use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; @@ -30,26 +30,32 @@ pub async fn handle_create(project_root: PathBuf, root: Option) -> Resu // NOTE: .gitignore creation is moved to the 'init' subcommand let tracked_path = project_root.join("functions_to_track.csv"); - let seed_path: PathBuf = if tracked_path.exists() { - tracked_path + let seed_path = if tracked_path.exists() { + Some(tracked_path) } else { - // Optional: use minimal seed when functions_to_track.csv is absent - let fallback_seed = verilib_path.join("seed.csv"); - std::fs::write(&fallback_seed, "function,module,link\n") - .context("Failed to write fallback seed.csv")?; - fallback_seed + // Default: track all functions when functions_to_track.csv is absent + None }; let tracked_output_path = verilib_path.join("tracked_functions.csv"); - // This command uses 'uv' which we expect to be local. + // Script is optional: if absent or fails, create proceeds with no structure files let local_config = CommandConfig { execution_mode: ExecutionMode::Local, ..Default::default() }; - run_analyze_verus_specs_proofs(&project_root, &seed_path, &tracked_output_path, &local_config)?; - - let tracked = read_tracked_csv(&tracked_output_path)?; + let tracked = match try_run_analyze_verus_specs_proofs( + &project_root, + seed_path.as_deref(), + &tracked_output_path, + &local_config, + ) { + Some(path) => read_tracked_csv(&path)?, + None => { + println!("Skipping structure generation (analyze_verus_specs_proofs.py not run)"); + HashMap::new() + } + }; let tracked = disambiguate_names(tracked); let structure = tracked_to_structure(&tracked); @@ -61,57 +67,68 @@ pub async fn handle_create(project_root: PathBuf, root: Option) -> Resu Ok(()) } -/// Run analyze_verus_specs_proofs.py CLI to generate tracked functions CSV. -fn run_analyze_verus_specs_proofs( +/// Path to the script bundled with verilib-cli (scripts/ relative to the executable). +fn cli_bundled_script_path() -> Option { + let exe = std::env::current_exe().ok()?; + let cli_root = exe.parent()?.parent()?; // target/release/verilib-cli -> verilib-cli root + let script = cli_root.join("scripts").join("analyze_verus_specs_proofs.py"); + script.exists().then_some(script) +} + +/// Optionally run analyze_verus_specs_proofs.py. Returns Some(output_path) on success, None if +/// script is absent or fails (create continues without structure files). +fn try_run_analyze_verus_specs_proofs( project_root: &Path, - seed_path: &Path, + seed_path: Option<&Path>, output_path: &Path, config: &CommandConfig, -) -> Result<()> { - let script_path = project_root +) -> Option { + let project_script = project_root .join("scripts") .join("analyze_verus_specs_proofs.py"); - if !script_path.exists() { - bail!("Script not found: {}", script_path.display()); - } + let script_path: PathBuf = project_script + .exists() + .then_some(project_script) + .or_else(cli_bundled_script_path)?; println!("Running analyze_verus_specs_proofs.py..."); - let seed_relative = seed_path.strip_prefix(project_root).unwrap_or(seed_path); let output_relative = output_path .strip_prefix(project_root) .unwrap_or(output_path); - // Ensure parent directory exists if let Some(parent) = output_path.parent() { - std::fs::create_dir_all(parent)?; + let _ = std::fs::create_dir_all(parent); + } + + let mut args: Vec = vec![ + "run".into(), + script_path.to_str()?.into(), + "--output".into(), + output_relative.to_str()?.into(), + ]; + if let Some(seed) = seed_path { + let seed_relative = seed.strip_prefix(project_root).unwrap_or(seed); + args.extend(["--seed".into(), seed_relative.to_str()?.into()]); } - let output = run_command( - "uv", - &[ - "run", - script_path.to_str().unwrap(), - "--seed", - seed_relative.to_str().unwrap(), - "--output", - output_relative.to_str().unwrap(), - ], - Some(project_root), - config, - )?; + let args_ref: Vec<&str> = args.iter().map(String::as_str).collect(); + let output = run_command("uv", &args_ref, Some(project_root), config).ok()?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Error running analyze_verus_specs_proofs.py:\n{}", stderr); - bail!("analyze_verus_specs_proofs.py failed"); + eprintln!( + "Warning: analyze_verus_specs_proofs.py failed (skipping structure generation):\n{}", + stderr + ); + return None; } println!( "Generated tracked functions CSV at {}", output_path.display() ); - Ok(()) + Some(output_path.to_path_buf()) } /// Tracked function data from CSV. diff --git a/src/structure/config.rs b/src/structure/config.rs index 4f569523..3023a8f3 100644 --- a/src/structure/config.rs +++ b/src/structure/config.rs @@ -84,18 +84,27 @@ pub fn create_gitignore(verilib_path: &Path) -> Result<()> { Ok(()) } +/// Default config written when .verilib/config.json is missing. +const DEFAULT_CONFIG_JSON: &str = r#"{ + "docker-image": "ghcr.io/beneficial-ai-foundation/verilib-cli:latest", + "execution-mode": "local", + "repo": {}, + "structure-root": ".verilib/structure" +}"#; + impl ConfigPaths { /// Load config and compute all paths. - /// Requires structure-root to be present in config. + /// If config.json is missing, creates a default one and continues. pub fn load(project_root: &Path) -> Result { let verilib_path = project_root.join(".verilib"); let config_path = verilib_path.join("config.json"); if !config_path.exists() { - anyhow::bail!( - "{} not found. Run 'verilib-cli create' first.", - config_path.display() - ); + std::fs::create_dir_all(&verilib_path) + .context("Failed to create .verilib directory")?; + std::fs::write(&config_path, DEFAULT_CONFIG_JSON) + .context("Failed to write default config.json")?; + println!("Created default config at {}", config_path.display()); } let content = @@ -107,11 +116,7 @@ impl ConfigPaths { let structure_root_str = json .get("structure-root") .and_then(|v| v.as_str()) - .ok_or_else(|| { - anyhow::anyhow!( - "No 'structure-root' field in config.json. Run 'verilib-cli create' first." - ) - })?; + .unwrap_or(".verilib/structure"); let structure_root = project_root.join(structure_root_str); diff --git a/tests/structure_commands_test.rs b/tests/structure_commands_test.rs index 1cfd7af8..eeb34cae 100644 --- a/tests/structure_commands_test.rs +++ b/tests/structure_commands_test.rs @@ -424,10 +424,10 @@ mod create_tests { use super::*; #[test] - fn test_create_uses_fallback_seed_without_functions_to_track_csv() { + fn test_create_tracks_all_functions_without_functions_to_track_csv() { let temp_dir = TempDir::new().expect("Failed to create temp dir"); - // No functions_to_track.csv - create uses fallback seed and does not fail for that reason + // No functions_to_track.csv - create invokes script without --seed (tracks all functions) let output = run_command(&["create"], temp_dir.path()); let stderr = String::from_utf8_lossy(&output.stderr); @@ -436,12 +436,6 @@ mod create_tests { "Should NOT fail with 'functions_to_track.csv not found' (now optional): {}", stderr ); - // Fallback seed should be created when functions_to_track.csv is absent - let seed_path = temp_dir.path().join(".verilib").join("seed.csv"); - assert!( - seed_path.exists(), - "Fallback .verilib/seed.csv should be created when functions_to_track.csv is missing" - ); } #[test] @@ -481,18 +475,17 @@ mod error_handling_tests { use super::*; #[test] - fn test_commands_fail_without_config() { + fn test_commands_create_default_config_when_missing() { let temp_dir = TempDir::new().expect("Failed to create temp dir"); let verilib_dir = temp_dir.path().join(".verilib"); - fs::create_dir_all(&verilib_dir).expect("Failed to create .verilib dir"); - // No config.json - should fail - let output = run_command(&["atomize", "--no-probe"], temp_dir.path()); - assert!(!output.status.success()); - let stderr = String::from_utf8_lossy(&output.stderr); + // No .verilib at all - atomize creates default config and proceeds + run_command(&["atomize", "--no-probe"], temp_dir.path()); + // config.json should have been created (fails later on atoms.json) + let config_path = verilib_dir.join("config.json"); assert!( - stderr.contains("config.json not found") || stderr.contains("Run 'verilib-cli create'"), - "Should report missing config" + config_path.exists(), + "Default config.json should be created when missing" ); } From a3e754773fe2842d7e50318a306a509cab8e4e15 Mon Sep 17 00:00:00 2001 From: avrevic Date: Thu, 19 Feb 2026 19:25:03 +0100 Subject: [PATCH 03/10] Generate structure even with 0 stubs --- src/commands/atomize.rs | 2 ++ src/commands/create.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/commands/atomize.rs b/src/commands/atomize.rs index 84468673..f8d1aab8 100644 --- a/src/commands/atomize.rs +++ b/src/commands/atomize.rs @@ -84,6 +84,8 @@ fn generate_stubs( if let Some(parent) = stubs_path.parent() { std::fs::create_dir_all(parent)?; } + // Ensure structure root exists (create may have written 0 files) + std::fs::create_dir_all(structure_root)?; println!( "Running probe-verus stubify on {}...", diff --git a/src/commands/create.rs b/src/commands/create.rs index 44cbd07a..95c58779 100644 --- a/src/commands/create.rs +++ b/src/commands/create.rs @@ -229,6 +229,8 @@ fn generate_structure_files( structure: &HashMap, structure_root: &Path, ) -> Result<()> { + std::fs::create_dir_all(structure_root) + .context("Failed to create structure root directory")?; let mut created_count = 0; for (relative_path_str, metadata) in structure { From 1cc1ab40a38bc6987f6c4bf3c641c11bf85a65a5 Mon Sep 17 00:00:00 2001 From: avrevic Date: Thu, 19 Feb 2026 19:36:43 +0100 Subject: [PATCH 04/10] Atomize now works without stubs --- src/commands/atomize.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/commands/atomize.rs b/src/commands/atomize.rs index f8d1aab8..042744c9 100644 --- a/src/commands/atomize.rs +++ b/src/commands/atomize.rs @@ -72,6 +72,14 @@ pub async fn handle_atomize( Ok(()) } +/// Check if structure has any .md files (recursively). +fn has_md_files(structure_root: &Path) -> bool { + walkdir::WalkDir::new(structure_root) + .into_iter() + .filter_map(|e| e.ok()) + .any(|e| e.path().extension().map_or(false, |ext| ext == "md")) +} + /// Run probe-verus stubify to generate stubs.json from .md files. fn generate_stubs( project_root: &Path, @@ -79,14 +87,21 @@ fn generate_stubs( stubs_path: &Path, config: &CommandConfig, ) -> Result> { - require_probe_installed(config)?; - if let Some(parent) = stubs_path.parent() { std::fs::create_dir_all(parent)?; } // Ensure structure root exists (create may have written 0 files) std::fs::create_dir_all(structure_root)?; + // No .md files: stubify would fail; write empty stubs.json and continue + if !has_md_files(structure_root) { + println!("No .md structure files found; writing empty stubs.json"); + std::fs::write(stubs_path, "{}")?; + return Ok(HashMap::new()); + } + + require_probe_installed(config)?; + println!( "Running probe-verus stubify on {}...", structure_root.display() From 9f17d6ebda504fba50e35f4029082529759ef40c Mon Sep 17 00:00:00 2001 From: Nima Hamed Date: Thu, 19 Feb 2026 12:27:55 -0800 Subject: [PATCH 05/10] refactor: Replace deprecated python script with probe-verus and fix docker flow - Replaced usage in command with . - Updated command to respect configuration (fixing script not found errors). - Enhanced in to support preserving existing configuration fields. - Updated to support local Docker images by checking existence before pull. - Fixed ARM64 compatibility by removing hardcoded in runtime. - Added Python dependencies to Dockerfile. - Added CACHEBUST arg to Dockerfile and CI workflow to ensure fresh builds. - Updated README with instructions for building and using local Docker images. --- .github/workflows/docker-publish.yml | 4 +- Dockerfile | 1 + README.md | 19 +++++ src/commands/create.rs | 109 ++++++++++++++++----------- src/structure/config.rs | 23 ++++-- src/structure/executor.rs | 57 +++++++------- 6 files changed, 132 insertions(+), 81 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index b7c50370..ec07a83b 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -5,7 +5,7 @@ on: paths: - 'Dockerfile' - '.github/workflows/docker-publish.yml' - branches: [ "main" ] + branches: [ "master" ] tags: [ 'docker-v*' ] workflow_dispatch: @@ -56,6 +56,8 @@ jobs: context: . platforms: linux/amd64 push: ${{ github.event_name != 'pull_request' }} + build-args: | + CACHEBUST=${{ github.run_id }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha diff --git a/Dockerfile b/Dockerfile index 192c81c3..e3d2fd15 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,6 +26,7 @@ RUN echo "n" | python3 verus_analyzer_installer.py RUN echo "n" | python3 scip_installer.py WORKDIR /build +ARG CACHEBUST=1 RUN git clone https://github.com/Beneficial-AI-Foundation/probe-verus.git WORKDIR /build/probe-verus diff --git a/README.md b/README.md index 8647f1a0..268356c0 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,25 @@ A command-line tool for managing Verilib repositories, verification structure fi 1. **Docker (Recommended)**: Runs `probe-verus` in a container with all dependencies pre-installed. This ensures a consistent environment and avoids local setup issues. You simply need Docker installed and running. 2. **Local**: Runs `probe-verus` directly on your host machine. This requires you to install `probe-verus` and all its dependencies (Rust, Verus, etc.) manually. +### Using a Local Docker Image + +If you want to use a locally built Docker image instead of pulling from the registry (e.g., for development or custom modifications), follow these steps: + +1. **Build the Docker image locally:** + ```bash + docker build --build-arg CACHEBUST=$(date +%s) -t verilib-cli:local . + ``` + +2. **Update your configuration:** + Edit `.verilib/config.json` in your project root and set the `docker-image` field: + ```json + { + "execution-mode": "docker", + "docker-image": "verilib-cli:local", + ... + } + ``` + During initialization (`verilib-cli init`), you will be prompted to choose your preferred execution mode. You can also change it later by editing the `.verilib/config.json` file. > **Note for Local Mode:** If you choose to run locally and encounter issues with missing dependencies or environment configuration, please refer to the [probe-verus repository](https://github.com/Beneficial-AI-Foundation/probe-verus) for installation instructions and troubleshooting. diff --git a/src/commands/create.rs b/src/commands/create.rs index 8799c5be..86a5b051 100644 --- a/src/commands/create.rs +++ b/src/commands/create.rs @@ -3,7 +3,7 @@ //! Initialize structure files from source analysis. use crate::structure::{ - parse_github_link, run_command, write_frontmatter, CommandConfig, ExecutionMode, StructureConfig, + parse_github_link, run_command, write_frontmatter, CommandConfig, ConfigPaths, ExecutionMode, StructureConfig, }; use anyhow::{bail, Context, Result}; use serde_json::{json, Value}; @@ -22,9 +22,9 @@ pub async fn handle_create(project_root: PathBuf, root: Option) -> Resu .map(|r| r.to_string_lossy().to_string()) .unwrap_or_else(|| ".verilib/structure".to_string()); - // Write config file with ONLY structure-root field + // Write config file with ONLY structure-root field (add/update it, preserving others) let config = StructureConfig::new(&structure_root_relative); - let config_path = config.save(&project_root)?; + let config_path = config.save(&project_root, true)?; println!("Wrote config to {}", config_path.display()); // NOTE: .gitignore creation is moved to the 'init' subcommand @@ -42,12 +42,10 @@ pub async fn handle_create(project_root: PathBuf, root: Option) -> Resu let tracked_output_path = verilib_path.join("tracked_functions.csv"); - // This command uses 'uv' which we expect to be local. - let local_config = CommandConfig { - execution_mode: ExecutionMode::Local, - ..Default::default() - }; - run_analyze_verus_specs_proofs(&project_root, &seed_path, &tracked_output_path, &local_config)?; + // Load configuration to determine execution mode + let config = ConfigPaths::load(&project_root)?; + + run_probe_verus_tracked_csv(&project_root, &seed_path, &tracked_output_path, &config.command_config)?; let tracked = read_tracked_csv(&tracked_output_path)?; let tracked = disambiguate_names(tracked); @@ -61,50 +59,73 @@ pub async fn handle_create(project_root: PathBuf, root: Option) -> Resu Ok(()) } -/// Run analyze_verus_specs_proofs.py CLI to generate tracked functions CSV. -fn run_analyze_verus_specs_proofs( +/// Run probe-verus tracked-csv CLI to generate tracked functions CSV. +fn run_probe_verus_tracked_csv( project_root: &Path, - seed_path: &Path, + _seed_path: &Path, output_path: &Path, config: &CommandConfig, ) -> Result<()> { - let script_path = project_root - .join("scripts") - .join("analyze_verus_specs_proofs.py"); - if !script_path.exists() { - bail!("Script not found: {}", script_path.display()); - } - - println!("Running analyze_verus_specs_proofs.py..."); - - let seed_relative = seed_path.strip_prefix(project_root).unwrap_or(seed_path); - let output_relative = output_path - .strip_prefix(project_root) - .unwrap_or(output_path); + println!("Running probe-verus tracked-csv..."); - // Ensure parent directory exists if let Some(parent) = output_path.parent() { std::fs::create_dir_all(parent)?; } - let output = run_command( - "uv", - &[ - "run", - script_path.to_str().unwrap(), - "--seed", - seed_relative.to_str().unwrap(), - "--output", - output_relative.to_str().unwrap(), - ], - Some(project_root), - config, - )?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Error running analyze_verus_specs_proofs.py:\n{}", stderr); - bail!("analyze_verus_specs_proofs.py failed"); + match config.execution_mode { + ExecutionMode::Local => { + + let output_relative = output_path + .strip_prefix(project_root) + .unwrap_or(output_path) + .to_string_lossy(); + + let output = run_command( + "probe-verus", + &[ + "tracked-csv", + ".", + "--output", + &output_relative, + ], + Some(project_root), + config, + )?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + eprintln!("Error running probe-verus tracked-csv:\n{}", stderr); + bail!("probe-verus tracked-csv failed"); + } + } + ExecutionMode::Docker => { + let workspace_mount = "/workspace"; + + let output_relative = output_path + .strip_prefix(project_root) + .unwrap_or(output_path) + .to_string_lossy(); + + let output_in_container = format!("{}/{}", workspace_mount, output_relative); + + let output = run_command( + "probe-verus", + &[ + "tracked-csv", + workspace_mount, + "--output", + &output_in_container + ], + Some(project_root), + config, + )?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + eprintln!("Error running probe-verus tracked-csv in Docker:\n{}", stderr); + bail!("probe-verus tracked-csv failed"); + } + } } println!( diff --git a/src/structure/config.rs b/src/structure/config.rs index c1c5d3bc..3595ddbc 100644 --- a/src/structure/config.rs +++ b/src/structure/config.rs @@ -46,15 +46,13 @@ impl StructureConfig { } } - /// Save config to .verilib/config.json - pub fn save(&self, project_root: &Path) -> Result { + /// Save config to .verilib/config.json. + pub fn save(&self, project_root: &Path, preserve_existing: bool) -> Result { let verilib_path = project_root.join(".verilib"); std::fs::create_dir_all(&verilib_path).context("Failed to create .verilib directory")?; let config_path = verilib_path.join("config.json"); - // If config exists, merge with existing content - // We read it into a Value to preserve other fields, but we should also respect our own fields. let mut json: serde_json::Value = if config_path.exists() { let existing = std::fs::read_to_string(&config_path) .context("Failed to read existing config.json")?; @@ -63,11 +61,20 @@ impl StructureConfig { serde_json::json!({}) }; - // Update fields + // Always update structure-root as that's the primary purpose of this save in 'create' json["structure-root"] = serde_json::Value::String(self.structure_root.clone()); - json["execution-mode"] = serde_json::to_value(&self.execution_mode).unwrap_or(serde_json::Value::Null); - json["docker-image"] = serde_json::Value::String(self.docker_image.clone()); - json["auto-validate-specs"] = serde_json::Value::Bool(self.auto_validate_specs); + + if !preserve_existing || json.get("execution-mode").is_none() { + json["execution-mode"] = serde_json::to_value(&self.execution_mode).unwrap_or(serde_json::Value::Null); + } + + if !preserve_existing || json.get("docker-image").is_none() { + json["docker-image"] = serde_json::Value::String(self.docker_image.clone()); + } + + if !preserve_existing || (json.get("auto-validate-specs").is_none() && json.get("auto_validate_specs").is_none()) { + json["auto-validate-specs"] = serde_json::Value::Bool(self.auto_validate_specs); + } let content = serde_json::to_string_pretty(&json).context("Failed to serialize config")?; std::fs::write(&config_path, content).context("Failed to write config.json")?; diff --git a/src/structure/executor.rs b/src/structure/executor.rs index 76b06b1f..af3d2699 100644 --- a/src/structure/executor.rs +++ b/src/structure/executor.rs @@ -64,33 +64,6 @@ fn run_local(program: &str, args: &[&str], cwd: Option<&Path>) -> Result Ok(output) } -fn ensure_image_pulled(image: &str) -> Result<()> { - let status = Command::new("docker") - .args(&["image", "inspect", image]) - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .status(); - - if let Ok(status) = status { - if status.success() { - return Ok(()); - } - } - - println!("Docker image {} not found locally. Pulling...", image); - - let status = Command::new("docker") - .args(&["pull", "--platform", "linux/amd64", image]) - .status() - .context(format!("Failed to pull docker image {}", image))?; - - if !status.success() { - anyhow::bail!("Failed to pull docker image {}", image); - } - - Ok(()) -} - fn run_docker( program: &str, args: &[&str], @@ -119,7 +92,6 @@ fn run_docker( let mut docker_args = vec![ "run", "--rm", - "--platform", "linux/amd64", "--entrypoint", program, "-u", &user_arg, "-v", @@ -145,3 +117,32 @@ fn run_docker( Ok(output) } + +fn ensure_image_pulled(image: &str) -> Result<()> { + // 1. Check if image exists locally (e.g. built locally) + let inspect = Command::new("docker") + .args(&["image", "inspect", image]) + .output(); + + if let Ok(output) = inspect { + if output.status.success() { + // Found locally, no need to pull + return Ok(()); + } + } + + // 2. Not found locally, try to pull from registry + println!("Docker image '{}' not found locally. Pulling...", image); + + let status = Command::new("docker") + .args(&["pull", "--platform", "linux/amd64", image]) + .status() + .context(format!("Failed to pull docker image {}", image))?; + + if !status.success() { + // We can't proceed if we don't have the image + anyhow::bail!("Failed to pull docker image {} (and not found locally)", image); + } + + Ok(()) +} From cba2ce388250f9a094c354c910e8d791c4edfa0c Mon Sep 17 00:00:00 2001 From: Nima Hamed Date: Thu, 19 Feb 2026 21:45:01 -0800 Subject: [PATCH 06/10] latest fixes --- Dockerfile | 25 +- functions_to_track.csv | 136 ++++ scripts/analyze_verus_specs_proofs.py | 877 ++++++++++++++++++++++++++ src/commands/create.rs | 228 ++++--- src/structure/executor.rs | 19 +- 5 files changed, 1195 insertions(+), 90 deletions(-) create mode 100644 functions_to_track.csv create mode 100644 scripts/analyze_verus_specs_proofs.py diff --git a/Dockerfile b/Dockerfile index e3d2fd15..32655d6b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,6 +32,12 @@ RUN git clone https://github.com/Beneficial-AI-Foundation/probe-verus.git WORKDIR /build/probe-verus RUN cargo install --path . +# Download scripts +WORKDIR /build +RUN git clone https://github.com/Beneficial-AI-Foundation/dalek-lite.git /build/dalek-lite && \ + cd /build/dalek-lite && \ + git checkout c36b395aa5526af7940d1db0f66ea60db4e3a157 + FROM --platform=linux/amd64 rust:slim-bookworm AS runtime RUN apt-get update && apt-get install -y \ @@ -42,6 +48,9 @@ RUN apt-get update && apt-get install -y \ python3-requests \ && rm -rf /var/lib/apt/lists/* +# Install uv python package manager +RUN pip3 install uv --break-system-packages + # Install specific Rust toolchain for Verus RUN rustup toolchain install 1.93.0 @@ -65,8 +74,22 @@ ENV PATH="/usr/local/verus-analyzer:${PATH}" COPY --from=builder /root/scip /usr/local/scip ENV PATH="/usr/local/scip:${PATH}" +# Copy scripts from builder +COPY --from=builder /build/dalek-lite/scripts /usr/local/bin/scripts +ENV PATH="/usr/local/bin/scripts:${PATH}" + +# Add local analysis script override +COPY scripts/analyze_verus_specs_proofs.py /usr/local/bin/scripts/analyze_verus_specs_proofs.py + +# Patch script to allow REPO_ROOT env var and generic crate name +RUN sed -i 's|repo_root = script_dir.parent|import os; repo_root = Path(os.environ.get("REPO_ROOT", script_dir.parent))|' /usr/local/bin/scripts/analyze_verus_specs_proofs.py && \ + sed -i 's|CRATE_NAME = "curve25519_dalek"|import os; CRATE_NAME = os.environ.get("CRATE_NAME", "curve25519_dalek")|' /usr/local/bin/scripts/analyze_verus_specs_proofs.py && \ + sed -i 's|CRATE_DIR = "curve25519-dalek"|CRATE_DIR = os.environ.get("CRATE_DIR", "curve25519-dalek")|' /usr/local/bin/scripts/analyze_verus_specs_proofs.py && \ + sed -i 's|skip_parts = {"curve25519-dalek", "src"}|skip_parts = {CRATE_DIR, "src"}|' /usr/local/bin/scripts/analyze_verus_specs_proofs.py && \ + sed -i 's|module_stripped = module.replace(f"{CRATE_NAME}::", "")|module_stripped = module.replace(f"{CRATE_NAME}::", "")|' /usr/local/bin/scripts/analyze_verus_specs_proofs.py + # Ensure permissions for copied tools -RUN chmod -R a+rx /usr/local/verus /usr/local/verus-analyzer /usr/local/scip +RUN chmod -R a+rx /usr/local/verus /usr/local/verus-analyzer /usr/local/scip /usr/local/bin/scripts # Setup workspace WORKDIR /workspace diff --git a/functions_to_track.csv b/functions_to_track.csv new file mode 100644 index 00000000..9c40a421 --- /dev/null +++ b/functions_to_track.csv @@ -0,0 +1,136 @@ +function,module,impl_block +parse,verilib-cli::structure::frontmatter, +FileStorage::delete_password,verilib-cli::storage::file, +create_repo_from_git_url,verilib-cli::commands::init, +handle_create,verilib-cli::commands::create, +save_config_from_response,verilib-cli::commands::deploy, +fetch_verifier_versions,verilib-cli::commands::deploy, +load_proofs_from_file,verilib-cli::commands::verify, +run_analyze_verus_specs_proofs,verilib-cli::commands::create, +cleanup_intermediate_files,verilib-cli::structure::probe, +run_command,verilib-cli::structure::utils, +generate_stubs,verilib-cli::commands::atomize, +FileStorage::set_password,verilib-cli::storage::file, +generate_probe_atoms,verilib-cli::commands::atomize, +handle_list,verilib-cli::commands::api, +get_credential_storage,verilib-cli::storage, +StructureConfig::save,verilib-cli::structure::config, +get_display_name,verilib-cli::structure::utils, +update_structure_files,verilib-cli::commands::atomize, +disambiguate_names,verilib-cli::commands::create, +collect_certifications,verilib-cli::commands::specify, +test_specify_check_only_reports_uncertified,verilib-cli::specify_tests, +handle_batch,verilib-cli::commands::api, +run_local,verilib-cli::structure::executor, +require_probe_installed,verilib-cli::structure::probe, +test_atomize_check_only_passes_when_stubs_match,verilib-cli::atomize_tests, +enrich_stubs,verilib-cli::commands::atomize, +write,verilib-cli::structure::frontmatter, +check_stubs_match,verilib-cli::commands::atomize, +check_admin_status,verilib-cli::commands::api, +handle_api_error,verilib-cli::download::error, +resolve_code_name_and_atom,verilib-cli::commands::atomize, +load_atoms_from_file,verilib-cli::commands::atomize, +print_platform_help,verilib-cli::storage, +test_specify_no_probe_loads_specs_from_file,verilib-cli::specify_tests, +test_commands_create_default_config_when_missing,verilib-cli::error_handling_tests, +test_atomize_update_stubs_updates_md_files,verilib-cli::atomize_tests, +tracked_to_structure,verilib-cli::commands::create, +wait_for_atomization,verilib-cli::download::client, +build_enriched_entry,verilib-cli::commands::atomize, +is_git_available,verilib-cli::commands::reclone, +get_platform_info,verilib-cli::storage, +run_command,verilib-cli::structure::executor, +ConfigPaths::load,verilib-cli::structure::config, +FileStorage::ensure_secure_file,verilib-cli::storage::file, +StructureConfig::new,verilib-cli::structure::config, +test_api_key_validation,verilib-cli::tests, +read_repo_id_from_config,verilib-cli::commands::deploy, +run_docker,verilib-cli::structure::executor, +update_stubs_with_verification,verilib-cli::commands::verify, +incorporate_spec_text,verilib-cli::commands::specify, +default_docker_image,verilib-cli::structure::executor, +test_verify_updates_stubs_with_verification_status,verilib-cli::verify_tests, +prompt_type,verilib-cli::commands::deploy, +encode_name,verilib-cli::structure::certs, +StorageType::should_use_file_storage,verilib-cli::storage::types, +handle_reclone,verilib-cli::commands::reclone, +validate_meta_file,verilib-cli::commands::api, +write_stubs_json,verilib-cli::commands::specify, +prompt_execution_mode,verilib-cli::commands::init, +handle_set,verilib-cli::commands::api, +init_required_msg,verilib-cli::constants, +generate_structure_files,verilib-cli::commands::create, +has_uncommitted_changes,verilib-cli::commands::reclone, +test_verify_fails_without_stubs_json,verilib-cli::error_handling_tests, +test_specify_check_only_passes_when_all_certified,verilib-cli::specify_tests, +run_probe_specify,verilib-cli::commands::specify, +StorageType::from_env,verilib-cli::storage::types, +test_verify_check_only_detects_failures,verilib-cli::verify_tests, +parse_github_link,verilib-cli::structure::utils, +test_verify_no_probe_loads_proofs_from_file,verilib-cli::verify_tests, +handle_api,verilib-cli::commands::api, +test_atomize_no_probe_loads_atoms_from_file,verilib-cli::atomize_tests, +check_all_certified,verilib-cli::commands::specify, +handle_init,verilib-cli::commands::init, +prompt_language,verilib-cli::commands::deploy, +handle_get,verilib-cli::commands::api, +save_config,verilib-cli::commands::init, +update_stubs_specification_status,verilib-cli::commands::specify, +CredentialStorageFactory::create,verilib-cli::CredentialStorageFactory Optional[str]: + """ + Extract the full impl block signature for a function. + + For example, if the function is inside: + impl Identity for ProjectivePoint { + Returns "Identity for ProjectivePoint" + + For trait implementations like: + impl<'a, 'b> Add<&'b Scalar> for &'a Scalar { + Returns "Add<&'b Scalar> for &'a Scalar" + + For inherent implementations: + impl Scalar { + Returns "Scalar" + """ + # Look backwards from fn_start to find the impl block + look_back = content[:fn_start] + lines = look_back.split("\n") + + # Search backwards for impl block + for i in range(len(lines) - 1, -1, -1): + line = lines[i].strip() + + # Stop at verus! blocks or module boundaries + if line.startswith("verus!") or line.startswith("mod "): + break + + # Look for impl blocks + if line.startswith("impl"): + # Extract everything between 'impl' and '{' + # Pattern: impl<...> TraitName<...> for TypeName { or impl TypeName { + match = re.search(r"impl\s*(?:<[^>]+>)?\s*(.+?)\s*\{", line) + if match: + impl_part = match.group(1).strip() + # Note: lifetime params are cleaned up later in normalize_impl() + return impl_part + + return None + + +@beartype +def extract_impl_context(content: str, fn_start: int) -> Optional[str]: + """ + Extract the impl block context for a function. + + For example, if the function is inside: + impl Identity for ProjectivePoint { + Returns "ProjectivePoint" + + For trait implementations like: + impl<'a, 'b> Add<&'b Scalar> for &'a Scalar { + Returns "Scalar" (the type being implemented for) + """ + # Look backwards from fn_start to find the impl block + look_back = content[:fn_start] + lines = look_back.split("\n") + + # Search backwards for impl block + for i in range(len(lines) - 1, -1, -1): + line = lines[i].strip() + + # Stop at verus! blocks or module boundaries + if line.startswith("verus!") or line.startswith("mod "): + break + + # Look for impl blocks + if line.startswith("impl"): + # Try to extract the type being implemented for + # Pattern 1: impl TraitName for TypeName { + match = re.search( + r"impl\s+(?:<[^>]+>)?\s*\w+\s+for\s+([&\']*)([A-Za-z0-9_]+)", line + ) + if match: + return match.group(2) + + # Pattern 2: impl TypeName { or impl<...> TypeName { + match = re.search(r"impl\s*(?:<[^>]+>)?\s+([A-Za-z0-9_]+)\s*\{", line) + if match: + return match.group(1) + + return None + + +@beartype +def extract_module_from_path(file_path: Path, repo_root: Path) -> str: + """ + Extract module path from file path. + + Example: + curve25519-dalek/src/backend/serial/curve_models/mod.rs + -> curve25519_dalek::backend::serial::curve_models + + curve25519-dalek/src/scalar.rs + -> curve25519_dalek::scalar + """ + try: + # Get relative path from repo root + rel_path = file_path.relative_to(repo_root) + + # Remove curve25519-dalek/ prefix and src/ prefix + parts = list(rel_path.parts) + + # Skip "curve25519-dalek" and "src" + module_parts = [] + skip_parts = {"curve25519-dalek", "src"} + + for part in parts: + if part in skip_parts: + continue + # Remove .rs extension or mod.rs + if part.endswith(".rs"): + if part == "mod.rs": + continue + part = part[:-3] + module_parts.append(part.replace("-", "_")) + + # Prepend crate name + return "curve25519_dalek::" + "::".join(module_parts) + except Exception: + return "curve25519_dalek" + + +@beartype +def parse_function_in_file( + file_path: Path, function_name: str, old_link: str = "", impl_block: str = "" +) -> Tuple[bool, bool, bool, int, str]: + """ + Parse a Rust file to find a function and check if it has Verus specs and proofs. + + Args: + file_path: Path to the Rust source file + function_name: Bare function name (without type prefix or signature) + old_link: Optional GitHub link with line number hint + impl_block: Optional impl block context for disambiguation (e.g., "Add for FieldElement51") + + Returns: + Tuple[bool, bool, bool, int, str]: (has_spec, has_proof, is_external_body, line_number, qualified_name) + - has_spec: True if function has requires or ensures clauses + - has_proof: True if has_spec and no assume in body + - is_external_body: True if function has #[verifier::external_body] + - line_number: Line number where the function is defined (1-indexed) + - qualified_name: Function name with impl context (e.g., "ProjectivePoint::identity") + """ + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + # Pattern to match function definitions + # Matches: fn function_name, pub fn function_name, pub const fn function_name, etc. + # This regex looks for function definitions + fn_pattern = ( + r"(?:pub\s+)?(?:const\s+)?fn\s+" + re.escape(function_name) + r"\s*[<\(]" + ) + + # Find all occurrences of the function + matches = list(re.finditer(fn_pattern, content)) + + # If we have an old link with a line number, use it to find the closest match + target_line = None + if old_link: + line_match = re.search(r"#L(\d+)", old_link) + if line_match: + target_line = int(line_match.group(1)) + + # If there are multiple matches and we have a target line, sort by proximity to target + if target_line and len(matches) > 1: + matches_with_lines = [] + for match in matches: + line_num = content[: match.start()].count("\n") + 1 + distance = abs(line_num - target_line) + matches_with_lines.append((distance, match)) + matches_with_lines.sort(key=lambda x: x[0]) + matches = [m[1] for m in matches_with_lines] + + # If we have impl_block for disambiguation and multiple matches, filter by impl context + if impl_block and len(matches) > 1: + filtered_matches = [] + for match in matches: + fn_start = match.start() + found_impl_sig = extract_impl_signature(content, fn_start) + if found_impl_sig: + # Normalize both for comparison (remove lifetimes but preserve &) + def normalize_impl(s): + # Remove lifetime annotations while preserving & references + # Pattern 1: <'a> → "" (standalone lifetime generics like Deserialize<'de>) + # Pattern 2: 'a → "" (lifetimes in mixed contexts like &'b Scalar → &Scalar) + # Both patterns together ensure consistent normalization for matching + s = re.sub(r"<'\w+>", "", s) + s = re.sub(r"'\w+\b", "", s) + # Remove space after & to normalize "&'a Scalar" → "&Scalar" and "&Scalar" → "&Scalar" + s = re.sub(r"&\s+", "&", s) + return " ".join(s.split()) + + norm_found = normalize_impl(found_impl_sig) + norm_expected = normalize_impl(impl_block) + + # Check if they match exactly + if norm_found == norm_expected: + filtered_matches.append(match) + continue + + # Check for reference vs owned type distinction + # "Neg for &EdwardsPoint" vs "Neg for EdwardsPoint" + # Only applies when trait names match (e.g., both are "Neg") + if " for " in impl_block and " for " in norm_found: + expected_trait = impl_block.split(" for ")[0].strip() + found_trait = norm_found.split(" for ")[0].strip() + # Extract base trait name (e.g., "Neg" from "Neg", "From" stays as is) + expected_trait_base = expected_trait.split("<")[0] + found_trait_base = found_trait.split("<")[0] + + # Only do ref vs owned check if traits match and have no type params + # (e.g., Neg for &T vs Neg for T, not From vs From) + if ( + expected_trait_base == found_trait_base + and "<" not in expected_trait + ): + expected_type = impl_block.split(" for ")[-1].strip() + found_type = norm_found.split(" for ")[-1].strip() + expected_is_ref = expected_type.startswith("&") + found_is_ref = found_type.startswith("&") + if expected_is_ref == found_is_ref: + expected_base = expected_type.lstrip("&").strip() + found_base = found_type.lstrip("&").strip() + if expected_base == found_base: + filtered_matches.append(match) + continue + if filtered_matches: + matches = filtered_matches + + # For each match, check if it has specs + for match in matches: + fn_start = match.start() + + # Check if this function is inside a comment block + # Look backwards for /* or // to see if we're in a comment + look_back_start = max(0, fn_start - 1000) + preceding_text = content[look_back_start:fn_start] + + # Check for /* ... */ block comments + last_block_open = preceding_text.rfind("/*") + last_block_close = preceding_text.rfind("*/") + if last_block_open > last_block_close: + # We're inside a block comment, skip this match + continue + + # Check for line comments - see if the function is on the same line as // + last_newline = preceding_text.rfind("\n") + if last_newline != -1: + line_start = preceding_text[last_newline:] + if "//" in line_start: + # We're on a line with a // comment, skip this match + continue + + # Find the function body by looking for the opening brace + # For Verus functions with complex specs, we need a more robust approach + # Look for the pattern that indicates the start of the function body + + # First, extract the entire function signature including specs + signature_end = fn_start + pos = fn_start + paren_depth = 0 + brace_depth = 0 + found_opening_paren = False + + # Find the end of the function signature (including specs) + # Skip characters inside comments to avoid counting parens/braces in comments + # (e.g., "// values are in [2^254, 2^255)" has a ) that would break paren tracking) + while pos < len(content): + char = content[pos] + + # Skip line comments (// ... \n) + if char == "/" and pos + 1 < len(content) and content[pos + 1] == "/": + newline = content.find("\n", pos) + pos = newline + 1 if newline != -1 else len(content) + continue + + # Skip block comments (/* ... */) + if char == "/" and pos + 1 < len(content) and content[pos + 1] == "*": + end = content.find("*/", pos + 2) + pos = end + 2 if end != -1 else len(content) + continue + + if char == "(": + paren_depth += 1 + found_opening_paren = True + elif char == ")": + paren_depth -= 1 + elif char == "{": + if found_opening_paren and paren_depth == 0 and brace_depth == 0: + # We've found the first brace at the top level after seeing the parameter list + # This should be the function body opening brace + signature_end = pos + break + brace_depth += 1 + elif char == "}": + brace_depth -= 1 + + pos += 1 + + brace_pos = signature_end if signature_end > fn_start else -1 + + if brace_pos == -1: + # Might be a trait method declaration without body (ends with semicolon) + # Calculate line number for trait methods too + line_number = content[:fn_start].count("\n") + 1 + # Trait methods don't have specs/proofs, just return line number + # Try to extract impl context + impl_context = extract_impl_context(content, fn_start) + qualified_name = ( + f"{impl_context}::{function_name}" if impl_context else function_name + ) + return (False, False, False, line_number, qualified_name) + + # Look backwards from fn_start to find any attributes + # Attributes appear before the function definition + # Search backwards for the start of attributes (look for lines starting with #[) + lines_before = content[:fn_start].split("\n") + attr_lines = [] + + # Skip the last line if it's not empty (it contains the beginning of the fn line) + lines_to_check = ( + lines_before[:-1] + if lines_before and lines_before[-1].strip() + else lines_before + ) + + for line in reversed(lines_to_check): + stripped = line.strip() + if stripped.startswith("#["): + attr_lines.insert(0, line) + elif stripped and not stripped.startswith("//"): + # Stop at first non-attribute, non-comment line (including closing braces) + break + + attributes = "\n".join(attr_lines) + + # Extract the signature (between fn keyword and opening brace) + signature = content[fn_start:brace_pos] + + # Check for requires or ensures in the signature + # They must be the first non-whitespace on a line to avoid matching comments + has_requires = bool(re.search(r"^\s*requires\b", signature, re.MULTILINE)) + has_ensures = bool(re.search(r"^\s*ensures\b", signature, re.MULTILINE)) + has_spec = has_requires or has_ensures + + # Check for verifier::external attributes early (before checking has_spec) + has_verifier_external = bool(re.search(r"#\[verifier::external", attributes)) + + # Calculate line number for ALL functions (even those without specs) + # This ensures CSV links are always up-to-date + line_number = content[:fn_start].count("\n") + 1 + + # Extract impl context for qualified name + impl_context = extract_impl_context(content, fn_start) + qualified_name = ( + f"{impl_context}::{function_name}" if impl_context else function_name + ) + + # If neither specs nor external_body, return early with just the line number + if not has_spec and not has_verifier_external: + return (False, False, False, line_number, qualified_name) + + # Find the matching closing brace for the function body + brace_count = 1 + pos = brace_pos + 1 + body_end = brace_pos + + while pos < len(content) and brace_count > 0: + if content[pos] == "{": + brace_count += 1 + elif content[pos] == "}": + brace_count -= 1 + if brace_count == 0: + body_end = pos + break + pos += 1 + + # Find the next function to determine the region boundary + next_fn_pos = len(content) + next_fn_match = re.search( + r"(?:pub\s+)?(?:const\s+)?fn\s+", content[body_end + 1 :] + ) + if next_fn_match: + next_fn_pos = body_end + 1 + next_fn_match.start() + + # Extract the region from this fn to the next fn + fn_region = content[fn_start:next_fn_pos] + + # Additional check: assume anywhere in the fn region (between this fn and next fn) + # First, strip all block comments /* ... */ from the region + fn_region_no_block_comments = re.sub( + r"/\*.*?\*/", "", fn_region, flags=re.DOTALL + ) + + # Check each line to exclude line comments + has_assume_in_region = False + for line in fn_region_no_block_comments.split("\n"): + # Skip lines that are comments (starting with //) + stripped = line.lstrip() + if stripped.startswith("//"): + continue + # Check if line contains assume (outside of comments) + # Remove inline comments first + code_part = line.split("//")[0] + if re.search(r"\bassume\b", code_part): + has_assume_in_region = True + break + + # In attributes: exec_allows_no_decreases_clause (external already checked above) + has_no_decreases = bool( + re.search(r"#\[verifier::exec_allows_no_decreases_clause\]", attributes) + ) + + has_proof = ( + has_spec + and not has_assume_in_region + and not has_verifier_external + and not has_no_decreases + ) + + # If function has external_body, treat it as having a spec (even if no requires/ensures) + has_spec_or_external = has_spec or has_verifier_external + + # Line number and qualified name were already calculated earlier + return ( + has_spec_or_external, + has_proof, + has_verifier_external, + line_number, + qualified_name, + ) + + # Probably what's happened is that we saw the function, but + # it doesn't have a spec yet + # Return line 0 if we couldn't find the function properly + # Try to get impl context even if function not found + impl_context = extract_impl_context(content, 0) if matches else None + qualified_name = ( + f"{impl_context}::{function_name}" if impl_context else function_name + ) + return (False, False, False, 0, qualified_name) + + +@beartype +def extract_file_path_from_link(link: str, src_dir: Path) -> Path: + """ + Extract the file path from a GitHub link. + + Example: + https://github.com/dalek-cryptography/curve25519-dalek/tree/curve25519-4.1.3/curve25519-dalek/src/window.rs#L232 + -> src_dir/window.rs + + """ + assert link + + # Extract path after /src/ + match = re.search(r"/blob/([^/]+)/([^#]+)", link) + + assert match, link + relative_path = match.group(2) + + file_path = src_dir / relative_path + assert file_path.exists() + return file_path + + +@beartype +def discover_function_in_module( + qualified_func: str, module: str, src_dir: Path, impl_block: str = "" +) -> Optional[Tuple[Path, int, str]]: + """ + Discover a function in the codebase given its qualified name and module. + + Args: + qualified_func: Function name, possibly qualified (e.g., "FieldElement51::mul(args)" or "mul") + module: Module path (e.g., "curve25519_dalek::backend::serial::u64::field") + src_dir: Root source directory + impl_block: Optional impl block context for disambiguation (e.g., "Add for FieldElement51") + + Returns: + Tuple of (file_path, line_number, github_link) if found, None otherwise + """ + # Convert module path to file path + # e.g., "curve25519_dalek::backend::serial::u64::field" -> "curve25519-dalek/src/backend/serial/u64/field.rs" + print(f" Looking for module: {module} in src_dir: {src_dir}") + + # Logic to normalize the module path (strip crate name if present) + if "::" in module: + parts = module.split("::") + # Heuristic: if the first part looks like a crate name (and doesn't match CRATE_NAME, or does), strip it + # This is because probe-verus output usually includes the crate name + print(f" Stripping crate prefix {parts[0]}") + module_stripped = "::".join(parts[1:]) + elif module == CRATE_NAME: + module_stripped = "" + else: + module_stripped = module + + module_parts = module_stripped.split("::") if module_stripped else [] + + # Handle special module cases (non-standard file locations) + possible_paths = [] + + # 1. Standard structure: src_dir/CRATE_DIR/src/... + base_path_1 = src_dir / CRATE_DIR / "src" + if module_parts: + possible_paths.append(base_path_1 / "/".join(module_parts[:-1]) / f"{module_parts[-1]}.rs") + possible_paths.append(base_path_1 / "/".join(module_parts) / "mod.rs") + + # 2. Try simple structure: src_dir/src/... (if we are already inside the crate) + base_path_2 = src_dir / "src" + if module_parts: + possible_paths.append(base_path_2 / "/".join(module_parts[:-1]) / f"{module_parts[-1]}.rs") + possible_paths.append(base_path_2 / "/".join(module_parts) / "mod.rs") + + # 3. Try module-root structure: src_dir/module_part0/src/... (monorepo style?) + if len(module_parts) > 1: + base_path_3 = src_dir / module_parts[0] / "src" + sub_parts = module_parts[1:] + possible_paths.append(base_path_3 / "/".join(sub_parts[:-1]) / f"{sub_parts[-1]}.rs") + possible_paths.append(base_path_3 / "/".join(sub_parts) / "mod.rs") + + # Extract just the function name (without Type:: prefix and signature if present) + func_part = ( + qualified_func.split("::")[-1] if "::" in qualified_func else qualified_func + ) + # Strip signature if present: "method(args)" -> "method" + func_name = func_part.split("(")[0] if "(" in func_part else func_part + + for file_path in possible_paths: + print(f" Checking file: {file_path}") + if not file_path.exists(): + print(f" File does not exist: {file_path}") + continue + + # Try to find the function in this file, passing impl_block for disambiguation + result = parse_function_in_file(file_path, func_name, "", impl_block) + has_spec, has_proof, is_external_body, line_number, found_qualified = result + if line_number > 0: + # Generate GitHub link + relative_path = file_path.relative_to(src_dir) + github_link = f"https://github.com/{GITHUB_REPO}/blob/main/{relative_path}#L{line_number}" + return (file_path, line_number, github_link) + + return None + + +@beartype +def analyze_functions( + seed_path: Path, src_dir: Path +) -> Dict[str, Tuple[bool, bool, bool, int, str, str, str, str]]: + """ + Analyze all functions in the seed file and check their Verus status. + + Args: + seed_path: Path to functions_to_track.csv (function, module, impl_block columns) + src_dir: Root source directory + + Returns: + Dict mapping function_module_impl_key to (has_spec, has_proof, is_external_body, line_number, github_link, qualified_name, module_name, impl_block) + """ + # Read the seed CSV to get function names and modules + with open(seed_path, "r") as f: + reader = csv.DictReader(f) + rows = [row for row in reader] + + results = {} + + # Keep track of functions we've seen to detect macro-generated duplicates + seen_functions = {} + + for row in rows: + qualified_func = row["function"] + module = row["module"] + impl_block = row.get("impl_block", "") # New column for disambiguation + + # Extract just the function name for searching + # Handle new format: "Type::method(signature)" -> extract "method" + func_part = ( + qualified_func.split("::")[-1] if "::" in qualified_func else qualified_func + ) + # Strip signature if present: "method(args)" -> "method" + func_name = func_part.split("(")[0] if "(" in func_part else func_part + + print(f"Discovering: {qualified_func} in {module}") + + # Discover the function in the codebase + discovery = discover_function_in_module( + qualified_func, module, src_dir, impl_block + ) + if not discovery: + print(" WARNING: Function not found in codebase, skipping") + continue + + target_file, line_number, github_link = discovery + + # Re-analyze to get full details (spec, proof, etc.) + result = parse_function_in_file(target_file, func_name, github_link, impl_block) + has_spec, has_proof, is_external_body, line_number, found_qualified = result + if line_number == 0: + print(" WARNING: Could not re-analyze function, skipping") + continue + + # Check for macro-generated duplicates (same file, same function name, same line, same impl_block) + func_key = f"{target_file}::{found_qualified}::{line_number}::{impl_block}" + if func_key in seen_functions: + print( + f" Skipping duplicate (macro-generated): {found_qualified} at line {line_number}" + ) + continue + seen_functions[func_key] = True + + # Use a unique key for results (function + module + impl_block) + result_key = f"{qualified_func}::{module}::{impl_block}" + results[result_key] = ( + has_spec, + has_proof, + is_external_body, + line_number, + github_link, # Use the link we generated during discovery + qualified_func, # Use the qualified name from seed file + module, # Use the module from seed file + impl_block, # Include impl_block for disambiguation + ) + ext_marker = " [external_body]" if is_external_body else "" + print(f" Found: {qualified_func} in module {module}") + print(f" spec={has_spec}, proof={has_proof}{ext_marker}, line={line_number}") + + return results + + +@beartype +def update_links_to_main_branch(csv_path: Path): + """Update all GitHub links in CSV to use 'main' branch instead of specific commit hash. + + Only updates links if the file exists on main to avoid 404 errors. + """ + try: + print("Updating links to use 'main' branch...") + + # Read the CSV + rows = [] + updated_count = 0 + skipped_count = 0 + + with open(csv_path, "r") as f: + reader = csv.DictReader(f) + fieldnames = reader.fieldnames + for row in reader: + # Update the link to use 'main' branch + old_link = row["link"] + if old_link: + # Extract file path from GitHub link + # Pattern: https://github.com/.../blob/{HASH}/path/to/file.rs#L123 + match = re.search(r"/blob/[a-f0-9]+/(.+?)(?:#|$)", old_link) + if match: + file_path = match.group(1) + # Remove line number if present + file_path = file_path.split("#")[0] + + # Check if file exists on main branch + try: + result = subprocess.run( + ["git", "cat-file", "-e", f"main:{file_path}"], + cwd=csv_path.parent.parent, # repo root + capture_output=True, + timeout=5, + ) + + if result.returncode == 0: + # File exists on main, safe to update link + new_link = re.sub( + r"/blob/[a-f0-9]+/", "/blob/main/", old_link + ) + row["link"] = new_link + updated_count += 1 + else: + # File doesn't exist on main, keep original link + skipped_count += 1 + except (subprocess.TimeoutExpired, subprocess.SubprocessError): + # If git check fails, keep original link to be safe + skipped_count += 1 + else: + # Couldn't parse link, keep original + pass + rows.append(row) + + # Write back to CSV + with open(csv_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + + print(f"✓ Updated {updated_count} links to use 'main' branch") + if skipped_count > 0: + print(f" Skipped {skipped_count} links (file not on main branch)") + except Exception as e: + print(f"Warning: Could not update links: {e}") + print("Continuing with existing links...") + + +@beartype +def update_csv( + csv_path: Path, results: Dict[str, Tuple[bool, bool, bool, int, str, str, str, str]] +): + """Generate the CSV file with Verus analysis results.""" + # CSV format + fieldnames = ["function", "module", "link", "has_spec", "has_proof"] + + # Build rows from results + new_rows = [] + for result_key, ( + has_spec, + has_proof, + is_external_body, + line_number, + github_link, + qualified_name, + module_name, + impl_block, + ) in results.items(): + new_row = { + "function": qualified_name, + "module": module_name, + "link": github_link, + # Use "ext" for functions with external_body, "yes" for regular specs + "has_spec": "ext" if is_external_body else ("yes" if has_spec else ""), + "has_proof": "yes" if has_proof else "", + } + new_rows.append(new_row) + + # Write the CSV + with open(csv_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(new_rows) + + print(f"\nCSV file updated: {csv_path}") + + # Print summary + spec_count = sum( + 1 for _, (has_spec, _, _, _, _, _, _, _) in results.items() if has_spec + ) + proof_count = sum( + 1 for _, (_, has_proof, _, _, _, _, _, _) in results.items() if has_proof + ) + external_count = sum( + 1 + for _, (has_spec, _, is_ext, _, _, _, _, _) in results.items() + if has_spec and is_ext + ) + full_spec_count = spec_count - external_count + total = len(results) + + print("\nSummary:") + print(f" Total functions: {total}") + print( + f" With Verus specs: {spec_count} ({round(spec_count * 100 / total) if total > 0 else 0}%)" + ) + print(f" - Full specs: {full_spec_count}") + print(f" - External body: {external_count}") + print( + f" With complete proofs: {proof_count} ({round(proof_count * 100 / total) if total > 0 else 0}%)" + ) + + +@beartype +def main(): + import os + # Parse command-line arguments + parser = argparse.ArgumentParser( + description="Generate CSV with Verus specs/proofs status from seed file" + ) + parser.add_argument( + "--seed", + type=str, + default="functions_to_track.csv", + help="Seed file name (default: functions_to_track.csv)", + ) + parser.add_argument( + "--output", + type=str, + default="outputs/curve25519_functions.csv", + help="Output CSV file path (default: outputs/curve25519_functions.csv)", + ) + args = parser.parse_args() + + # Set up paths + script_dir = Path(__file__).parent + + # Check if REPO_ROOT env var is set (e.g. from Docker) + if 'REPO_ROOT' in os.environ: + repo_root = Path(os.environ["REPO_ROOT"]) + print(f"Using REPO_ROOT from environment: {repo_root}") + else: + repo_root = script_dir.parent # Go up one level from scripts/ to repo root + print(f"Using calculated REPO_ROOT: {repo_root}") + + seed_path = repo_root / args.seed + output_path = repo_root / args.output + src_dir = repo_root + + # Check seed file exists + if not seed_path.exists(): + print(f"Error: Seed file not found at {seed_path}") + # In Docker, paths might be mounted differently. List files for debugging + print(f"Listing files in {repo_root}:") + try: + for p in repo_root.iterdir(): + print(f" {p}") + except Exception as e: + print(f" Error listing files: {e}") + return + + if not src_dir.exists(): + print(f"Error: Source directory not found at {src_dir}") + return + + print(f"Reading seed file: {seed_path}") + print(f"Will generate CSV at: {output_path}") + print(f"Searching in: {src_dir}") + print() + + # Analyze functions from seed file + results = analyze_functions(seed_path, src_dir) + + # Generate output CSV + output_path.parent.mkdir(parents=True, exist_ok=True) + update_csv(output_path, results) + + +if __name__ == "__main__": + if os.environ.get("REPO_ROOT"): + pass + + if os.environ.get("CRATE_NAME"): + CRATE_NAME = os.environ.get("CRATE_NAME") + + if os.environ.get("CRATE_DIR"): + CRATE_DIR = os.environ.get("CRATE_DIR") + + main() diff --git a/src/commands/create.rs b/src/commands/create.rs index 6997416e..5c4c10b2 100644 --- a/src/commands/create.rs +++ b/src/commands/create.rs @@ -5,7 +5,7 @@ use crate::structure::{ parse_github_link, run_command, write_frontmatter, CommandConfig, ConfigPaths, ExecutionMode, StructureConfig, }; -use anyhow::{Context, Result}; +use anyhow::{bail, Context, Result}; use serde_json::{json, Value}; use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; @@ -22,27 +22,37 @@ pub async fn handle_create(project_root: PathBuf, root: Option) -> Resu .map(|r| r.to_string_lossy().to_string()) .unwrap_or_else(|| ".verilib/structure".to_string()); - // Write config file with ONLY structure-root field (add/update it, preserving others) + // Write config file with ONLY structure-root field let config = StructureConfig::new(&structure_root_relative); let config_path = config.save(&project_root, true)?; println!("Wrote config to {}", config_path.display()); // NOTE: .gitignore creation is moved to the 'init' subcommand + let config = ConfigPaths::load(&project_root)?; + let tracked_path = project_root.join("functions_to_track.csv"); - let seed_path = if tracked_path.exists() { - Some(tracked_path) - } else { - // Default: track all functions when functions_to_track.csv is absent - None - }; + if !tracked_path.exists() { + println!("functions_to_track.csv not found, generating from atomize..."); + crate::commands::atomize::handle_atomize( + project_root.clone(), + false, + false, + false, + ) + .await?; + + let atoms_path = verilib_path.join("atoms.json"); + if atoms_path.exists() { + generate_functions_to_track_csv(&atoms_path, &tracked_path)?; + } else { + bail!("Failed to generate atoms.json for functions_to_track.csv"); + } + } let tracked_output_path = verilib_path.join("tracked_functions.csv"); - // Load configuration to determine execution mode - let config = ConfigPaths::load(&project_root)?; - - run_probe_verus_tracked_csv(&project_root, &seed_path, &tracked_output_path, &config.command_config)?; + run_analyze_verus_specs_proofs(&project_root, &tracked_path, &tracked_output_path, &config.command_config)?; let tracked = read_tracked_csv(&tracked_output_path)?; let tracked = disambiguate_names(tracked); @@ -56,93 +66,143 @@ pub async fn handle_create(project_root: PathBuf, root: Option) -> Resu Ok(()) } -/// Run probe-verus tracked-csv CLI to generate tracked functions CSV. -fn run_probe_verus_tracked_csv( +/// Run analyze_verus_specs_proofs.py CLI to generate tracked functions CSV. +fn run_analyze_verus_specs_proofs( project_root: &Path, - _seed_path: &Path, + seed_path: &Path, output_path: &Path, config: &CommandConfig, ) -> Result<()> { - println!("Running probe-verus tracked-csv..."); + let script_name = "analyze_verus_specs_proofs.py"; + let script_path = if matches!(config.execution_mode, ExecutionMode::Docker) { + let workspace_script = PathBuf::from("/workspace/scripts").join(script_name); + + if project_root.join("scripts").join(script_name).exists() { + workspace_script + } else { + PathBuf::from("/usr/local/bin/scripts").join(script_name) + } + } else { + let path = project_root.join("scripts").join(script_name); + if !path.exists() { + bail!("Script not found locally: {}", path.display()); + } + path + }; + println!("Running {}...", script_name); + + let seed_arg = if matches!(config.execution_mode, ExecutionMode::Docker) { + seed_path.strip_prefix(project_root).unwrap_or(seed_path).to_string_lossy().to_string() + } else { + seed_path.to_string_lossy().to_string() + }; + + let output_arg = if matches!(config.execution_mode, ExecutionMode::Docker) { + output_path.strip_prefix(project_root).unwrap_or(output_path).to_string_lossy().to_string() + } else { + output_path.to_string_lossy().to_string() + }; + + // Ensure parent directory exists (locally) if let Some(parent) = output_path.parent() { - let _ = std::fs::create_dir_all(parent); + std::fs::create_dir_all(parent)?; } + + let (seed_flag, output_flag) = if matches!(config.execution_mode, ExecutionMode::Docker) { + ( + format!("/workspace/{}", seed_arg), + format!("/workspace/{}", output_arg), + ) + } else { + (seed_arg.clone(), output_arg.clone()) + }; - let mut args: Vec = vec![ - "run".into(), - script_path.to_str()?.into(), - "--output".into(), - output_relative.to_str()?.into(), + let script_path_str = script_path.to_string_lossy(); + let args = vec![ + "run", + &script_path_str, + "--seed", + &seed_flag, + "--output", + &output_flag, ]; - if let Some(seed) = seed_path { - let seed_relative = seed.strip_prefix(project_root).unwrap_or(seed); - args.extend(["--seed".into(), seed_relative.to_str()?.into()]); - } - - match config.execution_mode { - ExecutionMode::Local => { - - let output_relative = output_path - .strip_prefix(project_root) - .unwrap_or(output_path) - .to_string_lossy(); - - let output = run_command( - "probe-verus", - &[ - "tracked-csv", - ".", - "--output", - &output_relative, - ], - Some(project_root), - config, - )?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Error running probe-verus tracked-csv:\n{}", stderr); - bail!("probe-verus tracked-csv failed"); - } - } - ExecutionMode::Docker => { - let workspace_mount = "/workspace"; - - let output_relative = output_path - .strip_prefix(project_root) - .unwrap_or(output_path) - .to_string_lossy(); - - let output_in_container = format!("{}/{}", workspace_mount, output_relative); - - let output = run_command( - "probe-verus", - &[ - "tracked-csv", - workspace_mount, - "--output", - &output_in_container - ], - Some(project_root), - config, - )?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Error running probe-verus tracked-csv in Docker:\n{}", stderr); - bail!("probe-verus tracked-csv failed"); - } - } + + let output = run_command( + "uv", + &args, + Some(project_root), + config, + )?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + eprintln!("Error running {}:\n{}", script_name, stderr); + bail!("{} failed", script_name); } + + let stdout = String::from_utf8_lossy(&output.stdout); + println!("{} output:\n{}", script_name, stdout); println!( "Generated tracked functions CSV at {}", output_path.display() ); - Some(output_path.to_path_buf()) + Ok(()) +} + +/// Generate functions_to_track.csv from atoms.json +fn generate_functions_to_track_csv(atoms_path: &Path, output_path: &Path) -> Result<()> { + let file = std::fs::File::open(atoms_path)?; + let reader = std::io::BufReader::new(file); + let atoms: HashMap = serde_json::from_reader(reader)?; + + let mut wtr = csv::Writer::from_path(output_path)?; + wtr.write_record(&["function", "module", "impl_block"])?; + + for (key, val) in atoms { + if !key.starts_with("probe:") { + continue; + } + + let parts: Vec<&str> = key.split('/').collect(); + if parts.len() < 3 { continue; } + + let project_part = parts[0]; + let project_name = project_part.strip_prefix("probe:").unwrap_or(project_part); + + let func_part = parts.last().unwrap(); + + let function = val.get("display-name") + .and_then(|v| v.as_str()) + .unwrap_or(func_part) + .to_string() + "()"; + + if parts.len() <= 2 { + continue; + } + + let dir_parts = &parts[2..parts.len()-1]; + if dir_parts.is_empty() { + continue; + } + + let mut rev_parts: Vec<&str> = dir_parts.to_vec(); + rev_parts.reverse(); + + let mut module_parts = vec![project_name]; + module_parts.extend(rev_parts); + let module = module_parts.join("::"); + + wtr.write_record(&[&function, &module, ""])?; + } + + wtr.flush()?; + println!("Generated functions_to_track.csv at {}", output_path.display()); + Ok(()) } + /// Tracked function data from CSV. #[derive(Debug, Clone)] struct TrackedFunction { @@ -241,8 +301,6 @@ fn generate_structure_files( structure: &HashMap, structure_root: &Path, ) -> Result<()> { - std::fs::create_dir_all(structure_root) - .context("Failed to create structure root directory")?; let mut created_count = 0; for (relative_path_str, metadata) in structure { @@ -274,4 +332,4 @@ fn generate_structure_files( structure_root.display() ); Ok(()) -} +} \ No newline at end of file diff --git a/src/structure/executor.rs b/src/structure/executor.rs index af3d2699..49373c23 100644 --- a/src/structure/executor.rs +++ b/src/structure/executor.rs @@ -92,8 +92,20 @@ fn run_docker( let mut docker_args = vec![ "run", "--rm", - "--entrypoint", program, - "-u", &user_arg, + "--entrypoint", + program, + "-u", + &user_arg, + "-e", + "HOME=/tmp", + "-e", + "XDG_CACHE_HOME=/tmp/.cache", + "-e", + "UV_CACHE_DIR=/tmp/.uv-cache", + "-e", + "UV_PYTHON_INSTALL_DIR=/tmp/.python", + "-e", + "REPO_ROOT=/workspace", "-v", ]; @@ -101,8 +113,7 @@ fn run_docker( docker_args.push(&mount_arg); docker_args.extend_from_slice(&[ - "--tmpfs", "/tmp", - "--tmpfs", "/home/tooluser/.cache", + "--tmpfs", "/tmp:exec,mode=777", "--security-opt=no-new-privileges", "-w", "/workspace", image, From c3baf801325ff902f380f729940a11670fb45577 Mon Sep 17 00:00:00 2001 From: Nima Hamed Date: Thu, 19 Feb 2026 21:45:52 -0800 Subject: [PATCH 07/10] 0.2.0-beta --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9ecee2e2..da920354 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2282,7 +2282,7 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "verilib-cli" -version = "0.1.9" +version = "0.2.0-beta" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 8d7335ab..08199287 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "verilib-cli" -version = "0.1.9" +version = "0.2.0-beta" edition = "2021" description = "A command-line tool for managing Verilib repositories and API interactions" homepage = "https://github.com/Beneficial-AI-Foundation/verilib-cli" From 5b4387cda0df23365fd158b9ca4bf870c9fd664c Mon Sep 17 00:00:00 2001 From: Nima Hamed Date: Thu, 19 Feb 2026 22:16:12 -0800 Subject: [PATCH 08/10] remove git ignore --- src/commands/init.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commands/init.rs b/src/commands/init.rs index f522df1d..57111a02 100644 --- a/src/commands/init.rs +++ b/src/commands/init.rs @@ -263,7 +263,7 @@ fn save_config( fs::write(&config_path, &config_json).context("Failed to write config.json file")?; // Create .gitignore for generated files - create_gitignore(&verilib_path)?; + // create_gitignore(&verilib_path)?; Ok(()) } From bd62843ec78cf30ce9b4032b4b30a460eebb0936 Mon Sep 17 00:00:00 2001 From: Nima Hamed Date: Thu, 19 Feb 2026 22:16:38 -0800 Subject: [PATCH 09/10] 0.2.0-beta.1 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index da920354..4f30de22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2282,7 +2282,7 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "verilib-cli" -version = "0.2.0-beta" +version = "0.2.0-beta.1" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 08199287..9080bea9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "verilib-cli" -version = "0.2.0-beta" +version = "0.2.0-beta.1" edition = "2021" description = "A command-line tool for managing Verilib repositories and API interactions" homepage = "https://github.com/Beneficial-AI-Foundation/verilib-cli" From b00d7ff58f029ba2a00cc6c51704e150d3cbc7bd Mon Sep 17 00:00:00 2001 From: avrevic Date: Fri, 20 Feb 2026 14:21:27 +0100 Subject: [PATCH 10/10] Remove file not in use --- scripts/analyze_verus_specs_proofs.py | 877 -------------------------- 1 file changed, 877 deletions(-) delete mode 100644 scripts/analyze_verus_specs_proofs.py diff --git a/scripts/analyze_verus_specs_proofs.py b/scripts/analyze_verus_specs_proofs.py deleted file mode 100644 index 03e25918..00000000 --- a/scripts/analyze_verus_specs_proofs.py +++ /dev/null @@ -1,877 +0,0 @@ - -#!/usr/bin/env -S uv run --script -# /// script -# requires-python = ">=3.11" -# dependencies = [ -# "beartype", -# ] -# /// -""" -Analyze Verus specs and proofs in curve25519-dalek source code. -Updates the CSV to mark which functions have Verus specs and/or proofs. -""" - -import argparse -import csv -import re -import os -import subprocess -from pathlib import Path -from typing import Dict, Tuple, Optional -from beartype import beartype - -# Constants -CRATE_NAME = "curve25519_dalek" -CRATE_DIR = "curve25519-dalek" -GITHUB_REPO = "Beneficial-AI-Foundation/dalek-lite" - -# Special module mappings (modules with non-standard file locations) -SPECIAL_MODULES = { - "build": lambda src_dir: src_dir / CRATE_DIR / "build.rs", -} - - -@beartype -def extract_impl_signature(content: str, fn_start: int) -> Optional[str]: - """ - Extract the full impl block signature for a function. - - For example, if the function is inside: - impl Identity for ProjectivePoint { - Returns "Identity for ProjectivePoint" - - For trait implementations like: - impl<'a, 'b> Add<&'b Scalar> for &'a Scalar { - Returns "Add<&'b Scalar> for &'a Scalar" - - For inherent implementations: - impl Scalar { - Returns "Scalar" - """ - # Look backwards from fn_start to find the impl block - look_back = content[:fn_start] - lines = look_back.split("\n") - - # Search backwards for impl block - for i in range(len(lines) - 1, -1, -1): - line = lines[i].strip() - - # Stop at verus! blocks or module boundaries - if line.startswith("verus!") or line.startswith("mod "): - break - - # Look for impl blocks - if line.startswith("impl"): - # Extract everything between 'impl' and '{' - # Pattern: impl<...> TraitName<...> for TypeName { or impl TypeName { - match = re.search(r"impl\s*(?:<[^>]+>)?\s*(.+?)\s*\{", line) - if match: - impl_part = match.group(1).strip() - # Note: lifetime params are cleaned up later in normalize_impl() - return impl_part - - return None - - -@beartype -def extract_impl_context(content: str, fn_start: int) -> Optional[str]: - """ - Extract the impl block context for a function. - - For example, if the function is inside: - impl Identity for ProjectivePoint { - Returns "ProjectivePoint" - - For trait implementations like: - impl<'a, 'b> Add<&'b Scalar> for &'a Scalar { - Returns "Scalar" (the type being implemented for) - """ - # Look backwards from fn_start to find the impl block - look_back = content[:fn_start] - lines = look_back.split("\n") - - # Search backwards for impl block - for i in range(len(lines) - 1, -1, -1): - line = lines[i].strip() - - # Stop at verus! blocks or module boundaries - if line.startswith("verus!") or line.startswith("mod "): - break - - # Look for impl blocks - if line.startswith("impl"): - # Try to extract the type being implemented for - # Pattern 1: impl TraitName for TypeName { - match = re.search( - r"impl\s+(?:<[^>]+>)?\s*\w+\s+for\s+([&\']*)([A-Za-z0-9_]+)", line - ) - if match: - return match.group(2) - - # Pattern 2: impl TypeName { or impl<...> TypeName { - match = re.search(r"impl\s*(?:<[^>]+>)?\s+([A-Za-z0-9_]+)\s*\{", line) - if match: - return match.group(1) - - return None - - -@beartype -def extract_module_from_path(file_path: Path, repo_root: Path) -> str: - """ - Extract module path from file path. - - Example: - curve25519-dalek/src/backend/serial/curve_models/mod.rs - -> curve25519_dalek::backend::serial::curve_models - - curve25519-dalek/src/scalar.rs - -> curve25519_dalek::scalar - """ - try: - # Get relative path from repo root - rel_path = file_path.relative_to(repo_root) - - # Remove curve25519-dalek/ prefix and src/ prefix - parts = list(rel_path.parts) - - # Skip "curve25519-dalek" and "src" - module_parts = [] - skip_parts = {"curve25519-dalek", "src"} - - for part in parts: - if part in skip_parts: - continue - # Remove .rs extension or mod.rs - if part.endswith(".rs"): - if part == "mod.rs": - continue - part = part[:-3] - module_parts.append(part.replace("-", "_")) - - # Prepend crate name - return "curve25519_dalek::" + "::".join(module_parts) - except Exception: - return "curve25519_dalek" - - -@beartype -def parse_function_in_file( - file_path: Path, function_name: str, old_link: str = "", impl_block: str = "" -) -> Tuple[bool, bool, bool, int, str]: - """ - Parse a Rust file to find a function and check if it has Verus specs and proofs. - - Args: - file_path: Path to the Rust source file - function_name: Bare function name (without type prefix or signature) - old_link: Optional GitHub link with line number hint - impl_block: Optional impl block context for disambiguation (e.g., "Add for FieldElement51") - - Returns: - Tuple[bool, bool, bool, int, str]: (has_spec, has_proof, is_external_body, line_number, qualified_name) - - has_spec: True if function has requires or ensures clauses - - has_proof: True if has_spec and no assume in body - - is_external_body: True if function has #[verifier::external_body] - - line_number: Line number where the function is defined (1-indexed) - - qualified_name: Function name with impl context (e.g., "ProjectivePoint::identity") - """ - with open(file_path, "r", encoding="utf-8") as f: - content = f.read() - - # Pattern to match function definitions - # Matches: fn function_name, pub fn function_name, pub const fn function_name, etc. - # This regex looks for function definitions - fn_pattern = ( - r"(?:pub\s+)?(?:const\s+)?fn\s+" + re.escape(function_name) + r"\s*[<\(]" - ) - - # Find all occurrences of the function - matches = list(re.finditer(fn_pattern, content)) - - # If we have an old link with a line number, use it to find the closest match - target_line = None - if old_link: - line_match = re.search(r"#L(\d+)", old_link) - if line_match: - target_line = int(line_match.group(1)) - - # If there are multiple matches and we have a target line, sort by proximity to target - if target_line and len(matches) > 1: - matches_with_lines = [] - for match in matches: - line_num = content[: match.start()].count("\n") + 1 - distance = abs(line_num - target_line) - matches_with_lines.append((distance, match)) - matches_with_lines.sort(key=lambda x: x[0]) - matches = [m[1] for m in matches_with_lines] - - # If we have impl_block for disambiguation and multiple matches, filter by impl context - if impl_block and len(matches) > 1: - filtered_matches = [] - for match in matches: - fn_start = match.start() - found_impl_sig = extract_impl_signature(content, fn_start) - if found_impl_sig: - # Normalize both for comparison (remove lifetimes but preserve &) - def normalize_impl(s): - # Remove lifetime annotations while preserving & references - # Pattern 1: <'a> → "" (standalone lifetime generics like Deserialize<'de>) - # Pattern 2: 'a → "" (lifetimes in mixed contexts like &'b Scalar → &Scalar) - # Both patterns together ensure consistent normalization for matching - s = re.sub(r"<'\w+>", "", s) - s = re.sub(r"'\w+\b", "", s) - # Remove space after & to normalize "&'a Scalar" → "&Scalar" and "&Scalar" → "&Scalar" - s = re.sub(r"&\s+", "&", s) - return " ".join(s.split()) - - norm_found = normalize_impl(found_impl_sig) - norm_expected = normalize_impl(impl_block) - - # Check if they match exactly - if norm_found == norm_expected: - filtered_matches.append(match) - continue - - # Check for reference vs owned type distinction - # "Neg for &EdwardsPoint" vs "Neg for EdwardsPoint" - # Only applies when trait names match (e.g., both are "Neg") - if " for " in impl_block and " for " in norm_found: - expected_trait = impl_block.split(" for ")[0].strip() - found_trait = norm_found.split(" for ")[0].strip() - # Extract base trait name (e.g., "Neg" from "Neg", "From" stays as is) - expected_trait_base = expected_trait.split("<")[0] - found_trait_base = found_trait.split("<")[0] - - # Only do ref vs owned check if traits match and have no type params - # (e.g., Neg for &T vs Neg for T, not From vs From) - if ( - expected_trait_base == found_trait_base - and "<" not in expected_trait - ): - expected_type = impl_block.split(" for ")[-1].strip() - found_type = norm_found.split(" for ")[-1].strip() - expected_is_ref = expected_type.startswith("&") - found_is_ref = found_type.startswith("&") - if expected_is_ref == found_is_ref: - expected_base = expected_type.lstrip("&").strip() - found_base = found_type.lstrip("&").strip() - if expected_base == found_base: - filtered_matches.append(match) - continue - if filtered_matches: - matches = filtered_matches - - # For each match, check if it has specs - for match in matches: - fn_start = match.start() - - # Check if this function is inside a comment block - # Look backwards for /* or // to see if we're in a comment - look_back_start = max(0, fn_start - 1000) - preceding_text = content[look_back_start:fn_start] - - # Check for /* ... */ block comments - last_block_open = preceding_text.rfind("/*") - last_block_close = preceding_text.rfind("*/") - if last_block_open > last_block_close: - # We're inside a block comment, skip this match - continue - - # Check for line comments - see if the function is on the same line as // - last_newline = preceding_text.rfind("\n") - if last_newline != -1: - line_start = preceding_text[last_newline:] - if "//" in line_start: - # We're on a line with a // comment, skip this match - continue - - # Find the function body by looking for the opening brace - # For Verus functions with complex specs, we need a more robust approach - # Look for the pattern that indicates the start of the function body - - # First, extract the entire function signature including specs - signature_end = fn_start - pos = fn_start - paren_depth = 0 - brace_depth = 0 - found_opening_paren = False - - # Find the end of the function signature (including specs) - # Skip characters inside comments to avoid counting parens/braces in comments - # (e.g., "// values are in [2^254, 2^255)" has a ) that would break paren tracking) - while pos < len(content): - char = content[pos] - - # Skip line comments (// ... \n) - if char == "/" and pos + 1 < len(content) and content[pos + 1] == "/": - newline = content.find("\n", pos) - pos = newline + 1 if newline != -1 else len(content) - continue - - # Skip block comments (/* ... */) - if char == "/" and pos + 1 < len(content) and content[pos + 1] == "*": - end = content.find("*/", pos + 2) - pos = end + 2 if end != -1 else len(content) - continue - - if char == "(": - paren_depth += 1 - found_opening_paren = True - elif char == ")": - paren_depth -= 1 - elif char == "{": - if found_opening_paren and paren_depth == 0 and brace_depth == 0: - # We've found the first brace at the top level after seeing the parameter list - # This should be the function body opening brace - signature_end = pos - break - brace_depth += 1 - elif char == "}": - brace_depth -= 1 - - pos += 1 - - brace_pos = signature_end if signature_end > fn_start else -1 - - if brace_pos == -1: - # Might be a trait method declaration without body (ends with semicolon) - # Calculate line number for trait methods too - line_number = content[:fn_start].count("\n") + 1 - # Trait methods don't have specs/proofs, just return line number - # Try to extract impl context - impl_context = extract_impl_context(content, fn_start) - qualified_name = ( - f"{impl_context}::{function_name}" if impl_context else function_name - ) - return (False, False, False, line_number, qualified_name) - - # Look backwards from fn_start to find any attributes - # Attributes appear before the function definition - # Search backwards for the start of attributes (look for lines starting with #[) - lines_before = content[:fn_start].split("\n") - attr_lines = [] - - # Skip the last line if it's not empty (it contains the beginning of the fn line) - lines_to_check = ( - lines_before[:-1] - if lines_before and lines_before[-1].strip() - else lines_before - ) - - for line in reversed(lines_to_check): - stripped = line.strip() - if stripped.startswith("#["): - attr_lines.insert(0, line) - elif stripped and not stripped.startswith("//"): - # Stop at first non-attribute, non-comment line (including closing braces) - break - - attributes = "\n".join(attr_lines) - - # Extract the signature (between fn keyword and opening brace) - signature = content[fn_start:brace_pos] - - # Check for requires or ensures in the signature - # They must be the first non-whitespace on a line to avoid matching comments - has_requires = bool(re.search(r"^\s*requires\b", signature, re.MULTILINE)) - has_ensures = bool(re.search(r"^\s*ensures\b", signature, re.MULTILINE)) - has_spec = has_requires or has_ensures - - # Check for verifier::external attributes early (before checking has_spec) - has_verifier_external = bool(re.search(r"#\[verifier::external", attributes)) - - # Calculate line number for ALL functions (even those without specs) - # This ensures CSV links are always up-to-date - line_number = content[:fn_start].count("\n") + 1 - - # Extract impl context for qualified name - impl_context = extract_impl_context(content, fn_start) - qualified_name = ( - f"{impl_context}::{function_name}" if impl_context else function_name - ) - - # If neither specs nor external_body, return early with just the line number - if not has_spec and not has_verifier_external: - return (False, False, False, line_number, qualified_name) - - # Find the matching closing brace for the function body - brace_count = 1 - pos = brace_pos + 1 - body_end = brace_pos - - while pos < len(content) and brace_count > 0: - if content[pos] == "{": - brace_count += 1 - elif content[pos] == "}": - brace_count -= 1 - if brace_count == 0: - body_end = pos - break - pos += 1 - - # Find the next function to determine the region boundary - next_fn_pos = len(content) - next_fn_match = re.search( - r"(?:pub\s+)?(?:const\s+)?fn\s+", content[body_end + 1 :] - ) - if next_fn_match: - next_fn_pos = body_end + 1 + next_fn_match.start() - - # Extract the region from this fn to the next fn - fn_region = content[fn_start:next_fn_pos] - - # Additional check: assume anywhere in the fn region (between this fn and next fn) - # First, strip all block comments /* ... */ from the region - fn_region_no_block_comments = re.sub( - r"/\*.*?\*/", "", fn_region, flags=re.DOTALL - ) - - # Check each line to exclude line comments - has_assume_in_region = False - for line in fn_region_no_block_comments.split("\n"): - # Skip lines that are comments (starting with //) - stripped = line.lstrip() - if stripped.startswith("//"): - continue - # Check if line contains assume (outside of comments) - # Remove inline comments first - code_part = line.split("//")[0] - if re.search(r"\bassume\b", code_part): - has_assume_in_region = True - break - - # In attributes: exec_allows_no_decreases_clause (external already checked above) - has_no_decreases = bool( - re.search(r"#\[verifier::exec_allows_no_decreases_clause\]", attributes) - ) - - has_proof = ( - has_spec - and not has_assume_in_region - and not has_verifier_external - and not has_no_decreases - ) - - # If function has external_body, treat it as having a spec (even if no requires/ensures) - has_spec_or_external = has_spec or has_verifier_external - - # Line number and qualified name were already calculated earlier - return ( - has_spec_or_external, - has_proof, - has_verifier_external, - line_number, - qualified_name, - ) - - # Probably what's happened is that we saw the function, but - # it doesn't have a spec yet - # Return line 0 if we couldn't find the function properly - # Try to get impl context even if function not found - impl_context = extract_impl_context(content, 0) if matches else None - qualified_name = ( - f"{impl_context}::{function_name}" if impl_context else function_name - ) - return (False, False, False, 0, qualified_name) - - -@beartype -def extract_file_path_from_link(link: str, src_dir: Path) -> Path: - """ - Extract the file path from a GitHub link. - - Example: - https://github.com/dalek-cryptography/curve25519-dalek/tree/curve25519-4.1.3/curve25519-dalek/src/window.rs#L232 - -> src_dir/window.rs - - """ - assert link - - # Extract path after /src/ - match = re.search(r"/blob/([^/]+)/([^#]+)", link) - - assert match, link - relative_path = match.group(2) - - file_path = src_dir / relative_path - assert file_path.exists() - return file_path - - -@beartype -def discover_function_in_module( - qualified_func: str, module: str, src_dir: Path, impl_block: str = "" -) -> Optional[Tuple[Path, int, str]]: - """ - Discover a function in the codebase given its qualified name and module. - - Args: - qualified_func: Function name, possibly qualified (e.g., "FieldElement51::mul(args)" or "mul") - module: Module path (e.g., "curve25519_dalek::backend::serial::u64::field") - src_dir: Root source directory - impl_block: Optional impl block context for disambiguation (e.g., "Add for FieldElement51") - - Returns: - Tuple of (file_path, line_number, github_link) if found, None otherwise - """ - # Convert module path to file path - # e.g., "curve25519_dalek::backend::serial::u64::field" -> "curve25519-dalek/src/backend/serial/u64/field.rs" - print(f" Looking for module: {module} in src_dir: {src_dir}") - - # Logic to normalize the module path (strip crate name if present) - if "::" in module: - parts = module.split("::") - # Heuristic: if the first part looks like a crate name (and doesn't match CRATE_NAME, or does), strip it - # This is because probe-verus output usually includes the crate name - print(f" Stripping crate prefix {parts[0]}") - module_stripped = "::".join(parts[1:]) - elif module == CRATE_NAME: - module_stripped = "" - else: - module_stripped = module - - module_parts = module_stripped.split("::") if module_stripped else [] - - # Handle special module cases (non-standard file locations) - possible_paths = [] - - # 1. Standard structure: src_dir/CRATE_DIR/src/... - base_path_1 = src_dir / CRATE_DIR / "src" - if module_parts: - possible_paths.append(base_path_1 / "/".join(module_parts[:-1]) / f"{module_parts[-1]}.rs") - possible_paths.append(base_path_1 / "/".join(module_parts) / "mod.rs") - - # 2. Try simple structure: src_dir/src/... (if we are already inside the crate) - base_path_2 = src_dir / "src" - if module_parts: - possible_paths.append(base_path_2 / "/".join(module_parts[:-1]) / f"{module_parts[-1]}.rs") - possible_paths.append(base_path_2 / "/".join(module_parts) / "mod.rs") - - # 3. Try module-root structure: src_dir/module_part0/src/... (monorepo style?) - if len(module_parts) > 1: - base_path_3 = src_dir / module_parts[0] / "src" - sub_parts = module_parts[1:] - possible_paths.append(base_path_3 / "/".join(sub_parts[:-1]) / f"{sub_parts[-1]}.rs") - possible_paths.append(base_path_3 / "/".join(sub_parts) / "mod.rs") - - # Extract just the function name (without Type:: prefix and signature if present) - func_part = ( - qualified_func.split("::")[-1] if "::" in qualified_func else qualified_func - ) - # Strip signature if present: "method(args)" -> "method" - func_name = func_part.split("(")[0] if "(" in func_part else func_part - - for file_path in possible_paths: - print(f" Checking file: {file_path}") - if not file_path.exists(): - print(f" File does not exist: {file_path}") - continue - - # Try to find the function in this file, passing impl_block for disambiguation - result = parse_function_in_file(file_path, func_name, "", impl_block) - has_spec, has_proof, is_external_body, line_number, found_qualified = result - if line_number > 0: - # Generate GitHub link - relative_path = file_path.relative_to(src_dir) - github_link = f"https://github.com/{GITHUB_REPO}/blob/main/{relative_path}#L{line_number}" - return (file_path, line_number, github_link) - - return None - - -@beartype -def analyze_functions( - seed_path: Path, src_dir: Path -) -> Dict[str, Tuple[bool, bool, bool, int, str, str, str, str]]: - """ - Analyze all functions in the seed file and check their Verus status. - - Args: - seed_path: Path to functions_to_track.csv (function, module, impl_block columns) - src_dir: Root source directory - - Returns: - Dict mapping function_module_impl_key to (has_spec, has_proof, is_external_body, line_number, github_link, qualified_name, module_name, impl_block) - """ - # Read the seed CSV to get function names and modules - with open(seed_path, "r") as f: - reader = csv.DictReader(f) - rows = [row for row in reader] - - results = {} - - # Keep track of functions we've seen to detect macro-generated duplicates - seen_functions = {} - - for row in rows: - qualified_func = row["function"] - module = row["module"] - impl_block = row.get("impl_block", "") # New column for disambiguation - - # Extract just the function name for searching - # Handle new format: "Type::method(signature)" -> extract "method" - func_part = ( - qualified_func.split("::")[-1] if "::" in qualified_func else qualified_func - ) - # Strip signature if present: "method(args)" -> "method" - func_name = func_part.split("(")[0] if "(" in func_part else func_part - - print(f"Discovering: {qualified_func} in {module}") - - # Discover the function in the codebase - discovery = discover_function_in_module( - qualified_func, module, src_dir, impl_block - ) - if not discovery: - print(" WARNING: Function not found in codebase, skipping") - continue - - target_file, line_number, github_link = discovery - - # Re-analyze to get full details (spec, proof, etc.) - result = parse_function_in_file(target_file, func_name, github_link, impl_block) - has_spec, has_proof, is_external_body, line_number, found_qualified = result - if line_number == 0: - print(" WARNING: Could not re-analyze function, skipping") - continue - - # Check for macro-generated duplicates (same file, same function name, same line, same impl_block) - func_key = f"{target_file}::{found_qualified}::{line_number}::{impl_block}" - if func_key in seen_functions: - print( - f" Skipping duplicate (macro-generated): {found_qualified} at line {line_number}" - ) - continue - seen_functions[func_key] = True - - # Use a unique key for results (function + module + impl_block) - result_key = f"{qualified_func}::{module}::{impl_block}" - results[result_key] = ( - has_spec, - has_proof, - is_external_body, - line_number, - github_link, # Use the link we generated during discovery - qualified_func, # Use the qualified name from seed file - module, # Use the module from seed file - impl_block, # Include impl_block for disambiguation - ) - ext_marker = " [external_body]" if is_external_body else "" - print(f" Found: {qualified_func} in module {module}") - print(f" spec={has_spec}, proof={has_proof}{ext_marker}, line={line_number}") - - return results - - -@beartype -def update_links_to_main_branch(csv_path: Path): - """Update all GitHub links in CSV to use 'main' branch instead of specific commit hash. - - Only updates links if the file exists on main to avoid 404 errors. - """ - try: - print("Updating links to use 'main' branch...") - - # Read the CSV - rows = [] - updated_count = 0 - skipped_count = 0 - - with open(csv_path, "r") as f: - reader = csv.DictReader(f) - fieldnames = reader.fieldnames - for row in reader: - # Update the link to use 'main' branch - old_link = row["link"] - if old_link: - # Extract file path from GitHub link - # Pattern: https://github.com/.../blob/{HASH}/path/to/file.rs#L123 - match = re.search(r"/blob/[a-f0-9]+/(.+?)(?:#|$)", old_link) - if match: - file_path = match.group(1) - # Remove line number if present - file_path = file_path.split("#")[0] - - # Check if file exists on main branch - try: - result = subprocess.run( - ["git", "cat-file", "-e", f"main:{file_path}"], - cwd=csv_path.parent.parent, # repo root - capture_output=True, - timeout=5, - ) - - if result.returncode == 0: - # File exists on main, safe to update link - new_link = re.sub( - r"/blob/[a-f0-9]+/", "/blob/main/", old_link - ) - row["link"] = new_link - updated_count += 1 - else: - # File doesn't exist on main, keep original link - skipped_count += 1 - except (subprocess.TimeoutExpired, subprocess.SubprocessError): - # If git check fails, keep original link to be safe - skipped_count += 1 - else: - # Couldn't parse link, keep original - pass - rows.append(row) - - # Write back to CSV - with open(csv_path, "w", newline="") as f: - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - writer.writerows(rows) - - print(f"✓ Updated {updated_count} links to use 'main' branch") - if skipped_count > 0: - print(f" Skipped {skipped_count} links (file not on main branch)") - except Exception as e: - print(f"Warning: Could not update links: {e}") - print("Continuing with existing links...") - - -@beartype -def update_csv( - csv_path: Path, results: Dict[str, Tuple[bool, bool, bool, int, str, str, str, str]] -): - """Generate the CSV file with Verus analysis results.""" - # CSV format - fieldnames = ["function", "module", "link", "has_spec", "has_proof"] - - # Build rows from results - new_rows = [] - for result_key, ( - has_spec, - has_proof, - is_external_body, - line_number, - github_link, - qualified_name, - module_name, - impl_block, - ) in results.items(): - new_row = { - "function": qualified_name, - "module": module_name, - "link": github_link, - # Use "ext" for functions with external_body, "yes" for regular specs - "has_spec": "ext" if is_external_body else ("yes" if has_spec else ""), - "has_proof": "yes" if has_proof else "", - } - new_rows.append(new_row) - - # Write the CSV - with open(csv_path, "w", newline="") as f: - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - writer.writerows(new_rows) - - print(f"\nCSV file updated: {csv_path}") - - # Print summary - spec_count = sum( - 1 for _, (has_spec, _, _, _, _, _, _, _) in results.items() if has_spec - ) - proof_count = sum( - 1 for _, (_, has_proof, _, _, _, _, _, _) in results.items() if has_proof - ) - external_count = sum( - 1 - for _, (has_spec, _, is_ext, _, _, _, _, _) in results.items() - if has_spec and is_ext - ) - full_spec_count = spec_count - external_count - total = len(results) - - print("\nSummary:") - print(f" Total functions: {total}") - print( - f" With Verus specs: {spec_count} ({round(spec_count * 100 / total) if total > 0 else 0}%)" - ) - print(f" - Full specs: {full_spec_count}") - print(f" - External body: {external_count}") - print( - f" With complete proofs: {proof_count} ({round(proof_count * 100 / total) if total > 0 else 0}%)" - ) - - -@beartype -def main(): - import os - # Parse command-line arguments - parser = argparse.ArgumentParser( - description="Generate CSV with Verus specs/proofs status from seed file" - ) - parser.add_argument( - "--seed", - type=str, - default="functions_to_track.csv", - help="Seed file name (default: functions_to_track.csv)", - ) - parser.add_argument( - "--output", - type=str, - default="outputs/curve25519_functions.csv", - help="Output CSV file path (default: outputs/curve25519_functions.csv)", - ) - args = parser.parse_args() - - # Set up paths - script_dir = Path(__file__).parent - - # Check if REPO_ROOT env var is set (e.g. from Docker) - if 'REPO_ROOT' in os.environ: - repo_root = Path(os.environ["REPO_ROOT"]) - print(f"Using REPO_ROOT from environment: {repo_root}") - else: - repo_root = script_dir.parent # Go up one level from scripts/ to repo root - print(f"Using calculated REPO_ROOT: {repo_root}") - - seed_path = repo_root / args.seed - output_path = repo_root / args.output - src_dir = repo_root - - # Check seed file exists - if not seed_path.exists(): - print(f"Error: Seed file not found at {seed_path}") - # In Docker, paths might be mounted differently. List files for debugging - print(f"Listing files in {repo_root}:") - try: - for p in repo_root.iterdir(): - print(f" {p}") - except Exception as e: - print(f" Error listing files: {e}") - return - - if not src_dir.exists(): - print(f"Error: Source directory not found at {src_dir}") - return - - print(f"Reading seed file: {seed_path}") - print(f"Will generate CSV at: {output_path}") - print(f"Searching in: {src_dir}") - print() - - # Analyze functions from seed file - results = analyze_functions(seed_path, src_dir) - - # Generate output CSV - output_path.parent.mkdir(parents=True, exist_ok=True) - update_csv(output_path, results) - - -if __name__ == "__main__": - if os.environ.get("REPO_ROOT"): - pass - - if os.environ.get("CRATE_NAME"): - CRATE_NAME = os.environ.get("CRATE_NAME") - - if os.environ.get("CRATE_DIR"): - CRATE_DIR = os.environ.get("CRATE_DIR") - - main()