From 5c262568fe7d5e04bccef98ac154a1de962cd07d Mon Sep 17 00:00:00 2001 From: Reiase Date: Thu, 1 Jan 2026 20:31:33 +0800 Subject: [PATCH 1/5] Update dioxus dependencies and streamline build environment setup - Bump dioxus, dioxus-router, and dioxus-web versions to 0.7.2 in Cargo.toml for improved features and stability. - Remove unnecessary installation commands for cargo-binstall and trunk in the GitHub Actions setup, simplifying the build environment configuration. --- .github/actions/setup-build-env/action.yml | 4 +--- web/Cargo.toml | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/actions/setup-build-env/action.yml b/.github/actions/setup-build-env/action.yml index 9030f2e..333e4e4 100644 --- a/.github/actions/setup-build-env/action.yml +++ b/.github/actions/setup-build-env/action.yml @@ -65,9 +65,7 @@ runs: test -e ~/.cargo/bin/cargo-zigbuild || cargo install cargo-zigbuild test -e ~/.cargo/bin/rnr || cargo install rnr test -e ~/.cargo/bin/cargo-nextest || cargo install cargo-nextest - test -e ~/.cargo/bin/cargo-binstall || cargo install cargo-binstall - test -e ~/.cargo/bin/dx || cargo binstall dioxus-cli@0.7.0 -y - test -e ~/.cargo/bin/trunk || cargo install trunk --locked + test -e ~/.cargo/bin/dx || cargo install dioxus-cli@0.7.0 -y - name: Install Python Build Dependencies if: inputs.install-python-deps == 'true' diff --git a/web/Cargo.toml b/web/Cargo.toml index a17c9b4..c4f5571 100644 --- a/web/Cargo.toml +++ b/web/Cargo.toml @@ -7,9 +7,9 @@ edition = "2021" [dependencies] # Dioxus dependencies -dioxus = { version = "0.7", features = ["web"] } -dioxus-router = "0.7" -dioxus-web = "0.7" +dioxus = { version = "0.7.2", features = ["web"] } +dioxus-router = "0.7.2" +dioxus-web = "0.7.2" # Serialization serde = { version = "1.0", features = ["derive"] } From 392c2c6afef6fcd713ae0b77901bb443aa235c6a Mon Sep 17 00:00:00 2001 From: Reiase Date: Thu, 1 Jan 2026 20:43:21 +0800 Subject: [PATCH 2/5] Update documentation and improve build environment setup - Updated the dioxus-cli version to 0.7.2 in the GitHub Actions setup for enhanced stability. - Revised navigation structure in MkDocs configuration to improve clarity and accessibility of documentation. - Added new sections in the documentation to highlight the advantages of Probing and its core technical features. - Translated new documentation content into Chinese to support a broader audience. --- .github/actions/setup-build-env/action.yml | 2 +- docs/mkdocs.yml | 36 ++-- docs/src/index.md | 65 ++++++ docs/src/index.zh.md | 65 ++++++ docs/src/why-probing.md | 234 +++++++++++++++++++++ docs/src/why-probing.zh.md | 234 +++++++++++++++++++++ 6 files changed, 620 insertions(+), 16 deletions(-) create mode 100644 docs/src/why-probing.md create mode 100644 docs/src/why-probing.zh.md diff --git a/.github/actions/setup-build-env/action.yml b/.github/actions/setup-build-env/action.yml index 333e4e4..9e1e117 100644 --- a/.github/actions/setup-build-env/action.yml +++ b/.github/actions/setup-build-env/action.yml @@ -65,7 +65,7 @@ runs: test -e ~/.cargo/bin/cargo-zigbuild || cargo install cargo-zigbuild test -e ~/.cargo/bin/rnr || cargo install rnr test -e ~/.cargo/bin/cargo-nextest || cargo install cargo-nextest - test -e ~/.cargo/bin/dx || cargo install dioxus-cli@0.7.0 -y + test -e ~/.cargo/bin/dx || cargo install dioxus-cli@0.7.2 - name: Install Python Build Dependencies if: inputs.install-python-deps == 'true' diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 5e81628..9532480 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -80,22 +80,25 @@ plugins: site_name: "Probing 文档" nav_translations: Home: 首页 - Installation: 安装指南 + Getting Started: 入门指南 + Why Probing: 为什么选择 Probing + Installation: 安装 Quick Start: 快速开始 User Guide: 用户指南 SQL Analytics: SQL 分析 Memory Analysis: 内存分析 Debugging: 调试指南 Troubleshooting: 常见问题 + Examples: 示例 + Training Debugging: 训练调试 + Memory Leak: 内存泄漏 + Performance Analysis: 性能分析 Design: 设计文档 Architecture: 系统架构 Profiling: 性能分析 Distributed: 分布式 Extensibility: 扩展机制 - Examples: 示例 - Training Debugging: 训练调试 - Memory Leak: 内存泄漏 - Performance Analysis: 性能分析 + Reference: 参考 API Reference: API 参考 Versions: 版本兼容性 Contributing: 贡献指南 @@ -111,14 +114,21 @@ plugins: nav: - Home: index.md - - Installation: installation.md - - Quick Start: quickstart.md + - Getting Started: + - Why Probing: why-probing.md + - Installation: installation.md + - Quick Start: quickstart.md - User Guide: - guide/index.md - SQL Analytics: guide/sql-analytics.md - Memory Analysis: guide/memory-analysis.md - Debugging: guide/debugging.md - Troubleshooting: guide/troubleshooting.md + - Examples: + - examples/index.md + - Training Debugging: examples/training-debugging.md + - Memory Leak: examples/memory-leak.md + - Performance Analysis: examples/performance-analysis.md - Design: - design/index.md - Architecture: design/architecture.md @@ -126,14 +136,10 @@ nav: - Debugging: design/debugging.md - Distributed: design/distributed.md - Extensibility: design/extensibility.md - - Examples: - - examples/index.md - - Training Debugging: examples/training-debugging.md - - Memory Leak: examples/memory-leak.md - - Performance Analysis: examples/performance-analysis.md - - API Reference: api-reference.md - - Versions: versions.md - - Contributing: contributing.md + - Reference: + - API Reference: api-reference.md + - Versions: versions.md + - Contributing: contributing.md extra: generator: false diff --git a/docs/src/index.md b/docs/src/index.md index f5dc49d..9b856c5 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -11,6 +11,68 @@ hide: toc **Probing** is a dynamic performance profiler for distributed AI applications. +## 🎯 Why Probing? + +### Pain Points of Traditional Profilers + +| Problem | Traditional Approach | Probing Solution | +|---------|---------------------|------------------| +| **Code modification required** | Add logging, timers, decorators | ✅ Dynamic injection, zero code changes | +| **Fixed report formats** | Predefined tables and charts | ✅ SQL queries, custom analysis | +| **Service restart needed** | Must stop and restart | ✅ Runtime attachment | +| **High learning curve** | Different syntax per tool | ✅ Familiar SQL + Python | +| **Distributed is hard** | Analyze each node separately | ✅ Unified cross-node view | + +### Core Technical Advantages + +=== "🔧 Dynamic Probe Injection" + + Professional-grade code injection based on ptrace: + + - No source code modification required + - Supports x86_64 and aarch64 architectures + - Complete state save and restore mechanism + - Production-safe implementation + +=== "📊 SQL Query Engine" + + Built on Apache DataFusion: + + - Standard SQL syntax, no new language to learn + - Millisecond query response + - Complex aggregations, window functions + - Plugin-based data source extension + +=== "🐍 Remote REPL" + + Execute Python directly in target process: + + - Inspect any variable or object + - Modify runtime state in real-time + - No need to stop training jobs + - Full Python environment access + +=== "🌐 Distributed Support" + + Native multi-node support: + + - Unified cross-node queries + - Automatic process discovery + - Communication latency analysis + - Cluster-wide performance view + +## 🔄 Comparison with Alternatives + +| Feature | Probing | py-spy | Perfetto | torch.profiler | +|:--------|:-------:|:------:|:--------:|:--------------:| +| **Zero Intrusion** | ✅ | ✅ | ❌ | ❌ | +| **Dynamic Injection** | ✅ | ❌ | ❌ | ❌ | +| **SQL Queries** | ✅ | ❌ | ❌ | ❌ | +| **Remote REPL** | ✅ | ❌ | ❌ | ❌ | +| **Distributed Support** | ✅ | ❌ | ✅ | ⚠️ | +| **AI Framework Integration** | ✅ | ❌ | ⚠️ | ✅ | +| **Web UI** | ✅ | ❌ | ✅ | ✅ | + ## Key Features - **Zero Intrusion** - Attach to running processes without code changes @@ -30,6 +92,9 @@ probing -t inject # Query performance data probing -t query "SELECT * FROM python.torch_trace LIMIT 10" + +# Remote REPL debugging +probing -t repl ``` ## Use Cases diff --git a/docs/src/index.zh.md b/docs/src/index.zh.md index facff99..1c25767 100644 --- a/docs/src/index.zh.md +++ b/docs/src/index.zh.md @@ -11,6 +11,68 @@ hide: toc **Probing** 是一个面向分布式 AI 应用的动态性能分析器。 +## 🎯 为什么选择 Probing? + +### 传统 Profiler 的痛点 + +| 问题 | 传统方案 | Probing 方案 | +|------|----------|--------------| +| **需要代码修改** | 添加日志、计时器、装饰器 | ✅ 动态注入,零代码修改 | +| **固定报告格式** | 预设的表格和图表 | ✅ SQL 查询,自定义分析 | +| **需要重启服务** | 必须停止再启动 | ✅ 运行时附加 | +| **学习成本高** | 各工具语法不同 | ✅ 熟悉的 SQL + Python | +| **分布式困难** | 各节点独立分析 | ✅ 跨节点统一视图 | + +### 核心技术优势 + +=== "🔧 动态探针注入" + + 基于 ptrace 的专业级代码注入技术: + + - 无需修改目标程序源码 + - 支持 x86_64 和 aarch64 架构 + - 完整的状态保存与恢复机制 + - 生产环境安全可用 + +=== "📊 SQL 查询引擎" + + 基于 Apache DataFusion 构建: + + - 标准 SQL 语法,无需学习新语言 + - 毫秒级查询响应 + - 支持复杂聚合、窗口函数 + - 插件式数据源扩展 + +=== "🐍 远程 REPL" + + 直接在目标进程中执行 Python: + + - 检查任意变量和对象 + - 实时修改运行状态 + - 无需停止训练任务 + - 完整的 Python 环境 + +=== "🌐 分布式支持" + + 原生支持多节点场景: + + - 统一的跨节点查询 + - 自动进程发现 + - 通信延迟分析 + - 集群级性能视图 + +## 🔄 竞品对比 + +| 特性 | Probing | py-spy | Perfetto | torch.profiler | +|:-----|:-------:|:------:|:--------:|:--------------:| +| **零侵入** | ✅ | ✅ | ❌ | ❌ | +| **动态注入** | ✅ | ❌ | ❌ | ❌ | +| **SQL 查询** | ✅ | ❌ | ❌ | ❌ | +| **远程 REPL** | ✅ | ❌ | ❌ | ❌ | +| **分布式支持** | ✅ | ❌ | ✅ | ⚠️ | +| **AI 框架集成** | ✅ | ❌ | ⚠️ | ✅ | +| **Web UI** | ✅ | ❌ | ✅ | ✅ | + ## 核心特性 - **零侵入** - 无需修改代码即可附加到运行中的进程 @@ -30,6 +92,9 @@ probing -t inject # 查询性能数据 probing -t query "SELECT * FROM python.torch_trace LIMIT 10" + +# 远程 REPL 调试 +probing -t repl ``` ## 使用场景 diff --git a/docs/src/why-probing.md b/docs/src/why-probing.md new file mode 100644 index 0000000..2b08d23 --- /dev/null +++ b/docs/src/why-probing.md @@ -0,0 +1,234 @@ +# Why Probing? + +Probing is a dynamic performance analysis tool designed specifically for AI applications. This document details Probing's core technical advantages and design philosophy. + +## Design Philosophy + +### Zero Intrusion Principle + +Traditional profiling tools typically require code modifications: + +```python +# ❌ Traditional approach: requires code changes +import logging +import time + +def train_step(model, data): + start = time.time() + logging.info("Starting train step") + + loss = model(data) + + logging.info(f"Train step took {time.time() - start:.3f}s") + return loss +``` + +Probing takes a completely different approach: + +```bash +# ✅ Probing approach: zero code changes +probing -t inject +probing -t query "SELECT * FROM python.torch_trace" +``` + +### SQL-Driven Analysis + +Why SQL instead of fixed reports? + +| Fixed Reports | SQL Queries | +|---------------|-------------| +| Predefined formats | Flexible custom queries | +| Export then process | Real-time interactive analysis | +| Hard to drill down | Aggregate on any dimension | +| Learn proprietary syntax | Universal SQL skills | + +```sql +-- Example: Find the 10 most time-consuming operations +SELECT + operation_name, + AVG(duration_ms) as avg_duration, + COUNT(*) as call_count +FROM python.torch_trace +WHERE timestamp > now() - interval '5 minutes' +GROUP BY operation_name +ORDER BY avg_duration DESC +LIMIT 10 +``` + +## Core Technical Advantages + +### 1. Dynamic Probe Injection + +#### Technical Implementation + +Probing uses the Linux ptrace system call for code injection: + +``` +Target Process Probing CLI + │ │ + ▼ ▼ +┌─────────┐ 1. ptrace attach ┌─────────┐ +│ Running │ ◄──────────────────────│ Tracer │ +│ Process │ │ │ +└─────────┘ └─────────┘ + │ │ + ▼ ▼ +┌─────────┐ 2. Inject shellcode ┌─────────┐ +│ Paused │ ◄──────────────────────│ Inject │ +│ │ │ │ +└─────────┘ └─────────┘ + │ │ + ▼ ▼ +┌─────────┐ 3. Call dlopen ┌─────────┐ +│ Load │ ◄──────────────────────│ Execute │ +│ Library │ │ │ +└─────────┘ └─────────┘ + │ │ + ▼ ▼ +┌─────────┐ 4. Resume execution ┌─────────┐ +│ Resume │ ◄──────────────────────│ Detach │ +│ + Probe │ │ │ +└─────────┘ └─────────┘ +``` + +#### Safety Guarantees + +- **Complete state preservation**: Save all registers and overwritten memory before injection +- **Atomic operations**: Full rollback on injection failure +- **Permission checks**: Only process owner can inject +- **Memory alignment**: Ensure 16-byte stack pointer alignment (x86-64 ABI) + +### 2. DataFusion-Based Query Engine + +#### Why DataFusion? + +| Feature | DataFusion | Custom Engine | +|---------|------------|---------------| +| Development cost | Low | High | +| SQL compatibility | Complete | Partial | +| Performance optimization | Mature | Needs accumulation | +| Community support | Active | None | +| Arrow integration | Native | Needs adaptation | + +#### Plugin Architecture + +```rust +/// Plugin trait definition +pub trait Plugin { + fn name(&self) -> String; + fn kind(&self) -> PluginType; + fn namespace(&self) -> String; + fn register_table(&self, ...) -> Result<()>; +} +``` + +Built-in plugins: + +- `python.backtrace` - Python call stack +- `python.torch_trace` - PyTorch operation tracing +- `python.memory` - Memory usage statistics +- `system.process` - Process information + +### 3. Remote REPL + +#### How It Works + +``` +┌──────────────┐ ┌──────────────┐ +│ CLI REPL │ HTTP │ Target Process│ +│ │ ─────► │ │ +│ >>> expr │ │ Python │ +│ │ ◄───── │ Interpreter │ +│ result │ JSON │ Exec+Return │ +└──────────────┘ └──────────────┘ +``` + +#### Use Cases + +```python +# Connect to running process +probing -t repl + +>>> # Inspect model parameters +>>> model = [m for m in gc.get_objects() if isinstance(m, torch.nn.Module)][0] +>>> print(sum(p.numel() for p in model.parameters())) +125000000 + +>>> # Check GPU memory +>>> print(torch.cuda.memory_allocated() / 1e9, "GB") +12.5 GB + +>>> # Check current loss +>>> print(loss.item()) +0.0234 +``` + +### 4. Multi-Version Python Support + +Probing supports all Python versions from 3.4 to 3.13: + +``` +Python Version │ Frame Structure │ Support Status +──────────────┼──────────────────┼──────────────── +3.4 - 3.10 │ PyFrameObject │ ✅ +3.11 │ _PyCFrame │ ✅ +3.12 │ _PyCFrame │ ✅ +3.13+ │ current_frame │ ✅ +``` + +This is achieved through version-specific bindings to Python's internal structures. + +## Performance Characteristics + +| Metric | Target | Measured | +|--------|--------|----------| +| CPU overhead | < 5% | ~2-3% | +| Memory overhead | < 50MB | ~30MB | +| Query latency | < 10ms | ~5ms | +| Injection time | < 100ms | ~50ms | + +## Detailed Comparison with Alternatives + +### vs py-spy + +| Dimension | Probing | py-spy | +|-----------|---------|--------| +| Core function | Full analysis platform | Sampling profiler | +| Data querying | SQL | Fixed format | +| Code execution | REPL support | Not supported | +| Distributed | Native support | Not supported | +| Use case | AI training debugging | General Python | + +### vs torch.profiler + +| Dimension | Probing | torch.profiler | +|-----------|---------|----------------| +| Code intrusion | None | Required | +| Runtime attach | Supported | Not supported | +| Query flexibility | SQL | Fixed API | +| Non-PyTorch support | Yes | No | + +### vs Perfetto + +| Dimension | Probing | Perfetto | +|-----------|---------|----------| +| Focus | AI applications | System tracing | +| Deployment complexity | Low | High | +| Python integration | Native | Limited | +| Learning curve | Low | High | + +## Summary + +Probing's unique value lies in integrating three powerful capabilities: + +1. **Dynamic Injection** - No code changes, runtime attachment +2. **SQL Queries** - Flexible data analysis capabilities +3. **Remote REPL** - Real-time interactive debugging + +This combination makes Probing particularly suitable for: + +- 🔬 AI researchers debugging training issues +- 🛠️ Framework developers analyzing performance bottlenecks +- 🏭 MLOps engineers monitoring production environments + +[Get Started with Probing →](quickstart.md) diff --git a/docs/src/why-probing.zh.md b/docs/src/why-probing.zh.md new file mode 100644 index 0000000..85546bc --- /dev/null +++ b/docs/src/why-probing.zh.md @@ -0,0 +1,234 @@ +# 为什么选择 Probing? + +Probing 是一个专为 AI 应用设计的动态性能分析工具。本文详细介绍 Probing 的核心技术优势和设计理念。 + +## 设计理念 + +### 零侵入原则 + +传统的性能分析工具通常需要修改代码: + +```python +# ❌ 传统方式:需要修改代码 +import logging +import time + +def train_step(model, data): + start = time.time() + logging.info("Starting train step") + + loss = model(data) + + logging.info(f"Train step took {time.time() - start:.3f}s") + return loss +``` + +Probing 的方式完全不同: + +```bash +# ✅ Probing 方式:零代码修改 +probing -t inject +probing -t query "SELECT * FROM python.torch_trace" +``` + +### SQL 驱动分析 + +为什么选择 SQL 而不是固定报告? + +| 传统报告 | SQL 查询 | +|----------|----------| +| 预设的固定格式 | 灵活的自定义查询 | +| 需要导出后处理 | 实时交互分析 | +| 难以深入钻取 | 任意维度聚合 | +| 学习专有语法 | 通用 SQL 技能 | + +```sql +-- 示例:找出最耗时的 10 个操作 +SELECT + operation_name, + AVG(duration_ms) as avg_duration, + COUNT(*) as call_count +FROM python.torch_trace +WHERE timestamp > now() - interval '5 minutes' +GROUP BY operation_name +ORDER BY avg_duration DESC +LIMIT 10 +``` + +## 核心技术优势 + +### 1. 动态探针注入 + +#### 技术实现 + +Probing 使用 Linux ptrace 系统调用实现代码注入: + +``` +目标进程 Probing CLI + │ │ + ▼ ▼ +┌─────────┐ 1. ptrace attach ┌─────────┐ +│ Running │ ◄──────────────────────│ Tracer │ +│ Process │ │ │ +└─────────┘ └─────────┘ + │ │ + ▼ ▼ +┌─────────┐ 2. 注入 shellcode ┌─────────┐ +│ Paused │ ◄──────────────────────│ Inject │ +│ │ │ │ +└─────────┘ └─────────┘ + │ │ + ▼ ▼ +┌─────────┐ 3. 调用 dlopen ┌─────────┐ +│ Load │ ◄──────────────────────│ Execute │ +│ Library │ │ │ +└─────────┘ └─────────┘ + │ │ + ▼ ▼ +┌─────────┐ 4. 恢复执行 ┌─────────┐ +│ Resume │ ◄──────────────────────│ Detach │ +│ + Probe │ │ │ +└─────────┘ └─────────┘ +``` + +#### 安全保证 + +- **状态完整保存**:注入前保存所有寄存器和被覆盖的内存 +- **原子性操作**:注入失败时完全回滚 +- **权限检查**:仅允许进程所有者注入 +- **内存对齐**:确保栈指针 16 字节对齐(x86-64 ABI) + +### 2. 基于 DataFusion 的查询引擎 + +#### 为什么选择 DataFusion? + +| 特性 | DataFusion | 自研引擎 | +|------|------------|----------| +| 开发成本 | 低 | 高 | +| SQL 兼容性 | 完整 | 部分 | +| 性能优化 | 成熟 | 需积累 | +| 社区支持 | 活跃 | 无 | +| Arrow 集成 | 原生 | 需适配 | + +#### 插件架构 + +```rust +/// 插件 trait 定义 +pub trait Plugin { + fn name(&self) -> String; + fn kind(&self) -> PluginType; + fn namespace(&self) -> String; + fn register_table(&self, ...) -> Result<()>; +} +``` + +内置插件: + +- `python.backtrace` - Python 调用栈 +- `python.torch_trace` - PyTorch 操作追踪 +- `python.memory` - 内存使用统计 +- `system.process` - 进程信息 + +### 3. 远程 REPL + +#### 工作原理 + +``` +┌──────────────┐ ┌──────────────┐ +│ CLI REPL │ HTTP │ 目标进程 │ +│ │ ─────► │ │ +│ >>> expr │ │ Python 解释器│ +│ │ ◄───── │ │ +│ result │ JSON │ 执行 + 返回 │ +└──────────────┘ └──────────────┘ +``` + +#### 使用场景 + +```python +# 连接到运行中的进程 +probing -t repl + +>>> # 检查模型参数 +>>> model = [m for m in gc.get_objects() if isinstance(m, torch.nn.Module)][0] +>>> print(sum(p.numel() for p in model.parameters())) +125000000 + +>>> # 检查 GPU 内存 +>>> print(torch.cuda.memory_allocated() / 1e9, "GB") +12.5 GB + +>>> # 检查当前 loss +>>> print(loss.item()) +0.0234 +``` + +### 4. Python 多版本支持 + +Probing 支持 Python 3.4 到 3.13 的所有版本: + +``` +Python 版本 │ Frame 结构 │ 支持状态 +──────────────┼───────────────┼────────── +3.4 - 3.10 │ PyFrameObject │ ✅ +3.11 │ _PyCFrame │ ✅ +3.12 │ _PyCFrame │ ✅ +3.13+ │ current_frame │ ✅ +``` + +这通过针对每个 Python 版本的内部结构 bindings 实现。 + +## 性能特征 + +| 指标 | 目标值 | 实际测量 | +|------|--------|----------| +| CPU 开销 | < 5% | ~2-3% | +| 内存开销 | < 50MB | ~30MB | +| 查询延迟 | < 10ms | ~5ms | +| 注入时间 | < 100ms | ~50ms | + +## 与竞品的详细对比 + +### vs py-spy + +| 维度 | Probing | py-spy | +|------|---------|--------| +| 核心功能 | 完整分析平台 | 采样 profiler | +| 数据查询 | SQL | 固定格式 | +| 代码执行 | REPL 支持 | 不支持 | +| 分布式 | 原生支持 | 不支持 | +| 适用场景 | AI 训练调试 | 通用 Python | + +### vs torch.profiler + +| 维度 | Probing | torch.profiler | +|------|---------|----------------| +| 代码侵入 | 无 | 需要 | +| 运行时附加 | 支持 | 不支持 | +| 查询灵活性 | SQL | 固定 API | +| 非 PyTorch 支持 | 支持 | 不支持 | + +### vs Perfetto + +| 维度 | Probing | Perfetto | +|------|---------|----------| +| 侧重点 | AI 应用 | 系统追踪 | +| 部署复杂度 | 低 | 高 | +| Python 集成 | 原生 | 有限 | +| 学习曲线 | 低 | 高 | + +## 总结 + +Probing 的独特价值在于将三个强大能力整合在一起: + +1. **动态注入** - 无需修改代码,运行时附加 +2. **SQL 查询** - 灵活的数据分析能力 +3. **远程 REPL** - 实时交互调试 + +这种组合使得 Probing 特别适合: + +- 🔬 AI 研究人员调试训练问题 +- 🛠️ 框架开发者分析性能瓶颈 +- 🏭 MLOps 工程师监控生产环境 + +[开始使用 Probing →](quickstart.zh.md) From 3434c2ae7be0cb57dcb51d16d1611635ea5013d0 Mon Sep 17 00:00:00 2001 From: Reiase Date: Fri, 2 Jan 2026 00:09:28 +0800 Subject: [PATCH 3/5] Enhance CI workflows by adding build tools installation - Added installation of `binutils` in both `pypi.yml` and `test.yml` workflows to ensure necessary build tools are available. - Updated the test workflow to run `pytest` directly, improving clarity and consistency in test execution. - Minor documentation updates in `why-probing.md` and its Chinese counterpart to maintain alignment with recent changes. --- .github/workflows/pypi.yml | 10 ++++++++++ .github/workflows/test.yml | 12 +++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index db2f5da..c4cab6c 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -31,6 +31,11 @@ jobs: install-cargo-tools: 'true' install-python-deps: 'false' + - name: Install build tools + run: | + sudo apt-get update + sudo apt-get install -y binutils + - name: Build frontend run: make web/dist @@ -90,6 +95,11 @@ jobs: install-python-deps: 'false' install-cargo-tools: 'true' + - name: Install build tools + run: | + sudo apt-get update + sudo apt-get install -y binutils + - name: Build frontend run: make web/dist diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cc2678e..d20a80c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,6 +48,11 @@ jobs: run: | pip install pytest pytest-cov coverage + - name: Install build tools + run: | + sudo apt-get update + sudo apt-get install -y binutils + - name: Build package run: | make ZIG=1 wheel @@ -88,6 +93,11 @@ jobs: - name: Cache Rust artifacts uses: Swatinem/rust-cache@v2 + - name: Install build tools + run: | + sudo apt-get update + sudo apt-get install -y binutils + - name: Build web frontend run: | # Ensure wasm32 target is available (should already be installed by setup-build-env) @@ -247,4 +257,4 @@ jobs: - name: Run Python Tests env: PROBING: "1" - run: maturin develop && pytest tests + run: pytest tests From 30b71122f9a52beef53932a496ad44a404a118f6 Mon Sep 17 00:00:00 2001 From: Reiase Date: Fri, 2 Jan 2026 01:09:01 +0800 Subject: [PATCH 4/5] Enhance cache key strategy in GitHub Actions setup - Updated the cache key in the setup-build-env action to include OS, action.yml hash, Python version, Rust toolchain, and dioxus-cli version for improved cache management. - Added detailed comments explaining the cache key components to enhance clarity and maintainability. --- .github/actions/setup-build-env/action.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup-build-env/action.yml b/.github/actions/setup-build-env/action.yml index 9e1e117..a00b358 100644 --- a/.github/actions/setup-build-env/action.yml +++ b/.github/actions/setup-build-env/action.yml @@ -56,7 +56,18 @@ runs: ~/.cargo/registry/cache/ ~/.cargo/git/db/ ~/zig - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + # Cache key includes: + # - OS: different OSes have different binaries + # - action.yml hash: invalidates cache when action changes + # - Python version: different Python versions may need different tools + # - Rust toolchain: nightly toolchain version + # - Tool versions: dioxus-cli version (update when tool versions change) + # - Cargo.lock: invalidates when dependencies change + key: ${{ runner.os }}-setup-${{ hashFiles('.github/actions/setup-build-env/action.yml') }}-python-${{ inputs.python-version }}-rust-nightly-dx-0.7.2-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-setup-${{ hashFiles('.github/actions/setup-build-env/action.yml') }}-python-${{ inputs.python-version }}-rust-nightly-dx-0.7.2-cargo- + ${{ runner.os }}-setup-${{ hashFiles('.github/actions/setup-build-env/action.yml') }}-cargo- + ${{ runner.os }}-cargo- - name: Install cargo tools if: inputs.install-cargo-tools == 'true' From 13117a39c628983bb6c86f9f4791695d0b0f3487 Mon Sep 17 00:00:00 2001 From: Reiase Date: Fri, 2 Jan 2026 01:21:42 +0800 Subject: [PATCH 5/5] Add installation of system build tools for Linux in setup-build-env action - Introduced a step to install `binutils` in the setup-build-env action specifically for Linux runners, ensuring necessary build tools are available. - Removed redundant installation steps from pypi.yml and test.yml workflows to streamline the CI process. --- .github/actions/setup-build-env/action.yml | 7 +++++++ .github/workflows/pypi.yml | 10 ---------- .github/workflows/test.yml | 10 ---------- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/.github/actions/setup-build-env/action.yml b/.github/actions/setup-build-env/action.yml index a00b358..9c45809 100644 --- a/.github/actions/setup-build-env/action.yml +++ b/.github/actions/setup-build-env/action.yml @@ -46,6 +46,13 @@ runs: - name: Install Zig toolchain uses: mlugg/setup-zig@v2 + - name: Install system build tools (Linux) + if: runner.os == 'Linux' + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y binutils + - name: Cache dependencies id: cache-deps uses: actions/cache@v4 diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index c4cab6c..db2f5da 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -31,11 +31,6 @@ jobs: install-cargo-tools: 'true' install-python-deps: 'false' - - name: Install build tools - run: | - sudo apt-get update - sudo apt-get install -y binutils - - name: Build frontend run: make web/dist @@ -95,11 +90,6 @@ jobs: install-python-deps: 'false' install-cargo-tools: 'true' - - name: Install build tools - run: | - sudo apt-get update - sudo apt-get install -y binutils - - name: Build frontend run: make web/dist diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d20a80c..873ea1b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,11 +48,6 @@ jobs: run: | pip install pytest pytest-cov coverage - - name: Install build tools - run: | - sudo apt-get update - sudo apt-get install -y binutils - - name: Build package run: | make ZIG=1 wheel @@ -93,11 +88,6 @@ jobs: - name: Cache Rust artifacts uses: Swatinem/rust-cache@v2 - - name: Install build tools - run: | - sudo apt-get update - sudo apt-get install -y binutils - - name: Build web frontend run: | # Ensure wasm32 target is available (should already be installed by setup-build-env)