shimmy/Cargo.toml at main · indoos/shimmy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
[package]
name = "shimmy"
version = "1.5.6"
edition = "2021"
license = "MIT"
description = "Lightweight sub-20MB Ollama alternative with native SafeTensors support. No Python dependencies, 2x faster loading. Now with GitHub Spec-Kit integration for systematic development."
homepage = "https://github.com/Michael-A-Kuykendall/shimmy"
repository = "https://github.com/Michael-A-Kuykendall/shimmy"
readme = "README.md"
keywords = ["llm", "local-ai", "inference", "server", "api"]
categories = ["command-line-utilities", "web-programming::http-server"]
authors = ["Michael A. Kuykendall <michaelallenkuykendall@gmail.com>"]
exclude = [
    "docs-internal/*",
    "test-models/*",
    "target/*",
    "target-minimal/*",
    "release-artifacts/*",
    "libs/*",
    "shimmy-vscode/*",
    "tests/*",
    ".*",
    "*.sh",
    "*.ps1",
    "*.py",
    "*.log",
    "*.bat",
    "*.exe",
    ".github/*",
    ".claude/*",
    ".internal/*",
    "deployment_failures.log",
    "shimmy-tui-*/*",
    "SHIMMY_TUI_*",
    "shimmy-tui-*",
    "benchmark_results.json",
    "Dockerfile*",
    "ROADMAP.md",
    "DCO.md",
    "CODE_OF_CONDUCT.md",
    "CONTRIBUTING.md",
    "SPONSORS.md",
    "assets/*",
    "benches/*",
    "docs/*",
    "shimmy-*/*",
    "shimmy-*.exe",
    "shimmy",
    "shimmy.exe",
    "spec-kit-env/*",
    "specs/*",
    "memory/*"
]

[features]
default = ["huggingface", "llama"]  # macOS ARM64 i8mm issues fixed via forked llama-cpp-2
# Engine backends
llama = ["dep:llama-cpp-2"]
huggingface = [] # Python integration, no additional Rust deps
mlx = [] # Apple MLX integration for Metal GPU acceleration on Apple Silicon
# GPU acceleration backends for llama.cpp
llama-cuda = ["llama", "llama-cpp-2/cuda"] # NVIDIA CUDA GPU acceleration
llama-vulkan = ["llama"] # Vulkan GPU acceleration (cross-platform)
llama-opencl = ["llama"] # OpenCL GPU acceleration (AMD, Intel, etc.)
# Convenience feature sets
fast = ["huggingface"] # Fast compilation - no C++ deps
full = ["huggingface", "llama", "mlx"] # Full compilation - includes all backends
gpu = ["huggingface", "llama-cuda", "llama-vulkan", "llama-opencl"] # GPU-optimized build
apple = ["huggingface", "mlx"] # Apple Silicon optimized - MLX + HuggingFace
coverage = ["huggingface"] # Coverage testing - minimal deps for faster builds

[dependencies]
anyhow = "1"
axum = { version = "0.7", features = ["http1","json","ws"] }
async-trait = "0.1"
bytes = "1"
chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4", features = ["derive"] }
futures-util = "0.3"
lazy_static = "1.5"
memmap2 = "0.9"
minijinja = { version = "2", features = ["loader"] }
parking_lot = "0.12"
rand = "0.8"
safetensors = "0.4"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
sys-info = "0.9"
sysinfo = "0.30"
tempfile = "3"
thiserror = "1"
tokio = { version = "1", features = ["macros","rt-multi-thread","signal","process","fs"] }
tokio-stream = "0.1"
tracing = "0.1"
tracing-subscriber = { version = "0.3.20", features = ["env-filter"] }
uuid = { version = "1", features = ["v4", "serde"] }
dirs = "5.0"
reqwest = { version = "0.11", features = ["json", "rustls-tls"], default-features = false }

# llama.cpp bindings (optional) - using forked version with macOS ARM64 i8mm fix
llama-cpp-2 = { version = "0.1.118", optional = true, default-features = false }

# Use forked llama-cpp-2 with macOS ARM64 i8mm compatibility fix
[patch.crates-io]
llama-cpp-2 = { git = "https://github.com/Michael-A-Kuykendall/llama-cpp-rs.git", branch = "fix-macos-arm64-i8mm", package = "llama-cpp-2" }

[dev-dependencies]
tokio-tungstenite = "0.20"
criterion = { version = "0.5", features = ["html_reports"] }
# Additional dependencies for mock testing infrastructure
tempfile = "3"  # For creating temporary test directories
rand = "0.8"    # For randomized testing scenarios (already in main deps)
# Note: tempfile is already in main dependencies, rand is already in main dependencies

[profile.release]
lto = true
codegen-units = 1
opt-level = "z"

# Optimize build times for development
[profile.dev]
opt-level = 1
debug = true

# Faster builds for dependencies
[profile.dev.package."*"]
opt-level = 2
debug = false

# Benchmark configuration
[[bench]]
name = "model_loading"
harness = false

[[bench]]
name = "generation_performance"
harness = false