Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
17cb812
Add benchmarks
RussellCanfield Oct 2, 2025
29e0f8d
feat:Anthropic Agent adapter
nitink23 Oct 9, 2025
6321d36
fix:AnthropicAdapter
nitink23 Oct 9, 2025
6c19161
feat: Oracle tools
nitink23 Oct 9, 2025
e4efe02
Chore:cleanup runtime tools
nitink23 Oct 9, 2025
749e893
chore: clean up agentadpater
nitink23 Oct 9, 2025
35a5908
chore:clean TS files
nitink23 Oct 10, 2025
67f5dfd
chore:cleaned up imports
nitink23 Oct 13, 2025
0c30fe8
fix:tsc-->tsx
nitink23 Oct 15, 2025
26e00a2
fix:cli typescript fix
nitink23 Oct 15, 2025
3daf20b
chore: add tsx dev dependency
nitink23 Oct 15, 2025
7cdcfcf
fix:CLI parser
nitink23 Oct 16, 2025
01f64c9
Merge pull request #3 from ZephyrCloudIO/feature/tool-calls-and-oracle
nitink23 Oct 17, 2025
9bbfffc
Feat:database
nitink23 Oct 18, 2025
e9d9585
feat: SQLite setup
nitink23 Oct 18, 2025
4c6ee88
fix: dependencies
nitink23 Oct 20, 2025
dc8cd5d
feat: UI changes
nitink23 Oct 21, 2025
fe5c318
feat: multi-select enabled
nitink23 Oct 22, 2025
9be71b3
feat: ze-bench ui fixes
nitink23 Oct 23, 2025
8a2858b
adding rsbuild project
zackarychapple Oct 23, 2025
f544223
reports pulling from db file
zackarychapple Oct 24, 2025
7acc7dc
reports working
zackarychapple Oct 24, 2025
4dc2161
initial reports done
zackarychapple Oct 24, 2025
9a93ef9
feat: stable version
nitink23 Oct 24, 2025
932b1fc
fix: dev-server start issue solved
nitink23 Oct 24, 2025
0aec9d1
fix: database persistence
nitink23 Oct 24, 2025
da63c99
fix: database persistence
nitink23 Oct 24, 2025
060b8d0
fix: added and changed docs
nitink23 Oct 24, 2025
23e8312
fix: openrouter-working
nitink23 Oct 27, 2025
cf8cf0d
feat:openrouter-anymodel
nitink23 Oct 27, 2025
aa8ee30
fix/report website update
nitink23 Oct 27, 2025
599ae15
Merge branch 'main' into fix/openrouter
nitink23 Oct 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions benchmark-report/src/routes/agents.tsx
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import { createFileRoute } from '@tanstack/react-router'
import { useDatabase } from '@/DatabaseProvider'
import { useEffect, useState } from 'react'
import { ChartContainer, ChartTooltip, ChartTooltipContent } from '@/components/ui/chart'
import { Bar, BarChart, CartesianGrid, XAxis, YAxis, Legend } from 'recharts'

export const Route = createFileRoute('/agents')({
component: AgentsPage,
Expand Down Expand Up @@ -103,8 +101,10 @@ function AgentsPage() {
{agentStats.map((stat, idx) => (
<div key={idx} className="rounded-lg border bg-card p-6 shadow-sm hover:shadow-md transition-shadow">
<div className="mb-4">
<h3 className="text-lg font-semibold">{stat.model || 'Unknown Model'}</h3>
<p className="text-sm text-muted-foreground">{stat.agent}</p>
<h3 className="text-lg font-semibold">
{stat.model ? stat.model : `${stat.agent} (no model)`}
</h3>
<p className="text-sm text-muted-foreground">{stat.agent} agent</p>
</div>
<div className="space-y-3">
<div className="flex justify-between items-center">
Expand Down Expand Up @@ -171,8 +171,10 @@ function AgentsPage() {
{idx === 0 ? '🥇' : idx === 1 ? '🥈' : idx === 2 ? '🥉' : idx + 1}
</div>
<div className="flex-1">
<div className="font-medium">{stat.model || 'Unknown Model'}</div>
<div className="text-sm text-muted-foreground">{stat.agent}</div>
<div className="font-medium">
{stat.model ? stat.model : `${stat.agent} (no model)`}
</div>
<div className="text-sm text-muted-foreground">{stat.agent} agent</div>
</div>
<div className="text-right">
<div className="font-bold">{stat.avgScore.toFixed(2)}</div>
Expand Down
8 changes: 6 additions & 2 deletions benchmark-report/src/routes/batches.$batchId.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -538,13 +538,17 @@ function BatchDetailsPage() {
{agentPerformance.map((agent, index) => {
const rankDisplay = index < 3 ? `#${index + 1}` : `${index + 1}.`
const scoreColor = getScoreColor(agent.avgWeightedScore)
const modelStr = agent.model && agent.model !== 'default' ? ` [${agent.model}]` : ''

return (
<div key={`${agent.agent}-${agent.model}`} className="flex items-center justify-between p-3 border rounded-lg">
<div className="flex items-center gap-3">
<span className="font-mono text-sm w-8">{rankDisplay}</span>
<span className="font-semibold">{agent.agent}{modelStr}</span>
<span className="font-semibold">
{agent.model ? agent.model : agent.agent}
</span>
{agent.model && (
<span className="text-sm text-muted-foreground">({agent.agent})</span>
)}
</div>
<div className="flex items-center gap-6">
<span className="text-sm text-muted-foreground">
Expand Down
4 changes: 3 additions & 1 deletion benchmark-report/src/routes/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,9 @@ function Dashboard() {
topPerformers.map((performer, idx) => (
<div key={idx} className="flex items-center justify-between p-3 rounded-lg bg-muted/50">
<div className="flex-1">
<div className="font-medium">{performer.model || 'Unknown Model'}</div>
<div className="font-medium">
{performer.model ? performer.model : `${performer.agent} (no model)`}
</div>
<div className="text-sm text-muted-foreground">{performer.agent} agent</div>
</div>
<div className="text-right mr-4">
Expand Down
4 changes: 2 additions & 2 deletions benchmark-report/src/routes/runs.index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,11 @@ function RunsPage() {
{run.tier}
</span>
<span>•</span>
<span>{run.agent}</span>
<span>{run.model ? run.model : run.agent}</span>
{run.model && (
<>
<span>•</span>
<span className="truncate">{run.model}</span>
<span className="text-xs text-muted-foreground">{run.agent}</span>
</>
)}
{run.batchId && (
Expand Down
Loading
Loading