Skip to content

Commit

Permalink
craeting more experiments and created dashboard
Browse files Browse the repository at this point in the history
  • Loading branch information
cykj40 committed Jan 16, 2025
1 parent dd7e8b5 commit 5570a53
Show file tree
Hide file tree
Showing 11 changed files with 585 additions and 65 deletions.
18 changes: 12 additions & 6 deletions dashboard/src/app.css
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
.app {
max-width: 800px;
padding: 20px;
max-width: 1000px;
margin: 0 auto;
padding: 2rem;
}

.controls {
margin: 2rem 0;
margin: 20px 0;
}

.graph {
background: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}

select {
margin-left: 1rem;
padding: 0.5rem;
font-size: 1rem;
padding: 8px;
margin-left: 10px;
}
27 changes: 20 additions & 7 deletions dashboard/src/app.tsx
Original file line number Diff line number Diff line change
@@ -1,14 +1,26 @@
import { useState } from 'react'
import { useEffect, useState } from 'react'
import ExperimentGraph from './components/ExperimentGraph'
import resultsData from '../../results.json'
import { Results } from '../../types'
import type { Results } from '../../types'
import './App.css'

const App = () => {
const results = resultsData as Results
const [selectedExperiment, setSelectedExperiment] = useState(
results.experiments[0].name
)
const [results, setResults] = useState<Results>({ experiments: [] })
const [selectedExperiment, setSelectedExperiment] = useState<string>('')

useEffect(() => {
// Fetch results from the results.json file
fetch('/results.json')
.then(res => res.json())
.then(data => {
console.log('Loaded results:', data) // Debug log
setResults(data)
})
.catch(err => {
console.error('Failed to load results:', err)
})
}, [])

console.log('Current state:', { results, selectedExperiment }) // Debug log

const currentExperiment = results.experiments.find(
(exp) => exp.name === selectedExperiment
Expand All @@ -32,6 +44,7 @@ const App = () => {
value={selectedExperiment}
onChange={(e) => setSelectedExperiment(e.target.value)}
>
<option value="">Select an experiment</option>
{results.experiments.map((exp) => (
<option key={exp.name} value={exp.name}>
{exp.name}
Expand Down
43 changes: 28 additions & 15 deletions dashboard/src/components/ExperimentGraph.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,47 @@ import {
Tooltip,
Legend,
} from 'recharts'
import type { Experiment, Set } from '../../../types'
import type { Experiment } from '../../../types'

interface Props {
experiment: Experiment
}

const ExperimentGraph = ({ experiment }: Props) => {
const data = experiment.sets.map((set: Set, index: number) => ({
const data = experiment.sets.map((set, index) => ({
name: `Run ${index + 1}`,
score: set.score,
date: new Date(set.createdAt).toLocaleDateString(),
runs: set.runs.length
}))

console.log('Graph data:', data) // Debug log

if (data.length === 0) {
return <div>No data available for this experiment</div>
}

return (
<div className="graph">
<h2>{experiment.name}</h2>
<LineChart width={800} height={400} data={data}>
<CartesianGrid strokeDasharray="3 3" />
<XAxis dataKey="name" />
<YAxis domain={[0, 1]} />
<Tooltip />
<Legend />
<Line
type="monotone"
dataKey="score"
stroke="#8884d8"
activeDot={{ r: 8 }}
/>
</LineChart>
<div style={{ overflowX: 'auto' }}>
<LineChart width={800} height={400} data={data}>
<CartesianGrid strokeDasharray="3 3" />
<XAxis dataKey="name" />
<YAxis domain={[0, 1]} />
<Tooltip
formatter={(value: number) => value.toFixed(2)}
labelFormatter={(label) => `${label}`}
/>
<Legend />
<Line
type="monotone"
dataKey="score"
stroke="#8884d8"
activeDot={{ r: 8 }}
/>
</LineChart>
</div>
</div>
)
}
Expand Down
8 changes: 7 additions & 1 deletion dashboard/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,11 @@ import react from '@vitejs/plugin-react'

// https://vite.dev/config/
export default defineConfig({
plugins: [react()]
plugins: [react()],
server: {
watch: {
usePolling: true,
},
},
publicDir: '../', // This will allow access to files in the parent directory
})
17 changes: 0 additions & 17 deletions evals/dadJoke.eval.ts

This file was deleted.

45 changes: 45 additions & 0 deletions evals/experiments/allTools.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import 'dotenv/config'
import { runLLM } from '../../src/llm'
import { redditToolDefinition } from '../../src/tools/reddit'
import { dadJokeToolDefinition } from '../../src/tools/dadJoke'
import { generateImageToolDefinition } from '../../src/tools/generateImage'
import { runEval } from '../lib/evalTools'
import { ToolCallMatch } from '../lib/scorers'

const createToolCallMessage = (toolName: string) => ({
role: 'assistant' as const,
content: null,
tool_calls: [
{
id: 'test-id',
type: 'function' as const,
function: {
name: toolName,
arguments: '{}'
},
},
],
})

export const allToolsEval = () => runEval('allTools', {
task: (input) =>
runLLM({
messages: [{ role: 'user', content: input }],
tools: [redditToolDefinition, dadJokeToolDefinition, generateImageToolDefinition],
}),
data: [
{
input: 'find me something interesting on reddit',
expected: createToolCallMessage(redditToolDefinition.function.name),
},
{
input: 'tell me a dad joke',
expected: createToolCallMessage(dadJokeToolDefinition.function.name),
},
{
input: 'generate an image of a sunset',
expected: createToolCallMessage(generateImageToolDefinition.function.name),
},
],
scorers: [ToolCallMatch],
})
36 changes: 36 additions & 0 deletions evals/experiments/dadJoke.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import 'dotenv/config'

import { runLLM } from '../../src/llm'
import { dadJokeToolDefinition } from '../../src/tools/dadJoke'
import { runEval } from '../evalTools'
import { ToolCallMatch } from '../scorers'

const createToolCallMessage = (toolName: string) => ({
role: 'assistant' as const,
content: null,
tool_calls: [
{
id: 'test-id',
type: 'function' as const,
function: {
name: toolName,
arguments: '{}'
},
},
],
})

export const dadJokeEval = () => runEval('dadJoke', {
task: (input) =>
runLLM({
messages: [{ role: 'user', content: input }],
tools: [dadJokeToolDefinition],
}),
data: [
{
input: 'Tell me a funny dad joke',
expected: createToolCallMessage(dadJokeToolDefinition.function.name),
},
],
scorers: [ToolCallMatch],
})
40 changes: 40 additions & 0 deletions evals/experiments/generateImage.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import 'dotenv/config'

import { runLLM } from '../../src/llm'
import { generateImageToolDefinition } from '../../src/tools/generateImage'
import { runEval } from '../lib/evalTools'
import { ToolCallMatch } from '../lib/scorers'

const createToolCallMessage = (toolName: string) => ({
role: 'assistant' as const,
content: null,
tool_calls: [
{
id: 'test-id',
type: 'function' as const,
function: {
name: toolName,
arguments: '{}'
},
},
],
})

export const generateImageEval = () => runEval('generateImage', {
task: (input) =>
runLLM({
messages: [{ role: 'user', content: input }],
tools: [generateImageToolDefinition],
}),
data: [
{
input: 'Generate an image of a sunset',
expected: createToolCallMessage(generateImageToolDefinition.function.name),
},
{
input: 'take a photo of the sunset',
expected: createToolCallMessage(generateImageToolDefinition.function.name),
},
],
scorers: [ToolCallMatch],
})
44 changes: 29 additions & 15 deletions evals/lib/evalTools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,31 +98,45 @@ export const saveSet = async (

export const runEval = async <T = any>(
experiment: string,
{
task,
data,
scorers,
}: {
task: (input: any) => Promise<T | TaskResult<T>>;
data: { input: any; expected?: T; reference?: string | string[] }[];
scorers: Scorer<T, any>[];
config: {
task: (input: any) => Promise<T | TaskResult<T>>
data: { input: any; expected?: T; reference?: string | string[] }[]
scorers: Scorer<T, any>[]
}
) => {
const results = await Promise.all(
data.map(async ({ input, expected, reference }) => {
const result = await task(input)
config.data.map(async ({ input, expected, reference }) => {
const result = await config.task(input)
console.log('Run result:', {
input,
output: result,
expected,
scores: await Promise.all(
config.scorers.map(async (scorer) => {
const score = await scorer({
input,
output: result,
expected,
reference,
})
return score
})
)
})

let context: string | string[] | undefined
let output: any
let output: T

if (result && typeof result === 'object' && 'context' in result) {
context = result.context
output = result.response ?? result.output ?? result
const taskResult = result as TaskResult<T>
context = taskResult.context
output = taskResult.output ?? result as T
} else {
output = result
output = result as T
}

const scores = await Promise.all(
scorers.map(async (scorer) => {
config.scorers.map(async (scorer) => {
const score = await scorer({
input,
output,
Expand Down
10 changes: 8 additions & 2 deletions evals/run.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { redditEval } from './reddit'
import { dadJokeEval } from './dadJoke.eval.ts'
import { dadJokeEval } from './experiments/dadJoke.eval'
import { generateImageEval } from './experiments/generateImage.eval'
import { allToolsEval } from './experiments/allTools.eval'

const evalName = process.argv[2]

Expand All @@ -8,9 +10,13 @@ if (!evalName) {
process.exit(1)
}

const evals: Record<string, () => Promise<void>> = {
type EvalFn = () => Promise<any>

const evals: Record<string, EvalFn> = {
reddit: redditEval,
dadJoke: dadJokeEval,
generateImage: generateImageEval,
allTools: allToolsEval,
}

const selectedEval = evals[evalName]
Expand Down
Loading

0 comments on commit 5570a53

Please sign in to comment.