Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
150242f
added test data for agent test mode
satti-hari-krishna-reddy Dec 29, 2025
c6c6e80
Add mock handler for agent decision testing
satti-hari-krishna-reddy Dec 29, 2025
0f3b236
Add fuzzy URL matching for mock tool calls
satti-hari-krishna-reddy Dec 29, 2025
bf09aa2
feat: added a handler function to run all test agent usecases
satti-hari-krishna-reddy Dec 29, 2025
71650a6
feat: added a test mode for the run agent decision which intercepts t…
satti-hari-krishna-reddy Dec 29, 2025
7ccf97c
handle missing action results gracefully in test mode
satti-hari-krishna-reddy Dec 29, 2025
9ca856a
feat: integrate LLM explainations for failed test cases
satti-hari-krishna-reddy Dec 30, 2025
1d23490
moved all the functions to single file
satti-hari-krishna-reddy Dec 30, 2025
43878e4
refactored the agent test related structs
satti-hari-krishna-reddy Jan 6, 2026
9c6012e
added more helper functions
satti-hari-krishna-reddy Jan 6, 2026
3aed372
Remove the llm dependence and refactor the code to use structs instea…
satti-hari-krishna-reddy Jan 6, 2026
d77acb9
simplified the comments
satti-hari-krishna-reddy Jan 6, 2026
81342fa
fixed agent context loss in test mode by reading from cache
satti-hari-krishna-reddy Jan 7, 2026
fc30d04
cleaned up unused code and unnecessary comments
satti-hari-krishna-reddy Jan 7, 2026
176b963
cleanup: moved the test runner code to different repo
satti-hari-krishna-reddy Jan 8, 2026
8ef7052
implemented getting mock data via cache
satti-hari-krishna-reddy Jan 8, 2026
f67e777
implemented a cache-based system for passing mock tool call data from…
satti-hari-krishna-reddy Jan 8, 2026
1b3f4c3
cleanup
satti-hari-krishna-reddy Jan 8, 2026
4500fc6
add fallback to filepath method when cache isnt found
satti-hari-krishna-reddy Jan 8, 2026
53ff1d5
removed the testcase json file to clean up
satti-hari-krishna-reddy Jan 8, 2026
390766d
rename to agent_mock.go
satti-hari-krishna-reddy Jan 8, 2026
079d2d1
reduced the TTL to 10 mins
satti-hari-krishna-reddy Jan 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
353 changes: 353 additions & 0 deletions agent_mock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,353 @@
package shuffle

import (
"context"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"log"
"net/url"
"os"
"path/filepath"
)

func RunAgentDecisionMockHandler(execution WorkflowExecution, decision AgentDecision) ([]byte, string, string, error) {
log.Printf("[DEBUG][%s] Mock handler called for tool=%s, action=%s", execution.ExecutionId, decision.Tool, decision.Action)

// Get mock response
response, err := GetMockSingulResponse(execution.ExecutionId, decision.Fields)
if err != nil {
log.Printf("[ERROR][%s] Failed to get mock response: %s", execution.ExecutionId, err)
return nil, "", decision.Tool, err
}

// Parse the response to extract raw_response
var outputMapped SchemalessOutput
err = json.Unmarshal(response, &outputMapped)
if err != nil {
log.Printf("[ERROR][%s] Failed to unmarshal mock response: %s", execution.ExecutionId, err)
return response, "", decision.Tool, err
}

// Extract the raw_response field
body := response
if val, ok := outputMapped.RawResponse.(string); ok {
body = []byte(val)
} else if val, ok := outputMapped.RawResponse.([]byte); ok {
body = val
} else if val, ok := outputMapped.RawResponse.(map[string]interface{}); ok {
marshalledRawResp, err := json.MarshalIndent(val, "", " ")
if err != nil {
log.Printf("[ERROR][%s] Failed to marshal raw response: %s", execution.ExecutionId, err)
} else {
body = marshalledRawResp
}
}

log.Printf("[DEBUG][%s] Returning mock response for %s (success=%v, response_size=%d bytes)",
execution.ExecutionId, decision.Tool, outputMapped.Success, len(body))

return body, "", decision.Tool, nil
}

func GetMockSingulResponse(executionId string, fields []Valuereplace) ([]byte, error) {
ctx := context.Background()
mockCacheKey := fmt.Sprintf("agent_mock_%s", executionId)
cache, err := GetCache(ctx, mockCacheKey)

if err == nil {
cacheData := cache.([]uint8)
log.Printf("[DEBUG][%s] Using cached mock data (%d bytes)", executionId, len(cacheData))

var toolCalls []MockToolCall
err = json.Unmarshal(cacheData, &toolCalls)
if err != nil {
log.Printf("[ERROR][%s] Failed to unmarshal cached mock data: %s", executionId, err)
return nil, fmt.Errorf("failed to unmarshal cached mock data: %w", err)
}

return GetMockResponseFromToolCalls(toolCalls, fields)
}

testDataPath := os.Getenv("AGENT_TEST_DATA_PATH")
if testDataPath == "" {
return nil, fmt.Errorf("no mock data in cache for execution %s and AGENT_TEST_DATA_PATH not set", executionId)
}

log.Printf("[DEBUG][%s] Cache miss, using file-based mocks from: %s", executionId, testDataPath)

useCase := os.Getenv("AGENT_TEST_USE_CASE")
if useCase == "" {
return nil, errors.New("AGENT_TEST_USE_CASE not set")
}

useCaseData, err := loadUseCaseData(useCase)
if err != nil {
return nil, err
}

return GetMockResponseFromToolCalls(useCaseData.ToolCalls, fields)
}

// GetMockResponseFromToolCalls finds and returns the matching mock response from tool calls
func GetMockResponseFromToolCalls(toolCalls []MockToolCall, fields []Valuereplace) ([]byte, error) {
requestURL := extractFieldValue(fields, "url")
if requestURL == "" {
return nil, errors.New("no URL found in request fields")
}

log.Printf("[DEBUG] Looking for mock data with URL: %s", requestURL)

var candidates []MockToolCall
reqURLParsed, err := url.Parse(requestURL)
if err != nil {
log.Printf("[ERROR] Invalid request URL %s: %v", requestURL, err)
return nil, fmt.Errorf("invalid request URL: %w", err)
}
for _, tc := range toolCalls {
if urlsEqual(reqURLParsed, tc.URL) {
candidates = append(candidates, tc)
}
}

// If no exact matches, try fuzzy matching
if len(candidates) == 0 {
log.Printf("[DEBUG] No exact match, trying fuzzy matching...")
bestMatch, score := findBestFuzzyMatch(reqURLParsed, toolCalls)
if score >= 0.80 {
log.Printf("[INFO] Found fuzzy match with %.1f%% similarity: %s", score*100, bestMatch.URL)
candidates = append(candidates, bestMatch)
} else {
return nil, fmt.Errorf("no mock data found for URL: %s (best match: %.1f%%)", requestURL, score*100)
}
}

// If only one match, return it
if len(candidates) == 1 {
log.Printf("[DEBUG] Found exact match for URL: %s", requestURL)
return marshalResponse(candidates[0].Response)
}

// Multiple matches - compare fields to find exact match
log.Printf("[DEBUG] Found %d candidates for URL, comparing fields...", len(candidates))
for _, candidate := range candidates {
if fieldsMatch(fields, candidate.Fields) {
log.Printf("[DEBUG] Found exact match based on fields")
return marshalResponse(candidate.Response)
}
}

// No exact match - return first candidate with a warning
log.Printf("[WARNING] No exact field match found, returning first candidate")
return marshalResponse(candidates[0].Response)
}

func urlsEqual(req *url.URL, stored string) bool {
storedURL, err := url.Parse(stored)
if err != nil {
log.Printf("[WARN] Invalid stored URL %s: %v", stored, err)
return false
}
if req.Scheme != storedURL.Scheme || req.Host != storedURL.Host || req.Path != storedURL.Path {
return false
}
reqQuery := req.Query()
storedQuery := storedURL.Query()
// If the number of parameters differs, not a match
if len(reqQuery) != len(storedQuery) {
return false
}

for key, reqVals := range reqQuery {
storedVals, ok := storedQuery[key]
if !ok {
return false
}
if len(reqVals) != len(storedVals) {
return false
}
for i, v := range reqVals {
if v != storedVals[i] {
return false
}
}
}
return true
}

func loadUseCaseData(useCase string) (*MockUseCaseData, error) {
possiblePaths := []string{}

if envPath := os.Getenv("AGENT_TEST_DATA_PATH"); envPath != "" {
possiblePaths = append(possiblePaths, envPath)
}

possiblePaths = append(possiblePaths, "agent_test_data")
possiblePaths = append(possiblePaths, "../shuffle-shared/agent_test_data")
possiblePaths = append(possiblePaths, "../../shuffle-shared/agent_test_data")

if homeDir, err := os.UserHomeDir(); err == nil {
possiblePaths = append(possiblePaths, filepath.Join(homeDir, "Documents", "shuffle-shared", "agent_test_data"))
}

var filePath string
var foundPath string

for _, basePath := range possiblePaths {
testPath := filepath.Join(basePath, fmt.Sprintf("%s.json", useCase))
if _, err := os.Stat(testPath); err == nil {
filePath = testPath
foundPath = basePath
break
}
}

if filePath == "" {
return nil, fmt.Errorf("could not find test data file %s.json in any of these paths: %v", useCase, possiblePaths)
}

log.Printf("[DEBUG] Loading use case data from: %s", filePath)

data, err := ioutil.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("failed to read use case file %s: %s", filePath, err)
}

var useCaseData MockUseCaseData
err = json.Unmarshal(data, &useCaseData)
if err != nil {
return nil, fmt.Errorf("failed to parse use case data: %s", err)
}

log.Printf("[DEBUG] Loaded use case '%s' with %d tool calls from %s", useCaseData.UseCase, len(useCaseData.ToolCalls), foundPath)

return &useCaseData, nil
}

func extractFieldValue(fields []Valuereplace, key string) string {
for _, field := range fields {
if field.Key == key {
return field.Value
}
}
return ""
}

func fieldsMatch(requestFields []Valuereplace, storedFields map[string]string) bool {
// Convert request fields to map for easier comparison
requestMap := make(map[string]string)
for _, field := range requestFields {
requestMap[field.Key] = field.Value
}

for key, storedValue := range storedFields {
requestValue, exists := requestMap[key]
if !exists || requestValue != storedValue {
return false
}
}

return true
}

func marshalResponse(response map[string]interface{}) ([]byte, error) {
data, err := json.Marshal(response)
if err != nil {
return nil, fmt.Errorf("failed to marshal response: %s", err)
}
return data, nil
}

func findBestFuzzyMatch(reqURL *url.URL, toolCalls []MockToolCall) (MockToolCall, float64) {
var bestMatch MockToolCall
bestScore := 0.0

for _, tc := range toolCalls {
storedURL, err := url.Parse(tc.URL)
if err != nil {
continue
}

score := calculateURLSimilarity(reqURL, storedURL)
if score > bestScore {
bestScore = score
bestMatch = tc
}
}

return bestMatch, bestScore
}

func calculateURLSimilarity(url1, url2 *url.URL) float64 {
score := 0.0
totalWeight := 0.0

// Scheme (10% weight)
if url1.Scheme == url2.Scheme {
score += 0.10
}
totalWeight += 0.10

// Host (20% weight)
if url1.Host == url2.Host {
score += 0.20
}
totalWeight += 0.20

// Path (20% weight)
if url1.Path == url2.Path {
score += 0.20
}
totalWeight += 0.20

// Query parameters (50% weight)
query1 := url1.Query()
query2 := url2.Query()

if len(query1) == 0 && len(query2) == 0 {
score += 0.50
} else if len(query1) > 0 || len(query2) > 0 {
matchingParams := 0
totalParams := 0

allKeys := make(map[string]bool)
for k := range query1 {
allKeys[k] = true
}
for k := range query2 {
allKeys[k] = true
}
totalParams = len(allKeys)

// Count how many match
for key := range allKeys {
val1, ok1 := query1[key]
val2, ok2 := query2[key]

if ok1 && ok2 {
// Both have this key - check if values match
if len(val1) == len(val2) {
allMatch := true
for i := range val1 {
if val1[i] != val2[i] {
allMatch = false
break
}
}
if allMatch {
matchingParams++
}
}
}
}

if totalParams > 0 {
paramScore := float64(matchingParams) / float64(totalParams)
score += paramScore * 0.50
}
}
totalWeight += 0.50

return score / totalWeight
}
2 changes: 1 addition & 1 deletion ai.go
Original file line number Diff line number Diff line change
Expand Up @@ -11973,4 +11973,4 @@ func buildManualInputList(history []ConversationMessage, newPrompt string) []map
})

return items
}
}
8 changes: 8 additions & 0 deletions cloudSync.go
Original file line number Diff line number Diff line change
Expand Up @@ -2109,6 +2109,14 @@ func RunAgentDecisionSingulActionHandler(execution WorkflowExecution, decision A
debugUrl := ""
log.Printf("[INFO][%s] Running agent decision action '%s' with app '%s'. This is ran with Singul.", execution.ExecutionId, decision.Action, decision.Tool)

// Check if running in test mode
if os.Getenv("AGENT_TEST_MODE") == "true" {
log.Printf("[DEBUG][%s] AGENT_TEST_MODE enabled - using mock tool execution", execution.ExecutionId)

// Call mock handler
return RunAgentDecisionMockHandler(execution, decision)
}

baseUrl := "https://shuffler.io"
if os.Getenv("BASE_URL") != "" {
baseUrl = os.Getenv("BASE_URL")
Expand Down
Loading
Loading