Skip to content

Commit 8feb7bf

Browse files
committed
feat(tests): enhance knowledge and flowfile tests with containment barriers
Added new test cases to validate containment barriers in the `ResolvePulledStagedTarget` and `ZipRelativePaths` functions, ensuring that paths escaping the designated directories are properly rejected. Updated the `knowledge_test.go` to include scenarios for handling nil embedder and error propagation during document creation, improving overall test coverage and robustness.
1 parent ce06a0d commit 8feb7bf

3 files changed

Lines changed: 157 additions & 47 deletions

File tree

backend/pkg/database/knowledge/knowledge_test.go

Lines changed: 100 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
type mockDB struct {
2828
database.Querier // nil — panics if an unexpected method is called
2929

30+
insertKnowledge func(ctx context.Context, arg database.InsertKnowledgeDocumentParams) (string, error)
3031
getKnowledge func(ctx context.Context, uuid string) (database.GetKnowledgeDocumentRow, error)
3132
getUserKnowledge func(ctx context.Context, arg database.GetUserKnowledgeDocumentParams) (database.GetUserKnowledgeDocumentRow, error)
3233
listAll func(ctx context.Context) ([]database.ListAllKnowledgeDocumentsRow, error)
@@ -39,6 +40,9 @@ type mockDB struct {
3940
searchUserKnowledge func(ctx context.Context, arg database.SearchUserKnowledgeDocumentsParams) ([]database.SearchUserKnowledgeDocumentsRow, error)
4041
}
4142

43+
func (m *mockDB) InsertKnowledgeDocument(ctx context.Context, arg database.InsertKnowledgeDocumentParams) (string, error) {
44+
return m.insertKnowledge(ctx, arg)
45+
}
4246
func (m *mockDB) GetKnowledgeDocument(ctx context.Context, uuid string) (database.GetKnowledgeDocumentRow, error) {
4347
return m.getKnowledge(ctx, uuid)
4448
}
@@ -1437,56 +1441,100 @@ func TestCreateDocument(t *testing.T) {
14371441
ctx := context.Background()
14381442
const userID = int64(11)
14391443

1440-
t.Run("store nil returns error immediately", func(t *testing.T) {
1441-
ks := &knowledgeStore{store: nil}
1444+
// insertOK returns a mockDB whose InsertKnowledgeDocument returns a fixed id.
1445+
insertOK := func(id string) *mockDB {
1446+
return &mockDB{
1447+
insertKnowledge: func(_ context.Context, _ database.InsertKnowledgeDocumentParams) (string, error) {
1448+
return id, nil
1449+
},
1450+
}
1451+
}
1452+
1453+
t.Run("embedder nil returns error", func(t *testing.T) {
1454+
ks := &knowledgeStore{db: insertOK("id"), embedder: nil}
1455+
_, err := ks.CreateDocument(ctx, userID, model.CreateKnowledgeDocumentInput{
1456+
DocType: model.KnowledgeDocTypeAnswer, Content: "x", Question: "q",
1457+
})
1458+
if err == nil {
1459+
t.Fatal("expected error when embedder is nil")
1460+
}
1461+
})
1462+
1463+
t.Run("embedder unavailable returns error", func(t *testing.T) {
1464+
ks := &knowledgeStore{db: insertOK("id"), embedder: &mockEmbedder{available: false}}
14421465
_, err := ks.CreateDocument(ctx, userID, model.CreateKnowledgeDocumentInput{
14431466
DocType: model.KnowledgeDocTypeAnswer, Content: "x", Question: "q",
14441467
})
14451468
if err == nil {
1446-
t.Fatal("expected error when store is nil")
1469+
t.Fatal("expected error when embedder unavailable")
14471470
}
14481471
})
14491472

1450-
t.Run("store AddDocuments error propagates", func(t *testing.T) {
1451-
vs := &mockVectorStore{
1452-
addDocumentsFn: func(_ context.Context, _ []schema.Document, _ ...vectorstores.Option) ([]string, error) {
1453-
return nil, errors.New("vector db error")
1473+
t.Run("EmbedDocuments error propagates", func(t *testing.T) {
1474+
ks := &knowledgeStore{
1475+
db: insertOK("id"),
1476+
embedder: &mockEmbedder{
1477+
available: true,
1478+
embedDocumentsFn: func(_ context.Context, _ []string) ([][]float32, error) {
1479+
return nil, errors.New("embed error")
1480+
},
14541481
},
14551482
}
1456-
ks := &knowledgeStore{store: vs, newKnp: newPublisherFactory(&mockPublisher{})}
14571483
_, err := ks.CreateDocument(ctx, userID, model.CreateKnowledgeDocumentInput{
14581484
DocType: model.KnowledgeDocTypeGuide, Content: "c", Question: "q",
14591485
})
14601486
if err == nil {
1461-
t.Fatal("expected error from AddDocuments")
1487+
t.Fatal("expected error from EmbedDocuments")
14621488
}
14631489
})
14641490

1465-
t.Run("AddDocuments returning no IDs is an error", func(t *testing.T) {
1466-
vs := &mockVectorStore{
1467-
addDocumentsFn: func(_ context.Context, _ []schema.Document, _ ...vectorstores.Option) ([]string, error) {
1468-
return []string{}, nil // empty slice
1491+
t.Run("embedder returning empty vectors is an error", func(t *testing.T) {
1492+
ks := &knowledgeStore{
1493+
db: insertOK("id"),
1494+
embedder: &mockEmbedder{
1495+
available: true,
1496+
embedDocumentsFn: func(_ context.Context, _ []string) ([][]float32, error) {
1497+
return [][]float32{}, nil // empty slice
1498+
},
14691499
},
14701500
}
1471-
ks := &knowledgeStore{store: vs, newKnp: newPublisherFactory(&mockPublisher{})}
14721501
_, err := ks.CreateDocument(ctx, userID, model.CreateKnowledgeDocumentInput{
14731502
DocType: model.KnowledgeDocTypeAnswer, Content: "c", Question: "q",
14741503
})
14751504
if err == nil {
1476-
t.Fatal("expected error for empty IDs")
1505+
t.Fatal("expected error for empty vectors")
1506+
}
1507+
})
1508+
1509+
t.Run("db insert error propagates", func(t *testing.T) {
1510+
db := &mockDB{
1511+
insertKnowledge: func(_ context.Context, _ database.InsertKnowledgeDocumentParams) (string, error) {
1512+
return "", errors.New("constraint error")
1513+
},
1514+
}
1515+
ks := &knowledgeStore{
1516+
db: db,
1517+
embedder: &mockEmbedder{available: true},
1518+
newKnp: newPublisherFactory(&mockPublisher{}),
1519+
}
1520+
_, err := ks.CreateDocument(ctx, userID, model.CreateKnowledgeDocumentInput{
1521+
DocType: model.KnowledgeDocTypeAnswer, Content: "c", Question: "q",
1522+
})
1523+
if err == nil {
1524+
t.Fatal("expected error from db insert")
14771525
}
14781526
})
14791527

14801528
t.Run("success: all fields set, manual=true, user_id present, event published", func(t *testing.T) {
14811529
pub := &mockPublisher{}
1482-
var capturedDocs []schema.Document
1483-
vs := &mockVectorStore{
1484-
addDocumentsFn: func(_ context.Context, docs []schema.Document, _ ...vectorstores.Option) ([]string, error) {
1485-
capturedDocs = docs
1486-
return []string{"new-uuid"}, nil
1530+
var gotParams database.InsertKnowledgeDocumentParams
1531+
db := &mockDB{
1532+
insertKnowledge: func(_ context.Context, arg database.InsertKnowledgeDocumentParams) (string, error) {
1533+
gotParams = arg
1534+
return "new-uuid", nil
14871535
},
14881536
}
1489-
ks := &knowledgeStore{store: vs, newKnp: newPublisherFactory(pub)}
1537+
ks := &knowledgeStore{db: db, embedder: &mockEmbedder{available: true}, newKnp: newPublisherFactory(pub)}
14901538

14911539
input := model.CreateKnowledgeDocumentInput{
14921540
DocType: model.KnowledgeDocTypeCode,
@@ -1510,25 +1558,35 @@ func TestCreateDocument(t *testing.T) {
15101558
if !doc.Manual {
15111559
t.Fatal("manual must be true for manually created docs")
15121560
}
1561+
if doc.UserID != userID {
1562+
t.Fatalf("doc.UserID: want %d, got %d", userID, doc.UserID)
1563+
}
1564+
if doc.CodeLang == nil || *doc.CodeLang != "go" {
1565+
t.Fatal("code_lang missing from returned doc")
1566+
}
15131567

1514-
// Metadata stored in pgvector
1515-
if len(capturedDocs) != 1 {
1516-
t.Fatal("expected exactly one document passed to AddDocuments")
1568+
// Persisted document text (trimmed) and embedding literal
1569+
if gotParams.Document.String != "func main() {}" {
1570+
t.Fatalf("persisted document not trimmed: %q", gotParams.Document.String)
15171571
}
1518-
meta := capturedDocs[0].Metadata
1519-
if meta["user_id"] != userID {
1520-
t.Fatalf("user_id in metadata: want %d, got %v", userID, meta["user_id"])
1572+
emb, ok := gotParams.Embedding.(string)
1573+
if !ok || emb == "" || emb[0] != '[' {
1574+
t.Fatalf("embedding must be a vector literal, got %v", gotParams.Embedding)
1575+
}
1576+
1577+
// Metadata stored in pgvector cmetadata
1578+
meta := parseMeta(string(gotParams.Cmetadata))
1579+
if meta.UserID != userID {
1580+
t.Fatalf("user_id in metadata: want %d, got %d", userID, meta.UserID)
15211581
}
1522-
if meta["manual"] != true {
1582+
if !meta.Manual {
15231583
t.Fatal("manual flag must be true in metadata")
15241584
}
1525-
if meta["code_lang"] != "go" {
1585+
if meta.CodeLang != "go" {
15261586
t.Fatal("code_lang missing from metadata")
15271587
}
1528-
1529-
// Returned model must carry the creator's UserID
1530-
if doc.UserID != userID {
1531-
t.Fatalf("doc.UserID: want %d, got %d", userID, doc.UserID)
1588+
if meta.Description != "a Go main" {
1589+
t.Fatalf("description missing from metadata: %q", meta.Description)
15321590
}
15331591

15341592
// Event
@@ -1541,32 +1599,27 @@ func TestCreateDocument(t *testing.T) {
15411599
})
15421600

15431601
t.Run("content is trimmed of whitespace", func(t *testing.T) {
1544-
var capturedContent string
1545-
vs := &mockVectorStore{
1546-
addDocumentsFn: func(_ context.Context, docs []schema.Document, _ ...vectorstores.Option) ([]string, error) {
1547-
capturedContent = docs[0].PageContent
1548-
return []string{"id"}, nil
1602+
var gotParams database.InsertKnowledgeDocumentParams
1603+
db := &mockDB{
1604+
insertKnowledge: func(_ context.Context, arg database.InsertKnowledgeDocumentParams) (string, error) {
1605+
gotParams = arg
1606+
return "id", nil
15491607
},
15501608
}
1551-
ks := &knowledgeStore{store: vs, newKnp: newPublisherFactory(&mockPublisher{})}
1609+
ks := &knowledgeStore{db: db, embedder: &mockEmbedder{available: true}, newKnp: newPublisherFactory(&mockPublisher{})}
15521610
_, err := ks.CreateDocument(ctx, userID, model.CreateKnowledgeDocumentInput{
15531611
DocType: model.KnowledgeDocTypeAnswer, Content: " trimmed ", Question: "q",
15541612
})
15551613
if err != nil {
15561614
t.Fatal(err)
15571615
}
1558-
if capturedContent != "trimmed" {
1559-
t.Fatalf("expected trimmed content, got %q", capturedContent)
1616+
if gotParams.Document.String != "trimmed" {
1617+
t.Fatalf("expected trimmed content, got %q", gotParams.Document.String)
15601618
}
15611619
})
15621620

15631621
t.Run("optional fields absent means nil in model", func(t *testing.T) {
1564-
vs := &mockVectorStore{
1565-
addDocumentsFn: func(_ context.Context, _ []schema.Document, _ ...vectorstores.Option) ([]string, error) {
1566-
return []string{"id"}, nil
1567-
},
1568-
}
1569-
ks := &knowledgeStore{store: vs, newKnp: newPublisherFactory(&mockPublisher{})}
1622+
ks := &knowledgeStore{db: insertOK("id"), embedder: &mockEmbedder{available: true}, newKnp: newPublisherFactory(&mockPublisher{})}
15701623
doc, err := ks.CreateDocument(ctx, userID, model.CreateKnowledgeDocumentInput{
15711624
DocType: model.KnowledgeDocTypeAnswer, Content: "c", Question: "q",
15721625
})

backend/pkg/flowfiles/files.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,12 @@ func ResolvePulledStagedTarget(stagingDir, cacheRelPath string) string {
354354
}
355355

356356
for _, candidate := range candidates {
357+
// Defense-in-depth containment barrier: ignore any candidate that
358+
// resolves outside the staging directory. cacheRelPath is sanitized by
359+
// the caller today, but this keeps the function safe under refactoring.
360+
if !IsWithinDir(candidate, stagingDir) {
361+
continue
362+
}
357363
if _, err := os.Lstat(candidate); err == nil {
358364
return candidate
359365
}
@@ -470,6 +476,13 @@ func ZipRelativePaths(w io.Writer, baseDir string, relPaths []string) error {
470476
for _, relPath := range relPaths {
471477
localPath := filepath.Join(baseDir, filepath.FromSlash(relPath))
472478

479+
// Defense-in-depth containment barrier: callers are expected to pass
480+
// pre-validated cache-relative paths, but never operate on a path that
481+
// resolves outside baseDir regardless of caller behaviour.
482+
if !IsWithinDir(localPath, baseDir) {
483+
continue
484+
}
485+
473486
info, err := os.Lstat(localPath)
474487
if err != nil {
475488
if errors.Is(err, os.ErrNotExist) {

backend/pkg/flowfiles/files_test.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,19 @@ func TestResolvePulledStagedTarget(t *testing.T) {
304304

305305
assert.Equal(t, target, ResolvePulledStagedTarget(stagingDir, "etc/nginx/nginx.conf"))
306306
})
307+
308+
t.Run("escaping cache path is rejected", func(t *testing.T) {
309+
root := t.TempDir()
310+
stagingDir := filepath.Join(root, "staging")
311+
require.NoError(t, os.MkdirAll(stagingDir, 0755))
312+
// A file outside the staging dir that an escaping path would resolve to.
313+
require.NoError(t, os.WriteFile(filepath.Join(root, "evil.conf"), []byte("evil"), 0644))
314+
315+
// The first candidate (Join(stagingDir, "../evil.conf")) escapes the
316+
// staging dir and must be ignored by the containment barrier; the
317+
// second candidate (basename "evil.conf") does not exist in staging.
318+
assert.Equal(t, "", ResolvePulledStagedTarget(stagingDir, "../evil.conf"))
319+
})
307320
}
308321

309322
func TestWriteUploadsTar(t *testing.T) {
@@ -683,6 +696,37 @@ func TestZipRelativePaths(t *testing.T) {
683696
require.NoError(t, err)
684697
assert.Empty(t, zr.File)
685698
})
699+
700+
t.Run("escaping relPath is skipped", func(t *testing.T) {
701+
root := t.TempDir()
702+
base := filepath.Join(root, "flow-1-data")
703+
require.NoError(t, os.MkdirAll(filepath.Join(base, "uploads"), 0755))
704+
require.NoError(t, os.WriteFile(filepath.Join(base, "uploads", "a.txt"), []byte("alpha"), 0644))
705+
// Secret file outside baseDir that an escaping relPath would resolve to.
706+
require.NoError(t, os.WriteFile(filepath.Join(root, "secret.txt"), []byte("secret"), 0644))
707+
708+
var buf bytes.Buffer
709+
err := ZipRelativePaths(&buf, base, []string{
710+
"uploads/a.txt",
711+
"../secret.txt", // escapes baseDir: must be skipped by the barrier
712+
})
713+
require.NoError(t, err)
714+
715+
zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
716+
require.NoError(t, err)
717+
contents := map[string]string{}
718+
for _, f := range zr.File {
719+
rc, err := f.Open()
720+
require.NoError(t, err)
721+
data, _ := io.ReadAll(rc)
722+
rc.Close()
723+
contents[f.Name] = string(data)
724+
}
725+
726+
assert.Equal(t, "alpha", contents["uploads/a.txt"])
727+
assert.NotContains(t, contents, "../secret.txt", "escaping paths must be excluded")
728+
assert.NotContains(t, contents, "secret.txt", "escaping paths must be excluded")
729+
})
686730
}
687731

688732
func TestZipDirectory(t *testing.T) {

0 commit comments

Comments
 (0)