streed · streed · Sep 22, 2025 · Sep 22, 2025 · Sep 22, 2025 · Sep 22, 2025
diff --git a/cmd/lil-rag/main.go b/cmd/lil-rag/main.go
@@ -826,32 +826,56 @@ func handleDelete(ctx context.Context, rag *lilrag.LilRag, args []string) error
 
 func handleReindex(ctx context.Context, rag *lilrag.LilRag, args []string) error {
 	if len(args) > 0 && (args[0] == "--help" || args[0] == "-h") {
-		fmt.Println("Usage: lil-rag reindex [--force]")
+		fmt.Println("Usage: lil-rag reindex [--force] [--chunking=STRATEGY]")
 		fmt.Println("")
-		fmt.Println("Reprocess all documents with the current recursive chunking configuration.")
+		fmt.Println("Reprocess all documents with the specified chunking configuration.")
 		fmt.Println("This will:")
-		fmt.Println("  • Re-chunk all documents using the latest algorithm")
+		fmt.Println("  • Re-chunk all documents using the specified algorithm")
 		fmt.Println("  • Regenerate embeddings for all chunks")
 		fmt.Println("  • Update chunk boundaries and overlap")
 		fmt.Println("  • Preserve original document content and metadata")
 		fmt.Println("")
 		fmt.Println("Options:")
-		fmt.Println("  --force    Skip confirmation prompt")
+		fmt.Println("  --force              Skip confirmation prompt")
+		fmt.Println("  --chunking=STRATEGY  Chunking strategy to use (default: recursive)")
+		fmt.Println("")
+		fmt.Println("Available chunking strategies:")
+		fmt.Println("  recursive   Adaptive chunking with semantic boundaries (default)")
+		fmt.Println("  fast        Small chunks for precise search (128 tokens, 19 overlap)")
+		fmt.Println("  contextual  Large chunks for context preservation (512 tokens, 76 overlap)")
+		fmt.Println("  legacy      Large chunks for backward compatibility (1800 tokens, 200 overlap)")
+		fmt.Println("  fallback    Simple word-based chunking")
 		fmt.Println("")
 		fmt.Println("Note: This operation can take several minutes depending on the number")
 		fmt.Println("of documents and their size. The system will remain accessible during")
 		fmt.Println("reindexing, but performance may be impacted.")
 		return nil
 	}
 
-	// Check if --force flag is provided
+	// Parse flags
 	force := false
+	chunkingStrategy := "recursive" // default strategy
 	for _, arg := range args {
 		if arg == forceFlag {
 			force = true
+		} else if strings.HasPrefix(arg, "--chunking=") {
+			chunkingStrategy = strings.TrimPrefix(arg, "--chunking=")
+		}
+	}
+
+	// Validate chunking strategy
+	validStrategies := []string{"recursive", "fast", "contextual", "legacy", "fallback"}
+	isValid := false
+	for _, valid := range validStrategies {
+		if chunkingStrategy == valid {
+			isValid = true
 			break
 		}
 	}
+	if !isValid {
+		return fmt.Errorf("invalid chunking strategy '%s'. Valid strategies: %s", 
+			chunkingStrategy, strings.Join(validStrategies, ", "))
+	}
 
 	// Get document count for confirmation
 	documents, err := rag.ListDocuments(ctx)
@@ -864,9 +888,9 @@ func handleReindex(ctx context.Context, rag *lilrag.LilRag, args []string) error
 		return nil
 	}
 
-	fmt.Printf("This will reindex %d documents with recursive chunking.\n", len(documents))
+	fmt.Printf("This will reindex %d documents with %s chunking.\n", len(documents), chunkingStrategy)
 	fmt.Println("The process will:")
-	fmt.Println("  • Re-chunk all documents using the latest algorithm")
+	fmt.Println("  • Re-chunk all documents using the specified algorithm")
 	fmt.Println("  • Regenerate embeddings for improved search performance")
 	fmt.Println("  • Update chunk boundaries for better semantic coherence")
 	fmt.Printf("\nEstimated time: %d-%d minutes (depending on document size and Ollama performance)\n",
@@ -887,11 +911,11 @@ func handleReindex(ctx context.Context, rag *lilrag.LilRag, args []string) error
 		}
 	}
 
-	fmt.Println("\n🔄 Starting reindex with recursive chunking...")
+	fmt.Printf("\n🔄 Starting reindex with %s chunking...\n", chunkingStrategy)
 	fmt.Println("This may take several minutes. Please do not interrupt the process.")
 
 	startTime := time.Now()
-	err = rag.ReindexAllDocuments(ctx)
+	err = rag.ReindexAllDocumentsWithStrategy(ctx, chunkingStrategy)
 	duration := time.Since(startTime)
 
 	if err != nil {
@@ -900,7 +924,7 @@ func handleReindex(ctx context.Context, rag *lilrag.LilRag, args []string) error
 	}
 
 	fmt.Printf("\n✅ Reindex completed successfully in %v\n", duration)
-	fmt.Printf("All %d documents have been reprocessed with recursive chunking.\n", len(documents))
+	fmt.Printf("All %d documents have been reprocessed with %s chunking.\n", len(documents), chunkingStrategy)
 	fmt.Println("Your RAG system now uses improved chunk boundaries for better search performance.")
 
 	return nil
@@ -1058,7 +1082,7 @@ func printUsage() {
 	fmt.Println("      --list-sessions         List all chat sessions")
 	fmt.Println("  documents                    List all indexed documents")
 	fmt.Println("  delete <id> [--force]        Delete a document by ID")
-	fmt.Println("  reindex [--force]            Reprocess all documents with recursive chunking")
+	fmt.Println("  reindex [--force] [--chunking=STRATEGY]  Reprocess all documents with specified chunking")
 	fmt.Println("  health                       Check system health status")
 	fmt.Println("  config <init|show|set>       Manage user profile configuration")
 	fmt.Println("  reset [--force]              Delete database and all indexed data")
@@ -1114,8 +1138,9 @@ func printUsage() {
 	fmt.Println("  lil-rag chat --list-sessions             # List all chat sessions")
 	fmt.Println("  lil-rag documents               # List all documents")
 	fmt.Println("  lil-rag delete doc1 --force     # Delete document")
-	fmt.Println("  lil-rag reindex                 # Reindex all documents with recursive chunking")
+	fmt.Println("  lil-rag reindex                 # Reindex all documents with recursive chunking (default)")
 	fmt.Println("  lil-rag reindex --force         # Reindex without confirmation")
+	fmt.Println("  lil-rag reindex --chunking=fast # Reindex with fast chunking strategy")
 	fmt.Println("  lil-rag health                  # Check system health")
 	fmt.Println("  lil-rag auth add alice password123  # Add user with username and password")
 	fmt.Println("  lil-rag auth list               # List all users")

diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
@@ -372,6 +372,77 @@ Controls how documents are split into searchable chunks.
   lil-rag config set chunking.overlap 200
   ```
 
+## Reindexing and Chunking Strategies
+
+The `reindex` command allows you to reprocess all documents with different chunking strategies without losing your original content.
+
+### Using Reindex with Chunking Strategies
+
+```bash
+# Default recursive chunking (uses current configuration)
+lil-rag reindex
+
+# Fast search optimization (small chunks)
+lil-rag reindex --chunking=fast
+
+# Context preservation (large chunks)  
+lil-rag reindex --chunking=contextual
+
+# Legacy compatibility
+lil-rag reindex --chunking=legacy
+
+# Simple word-based chunking
+lil-rag reindex --chunking=fallback
+
+# Skip confirmation prompt
+lil-rag reindex --force --chunking=fast
+```
+
+### Available Chunking Strategies
+
+#### `recursive` (default)
+- **Description**: Adaptive chunking with semantic boundaries
+- **Behavior**: Uses current `chunking.max_tokens` and `chunking.overlap` settings
+- **Best for**: Balanced performance with semantic coherence
+- **Algorithm**: Hierarchical text splitting that respects document structure
+
+#### `fast`
+- **Tokens**: 128 max, 19 overlap
+- **Best for**: Precise search results, Q&A applications
+- **Trade-off**: Less context per chunk, more precise retrieval
+
+#### `contextual`
+- **Tokens**: 512 max, 76 overlap  
+- **Best for**: Summarization, context-heavy applications
+- **Trade-off**: More context per chunk, potentially less precise retrieval
+
+#### `legacy`
+- **Tokens**: 1800 max, 200 overlap
+- **Best for**: Backward compatibility with older configurations
+- **Trade-off**: Large chunks may reduce search precision
+
+#### `fallback`
+- **Description**: Simple word-based chunking
+- **Behavior**: Uses current token settings but applies basic word splitting
+- **Best for**: Troubleshooting or when semantic chunking fails
+
+### Strategy Selection Guidelines
+
+```bash
+# For knowledge bases and FAQs
+lil-rag config set chunking.max-tokens 128
+lil-rag config set chunking.overlap 19
+lil-rag reindex --chunking=fast
+
+# For document summarization
+lil-rag config set chunking.max-tokens 512  
+lil-rag config set chunking.overlap 76
+lil-rag reindex --chunking=contextual
+
+# For existing systems (pre-2025)
+lil-rag reindex --chunking=legacy
+```
+
 ## Command Line Overrides
 
 All configuration options can be overridden with command line flags.

diff --git a/pkg/lilrag/chunker.go b/pkg/lilrag/chunker.go
@@ -89,6 +89,11 @@ func (tc *TextChunker) ChunkText(text string) []Chunk {
 	return semanticChunks
 }
 
+// ChunkTextWithFallback applies simple word-based chunking (for fallback strategy)
+func (tc *TextChunker) ChunkTextWithFallback(text string) []Chunk {
+	return tc.fallbackChunk(text, "text")
+}
+
 // detectContentType analyzes text to determine optimal chunking strategy
 func (tc *TextChunker) detectContentType(text string) string {
 	codeIndicators := []string{"function", "class", "def ", "```", "import ", "#include", "var ", "let ", "const "}

diff --git a/pkg/lilrag/chunker_strategy_test.go b/pkg/lilrag/chunker_strategy_test.go
@@ -0,0 +1,109 @@
+package lilrag
+
+import (
+	"testing"
+)
+
+// Test the new ChunkTextWithFallback method
+func TestTextChunker_ChunkTextWithFallback(t *testing.T) {
+	tests := []struct {
+		name        string
+		maxTokens   int
+		overlap     int
+		text        string
+		wantChunks  int
+		wantPattern string // Pattern to check in first chunk
+	}{
+		{
+			name:        "simple_text",
+			maxTokens:   5,
+			overlap:     1,
+			text:        "This is a simple test document.",
+			wantChunks:  2,
+			wantPattern: "This is a simple test",
+		},
+		{
+			name:        "single_chunk",
+			maxTokens:   10,
+			overlap:     2,
+			text:        "Short text.",
+			wantChunks:  1,
+			wantPattern: "Short text.",
+		},
+		{
+			name:        "empty_text",
+			maxTokens:   5,
+			overlap:     1,
+			text:        "",
+			wantChunks:  0,
+			wantPattern: "",
+		},
+		{
+			name:        "long_text",
+			maxTokens:   3,
+			overlap:     0,
+			text:        "One two three four five six seven eight nine ten",
+			wantChunks:  4,
+			wantPattern: "One two three",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			chunker := NewTextChunker(tt.maxTokens, tt.overlap)
+			chunks := chunker.ChunkTextWithFallback(tt.text)
+
+			if len(chunks) != tt.wantChunks {
+				t.Errorf("ChunkTextWithFallback() got %d chunks, want %d", len(chunks), tt.wantChunks)
+			}
+
+			if tt.wantChunks > 0 && chunks[0].Text != tt.wantPattern {
+				t.Errorf("ChunkTextWithFallback() first chunk = %q, want %q", chunks[0].Text, tt.wantPattern)
+			}
+
+			// Verify each chunk has correct token count
+			for i, chunk := range chunks {
+				expectedTokens := chunker.EstimateTokenCount(chunk.Text)
+				if chunk.TokenCount != expectedTokens {
+					t.Errorf("Chunk %d token count = %d, want %d", i, chunk.TokenCount, expectedTokens)
+				}
+			}
+		})
+	}
+}
+
+// Test chunker creation for different strategies
+func TestCreateChunkerForStrategy(t *testing.T) {
+	// Create a mock LilRag with default chunker
+	defaultChunker := NewTextChunker(256, 38)
+	rag := &LilRag{chunker: defaultChunker}
+
+	tests := []struct {
+		strategy     string
+		wantTokens   int
+		wantOverlap  int
+	}{
+		{"fast", 128, 19},
+		{"contextual", 512, 76},
+		{"legacy", 1800, 200},
+		{"fallback", 256, 38}, // Uses current settings
+		{"recursive", 256, 38}, // Uses current settings
+		{"invalid", 256, 38},   // Uses current settings (default)
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.strategy, func(t *testing.T) {
+			chunker := rag.createChunkerForStrategy(tt.strategy)
+
+			if chunker.MaxTokens != tt.wantTokens {
+				t.Errorf("createChunkerForStrategy(%s) MaxTokens = %d, want %d", 
+					tt.strategy, chunker.MaxTokens, tt.wantTokens)
+			}
+
+			if chunker.Overlap != tt.wantOverlap {
+				t.Errorf("createChunkerForStrategy(%s) Overlap = %d, want %d", 
+					tt.strategy, chunker.Overlap, tt.wantOverlap)
+			}
+		})
+	}
+}