nickna
diff --git a/‎README.md‎
Lines changed: 57 additions & 4 deletions b/‎README.md‎
Lines changed: 57 additions & 4 deletions
diff --git a/‎SDKs/Node/Core/README.md‎
Lines changed: 169 additions & 0 deletions b/‎SDKs/Node/Core/README.md‎
Lines changed: 169 additions & 0 deletions
diff --git a/‎SDKs/Node/Core/src/chat/streaming/chat-streaming-manager.ts‎
Lines changed: 40 additions & 0 deletions b/‎SDKs/Node/Core/src/chat/streaming/chat-streaming-manager.ts‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎SDKs/Node/Core/src/chat/streaming/types.ts‎
Lines changed: 26 additions & 0 deletions b/‎SDKs/Node/Core/src/chat/streaming/types.ts‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎SDKs/Node/Core/src/chat/utils/sse-parser.ts‎
Lines changed: 11 additions & 0 deletions b/‎SDKs/Node/Core/src/chat/utils/sse-parser.ts‎
Lines changed: 11 additions & 0 deletions
@@ -45,11 +45,15 @@ npm install @knn_labs/conduit-admin-client
 
 ## Key Features
 
-- **OpenAI-Compatible REST API**: Exposes a standard `/v1/chat/completions` endpoint for seamless integration with existing tools and SDKs
+- **OpenAI-Compatible REST API**:
+  - ✅ **100% OpenAI compatible** - drop-in replacement for OpenAI API clients
+  - ✅ **Extended with Conduit features** - optional enhanced events for reasoning, tool execution, and metrics
+  - ✅ **Works with standard clients** - OpenAI SDKs and tools work without any modifications
+  - 📚 For enhanced features, use Conduit SDKs to access real-time tool execution, reasoning events, and performance metrics
 - **Multi-Provider Support**: Interact with various LLM providers through a single interface
 - **Model Routing & Mapping**: Define custom model aliases (e.g., `my-gpt4`) and map them to specific provider models (e.g., `openai/gpt-4`)
 - **Virtual API Key Management**: Create and manage Conduit-specific API keys (`condt_...`) with built-in spend tracking
-- **Streaming Support**: Real-time token streaming for responsive applications
+- **Streaming Support**: Real-time token streaming with optional enhanced events (reasoning, tool execution progress, metrics)
 - **Web-Based User Interface**: Administrative dashboard for configuration and monitoring
 - **Enterprise Security Features**: IP filtering, rate limiting, failed login protection, and security headers
 - **Security Dashboard**: Real-time monitoring of security events and access attempts
@@ -381,15 +385,17 @@ curl http://localhost:5000/v1/chat/completions \
 
 ### Using with OpenAI SDKs
 
+Conduit is **100% compatible with standard OpenAI SDKs** - simply point them to your Conduit instance:
+
 ```python
-# Python example
+# Python example with OpenAI SDK (fully compatible)
 from openai import OpenAI
 
 client = OpenAI(
     api_key="condt_yourvirtualkey",
     # Use http://localhost:5000/v1 for local testing,
     # or your configured CONDUIT_API_BASE_URL for deployed instances
-    base_url="http://localhost:5000/v1" 
+    base_url="http://localhost:5000/v1"
 )
 
 response = client.chat.completions.create(
@@ -398,6 +404,53 @@ response = client.chat.completions.create(
 )
 ```
 
+#### Enhanced Features with Conduit SDKs
+
+For access to Conduit-specific features like real-time tool execution progress, reasoning events, and performance metrics, use the official Conduit SDKs:
+
+```typescript
+// Node.js/TypeScript example with Conduit SDK
+import { ConduitCoreClient } from '@knn_labs/conduit-core-client';
+import {
+  isChatCompletionChunk,
+  isToolExecutingEvent,
+  isFinalMetrics
+} from '@knn_labs/conduit-core-client';
+
+const client = new ConduitCoreClient({
+  apiKey: 'condt_yourvirtualkey',
+  baseURL: 'http://localhost:5000'
+});
+
+const stream = await client.chat.create({
+  model: 'gpt-4',
+  messages: [{ role: 'user', content: 'What is the weather?' }],
+  stream: true,
+  function_configuration_ids: ['weather-functions']
+});
+
+for await (const event of stream) {
+  if (isChatCompletionChunk(event)) {
+    // Standard OpenAI content
+    const content = event.choices[0]?.delta?.content;
+  }
+  else if (isToolExecutingEvent(event)) {
+    // Conduit extension: real-time tool execution
+    console.log(`Executing ${event.function_name}...`);
+  }
+  else if (isFinalMetrics(event)) {
+    // Conduit extension: performance metrics
+    console.log(`Tokens: ${event.total_tokens}, Speed: ${event.tokens_per_second}`);
+  }
+}
+```
+
+**Key Differences:**
+- **OpenAI SDKs**: ✅ Full compatibility, ignores Conduit extensions
+- **Conduit SDKs**: ✅ Full compatibility + enhanced events (reasoning, tool execution, metrics)
+
+See [Streaming with Tools Guide](docs/api-guides/streaming-with-tools.md) for complete documentation.
+
 
 ## Documentation
 
 
@@ -93,6 +93,175 @@ for await (const chunk of stream) {
 }
 ```
 
+### Streaming with Function Calling
+
+Conduit extends the OpenAI streaming API with additional event types for richer real-time experiences. While maintaining full OpenAI compatibility, Conduit streams include:
+
+- 🧠 **Reasoning events** - Model thinking/reasoning content
+- 🔧 **Tool execution events** - Real-time function call progress
+- 📊 **Performance metrics** - Live and final metrics
+
+#### Enhanced Event Types
+
+```typescript
+import {
+  isChatCompletionChunk,
+  isFinalMetrics,
+  isReasoningEvent,
+  isToolExecutingEvent,
+  isStreamingMetrics
+} from '@conduit/core';
+
+const stream = await client.chat.completions.create({
+  model: 'gpt-4',
+  messages: [{ role: 'user', content: 'What is the weather in Paris?' }],
+  stream: true,
+  function_configuration_ids: ['weather-functions'], // Conduit managed functions
+});
+
+let totalContent = '';
+let totalReasoning = '';
+const toolCalls = [];
+
+for await (const event of stream) {
+  // Standard OpenAI chat chunks
+  if (isChatCompletionChunk(event)) {
+    const content = event.choices?.[0]?.delta?.content;
+    if (content) {
+      totalContent += content;
+      process.stdout.write(content);
+    }
+
+    // Handle streaming tool calls
+    const deltaToolCalls = event.choices?.[0]?.delta?.tool_calls;
+    if (deltaToolCalls) {
+      for (const toolCall of deltaToolCalls) {
+        const index = toolCall.index ?? 0;
+        if (!toolCalls[index]) {
+          toolCalls[index] = {
+            id: toolCall.id ?? '',
+            type: 'function',
+            function: {
+              name: toolCall.function?.name ?? '',
+              arguments: toolCall.function?.arguments ?? ''
+            }
+          };
+        } else {
+          // Append arguments incrementally
+          if (toolCall.function?.arguments) {
+            toolCalls[index].function.arguments += toolCall.function.arguments;
+          }
+        }
+      }
+    }
+
+    // Check finish_reason
+    const finishReason = event.choices?.[0]?.finish_reason;
+    if (finishReason === 'tool_calls') {
+      console.warn('\nExecuting tools...');
+      // DO NOT end the stream! Backend will execute tools and continue
+    } else if (finishReason === 'stop') {
+      console.warn('\nStream complete');
+    }
+  }
+
+  // Conduit extension: Reasoning events (model thinking)
+  else if (isReasoningEvent(event)) {
+    totalReasoning += event.content;
+    console.warn(`[Reasoning] ${event.content}`);
+  }
+
+  // Conduit extension: Tool execution progress
+  else if (isToolExecutingEvent(event)) {
+    if (event.status === 'started') {
+      console.warn(`\n🔧 Executing ${event.function_name}...`);
+    } else if (event.status === 'completed') {
+      console.warn(`✅ ${event.function_name} completed`);
+      console.warn(`   Result: ${JSON.stringify(event.result)}`);
+      console.warn(`   Cost: $${event.cost}`);
+    } else if (event.status === 'failed') {
+      console.warn(`❌ ${event.function_name} failed: ${event.error_message}`);
+    }
+  }
+
+  // Conduit extension: Live performance metrics
+  else if (isStreamingMetrics(event)) {
+    console.warn(`Speed: ${event.current_tokens_per_second} tokens/sec`);
+  }
+
+  // Conduit extension: Final metrics
+  else if (isFinalMetrics(event)) {
+    console.warn('\nFinal Metrics:');
+    console.warn(`  Total tokens: ${event.total_tokens}`);
+    console.warn(`  Latency: ${event.total_latency_ms}ms`);
+    console.warn(`  Speed: ${event.tokens_per_second} tokens/sec`);
+    console.warn(`  Provider: ${event.provider}`);
+  }
+}
+
+// Use reasoning as fallback if no content (some models output to reasoning)
+const finalContent = totalContent || totalReasoning;
+console.warn('\n\nFinal content:', finalContent);
+console.warn('Tool calls:', toolCalls);
+```
+
+#### Important: finish_reason Semantics
+
+When streaming with function calling, `finish_reason` has special semantics:
+
+- **`finish_reason: "tool_calls"`** - Tool execution in progress, stream **continues**
+- **`finish_reason: "stop"`** - Actual completion, stream ends
+- **`finish_reason: "length"`** - Max tokens reached, stream ends
+
+**Critical**: Do NOT end the stream when `finish_reason === "tool_calls"`. The backend executes tools and continues streaming the model's response with tool results.
+
+```typescript
+// ✅ Correct handling
+if (finishReason === 'tool_calls') {
+  // Tools executing, keep processing stream
+  continue;
+}
+
+if (finishReason === 'stop' || finishReason === 'length') {
+  // Actual completion
+  break;
+}
+
+// ❌ Wrong - ends too early!
+if (finishReason) {
+  break; // This breaks on "tool_calls" prematurely
+}
+```
+
+#### OpenAI Compatibility
+
+Standard OpenAI clients can consume Conduit streams by ignoring Conduit extensions:
+
+```typescript
+import OpenAI from 'openai';
+
+const openai = new OpenAI({
+  baseURL: 'https://your-conduit-instance.com/v1',
+  apiKey: 'your-virtual-key'
+});
+
+const stream = await openai.chat.completions.create({
+  model: 'gpt-4',
+  messages: [{ role: 'user', content: 'Hello!' }],
+  stream: true,
+  tools: [{ type: 'function', function: { name: 'get_weather', ...} }]
+});
+
+for await (const chunk of stream) {
+  // Works exactly like OpenAI API
+  // Conduit extensions (reasoning, tool-executing, metrics) are ignored
+  const content = chunk.choices[0]?.delta?.content;
+  if (content) process.stdout.write(content);
+}
+```
+
+For more details, see the [Streaming with Tools API Guide](../../docs/api-guides/streaming-with-tools.md).
+
 ### React Query Hooks - Streaming
 
 The React Query integration now supports proper streaming with callbacks:
 
@@ -368,6 +368,46 @@ export class ChatStreamingManager {
         break;
       }
 
+      case SSEEventType.Reasoning: {
+        const reasoningData = event.data as { content?: string };
+        const reasoning = reasoningData?.content;
+
+        if (reasoning !== undefined && reasoning !== null && reasoning !== '') {
+          this.state.totalReasoning += reasoning;
+          callbacks.onReasoning?.(reasoning, this.state.totalReasoning);
+          this.log('Reasoning content received:', reasoning.slice(0, 100));
+        }
+        break;
+      }
+
+      case SSEEventType.ToolExecuting: {
+        const toolData = event.data as {
+          tool_call_id?: string;
+          function_name?: string;
+          status: string;
+          result?: unknown;
+          cost?: number;
+          error_message?: string;
+          function_execution_id?: string;
+        };
+
+        this.log('Tool execution event:', toolData.function_name, toolData.status);
+        callbacks.onToolExecuting?.(toolData);
+        break;
+      }
+
+      case SSEEventType.ToolResult: {
+        const toolResultData = event.data as {
+          tool_call_id: string;
+          result: unknown;
+          error?: string;
+        };
+
+        this.log('Tool result received for:', toolResultData.tool_call_id);
+        callbacks.onToolResult?.(toolResultData);
+        break;
+      }
+
       case SSEEventType.Error: {
         this.log('Received SSE error event:', event);
         const errorData = event.data as {
 
@@ -185,16 +185,42 @@ export interface StreamMessageOptions extends SendMessageOptions {
  * Callbacks for UI integration
  */
 export interface StreamingCallbacks {
+  /** Called for each chat completion chunk received */
   onChunk?: (chunk: ChatCompletionChunk) => void;
+  /** Called when content delta is received (cumulative content provided) */
   onContent?: (content: string, totalContent: string) => void;
+  /** Called when reasoning/thinking content is received */
+  onReasoning?: (reasoning: string, totalReasoning: string) => void;
+  /** Called when tool execution status updates are received */
+  onToolExecuting?: (event: {
+    tool_call_id?: string;
+    function_name?: string;
+    status: string;
+    result?: unknown;
+    cost?: number;
+    error_message?: string;
+    function_execution_id?: string;
+  }) => void;
+  /** Called when individual tool results are received (optional, for detailed logging) */
+  onToolResult?: (event: {
+    tool_call_id: string;
+    result: unknown;
+    error?: string;
+  }) => void;
+  /** Called when performance metrics are received */
   onMetrics?: (metrics: StreamingPerformanceMetrics | MetricsEventData) => void;
+  /** Called when tokens per second updates are available */
   onTokensPerSecond?: (tokensPerSecond: number) => void;
+  /** Called when an error occurs during streaming */
   onError?: (error: StreamingError) => void;
+  /** Called when streaming completes successfully */
   onComplete?: (response: {
     content: string;
     metadata?: MessageMetadata;
   }) => void;
+  /** Called when streaming starts */
   onStart?: () => void;
+  /** Called when streaming is aborted */
   onAbort?: () => void;
 }
 
 
@@ -5,11 +5,22 @@
 
 /**
  * SSE event types from Core API
+ * Combines OpenAI-compatible standard events with Conduit-specific extensions
  */
 export enum SSEEventType {
+  /** Standard OpenAI content chunks containing delta updates */
   Content = 'content',
+  /** Conduit extension: Model reasoning/thinking content separate from main response */
+  Reasoning = 'reasoning',
+  /** Conduit extension: Tool/function execution status and progress updates */
+  ToolExecuting = 'tool-executing',
+  /** Conduit extension: Individual tool execution results (optional, for detailed logging) */
+  ToolResult = 'tool-result',
+  /** Conduit extension: Real-time performance metrics during streaming */
   Metrics = 'metrics',
+  /** Conduit extension: Final performance metrics at stream completion */
   MetricsFinal = 'metrics-final',
+  /** Conduit extension: Error events during streaming */
   Error = 'error'
 }