diff --git a/core/providers/ollama/chat.go b/core/providers/ollama/chat.go
new file mode 100644
index 000000000..3fdb9db89
--- /dev/null
+++ b/core/providers/ollama/chat.go
@@ -0,0 +1,223 @@
+// Package ollama implements the Ollama provider using native Ollama APIs.
+// This file contains converters for chat completion requests and responses.
+package ollama
+
+import (
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// ToOllamaChatRequest converts a Bifrost chat request to Ollama native format.
+func ToOllamaChatRequest(bifrostReq *schemas.BifrostChatRequest) *OllamaChatRequest {
+	if bifrostReq == nil || bifrostReq.Input == nil {
+		return nil
+	}
+
+	ollamaReq := &OllamaChatRequest{
+		Model:    bifrostReq.Model,
+		Messages: convertMessagesToOllama(bifrostReq.Input),
+	}
+
+	// Convert parameters
+	if bifrostReq.Params != nil {
+		options := &OllamaOptions{}
+		hasOptions := false
+
+		// Map standard parameters
+		if bifrostReq.Params.MaxCompletionTokens != nil {
+			options.NumPredict = bifrostReq.Params.MaxCompletionTokens
+			hasOptions = true
+		}
+		if bifrostReq.Params.Temperature != nil {
+			options.Temperature = bifrostReq.Params.Temperature
+			hasOptions = true
+		}
+		if bifrostReq.Params.TopP != nil {
+			options.TopP = bifrostReq.Params.TopP
+			hasOptions = true
+		}
+		if bifrostReq.Params.PresencePenalty != nil {
+			options.PresencePenalty = bifrostReq.Params.PresencePenalty
+			hasOptions = true
+		}
+		if bifrostReq.Params.FrequencyPenalty != nil {
+			options.FrequencyPenalty = bifrostReq.Params.FrequencyPenalty
+			hasOptions = true
+		}
+		if bifrostReq.Params.Stop != nil {
+			options.Stop = bifrostReq.Params.Stop
+			hasOptions = true
+		}
+		if bifrostReq.Params.Seed != nil {
+			options.Seed = bifrostReq.Params.Seed
+			hasOptions = true
+		}
+
+		// Handle extra parameters for Ollama-specific fields
+		if bifrostReq.Params.ExtraParams != nil {
+			// Top-k sampling
+			if topK, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["top_k"]); ok {
+				options.TopK = topK
+				hasOptions = true
+			}
+
+			// Context window size
+			if numCtx, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["num_ctx"]); ok {
+				options.NumCtx = numCtx
+				hasOptions = true
+			}
+
+			// Repeat penalty
+			if repeatPenalty, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["repeat_penalty"]); ok {
+				options.RepeatPenalty = repeatPenalty
+				hasOptions = true
+			}
+
+			// Repeat last N
+			if repeatLastN, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["repeat_last_n"]); ok {
+				options.RepeatLastN = repeatLastN
+				hasOptions = true
+			}
+
+			// Mirostat sampling
+			if mirostat, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["mirostat"]); ok {
+				options.Mirostat = mirostat
+				hasOptions = true
+			}
+			if mirostatEta, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["mirostat_eta"]); ok {
+				options.MirostatEta = mirostatEta
+				hasOptions = true
+			}
+			if mirostatTau, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["mirostat_tau"]); ok {
+				options.MirostatTau = mirostatTau
+				hasOptions = true
+			}
+
+			// TFS-Z sampling
+			if tfsZ, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["tfs_z"]); ok {
+				options.TfsZ = tfsZ
+				hasOptions = true
+			}
+
+			// Typical-P sampling
+			if typicalP, ok := schemas.SafeExtractFloat64Pointer(bifrostReq.Params.ExtraParams["typical_p"]); ok {
+				options.TypicalP = typicalP
+				hasOptions = true
+			}
+
+			// Performance options
+			if numBatch, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["num_batch"]); ok {
+				options.NumBatch = numBatch
+				hasOptions = true
+			}
+			if numGPU, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["num_gpu"]); ok {
+				options.NumGPU = numGPU
+				hasOptions = true
+			}
+			if numThread, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["num_thread"]); ok {
+				options.NumThread = numThread
+				hasOptions = true
+			}
+
+			// Keep-alive duration
+			if keepAlive, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["keep_alive"]); ok {
+				ollamaReq.KeepAlive = keepAlive
+			}
+
+			// Enable thinking mode (for thinking-specific models)
+			if think, ok := schemas.SafeExtractBoolPointer(bifrostReq.Params.ExtraParams["think"]); ok {
+				ollamaReq.Think = think
+			}
+		}
+
+		if hasOptions {
+			ollamaReq.Options = options
+		}
+
+		// Handle response format (JSON mode)
+		if bifrostReq.Params.ResponseFormat != nil {
+			if rf, ok := (*bifrostReq.Params.ResponseFormat).(map[string]interface{}); ok {
+				if t, exists := rf["type"]; exists && t == "json_object" {
+					ollamaReq.Format = "json"
+				} else if schema, exists := rf["json_schema"]; exists {
+					// Pass JSON schema directly for structured output
+					ollamaReq.Format = schema
+				}
+			}
+		}
+
+		// Convert tools
+		if bifrostReq.Params.Tools != nil {
+			ollamaReq.Tools = convertToolsToOllama(bifrostReq.Params.Tools)
+		}
+	}
+
+	return ollamaReq
+}
+
+// ToBifrostChatRequest converts an Ollama chat request to Bifrost format.
+// This is used for passthrough/reverse conversion scenarios.
+func (r *OllamaChatRequest) ToBifrostChatRequest() *schemas.BifrostChatRequest {
+	if r == nil {
+		return nil
+	}
+
+	provider, model := schemas.ParseModelString(r.Model, schemas.Ollama)
+
+	bifrostReq := &schemas.BifrostChatRequest{
+		Provider: provider,
+		Model:    model,
+		Input:    convertMessagesFromOllama(r.Messages),
+	}
+
+	// Convert options to parameters
+	if r.Options != nil {
+		params := &schemas.ChatParameters{
+			ExtraParams: make(map[string]interface{}),
+		}
+
+		if r.Options.NumPredict != nil {
+			params.MaxCompletionTokens = r.Options.NumPredict
+		}
+		if r.Options.Temperature != nil {
+			params.Temperature = r.Options.Temperature
+		}
+		if r.Options.TopP != nil {
+			params.TopP = r.Options.TopP
+		}
+		if r.Options.Stop != nil {
+			params.Stop = r.Options.Stop
+		}
+		if r.Options.PresencePenalty != nil {
+			params.PresencePenalty = r.Options.PresencePenalty
+		}
+		if r.Options.FrequencyPenalty != nil {
+			params.FrequencyPenalty = r.Options.FrequencyPenalty
+		}
+		if r.Options.Seed != nil {
+			params.Seed = r.Options.Seed
+		}
+
+		// Map Ollama-specific parameters to ExtraParams
+		if r.Options.TopK != nil {
+			params.ExtraParams["top_k"] = *r.Options.TopK
+		}
+		if r.Options.NumCtx != nil {
+			params.ExtraParams["num_ctx"] = *r.Options.NumCtx
+		}
+		if r.Options.RepeatPenalty != nil {
+			params.ExtraParams["repeat_penalty"] = *r.Options.RepeatPenalty
+		}
+
+		bifrostReq.Params = params
+	}
+
+	// Convert tools
+	if r.Tools != nil {
+		if bifrostReq.Params == nil {
+			bifrostReq.Params = &schemas.ChatParameters{}
+		}
+		bifrostReq.Params.Tools = convertToolsFromOllama(r.Tools)
+	}
+
+	return bifrostReq
+}
diff --git a/core/providers/ollama/embedding.go b/core/providers/ollama/embedding.go
new file mode 100644
index 000000000..42784013d
--- /dev/null
+++ b/core/providers/ollama/embedding.go
@@ -0,0 +1,118 @@
+// Package ollama implements the Ollama provider using native Ollama APIs.
+// This file contains converters for embedding requests and responses.
+package ollama
+
+import (
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+func ToOllamaEmbeddingRequest(bifrostReq *schemas.BifrostEmbeddingRequest) *OllamaEmbeddingRequest {
+	if bifrostReq == nil {
+		return nil
+	}
+
+	ollamaReq := &OllamaEmbeddingRequest{
+		Model: bifrostReq.Model,
+	}
+
+	// Handle input - Bifrost uses EmbeddingInput type
+	if bifrostReq.Input != nil {
+		if bifrostReq.Input.Text != nil {
+			ollamaReq.Input = *bifrostReq.Input.Text
+		} else if bifrostReq.Input.Texts != nil {
+			ollamaReq.Input = bifrostReq.Input.Texts
+		}
+	}
+
+	// Handle extra parameters from Params
+	if bifrostReq.Params != nil && bifrostReq.Params.ExtraParams != nil {
+		// Truncate option
+		if truncate, ok := schemas.SafeExtractBoolPointer(bifrostReq.Params.ExtraParams["truncate"]); ok {
+			ollamaReq.Truncate = truncate
+		}
+
+		// Keep-alive duration
+		if keepAlive, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["keep_alive"]); ok {
+			ollamaReq.KeepAlive = keepAlive
+		}
+
+		// Model options
+		options := &OllamaOptions{}
+		hasOptions := false
+
+		if numCtx, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["num_ctx"]); ok {
+			options.NumCtx = numCtx
+			hasOptions = true
+		}
+
+		if hasOptions {
+			ollamaReq.Options = options
+		}
+	}
+
+	return ollamaReq
+}
+
+// ToBifrostEmbeddingRequest converts an Ollama embedding request to Bifrost format.
+// This is used for passthrough/reverse conversion scenarios.
+func (r *OllamaEmbeddingRequest) ToBifrostEmbeddingRequest() *schemas.BifrostEmbeddingRequest {
+	if r == nil {
+		return nil
+	}
+
+	provider, model := schemas.ParseModelString(r.Model, schemas.Ollama)
+
+	bifrostReq := &schemas.BifrostEmbeddingRequest{
+		Provider: provider,
+		Model:    model,
+	}
+
+	// Convert input to EmbeddingInput
+	if r.Input != nil {
+		input := &schemas.EmbeddingInput{}
+		converted := false
+		switch v := r.Input.(type) {
+		case string:
+			input.Text = &v
+			converted = true
+		case []string:
+			input.Texts = v
+			converted = true
+		case []interface{}:
+			ss := make([]string, 0, len(v))
+			for _, it := range v {
+				s, ok := it.(string)
+				if !ok {
+					converted = false
+					break
+				}
+				ss = append(ss, s)
+			}
+			if len(ss) > 0 {
+				input.Texts = ss
+				converted = true
+			}
+		}
+		if converted {
+			bifrostReq.Input = input
+		}
+	}
+
+	// Map Ollama-specific options back to extra params
+	if r.Truncate != nil || r.KeepAlive != nil || (r.Options != nil && r.Options.NumCtx != nil) {
+		bifrostReq.Params = &schemas.EmbeddingParameters{
+			ExtraParams: make(map[string]interface{}),
+		}
+		if r.Truncate != nil {
+			bifrostReq.Params.ExtraParams["truncate"] = *r.Truncate
+		}
+		if r.KeepAlive != nil {
+			bifrostReq.Params.ExtraParams["keep_alive"] = *r.KeepAlive
+		}
+		if r.Options != nil && r.Options.NumCtx != nil {
+			bifrostReq.Params.ExtraParams["num_ctx"] = *r.Options.NumCtx
+		}
+	}
+
+	return bifrostReq
+}
diff --git a/core/providers/ollama/models.go b/core/providers/ollama/models.go
new file mode 100644
index 000000000..6d429719c
--- /dev/null
+++ b/core/providers/ollama/models.go
@@ -0,0 +1,67 @@
+// Package ollama implements the Ollama provider using native Ollama APIs.
+// This file contains converters for list models requests and responses.
+package ollama
+
+import (
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// ToOllamaModel converts a Bifrost model to Ollama format.
+// Note: Ollama's /api/tags endpoint is GET-only and doesn't need a request body.
+// This function is included for completeness and potential future use.
+func ToOllamaModel(bifrostModel *schemas.Model) *OllamaModel {
+	if bifrostModel == nil {
+		return nil
+	}
+
+	return &OllamaModel{
+		Name:  bifrostModel.ID,
+		Model: bifrostModel.ID,
+	}
+}
+
+// ToBifrostModel converts an Ollama model to Bifrost format.
+func (m *OllamaModel) ToBifrostModel() *schemas.Model {
+	if m == nil {
+		return nil
+	}
+
+	created := m.ModifiedAt.Unix()
+	ownedBy := "ollama"
+
+	return &schemas.Model{
+		ID:      m.Name,
+		Created: &created,
+		OwnedBy: &ownedBy,
+	}
+}
+
+// GetModelInfo returns formatted model information for display.
+func (m *OllamaModel) GetModelInfo() map[string]interface{} {
+	if m == nil {
+		return nil
+	}
+
+	info := map[string]interface{}{
+		"name":        m.Name,
+		"model":       m.Model,
+		"modified_at": m.ModifiedAt,
+		"size":        m.Size,
+		"digest":      m.Digest,
+	}
+
+	if m.Details.Family != "" {
+		info["family"] = m.Details.Family
+	}
+	if m.Details.ParameterSize != "" {
+		info["parameter_size"] = m.Details.ParameterSize
+	}
+	if m.Details.QuantizationLevel != "" {
+		info["quantization_level"] = m.Details.QuantizationLevel
+	}
+	if m.Details.Format != "" {
+		info["format"] = m.Details.Format
+	}
+
+	return info
+}
diff --git a/core/providers/ollama/ollama.go b/core/providers/ollama/ollama.go
index 62012151d..ae79fa668 100644
--- a/core/providers/ollama/ollama.go
+++ b/core/providers/ollama/ollama.go
@@ -1,20 +1,36 @@
-// Package providers implements various LLM providers and their utility functions.
-// This file contains the Ollama provider implementation.
+// Package ollama implements the Ollama provider using native Ollama APIs.
+// This file contains the main provider implementation for Ollama's native API.
+//
+// Ollama API Documentation: https://github.com/ollama/ollama/blob/main/docs/api.md
+//
+// Supported endpoints:
+//   - /api/chat - Chat completion
+//   - /api/embed - Embeddings
+//   - /api/tags - List models
+//
+// Key differences from OpenAI-compatible API:
+//   - Native endpoints instead of /v1/* paths
+//   - Newline-delimited JSON streaming instead of SSE
+//   - Different request/response structure
+//   - Options object for model parameters
 package ollama
 
 import (
+	"bufio"
 	"context"
 	"fmt"
+	"net/http"
 	"strings"
+	"sync"
 	"time"
 
-	"github.com/maximhq/bifrost/core/providers/openai"
+	"github.com/bytedance/sonic"
 	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
 	schemas "github.com/maximhq/bifrost/core/schemas"
 	"github.com/valyala/fasthttp"
 )
 
-// OllamaProvider implements the Provider interface for Ollama's API.
+// OllamaProvider implements the Provider interface for Ollama's native API.
 type OllamaProvider struct {
 	logger              schemas.Logger        // Logger for provider operations
 	client              *fasthttp.Client      // HTTP client for API requests
@@ -23,6 +39,48 @@ type OllamaProvider struct {
 	sendBackRawResponse bool                  // Whether to include raw response in BifrostResponse
 }
 
+// Response pools for efficient memory usage
+var (
+	ollamaChatResponsePool = sync.Pool{
+		New: func() interface{} {
+			return &OllamaChatResponse{}
+		},
+	}
+	ollamaEmbeddingResponsePool = sync.Pool{
+		New: func() interface{} {
+			return &OllamaEmbeddingResponse{}
+		},
+	}
+)
+
+// acquireOllamaChatResponse gets an Ollama chat response from the pool.
+func acquireOllamaChatResponse() *OllamaChatResponse {
+	resp := ollamaChatResponsePool.Get().(*OllamaChatResponse)
+	*resp = OllamaChatResponse{} // Reset the struct
+	return resp
+}
+
+// releaseOllamaChatResponse returns an Ollama chat response to the pool.
+func releaseOllamaChatResponse(resp *OllamaChatResponse) {
+	if resp != nil {
+		ollamaChatResponsePool.Put(resp)
+	}
+}
+
+// acquireOllamaEmbeddingResponse gets an Ollama embedding response from the pool.
+func acquireOllamaEmbeddingResponse() *OllamaEmbeddingResponse {
+	resp := ollamaEmbeddingResponsePool.Get().(*OllamaEmbeddingResponse)
+	*resp = OllamaEmbeddingResponse{} // Reset the struct
+	return resp
+}
+
+// releaseOllamaEmbeddingResponse returns an Ollama embedding response to the pool.
+func releaseOllamaEmbeddingResponse(resp *OllamaEmbeddingResponse) {
+	if resp != nil {
+		ollamaEmbeddingResponsePool.Put(resp)
+	}
+}
+
 // NewOllamaProvider creates a new Ollama provider instance.
 // It initializes the HTTP client with the provided configuration and sets up response pools.
 // The client is configured with timeouts, concurrency limits, and optional proxy settings.
@@ -37,19 +95,20 @@ func NewOllamaProvider(config *schemas.ProviderConfig, logger schemas.Logger) (*
 		MaxConnWaitTimeout:  10 * time.Second,
 	}
 
-	// // Pre-warm response pools
-	// for range config.ConcurrencyAndBufferSize.Concurrency {
-	// 	ollamaResponsePool.Put(&schemas.BifrostResponse{})
-	// }
+	// Pre-warm response pools
+	for i := 0; i < config.ConcurrencyAndBufferSize.Concurrency; i++ {
+		ollamaChatResponsePool.Put(&OllamaChatResponse{})
+		ollamaEmbeddingResponsePool.Put(&OllamaEmbeddingResponse{})
+	}
 
 	// Configure proxy if provided
-	client = providerUtils.ConfigureProxy(client, config.ProxyConfig, logger)
+	providerUtils.ConfigureProxy(client, config.ProxyConfig, logger)
 
 	config.NetworkConfig.BaseURL = strings.TrimRight(config.NetworkConfig.BaseURL, "/")
 
-	// BaseURL is required for Ollama
+	// Set default BaseURL for local Ollama if not provided
 	if config.NetworkConfig.BaseURL == "" {
-		return nil, fmt.Errorf("base_url is required for ollama provider")
+		config.NetworkConfig.BaseURL = "http://localhost:11434"
 	}
 
 	return &OllamaProvider{
@@ -66,102 +125,409 @@ func (provider *OllamaProvider) GetProviderKey() schemas.ModelProvider {
 	return schemas.Ollama
 }
 
-// ListModels performs a list models request to Ollama's API.
+// completeRequest sends a request to Ollama's native API and handles the response.
+// It constructs the API URL, sets up authentication, and processes the response.
+// Returns the response body or an error if the request fails.
+func (provider *OllamaProvider) completeRequest(ctx context.Context, jsonData []byte, url string, key string) ([]byte, time.Duration, *schemas.BifrostError) {
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	// Set any extra headers from network config
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+
+	req.SetRequestURI(url)
+	req.Header.SetMethod(http.MethodPost)
+	req.Header.SetContentType("application/json")
+
+	// Uses Authorization: Bearer <key> for Ollama Cloud / authenticated instances.
+	if key != "" {
+		req.Header.Set("Authorization", "Bearer "+key)
+	}
+
+	req.SetBody(jsonData)
+
+	// Send the request
+	latency, bifrostErr := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	if bifrostErr != nil {
+		return nil, latency, bifrostErr
+	}
+
+	// Handle error response
+	if resp.StatusCode() != fasthttp.StatusOK {
+		provider.logger.Debug(fmt.Sprintf("error from %s provider: %s", provider.GetProviderKey(), string(resp.Body())))
+		return nil, latency, parseOllamaError(resp, provider.GetProviderKey())
+	}
+
+	body, err := providerUtils.CheckAndDecodeBody(resp)
+	if err != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, err, provider.GetProviderKey())
+	}
+
+	// Copy body before releasing response
+	bodyCopy := append([]byte(nil), body...)
+
+	return bodyCopy, latency, nil
+}
+
+// parseOllamaError parses an error response from Ollama's API.
+func parseOllamaError(resp *fasthttp.Response, providerType schemas.ModelProvider) *schemas.BifrostError {
+	statusCode := resp.StatusCode()
+	body := resp.Body()
+
+	var errorResp OllamaError
+	if err := sonic.Unmarshal(body, &errorResp); err == nil && errorResp.Error != "" {
+		return providerUtils.NewProviderAPIError(errorResp.Error, nil, statusCode, providerType, nil, nil)
+	}
+
+	return providerUtils.NewProviderAPIError(string(body), nil, statusCode, providerType, nil, nil)
+}
+
+// ListModels performs a list models request to Ollama's native API.
+// Uses the /api/tags endpoint to fetch available models.
 func (provider *OllamaProvider) ListModels(ctx context.Context, keys []schemas.Key, request *schemas.BifrostListModelsRequest) (*schemas.BifrostListModelsResponse, *schemas.BifrostError) {
-	if provider.networkConfig.BaseURL == "" {
-		return nil, providerUtils.NewConfigurationError("base_url is not set", provider.GetProviderKey())
+	// Use first key if available, otherwise empty (for local Ollama)
+	var key schemas.Key
+	if len(keys) > 0 {
+		key = keys[0]
 	}
-	return openai.HandleOpenAIListModelsRequest(
-		ctx,
-		provider.client,
-		request,
-		provider.networkConfig.BaseURL+providerUtils.GetPathFromContext(ctx, "/v1/models"),
-		keys,
-		provider.networkConfig.ExtraHeaders,
-		provider.GetProviderKey(),
-		providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest),
-		providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse),
-		provider.logger,
-	)
+
+	return provider.listModelsByKey(ctx, key, request)
+}
+
+// listModelsByKey performs a list models request for a single key.
+func (provider *OllamaProvider) listModelsByKey(ctx context.Context, key schemas.Key, request *schemas.BifrostListModelsRequest) (*schemas.BifrostListModelsResponse, *schemas.BifrostError) {
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	// Set any extra headers from network config
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+
+	// Build URL - Ollama uses GET /api/tags
+	// Use GetPathFromContext to support path overrides
+	req.SetRequestURI(provider.networkConfig.BaseURL + providerUtils.GetPathFromContext(ctx, "/api/tags"))
+	req.Header.SetMethod(http.MethodGet)
+
+	// Set API key if provided
+	if key.Value != "" {
+		req.Header.Set("Authorization", "Bearer "+key.Value)
+	}
+
+	// Make request
+	latency, bifrostErr := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+
+	// Handle error response
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, parseOllamaError(resp, provider.GetProviderKey())
+	}
+
+	// Decode response body (handles gzip, etc.)
+	body, err := providerUtils.CheckAndDecodeBody(resp)
+	if err != nil {
+		return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, err, provider.GetProviderKey())
+	}
+
+	// Parse response
+	var ollamaResponse OllamaListModelsResponse
+	_, rawResponse, bifrostErr := providerUtils.HandleProviderResponse(body, &ollamaResponse, nil, providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse))
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+
+	// Convert to Bifrost format
+	response := ollamaResponse.ToBifrostListModelsResponse(provider.GetProviderKey(), key.Models)
+	response.ExtraFields.Latency = latency.Milliseconds()
+
+	// Set raw response if enabled
+	if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
+		response.ExtraFields.RawResponse = rawResponse
+	}
+
+	return response, nil
 }
 
-// TextCompletion performs a text completion request to the Ollama API.
+// TextCompletion is not directly supported by Ollama's native API.
+// Use ChatCompletion instead for text generation.
 func (provider *OllamaProvider) TextCompletion(ctx context.Context, key schemas.Key, request *schemas.BifrostTextCompletionRequest) (*schemas.BifrostTextCompletionResponse, *schemas.BifrostError) {
-	return openai.HandleOpenAITextCompletionRequest(
-		ctx,
-		provider.client,
-		provider.networkConfig.BaseURL+providerUtils.GetPathFromContext(ctx, "/v1/completions"),
-		request,
-		key,
-		provider.networkConfig.ExtraHeaders,
-		provider.GetProviderKey(),
-		providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest),
-		providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse),
-		provider.logger,
-	)
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.TextCompletionRequest, provider.GetProviderKey())
 }
 
-// TextCompletionStream performs a streaming text completion request to Ollama's API.
-// It formats the request, sends it to Ollama, and processes the response.
-// Returns a channel of BifrostStream objects or an error if the request fails.
+// TextCompletionStream is not directly supported by Ollama's native API.
+// Use ChatCompletionStream instead for text generation.
 func (provider *OllamaProvider) TextCompletionStream(ctx context.Context, postHookRunner schemas.PostHookRunner, key schemas.Key, request *schemas.BifrostTextCompletionRequest) (chan *schemas.BifrostStream, *schemas.BifrostError) {
-	return openai.HandleOpenAITextCompletionStreaming(
-		ctx,
-		provider.client,
-		provider.networkConfig.BaseURL+"/v1/completions",
-		request,
-		nil,
-		provider.networkConfig.ExtraHeaders,
-		providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest),
-		providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse),
-		provider.GetProviderKey(),
-		postHookRunner,
-		nil,
-		provider.logger,
-	)
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.TextCompletionStreamRequest, provider.GetProviderKey())
 }
 
-// ChatCompletion performs a chat completion request to the Ollama API.
+// ChatCompletion performs a chat completion request to Ollama's native API.
+// Uses the /api/chat endpoint with stream=false.
 func (provider *OllamaProvider) ChatCompletion(ctx context.Context, key schemas.Key, request *schemas.BifrostChatRequest) (*schemas.BifrostChatResponse, *schemas.BifrostError) {
-	return openai.HandleOpenAIChatCompletionRequest(
+	// Convert to Ollama format
+	jsonData, bifrostErr := providerUtils.CheckContextAndGetRequestBody(
 		ctx,
-		provider.client,
-		provider.networkConfig.BaseURL+providerUtils.GetPathFromContext(ctx, "/v1/chat/completions"),
 		request,
-		key,
-		provider.networkConfig.ExtraHeaders,
-		providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest),
-		providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse),
-		provider.GetProviderKey(),
-		provider.logger,
+		func() (any, error) {
+			ollamaReq := ToOllamaChatRequest(request)
+			if ollamaReq != nil {
+				ollamaReq.Stream = schemas.Ptr(false) // Non-streaming request
+			}
+			return ollamaReq, nil
+		},
+		provider.GetProviderKey())
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+
+	// Make request
+	responseBody, latency, bifrostErr := provider.completeRequest(
+		ctx,
+		jsonData,
+		provider.networkConfig.BaseURL+providerUtils.GetPathFromContext(ctx, "/api/chat"),
+		key.Value,
 	)
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+
+	// Parse response
+	response := acquireOllamaChatResponse()
+	defer releaseOllamaChatResponse(response)
+
+	_, rawResponse, bifrostErr := providerUtils.HandleProviderResponse(responseBody, response, jsonData, providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse))
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+
+	// Convert to Bifrost format
+	bifrostResponse := response.ToBifrostChatResponse(request.Model)
+
+	// Set ExtraFields
+	bifrostResponse.ExtraFields.Provider = provider.GetProviderKey()
+	bifrostResponse.ExtraFields.ModelRequested = request.Model
+	bifrostResponse.ExtraFields.RequestType = schemas.ChatCompletionRequest
+	bifrostResponse.ExtraFields.Latency = latency.Milliseconds()
+
+	// Set raw response if enabled
+	if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
+		bifrostResponse.ExtraFields.RawResponse = rawResponse
+	}
+
+	return bifrostResponse, nil
 }
 
-// ChatCompletionStream performs a streaming chat completion request to the Ollama API.
-// It supports real-time streaming of responses using Server-Sent Events (SSE).
-// Uses Ollama's OpenAI-compatible streaming format.
-// Returns a channel containing BifrostResponse objects representing the stream or an error if the request fails.
+// ChatCompletionStream performs a streaming chat completion request to Ollama's native API.
+// Uses newline-delimited JSON streaming format (not SSE).
 func (provider *OllamaProvider) ChatCompletionStream(ctx context.Context, postHookRunner schemas.PostHookRunner, key schemas.Key, request *schemas.BifrostChatRequest) (chan *schemas.BifrostStream, *schemas.BifrostError) {
-	// Use shared OpenAI-compatible streaming logic
-	return openai.HandleOpenAIChatCompletionStreaming(
+	// Check if the request is a redirect from ResponsesStream to ChatCompletionStream
+	isResponsesToChatCompletionsFallback := false
+	var responsesStreamState *schemas.ChatToResponsesStreamState
+	if ctx.Value(schemas.BifrostContextKeyIsResponsesToChatCompletionFallback) != nil {
+		isResponsesToChatCompletionsFallbackValue, ok := ctx.Value(schemas.BifrostContextKeyIsResponsesToChatCompletionFallback).(bool)
+		if ok && isResponsesToChatCompletionsFallbackValue {
+			isResponsesToChatCompletionsFallback = true
+			responsesStreamState = schemas.AcquireChatToResponsesStreamState()
+			defer schemas.ReleaseChatToResponsesStreamState(responsesStreamState)
+		}
+	}
+
+	// Convert to Ollama format with streaming enabled
+	jsonData, bifrostErr := providerUtils.CheckContextAndGetRequestBody(
 		ctx,
-		provider.client,
-		provider.networkConfig.BaseURL+"/v1/chat/completions",
 		request,
-		nil,
-		provider.networkConfig.ExtraHeaders,
-		providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest),
-		providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse),
-		schemas.Ollama,
-		postHookRunner,
-		nil,
-		nil,
-		nil,
-		provider.logger,
-	)
+		func() (any, error) {
+			ollamaReq := ToOllamaChatRequest(request)
+			if ollamaReq != nil {
+				ollamaReq.Stream = schemas.Ptr(true) // Enable streaming
+			}
+			return ollamaReq, nil
+		},
+		provider.GetProviderKey())
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+
+	// Create request
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	resp.StreamBody = true // Enable streaming
+	defer fasthttp.ReleaseRequest(req)
+
+	// Set headers
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(provider.networkConfig.BaseURL + "/api/chat")
+	req.Header.SetMethod(http.MethodPost)
+	req.Header.SetContentType("application/json")
+
+	if key.Value != "" {
+		req.Header.Set("Authorization", "Bearer "+key.Value)
+	}
+
+	req.SetBody(jsonData)
+
+	// Make the request with context support
+	// NOTE: fasthttp does not natively support context cancellation for streaming requests.
+	// MakeRequestWithContext only cancels waiting for the initial request, not the ongoing stream.
+	// The scanner loop below includes context cancellation checks to exit early when ctx is cancelled.
+	_, bifrostErr = providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	if bifrostErr != nil {
+		defer providerUtils.ReleaseStreamingResponse(resp)
+		return nil, bifrostErr
+	}
+
+	// Check for HTTP errors
+	if resp.StatusCode() != fasthttp.StatusOK {
+		defer providerUtils.ReleaseStreamingResponse(resp)
+		return nil, parseOllamaError(resp, provider.GetProviderKey())
+	}
+
+	// Create response channel
+	responseChan := make(chan *schemas.BifrostStream, schemas.DefaultStreamBufferSize)
+
+	// Start streaming in a goroutine
+	go func() {
+		defer close(responseChan)
+		defer providerUtils.ReleaseStreamingResponse(resp)
+
+		if resp.BodyStream() == nil {
+			bifrostErr := providerUtils.NewBifrostOperationError(
+				"Provider returned an empty response",
+				fmt.Errorf("provider returned an empty response"),
+				provider.GetProviderKey(),
+			)
+			ctx = context.WithValue(ctx, schemas.BifrostContextKeyStreamEndIndicator, true)
+			providerUtils.ProcessAndSendBifrostError(ctx, postHookRunner, bifrostErr, responseChan, provider.logger)
+			return
+		}
+
+		scanner := bufio.NewScanner(resp.BodyStream())
+		// Increase buffer size for large responses
+		buf := make([]byte, 0, 1024*1024)
+		scanner.Buffer(buf, 10*1024*1024)
+
+		chunkIndex := 0
+		startTime := time.Now()
+		lastChunkTime := startTime
+
+		for {
+			// Check for context cancellation before attempting to scan
+			select {
+			case <-ctx.Done():
+				// Context was cancelled - exit the goroutine
+				bifrostErr := &schemas.BifrostError{
+					IsBifrostError: true,
+					Error: &schemas.ErrorField{
+						Type:    schemas.Ptr(schemas.RequestCancelled),
+						Message: fmt.Sprintf("Stream cancelled or timed out by context: %v", ctx.Err()),
+						Error:   ctx.Err(),
+					},
+				}
+				ctx = context.WithValue(ctx, schemas.BifrostContextKeyStreamEndIndicator, true)
+				providerUtils.ProcessAndSendBifrostError(ctx, postHookRunner, bifrostErr, responseChan, provider.logger)
+				return
+			default:
+				// Continue to scanner.Scan()
+			}
+
+			// Attempt to scan next line
+			if !scanner.Scan() {
+				// Scanner reached end of stream or encountered an error
+				break
+			}
+
+			line := scanner.Text()
+
+			// Skip empty lines
+			if line == "" {
+				continue
+			}
+
+			// Parse the JSON chunk (Ollama uses newline-delimited JSON)
+			var streamChunk OllamaStreamResponse
+			if err := sonic.Unmarshal([]byte(line), &streamChunk); err != nil {
+				provider.logger.Warn(fmt.Sprintf("Failed to parse Ollama stream chunk: %v", err))
+				continue
+			}
+
+			// Convert to Bifrost format
+			bifrostResponse, isDone := streamChunk.ToBifrostStreamResponse(chunkIndex)
+			if bifrostResponse != nil {
+				bifrostResponse.ExtraFields.Provider = provider.GetProviderKey()
+				bifrostResponse.ExtraFields.ModelRequested = request.Model
+				bifrostResponse.ExtraFields.ChunkIndex = chunkIndex
+				chunkLatencyMs := time.Since(lastChunkTime).Milliseconds()
+				bifrostResponse.ExtraFields.Latency = chunkLatencyMs
+
+				if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
+					bifrostResponse.ExtraFields.RawResponse = line
+				}
+
+				lastChunkTime = time.Now()
+				chunkIndex++
+
+				// Handle Responses API fallback conversion
+				if isResponsesToChatCompletionsFallback {
+					// Convert chat completion stream to responses stream
+					spreadResponses := bifrostResponse.ToBifrostResponsesStreamResponse(responsesStreamState)
+					for _, responsesResponse := range spreadResponses {
+						if responsesResponse == nil {
+							continue
+						}
+
+						// Update ExtraFields for Responses API
+						responsesResponse.ExtraFields.RequestType = schemas.ResponsesStreamRequest
+						responsesResponse.ExtraFields.Provider = provider.GetProviderKey()
+						responsesResponse.ExtraFields.ModelRequested = request.Model
+						responsesResponse.ExtraFields.ChunkIndex = responsesResponse.SequenceNumber
+
+						if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
+							responsesResponse.ExtraFields.RawResponse = line
+						}
+
+						// Send response chunk
+						if isDone && responsesResponse.Type == schemas.ResponsesStreamResponseTypeCompleted {
+							responsesResponse.ExtraFields.Latency = time.Since(startTime).Milliseconds()
+							ctx = context.WithValue(ctx, schemas.BifrostContextKeyStreamEndIndicator, true)
+							providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, responsesResponse, nil, nil), responseChan)
+							return
+						}
+
+						responsesResponse.ExtraFields.Latency = chunkLatencyMs
+						providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, responsesResponse, nil, nil), responseChan)
+					}
+				} else {
+					// Regular chat completion stream
+					if isDone {
+						bifrostResponse.ExtraFields.Latency = time.Since(startTime).Milliseconds()
+						ctx = context.WithValue(ctx, schemas.BifrostContextKeyStreamEndIndicator, true)
+						providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, bifrostResponse, nil, nil, nil), responseChan)
+						return
+					}
+
+					providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, bifrostResponse, nil, nil, nil), responseChan)
+				}
+			}
+		}
+
+		if err := scanner.Err(); err != nil {
+			provider.logger.Warn(fmt.Sprintf("Error reading Ollama stream: %v", err))
+			requestType := schemas.ChatCompletionStreamRequest
+			if isResponsesToChatCompletionsFallback {
+				requestType = schemas.ResponsesStreamRequest
+			}
+			providerUtils.ProcessAndSendError(ctx, postHookRunner, err, responseChan, requestType, provider.GetProviderKey(), request.Model, provider.logger)
+		}
+	}()
+
+	return responseChan, nil
 }
 
-// Responses performs a responses request to the Ollama API.
+// Responses performs a responses request to Ollama's API.
+// Falls back to ChatCompletion with conversion.
 func (provider *OllamaProvider) Responses(ctx context.Context, key schemas.Key, request *schemas.BifrostResponsesRequest) (*schemas.BifrostResponsesResponse, *schemas.BifrostError) {
 	chatResponse, err := provider.ChatCompletion(ctx, key, request.ToChatRequest())
 	if err != nil {
@@ -176,7 +542,8 @@ func (provider *OllamaProvider) Responses(ctx context.Context, key schemas.Key,
 	return response, nil
 }
 
-// ResponsesStream performs a streaming responses request to the Ollama API.
+// ResponsesStream performs a streaming responses request to Ollama's API.
+// Falls back to ChatCompletionStream with conversion.
 func (provider *OllamaProvider) ResponsesStream(ctx context.Context, postHookRunner schemas.PostHookRunner, key schemas.Key, request *schemas.BifrostResponsesRequest) (chan *schemas.BifrostStream, *schemas.BifrostError) {
 	ctx = context.WithValue(ctx, schemas.BifrostContextKeyIsResponsesToChatCompletionFallback, true)
 	return provider.ChatCompletionStream(
@@ -187,20 +554,54 @@ func (provider *OllamaProvider) ResponsesStream(ctx context.Context, postHookRun
 	)
 }
 
-// Embedding performs an embedding request to the Ollama API.
+// Embedding performs an embedding request to Ollama's native API.
+// Uses the /api/embed endpoint.
 func (provider *OllamaProvider) Embedding(ctx context.Context, key schemas.Key, request *schemas.BifrostEmbeddingRequest) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError) {
-	return openai.HandleOpenAIEmbeddingRequest(
+	// Convert to Ollama format
+	jsonData, bifrostErr := providerUtils.CheckContextAndGetRequestBody(
 		ctx,
-		provider.client,
-		provider.networkConfig.BaseURL+providerUtils.GetPathFromContext(ctx, "/v1/embeddings"),
 		request,
-		key,
-		provider.networkConfig.ExtraHeaders,
-		provider.GetProviderKey(),
-		providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest),
-		providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse),
-		provider.logger,
+		func() (any, error) { return ToOllamaEmbeddingRequest(request), nil },
+		provider.GetProviderKey())
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+
+	// Make request
+	responseBody, latency, bifrostErr := provider.completeRequest(
+		ctx,
+		jsonData,
+		provider.networkConfig.BaseURL+providerUtils.GetPathFromContext(ctx, "/api/embed"),
+		key.Value,
 	)
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+
+	// Parse response
+	response := acquireOllamaEmbeddingResponse()
+	defer releaseOllamaEmbeddingResponse(response)
+
+	_, rawResponse, bifrostErr := providerUtils.HandleProviderResponse(responseBody, response, jsonData, providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse))
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+
+	// Convert to Bifrost format
+	bifrostResponse := response.ToBifrostEmbeddingResponse(request.Model)
+
+	// Set ExtraFields
+	bifrostResponse.ExtraFields.Provider = provider.GetProviderKey()
+	bifrostResponse.ExtraFields.ModelRequested = request.Model
+	bifrostResponse.ExtraFields.RequestType = schemas.EmbeddingRequest
+	bifrostResponse.ExtraFields.Latency = latency.Milliseconds()
+
+	// Set raw response if enabled
+	if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) {
+		bifrostResponse.ExtraFields.RawResponse = rawResponse
+	}
+
+	return bifrostResponse, nil
 }
 
 // Speech is not supported by the Ollama provider.
diff --git a/core/providers/ollama/ollama_test.go b/core/providers/ollama/ollama_test.go
index 090676261..1cdf3aa0d 100644
--- a/core/providers/ollama/ollama_test.go
+++ b/core/providers/ollama/ollama_test.go
@@ -6,10 +6,21 @@ import (
 	"testing"
 
 	"github.com/maximhq/bifrost/core/internal/testutil"
-
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
+// TestOllama runs comprehensive tests against a local or remote Ollama instance.
+//
+// Environment variables:
+//   - OLLAMA_BASE_URL: Required. The base URL of the Ollama instance (e.g., "http://localhost:11434")
+//   - OLLAMA_API_KEY: Optional. API key for authenticated Ollama Cloud instances
+//   - OLLAMA_MODEL: Optional. Model to test with (default: "llama3.2:latest")
+//   - OLLAMA_EMBEDDING_MODEL: Optional. Embedding model to test with (default: "nomic-embed-text:latest")
+//
+// The tests use Ollama's native API endpoints:
+//   - /api/chat for chat completion
+//   - /api/embed for embeddings
+//   - /api/tags for listing models
 func TestOllama(t *testing.T) {
 	t.Parallel()
 	if strings.TrimSpace(os.Getenv("OLLAMA_BASE_URL")) == "" {
@@ -22,13 +33,24 @@ func TestOllama(t *testing.T) {
 	}
 	defer cancel()
 
+	// Get model names from environment or use defaults
+	chatModel := os.Getenv("OLLAMA_MODEL")
+	if chatModel == "" {
+		chatModel = "llama3.2:latest"
+	}
+
+	embeddingModel := os.Getenv("OLLAMA_EMBEDDING_MODEL")
+	if embeddingModel == "" {
+		embeddingModel = "nomic-embed-text:latest"
+	}
+
 	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:       schemas.Ollama,
-		ChatModel:      "llama3.1:latest",
-		TextModel:      "", // Ollama doesn't support text completion in newer models
-		EmbeddingModel: "", // Ollama doesn't support embedding
+		ChatModel:      chatModel,
+		TextModel:      "", // Text completion uses chat endpoint in native API
+		EmbeddingModel: embeddingModel,
 		Scenarios: testutil.TestScenarios{
-			TextCompletion:        false, // Not supported
+			TextCompletion:        false, // Not supported - use chat instead
 			SimpleChat:            true,
 			CompletionStream:      true,
 			MultiTurnConversation: true,
@@ -37,16 +59,74 @@ func TestOllama(t *testing.T) {
 			MultipleToolCalls:     true,
 			End2EndToolCalling:    true,
 			AutomaticFunctionCall: true,
+			ImageURL:              false, // Ollama expects base64 images
+			ImageBase64:           true,  // Multimodal models support base64 images
+			MultipleImages:        false,
+			CompleteEnd2End:       true,
+			Embedding:             true, // Native API supports embeddings
+			ListModels:            true,
+		},
+	}
+
+	t.Run("OllamaTests", func(t *testing.T) {
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
+	})
+	client.Shutdown()
+}
+
+// TestOllamaCloud tests Ollama Cloud with API key authentication.
+// This test is separate to allow testing against Ollama Cloud specifically.
+//
+// Environment variables:
+//   - OLLAMA_CLOUD_URL: Required. The Ollama Cloud URL
+//   - OLLAMA_API_KEY: Required. API key for Ollama Cloud
+//   - OLLAMA_CLOUD_MODEL: Optional. Model to test with
+func TestOllamaCloud(t *testing.T) {
+	t.Parallel()
+	cloudURL := os.Getenv("OLLAMA_CLOUD_URL")
+	apiKey := os.Getenv("OLLAMA_API_KEY")
+
+	if cloudURL == "" || apiKey == "" {
+		t.Skip("Skipping Ollama Cloud tests because OLLAMA_CLOUD_URL or OLLAMA_API_KEY is not set")
+	}
+
+	client, ctx, cancel, err := testutil.SetupTest()
+	if err != nil {
+		t.Fatalf("Error initializing test setup: %v", err)
+	}
+	defer cancel()
+
+	// Get model name from environment or use default
+	chatModel := os.Getenv("OLLAMA_CLOUD_MODEL")
+	if chatModel == "" {
+		chatModel = "llama3.2:latest"
+	}
+
+	testConfig := testutil.ComprehensiveTestConfig{
+		Provider:       schemas.Ollama,
+		ChatModel:      chatModel,
+		TextModel:      "",
+		EmbeddingModel: "",
+		Scenarios: testutil.TestScenarios{
+			TextCompletion:        false,
+			SimpleChat:            true,
+			CompletionStream:      true,
+			MultiTurnConversation: true,
+			ToolCalls:             true,
+			ToolCallsStreaming:    true,
+			MultipleToolCalls:     false, // May not be supported in cloud
+			End2EndToolCalling:    true,
+			AutomaticFunctionCall: false,
 			ImageURL:              false,
 			ImageBase64:           false,
 			MultipleImages:        false,
 			CompleteEnd2End:       true,
-			Embedding:             false,
+			Embedding:             false, // May not be available
 			ListModels:            true,
 		},
 	}
 
-	t.Run("OllamaTests", func(t *testing.T) {
+	t.Run("OllamaCloudTests", func(t *testing.T) {
 		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
diff --git a/core/providers/ollama/types.go b/core/providers/ollama/types.go
new file mode 100644
index 000000000..965572119
--- /dev/null
+++ b/core/providers/ollama/types.go
@@ -0,0 +1,520 @@
+// Package ollama implements the Ollama provider using native Ollama APIs.
+// This file contains the type definitions for Ollama's native API.
+package ollama
+
+import (
+	"encoding/json"
+	"time"
+
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// ==================== REQUEST TYPES ====================
+
+// OllamaChatRequest represents an Ollama chat completion request using native API.
+// See: https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion
+type OllamaChatRequest struct {
+	Model     string          `json:"model"`                // Required: Name of the model to use
+	Messages  []OllamaMessage `json:"messages"`             // Required: Messages for the chat
+	Tools     []OllamaTool    `json:"tools,omitempty"`      // Optional: List of tools the model may use
+	Think     *bool           `json:"think,omitempty"`      // Optional: Enable thinking (default: false)
+	Format    interface{}     `json:"format,omitempty"`     // Optional: Format of the response (e.g., "json" or JSON schema)
+	Options   *OllamaOptions  `json:"options,omitempty"`    // Optional: Model parameters
+	Stream    *bool           `json:"stream,omitempty"`     // Optional: Enable streaming (default: true)
+	KeepAlive *string         `json:"keep_alive,omitempty"` // Optional: How long to keep model loaded (e.g., "5m", "0" to unload)
+}
+
+// OllamaMessage represents a message in Ollama format.
+type OllamaMessage struct {
+	Role      string           `json:"role"`                 // "system", "user", "assistant", or "tool"
+	Content   string           `json:"content"`              // Message content
+	Thinking  *string          `json:"thinking,omitempty"`   // Optional: Thinking content
+	Images    []string         `json:"images,omitempty"`     // Optional: Base64 encoded images for multimodal models
+	ToolCalls []OllamaToolCall `json:"tool_calls,omitempty"` // Optional: Tool calls made by the assistant
+	ToolName  *string          `json:"tool_name,omitempty"`  // Optional: Tool name
+}
+
+// OllamaToolCall represents a tool call in Ollama format.
+type OllamaToolCall struct {
+	Function OllamaToolCallFunction `json:"function"`
+}
+
+// OllamaToolCallFunction represents the function details of a tool call.
+type OllamaToolCallFunction struct {
+	Name      string                 `json:"name"`
+	Arguments map[string]interface{} `json:"arguments"`
+}
+
+// OllamaTool represents a tool definition in Ollama format.
+type OllamaTool struct {
+	Type     string             `json:"type"` // "function"
+	Function OllamaToolFunction `json:"function"`
+}
+
+// OllamaToolFunction represents a function definition for tools.
+type OllamaToolFunction struct {
+	Name        string                          `json:"name"`
+	Description string                          `json:"description"`
+	Parameters  *schemas.ToolFunctionParameters `json:"parameters,omitempty"`
+}
+
+// OllamaOptions represents model parameters for Ollama requests.
+// See: https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
+type OllamaOptions struct {
+	// Generation parameters
+	NumPredict  *int     `json:"num_predict,omitempty"` // Maximum number of tokens to generate (similar to max_tokens)
+	Temperature *float64 `json:"temperature,omitempty"` // Sampling temperature (0.0-2.0)
+	TopP        *float64 `json:"top_p,omitempty"`       // Top-p sampling
+	TopK        *int     `json:"top_k,omitempty"`       // Top-k sampling
+	Seed        *int     `json:"seed,omitempty"`        // Random seed for reproducibility
+	Stop        []string `json:"stop,omitempty"`        // Stop sequences
+
+	// Penalty parameters
+	RepeatPenalty    *float64 `json:"repeat_penalty,omitempty"`    // Repetition penalty
+	PresencePenalty  *float64 `json:"presence_penalty,omitempty"`  // Presence penalty
+	FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` // Frequency penalty
+	RepeatLastN      *int     `json:"repeat_last_n,omitempty"`     // Last N tokens for repeat penalty
+
+	// Context and performance
+	NumCtx    *int `json:"num_ctx,omitempty"`    // Context window size
+	NumBatch  *int `json:"num_batch,omitempty"`  // Batch size for prompt processing
+	NumGPU    *int `json:"num_gpu,omitempty"`    // Number of layers to offload to GPU
+	NumThread *int `json:"num_thread,omitempty"` // Number of threads
+
+	// Advanced parameters
+	Mirostat    *int     `json:"mirostat,omitempty"`     // Mirostat sampling (0, 1, or 2)
+	MirostatEta *float64 `json:"mirostat_eta,omitempty"` // Mirostat learning rate
+	MirostatTau *float64 `json:"mirostat_tau,omitempty"` // Mirostat target entropy
+	TfsZ        *float64 `json:"tfs_z,omitempty"`        // Tail-free sampling
+	TypicalP    *float64 `json:"typical_p,omitempty"`    // Typical p sampling
+
+	// Low-level parameters
+	UseMlock *bool `json:"use_mlock,omitempty"` // Lock model in memory
+	UseMmap  *bool `json:"use_mmap,omitempty"`  // Use memory mapping
+	Numa     *bool `json:"numa,omitempty"`      // Enable NUMA support
+}
+
+// ==================== RESPONSE TYPES ====================
+
+// OllamaChatResponse represents an Ollama chat completion response.
+type OllamaChatResponse struct {
+	Model              string         `json:"model"`                          // Model used for generation
+	CreatedAt          string         `json:"created_at"`                     // Timestamp when response was created
+	Message            *OllamaMessage `json:"message,omitempty"`              // Generated message
+	Done               bool           `json:"done"`                           // Whether generation is complete
+	DoneReason         *string        `json:"done_reason,omitempty"`          // Reason for completion ("stop", "length", "load", "unload")
+	TotalDuration      *int64         `json:"total_duration,omitempty"`       // Total time in nanoseconds
+	LoadDuration       *int64         `json:"load_duration,omitempty"`        // Time to load model in nanoseconds
+	PromptEvalCount    *int           `json:"prompt_eval_count,omitempty"`    // Number of tokens in prompt
+	PromptEvalDuration *int64         `json:"prompt_eval_duration,omitempty"` // Time to evaluate prompt in nanoseconds
+	EvalCount          *int           `json:"eval_count,omitempty"`           // Number of tokens generated
+	EvalDuration       *int64         `json:"eval_duration,omitempty"`        // Time to generate response in nanoseconds
+}
+
+// ==================== EMBEDDING TYPES ====================
+
+// OllamaEmbeddingRequest represents an Ollama embedding request.
+// See: https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings
+type OllamaEmbeddingRequest struct {
+	Model     string         `json:"model"`                // Required: Name of the embedding model
+	Input     interface{}    `json:"input"`                // Required: Text to embed (string or []string)
+	Truncate  *bool          `json:"truncate,omitempty"`   // Optional: Truncate input to fit context length
+	Options   *OllamaOptions `json:"options,omitempty"`    // Optional: Model parameters
+	KeepAlive *string        `json:"keep_alive,omitempty"` // Optional: How long to keep model loaded
+}
+
+// OllamaEmbeddingResponse represents an Ollama embedding response.
+type OllamaEmbeddingResponse struct {
+	Model           string      `json:"model"`                       // Model used for embedding
+	Embeddings      [][]float64 `json:"embeddings"`                  // Generated embeddings
+	TotalDuration   *int64      `json:"total_duration,omitempty"`    // Total time in nanoseconds
+	LoadDuration    *int64      `json:"load_duration,omitempty"`     // Time to load model in nanoseconds
+	PromptEvalCount *int        `json:"prompt_eval_count,omitempty"` // Number of tokens processed
+}
+
+// ==================== LIST MODELS TYPES ====================
+
+// OllamaListModelsResponse represents the response from /api/tags endpoint.
+type OllamaListModelsResponse struct {
+	Models []OllamaModel `json:"models"`
+}
+
+// OllamaModel represents a model in Ollama's list.
+type OllamaModel struct {
+	Name       string             `json:"name"`        // Model name (e.g., "llama3.2:latest")
+	Model      string             `json:"model"`       // Model identifier
+	ModifiedAt time.Time          `json:"modified_at"` // Last modified timestamp
+	Size       int64              `json:"size"`        // Model size in bytes
+	Digest     string             `json:"digest"`      // Model digest
+	Details    OllamaModelDetails `json:"details"`     // Model details
+}
+
+// OllamaModelDetails contains detailed information about a model.
+type OllamaModelDetails struct {
+	ParentModel       string   `json:"parent_model,omitempty"` // Parent model name
+	Format            string   `json:"format"`                 // Model format (e.g., "gguf")
+	Family            string   `json:"family"`                 // Model family (e.g., "llama")
+	Families          []string `json:"families,omitempty"`     // Additional families
+	ParameterSize     string   `json:"parameter_size"`         // Parameter count (e.g., "8B")
+	QuantizationLevel string   `json:"quantization_level"`     // Quantization (e.g., "Q4_0")
+}
+
+// ==================== ERROR TYPES ====================
+
+// OllamaError represents an error response from Ollama's API.
+type OllamaError struct {
+	Error string `json:"error"`
+}
+
+// ==================== STREAMING TYPES ====================
+
+// OllamaStreamResponse represents a single streaming chunk from Ollama.
+// It's the same structure as OllamaChatResponse but used during streaming.
+type OllamaStreamResponse struct {
+	Model              string         `json:"model"`
+	CreatedAt          string         `json:"created_at"`
+	Message            *OllamaMessage `json:"message,omitempty"`
+	Done               bool           `json:"done"`
+	DoneReason         *string        `json:"done_reason,omitempty"`
+	TotalDuration      *int64         `json:"total_duration,omitempty"`
+	LoadDuration       *int64         `json:"load_duration,omitempty"`
+	PromptEvalCount    *int           `json:"prompt_eval_count,omitempty"`
+	PromptEvalDuration *int64         `json:"prompt_eval_duration,omitempty"`
+	EvalCount          *int           `json:"eval_count,omitempty"`
+	EvalDuration       *int64         `json:"eval_duration,omitempty"`
+}
+
+// ==================== HELPER METHODS ====================
+
+// ToBifrostChatResponse converts an Ollama chat response to Bifrost format.
+func (r *OllamaChatResponse) ToBifrostChatResponse(model string) *schemas.BifrostChatResponse {
+	if r == nil {
+		return nil
+	}
+
+	// Parse timestamp
+	created := int(time.Now().Unix())
+	if r.CreatedAt != "" {
+		if t, err := time.Parse(time.RFC3339Nano, r.CreatedAt); err == nil {
+			created = int(t.Unix())
+		}
+	}
+
+	response := &schemas.BifrostChatResponse{
+		Model:   model,
+		Created: created,
+		Object:  "chat.completion",
+		ExtraFields: schemas.BifrostResponseExtraFields{
+			RequestType: schemas.ChatCompletionRequest,
+			Provider:    schemas.Ollama,
+		},
+	}
+
+	// Build the choice
+	if r.Message != nil {
+		var toolCalls []schemas.ChatAssistantMessageToolCall
+		if len(r.Message.ToolCalls) > 0 {
+			for i, tc := range r.Message.ToolCalls {
+				args, _ := json.Marshal(tc.Function.Arguments)
+				toolCalls = append(toolCalls, schemas.ChatAssistantMessageToolCall{
+					Index: uint16(i),
+					Type:  schemas.Ptr("function"),
+					ID:    schemas.Ptr(tc.Function.Name), // Ollama doesn't provide IDs, use name
+					Function: schemas.ChatAssistantMessageToolCallFunction{
+						Name:      &tc.Function.Name,
+						Arguments: string(args),
+					},
+				})
+			}
+		}
+
+		var assistantMessage *schemas.ChatAssistantMessage
+		if len(toolCalls) > 0 {
+			assistantMessage = &schemas.ChatAssistantMessage{
+				ToolCalls: toolCalls,
+			}
+		}
+
+		// Handle thinking content for non-streaming responses
+		// Store thinking in tool call ExtraContent (similar to how we preserve it in message conversion)
+		if r.Message.Thinking != nil && *r.Message.Thinking != "" {
+			if assistantMessage == nil {
+				assistantMessage = &schemas.ChatAssistantMessage{}
+			}
+			// If we have tool calls, store thinking in the first one's ExtraContent
+			// Otherwise, create a placeholder tool call to preserve thinking
+			if len(assistantMessage.ToolCalls) > 0 {
+				if assistantMessage.ToolCalls[0].ExtraContent == nil {
+					assistantMessage.ToolCalls[0].ExtraContent = make(map[string]interface{})
+				}
+				assistantMessage.ToolCalls[0].ExtraContent["ollama"] = map[string]interface{}{
+					"thinking": *r.Message.Thinking,
+				}
+			} else {
+				// Create placeholder tool call to preserve thinking
+				assistantMessage.ToolCalls = []schemas.ChatAssistantMessageToolCall{
+					{
+						Index: 0,
+						Type:  schemas.Ptr("function"),
+						Function: schemas.ChatAssistantMessageToolCallFunction{
+							Name:      schemas.Ptr("_thinking_placeholder"),
+							Arguments: "{}",
+						},
+						ExtraContent: map[string]interface{}{
+							"ollama": map[string]interface{}{
+								"thinking": *r.Message.Thinking,
+							},
+						},
+					},
+				}
+			}
+		}
+
+		choice := schemas.BifrostResponseChoice{
+			Index: 0,
+			ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{
+				Message: &schemas.ChatMessage{
+					Role: schemas.ChatMessageRole(r.Message.Role),
+					Content: &schemas.ChatMessageContent{
+						ContentStr: &r.Message.Content,
+					},
+					ChatAssistantMessage: assistantMessage,
+				},
+			},
+			FinishReason: r.mapFinishReason(),
+		}
+		response.Choices = []schemas.BifrostResponseChoice{choice}
+	}
+
+	// Map usage
+	response.Usage = r.toUsage()
+
+	return response
+}
+
+// mapFinishReason maps Ollama's done_reason to Bifrost format.
+func (r *OllamaChatResponse) mapFinishReason() *string {
+	if r.DoneReason == nil {
+		if r.Done {
+			return schemas.Ptr("stop")
+		}
+		return nil
+	}
+
+	switch *r.DoneReason {
+	case "stop":
+		return schemas.Ptr("stop")
+	case "length":
+		return schemas.Ptr("length")
+	case "load", "unload":
+		return schemas.Ptr("stop")
+	default:
+		return r.DoneReason
+	}
+}
+
+// toUsage converts Ollama usage info to Bifrost format.
+func (r *OllamaChatResponse) toUsage() *schemas.BifrostLLMUsage {
+	usage := &schemas.BifrostLLMUsage{}
+
+	if r.PromptEvalCount != nil {
+		usage.PromptTokens = *r.PromptEvalCount
+	}
+	if r.EvalCount != nil {
+		usage.CompletionTokens = *r.EvalCount
+	}
+	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+
+	return usage
+}
+
+// ToBifrostStreamResponse converts an Ollama streaming chunk to Bifrost format.
+func (r *OllamaStreamResponse) ToBifrostStreamResponse(chunkIndex int) (*schemas.BifrostChatResponse, bool) {
+	if r == nil {
+		return nil, false
+	}
+
+	response := &schemas.BifrostChatResponse{
+		Model:  r.Model,
+		Object: "chat.completion.chunk",
+		ExtraFields: schemas.BifrostResponseExtraFields{
+			RequestType: schemas.ChatCompletionStreamRequest,
+			Provider:    schemas.Ollama,
+			ChunkIndex:  chunkIndex,
+		},
+	}
+
+	// Parse timestamp
+	if r.CreatedAt != "" {
+		if t, err := time.Parse(time.RFC3339Nano, r.CreatedAt); err == nil {
+			response.Created = int(t.Unix())
+		}
+	}
+
+	// Build delta content
+	if r.Message != nil {
+		var toolCalls []schemas.ChatAssistantMessageToolCall
+		if len(r.Message.ToolCalls) > 0 {
+			for i, tc := range r.Message.ToolCalls {
+				args, _ := json.Marshal(tc.Function.Arguments)
+				toolCalls = append(toolCalls, schemas.ChatAssistantMessageToolCall{
+					Index: uint16(i),
+					Type:  schemas.Ptr("function"),
+					ID:    schemas.Ptr(tc.Function.Name),
+					Function: schemas.ChatAssistantMessageToolCallFunction{
+						Name:      &tc.Function.Name,
+						Arguments: string(args),
+					},
+				})
+			}
+		}
+
+		delta := &schemas.ChatStreamResponseChoiceDelta{}
+
+		if r.Message.Role != "" {
+			role := string(r.Message.Role)
+			delta.Role = &role
+		}
+
+		if r.Message.Content != "" {
+			delta.Content = &r.Message.Content
+		}
+
+		// Handle thinking content (for thinking-specific models)
+		// Ollama may send thinking incrementally in streaming chunks, similar to content
+		if r.Message.Thinking != nil && *r.Message.Thinking != "" {
+			delta.Reasoning = r.Message.Thinking
+		}
+
+		if len(toolCalls) > 0 {
+			delta.ToolCalls = toolCalls
+		}
+
+		// Always create a choice if we have any delta content (content, thinking, tool calls, or role)
+		hasDelta := delta.Role != nil || delta.Content != nil || delta.Reasoning != nil || len(delta.ToolCalls) > 0
+		if hasDelta {
+			choice := schemas.BifrostResponseChoice{
+				Index: 0,
+				ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{
+					Delta: delta,
+				},
+			}
+
+			// Set finish reason on final chunk
+			if r.Done {
+				if r.DoneReason != nil {
+					switch *r.DoneReason {
+					case "stop":
+						choice.FinishReason = schemas.Ptr("stop")
+					case "length":
+						choice.FinishReason = schemas.Ptr("length")
+					default:
+						choice.FinishReason = schemas.Ptr("stop")
+					}
+				} else {
+					choice.FinishReason = schemas.Ptr("stop")
+				}
+			}
+
+			response.Choices = []schemas.BifrostResponseChoice{choice}
+		}
+	}
+
+	// Add usage on final chunk
+	if r.Done {
+		usage := &schemas.BifrostLLMUsage{}
+		if r.PromptEvalCount != nil {
+			usage.PromptTokens = *r.PromptEvalCount
+		}
+		if r.EvalCount != nil {
+			usage.CompletionTokens = *r.EvalCount
+		}
+		usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+		response.Usage = usage
+	}
+
+	return response, r.Done
+}
+
+// ToBifrostEmbeddingResponse converts an Ollama embedding response to Bifrost format.
+func (r *OllamaEmbeddingResponse) ToBifrostEmbeddingResponse(model string) *schemas.BifrostEmbeddingResponse {
+	if r == nil {
+		return nil
+	}
+
+	response := &schemas.BifrostEmbeddingResponse{
+		Model:  model,
+		Object: "list",
+		ExtraFields: schemas.BifrostResponseExtraFields{
+			RequestType: schemas.EmbeddingRequest,
+			Provider:    schemas.Ollama,
+		},
+	}
+
+	// Convert embeddings to Bifrost format
+	for i, embedding := range r.Embeddings {
+		// Convert []float64 to []float32
+		embeddingFloat32 := make([]float32, len(embedding))
+		for j, v := range embedding {
+			embeddingFloat32[j] = float32(v)
+		}
+
+		response.Data = append(response.Data, schemas.EmbeddingData{
+			Object: "embedding",
+			Embedding: schemas.EmbeddingStruct{
+				EmbeddingArray: embeddingFloat32,
+			},
+			Index: i,
+		})
+	}
+
+	// Convert usage
+	if r.PromptEvalCount != nil {
+		response.Usage = &schemas.BifrostLLMUsage{
+			PromptTokens: *r.PromptEvalCount,
+			TotalTokens:  *r.PromptEvalCount,
+		}
+	}
+
+	return response
+}
+
+// ToBifrostListModelsResponse converts an Ollama list models response to Bifrost format.
+func (r *OllamaListModelsResponse) ToBifrostListModelsResponse(providerName schemas.ModelProvider, configuredModels []string) *schemas.BifrostListModelsResponse {
+	if r == nil {
+		return nil
+	}
+
+	response := &schemas.BifrostListModelsResponse{
+		ExtraFields: schemas.BifrostResponseExtraFields{
+			RequestType: schemas.ListModelsRequest,
+			Provider:    providerName,
+		},
+	}
+
+	// Create a set of configured models for quick lookup
+	configuredSet := make(map[string]bool)
+	for _, m := range configuredModels {
+		configuredSet[m] = true
+	}
+
+	for _, model := range r.Models {
+		// Filter models if configuredModels is non-empty
+		if len(configuredModels) > 0 && !configuredSet[model.Name] {
+			continue
+		}
+
+		created := model.ModifiedAt.Unix()
+		ownedBy := "ollama"
+
+		bifrostModel := schemas.Model{
+			ID:      model.Name,
+			Created: &created,
+			OwnedBy: &ownedBy,
+		}
+
+		response.Data = append(response.Data, bifrostModel)
+	}
+
+	return response
+}
diff --git a/core/providers/ollama/utils.go b/core/providers/ollama/utils.go
new file mode 100644
index 000000000..f06bad205
--- /dev/null
+++ b/core/providers/ollama/utils.go
@@ -0,0 +1,469 @@
+// Package ollama implements the Ollama provider using native Ollama APIs.
+// This file contains utility functions for converting between Bifrost and Ollama formats.
+package ollama
+
+import (
+	"encoding/base64"
+	"encoding/json"
+	"log"
+	"strings"
+
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// convertMessagesToOllama converts Bifrost messages to Ollama format.
+// Ollama has specific semantics for tool calls:
+// - Tool calls only appear on assistant messages
+// - Assistant messages with tool_calls are function invocation requests and must have NO content or images
+// - Tool responses must be separate messages with role="tool" and tool_name set
+// - Ollama correlates tool calls and responses by function name directly, not by opaque IDs
+
+// NOTE: Ollama does not provide tool call IDs. When multiple calls to the same function occur
+// in a single turn, tool responses are correlated by function name only. This is a lossy conversion
+// but accurately reflects Ollama's native semantics. Bifrost allows toolCallId to be optional,
+// so IDs are intentionally omitted. Do not generate synthetic tool call IDs.
+func convertMessagesToOllama(messages []schemas.ChatMessage) []OllamaMessage {
+	var ollamaMessages []OllamaMessage
+
+	for _, msg := range messages {
+		ollamaMsg := OllamaMessage{
+			Role: mapRoleToOllama(msg.Role),
+		}
+
+		if ollamaMsg.Role == "" {
+			continue // Skip unsupported roles
+		}
+
+		// Check if this is an assistant message with tool calls
+		hasToolCalls := msg.Role == schemas.ChatMessageRoleAssistant && msg.ChatAssistantMessage != nil && msg.ChatAssistantMessage.ToolCalls != nil
+
+		// Convert content - but NOT for assistant messages with tool_calls
+		// In Ollama, assistant messages with tool_calls are function invocation requests
+		// and must contain no content or images, exactly as shown in native /api/chat behavior
+		if !hasToolCalls {
+			ollamaMsg.Content, ollamaMsg.Images = convertContentToOllama(msg.Content)
+		} else {
+			// Assistant message with tool_calls: no content or images
+			ollamaMsg.Content = ""
+			ollamaMsg.Images = nil
+		}
+
+		// Handle tool calls - ONLY on assistant messages per Ollama semantics
+		if hasToolCalls {
+			// Filter out thinking placeholder tool calls before converting
+			var realToolCalls []schemas.ChatAssistantMessageToolCall
+			var thinkingContent *string
+			for _, tc := range msg.ChatAssistantMessage.ToolCalls {
+				// Check if this is a thinking placeholder
+				if tc.Function.Name != nil && *tc.Function.Name == "_thinking_placeholder" {
+					// Extract thinking from ExtraContent
+					if tc.ExtraContent != nil {
+						if ollamaData, ok := tc.ExtraContent["ollama"].(map[string]interface{}); ok {
+							if thinking, ok := ollamaData["thinking"].(string); ok && thinking != "" {
+								thinkingContent = &thinking
+							}
+						}
+					}
+					continue // Skip the placeholder tool call
+				}
+				// Extract thinking from tool call's ExtraContent if present
+				if tc.ExtraContent != nil {
+					if ollamaData, ok := tc.ExtraContent["ollama"].(map[string]interface{}); ok {
+						if thinking, ok := ollamaData["thinking"].(string); ok && thinking != "" {
+							thinkingContent = &thinking
+						}
+					}
+				}
+				realToolCalls = append(realToolCalls, tc)
+			}
+			if len(realToolCalls) > 0 {
+				ollamaMsg.ToolCalls = convertToolCallsToOllama(realToolCalls)
+			}
+			// Set thinking if we found it
+			if thinkingContent != nil {
+				ollamaMsg.Thinking = thinkingContent
+			}
+		}
+
+		// Handle tool response messages - must set tool_name per Ollama semantics
+		// Ollama uses tool_name (function name) to correlate, not tool_call_id
+		// We ignore ToolCallID since Ollama doesn't support it
+		if msg.Role == schemas.ChatMessageRoleTool && msg.ChatToolMessage != nil {
+			if msg.Name != nil {
+				ollamaMsg.ToolName = msg.Name
+			} else {
+				log.Printf("Error in Tool message without Name field - Ollama requires tool_name field")
+			}
+		}
+
+		if ollamaMsg.Role == "tool" && ollamaMsg.ToolName == nil {
+			continue // Skip invalid tool messages that would be silently ignored by Ollama
+		}
+		ollamaMessages = append(ollamaMessages, ollamaMsg)
+	}
+
+	return ollamaMessages
+}
+
+// NOTE: Ollama does not provide tool call IDs. When multiple calls to the same function occur
+// in a single turn, tool responses are correlated by function name only. This is a lossy conversion
+// but accurately reflects Ollama's native semantics. Bifrost allows toolCallId to be optional,
+// so IDs are intentionally omitted. Do not generate synthetic tool call IDs.
+func convertMessagesFromOllama(messages []OllamaMessage) []schemas.ChatMessage {
+	var bifrostMessages []schemas.ChatMessage
+
+	for _, msg := range messages {
+		bifrostMsg := schemas.ChatMessage{
+			Role: schemas.ChatMessageRole(msg.Role),
+		}
+
+		// Check if this is an assistant message with tool calls
+		hasToolCalls := msg.Role == "assistant" && len(msg.ToolCalls) > 0
+
+		// Set content - but NOT for assistant messages with tool_calls
+		// In Ollama, assistant messages with tool_calls are function invocation requests
+		// and contain no content or images
+		if !hasToolCalls {
+			bifrostMsg.Content = &schemas.ChatMessageContent{
+				ContentStr: &msg.Content,
+			}
+		}
+		// If hasToolCalls is true, Content remains nil (no content for function invocation requests)
+
+		// Handle assistant messages with tool calls
+		// Ollama doesn't provide tool call IDs - ID field is optional in Bifrost, so we don't set it
+		if hasToolCalls {
+			var toolCalls []schemas.ChatAssistantMessageToolCall
+			for i, tc := range msg.ToolCalls {
+				args, _ := json.Marshal(tc.Function.Arguments)
+				toolCalls = append(toolCalls, schemas.ChatAssistantMessageToolCall{
+					Index: uint16(i),
+					Type:  schemas.Ptr("function"),
+					// ID is intentionally not set - Ollama doesn't provide tool call IDs
+					Function: schemas.ChatAssistantMessageToolCallFunction{
+						Name:      &tc.Function.Name,
+						Arguments: string(args),
+					},
+				})
+			}
+			bifrostMsg.ChatAssistantMessage = &schemas.ChatAssistantMessage{
+				ToolCalls: toolCalls,
+			}
+		}
+
+		// Handle thinking content for assistant messages
+		// Store thinking in the first tool call's ExtraContent (if tool calls exist) or create assistant message
+		// This preserves thinking for passthrough scenarios
+		if msg.Role == "assistant" && msg.Thinking != nil && *msg.Thinking != "" {
+			if bifrostMsg.ChatAssistantMessage == nil {
+				bifrostMsg.ChatAssistantMessage = &schemas.ChatAssistantMessage{}
+			}
+			// Store thinking in the first tool call's ExtraContent if tool calls exist
+			// Otherwise, we'll need to store it somewhere - but ChatAssistantMessage doesn't have ExtraContent
+			// So we'll store it in the first tool call's ExtraContent, or create a dummy tool call if none exist
+			if len(bifrostMsg.ChatAssistantMessage.ToolCalls) > 0 {
+				if bifrostMsg.ChatAssistantMessage.ToolCalls[0].ExtraContent == nil {
+					bifrostMsg.ChatAssistantMessage.ToolCalls[0].ExtraContent = make(map[string]interface{})
+				}
+				bifrostMsg.ChatAssistantMessage.ToolCalls[0].ExtraContent["ollama"] = map[string]interface{}{
+					"thinking": *msg.Thinking,
+				}
+			} else {
+				// No tool calls - create a dummy tool call to store thinking
+				// This is a workaround since ChatAssistantMessage doesn't have ExtraContent
+				bifrostMsg.ChatAssistantMessage.ToolCalls = []schemas.ChatAssistantMessageToolCall{
+					{
+						Index: 0,
+						Type:  schemas.Ptr("function"),
+						Function: schemas.ChatAssistantMessageToolCallFunction{
+							Name:      schemas.Ptr("_thinking_placeholder"),
+							Arguments: "{}",
+						},
+						ExtraContent: map[string]interface{}{
+							"ollama": map[string]interface{}{
+								"thinking": *msg.Thinking,
+							},
+						},
+					},
+				}
+			}
+		}
+
+		// Handle tool response messages
+		// Ollama uses tool_name (function name) to correlate, not tool_call_id
+		// Since ToolCallID is optional in Bifrost, we don't set it for Ollama
+		if msg.Role == "tool" && msg.ToolName != nil {
+			bifrostMsg.ChatToolMessage = &schemas.ChatToolMessage{
+				// ToolCallID is intentionally not set - Ollama doesn't use tool call IDs
+			}
+			bifrostMsg.Name = msg.ToolName
+		}
+
+		// Handle images - but NOT for assistant messages with tool_calls
+		// Assistant messages with tool_calls are function invocation requests and have no content/images
+		if !hasToolCalls && len(msg.Images) > 0 {
+			var contentBlocks []schemas.ChatContentBlock
+
+			// Add text content if present
+			if msg.Content != "" {
+				contentBlocks = append(contentBlocks, schemas.ChatContentBlock{
+					Type: schemas.ChatContentBlockTypeText,
+					Text: &msg.Content,
+				})
+			}
+
+			// Add images
+			for _, img := range msg.Images {
+				dataURL := "data:image/jpeg;base64," + img
+				contentBlocks = append(contentBlocks, schemas.ChatContentBlock{
+					Type: schemas.ChatContentBlockTypeImage,
+					ImageURLStruct: &schemas.ChatInputImage{
+						URL: dataURL,
+					},
+				})
+			}
+
+			bifrostMsg.Content = &schemas.ChatMessageContent{
+				ContentBlocks: contentBlocks,
+			}
+		}
+
+		bifrostMessages = append(bifrostMessages, bifrostMsg)
+	}
+
+	return bifrostMessages
+}
+
+// ==================== ROLE MAPPING UTILITIES ====================
+
+// mapRoleToOllama maps Bifrost roles to Ollama roles.
+func mapRoleToOllama(role schemas.ChatMessageRole) string {
+	switch role {
+	case schemas.ChatMessageRoleDeveloper:
+		return "system" // Ollama doesn't support developer role, map to system
+	case schemas.ChatMessageRoleSystem:
+		return "system"
+	case schemas.ChatMessageRoleUser:
+		return "user"
+	case schemas.ChatMessageRoleAssistant:
+		return "assistant"
+	case schemas.ChatMessageRoleTool:
+		return "tool"
+	default:
+		return "" // Unsupported
+	}
+}
+
+// ==================== CONTENT CONVERSION UTILITIES ====================
+
+// convertContentToOllama extracts text and images from Bifrost content.
+// Returns the combined text content and a slice of raw base64-encoded images.
+// Note: Ollama expects raw base64 strings WITHOUT data URL prefixes.
+func convertContentToOllama(content *schemas.ChatMessageContent) (string, []string) {
+	if content == nil {
+		return "", nil
+	}
+
+	// Simple string content - no images
+	if content.ContentStr != nil {
+		return *content.ContentStr, nil
+	}
+
+	// Content blocks - may contain text and/or images
+	if content.ContentBlocks == nil {
+		return "", nil
+	}
+
+	var textParts []string
+	var images []string
+
+	for _, block := range content.ContentBlocks {
+		switch block.Type {
+		case schemas.ChatContentBlockTypeText:
+			if block.Text != nil {
+				textParts = append(textParts, *block.Text)
+			}
+
+		case schemas.ChatContentBlockTypeImage:
+			// Extract base64 image data
+			// Note: ImageURLStruct.URL can be:
+			// 1. A data URL: "data:image/jpeg;base64,<base64>"
+			// 2. Raw base64: "<base64>"
+			// 3. HTTP(S) URL: "https://..." (not supported by Ollama)
+			if block.ImageURLStruct != nil && block.ImageURLStruct.URL != "" {
+				imageData := extractBase64Image(block.ImageURLStruct.URL)
+				if imageData != "" {
+					images = append(images, imageData)
+				}
+				// extractBase64Image logs warnings for unsupported formats
+			}
+		}
+	}
+
+	return strings.Join(textParts, "\n"), images
+}
+
+// ==================== IMAGE UTILITIES ====================
+
+// extractBase64Image extracts raw base64 data from various image URL formats.
+// Ollama expects raw base64 strings without data URL prefixes.
+//
+// Supported formats:
+//   - data:image/jpeg;base64,<base64> -> extracts <base64>
+//   - data:image/png;base64,<base64>  -> extracts <base64>
+//   - <raw-base64>                     -> returns as-is
+//   - http(s)://...                    -> logs warning, returns empty (not supported)
+func extractBase64Image(url string) string {
+	if url == "" {
+		return ""
+	}
+
+	// Handle data URLs: data:image/jpeg;base64,<base64-data>
+	// Must strip the prefix to get raw base64 that Ollama expects
+	if strings.HasPrefix(url, "data:") {
+		// Find the comma that separates the metadata from the base64 data
+		commaIndex := strings.Index(url, ",")
+		if commaIndex != -1 && commaIndex < len(url)-1 {
+			// Extract everything after the comma (the raw base64 data)
+			base64Data := url[commaIndex+1:]
+			// Validate it's actually base64
+			if isValidBase64(base64Data) {
+				return base64Data
+			}
+			log.Printf("Data URL contains invalid base64 data: %s", url[:min(50, len(url))])
+			return ""
+		}
+		log.Printf("Malformed data URL (no comma separator): %s", url[:min(50, len(url))])
+		return ""
+	}
+
+	// Check if it's a regular HTTP(S) URL
+	if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
+		log.Printf("Ollama does not support HTTP(S) image URLs. Please convert to base64: %s", url[:min(100, len(url))])
+		return ""
+	}
+
+	// Assume it's raw base64 - validate and return
+	if isValidBase64(url) {
+		return url
+	}
+
+	log.Printf("Image URL is neither a valid data URL nor base64: %s", url[:min(50, len(url))])
+	return ""
+}
+
+// isValidBase64 checks if a string is valid base64 encoded data.
+// This is more robust than just checking if it decodes, as it also validates
+// that the string contains only valid base64 characters.
+func isValidBase64(s string) bool {
+	if len(s) < 4 {
+		return false
+	}
+
+	// Try to decode - this validates both format and content
+	decoded, err := base64.StdEncoding.DecodeString(s)
+	if err != nil {
+		// Try with padding issues fixed
+		decoded, err = base64.RawStdEncoding.DecodeString(s)
+		if err != nil {
+			return false
+		}
+	}
+
+	// Sanity check: decoded data should be non-empty for images
+	return len(decoded) > 0
+}
+
+// min returns the minimum of two integers.
+func min(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+// ==================== TOOL CONVERSION UTILITIES ====================
+
+// convertToolCallsToOllama converts Bifrost tool calls to Ollama format.
+// Ollama tool calls don't require an ID field - they use function name for correlation
+func convertToolCallsToOllama(toolCalls []schemas.ChatAssistantMessageToolCall) []OllamaToolCall {
+	var ollamaToolCalls []OllamaToolCall
+
+	for _, tc := range toolCalls {
+		var args map[string]interface{}
+		if tc.Function.Arguments != "" {
+			if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil {
+				log.Printf("Failed to unmarshal tool call arguments: %v. Raw arguments: %s", err, tc.Function.Arguments)
+				args = map[string]interface{}{
+					"_raw_arguments": tc.Function.Arguments,
+				}
+			}
+		}
+		if args == nil {
+			args = make(map[string]interface{})
+		}
+
+		name := ""
+		if tc.Function.Name != nil {
+			name = *tc.Function.Name
+		}
+
+		ollamaToolCalls = append(ollamaToolCalls, OllamaToolCall{
+			Function: OllamaToolCallFunction{
+				Name:      name,
+				Arguments: args,
+			},
+		})
+	}
+
+	return ollamaToolCalls
+}
+
+// convertToolsToOllama converts Bifrost tools to Ollama format.
+func convertToolsToOllama(tools []schemas.ChatTool) []OllamaTool {
+	var ollamaTools []OllamaTool
+
+	for _, tool := range tools {
+		if tool.Function == nil {
+			continue
+		}
+
+		ollamaTool := OllamaTool{
+			Type: "function",
+			Function: OllamaToolFunction{
+				Name: tool.Function.Name,
+			},
+		}
+
+		if tool.Function.Description != nil {
+			ollamaTool.Function.Description = *tool.Function.Description
+		}
+
+		if tool.Function.Parameters != nil {
+			ollamaTool.Function.Parameters = tool.Function.Parameters
+		}
+
+		ollamaTools = append(ollamaTools, ollamaTool)
+	}
+
+	return ollamaTools
+}
+
+// convertToolsFromOllama converts Ollama tools to Bifrost format.
+func convertToolsFromOllama(tools []OllamaTool) []schemas.ChatTool {
+	var bifrostTools []schemas.ChatTool
+
+	for _, tool := range tools {
+		bifrostTool := schemas.ChatTool{
+			Type: schemas.ChatToolTypeFunction,
+			Function: &schemas.ChatToolFunction{
+				Name:        tool.Function.Name,
+				Description: &tool.Function.Description,
+				Parameters:  tool.Function.Parameters,
+			},
+		}
+		bifrostTools = append(bifrostTools, bifrostTool)
+	}
+
+	return bifrostTools
+}
diff --git a/core/providers/ollama/utils_test.go b/core/providers/ollama/utils_test.go
new file mode 100644
index 000000000..44a874d6c
--- /dev/null
+++ b/core/providers/ollama/utils_test.go
@@ -0,0 +1,481 @@
+package ollama
+
+import (
+	"testing"
+
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+func TestExtractBase64Image(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+		wantWarn bool
+	}{
+		{
+			name:     "data URL with JPEG",
+			input:    "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
+			expected: "/9j/4AAQSkZJRg==",
+			wantWarn: false,
+		},
+		{
+			name:     "data URL with PNG",
+			input:    "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg==",
+			expected: "iVBORw0KGgoAAAANSUhEUg==",
+			wantWarn: false,
+		},
+		{
+			name:     "raw base64",
+			input:    "iVBORw0KGgoAAAANSUhEUg==",
+			expected: "iVBORw0KGgoAAAANSUhEUg==",
+			wantWarn: false,
+		},
+		{
+			name:     "HTTP URL",
+			input:    "https://example.com/image.jpg",
+			expected: "",
+			wantWarn: true,
+		},
+		{
+			name:     "HTTPS URL",
+			input:    "https://example.com/image.png",
+			expected: "",
+			wantWarn: true,
+		},
+		{
+			name:     "empty string",
+			input:    "",
+			expected: "",
+			wantWarn: false,
+		},
+		{
+			name:     "malformed data URL - no comma",
+			input:    "data:image/jpeg;base64",
+			expected: "",
+			wantWarn: true,
+		},
+		{
+			name:     "malformed data URL - empty after comma",
+			input:    "data:image/jpeg;base64,",
+			expected: "",
+			wantWarn: true,
+		},
+		{
+			name:     "invalid base64",
+			input:    "not-valid-base64!@#$%",
+			expected: "",
+			wantWarn: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := extractBase64Image(tt.input)
+			if result != tt.expected {
+				t.Errorf("extractBase64Image(%q) = %q, want %q", tt.input, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestIsValidBase64(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  bool
+	}{
+		{
+			name:  "valid base64 - standard",
+			input: "iVBORw0KGgoAAAANSUhEUg==",
+			want:  true,
+		},
+		{
+			name:  "valid base64 - JPEG header",
+			input: "/9j/4AAQSkZJRg==",
+			want:  true,
+		},
+		{
+			name:  "valid base64 - no padding",
+			input: "SGVsbG8gV29ybGQ",
+			want:  true,
+		},
+		{
+			name:  "invalid - too short",
+			input: "abc",
+			want:  false,
+		},
+		{
+			name:  "invalid - special characters",
+			input: "abc!@#$%",
+			want:  false,
+		},
+		{
+			name:  "empty string",
+			input: "",
+			want:  false,
+		},
+		{
+			name:  "URL",
+			input: "https://example.com",
+			want:  false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := isValidBase64(tt.input)
+			if result != tt.want {
+				t.Errorf("isValidBase64(%q) = %v, want %v", tt.input, result, tt.want)
+			}
+		})
+	}
+}
+
+func TestMin(t *testing.T) {
+	tests := []struct {
+		a, b int
+		want int
+	}{
+		{5, 10, 5},
+		{10, 5, 5},
+		{5, 5, 5},
+		{0, 10, 0},
+		{-5, 5, -5},
+	}
+
+	for _, tt := range tests {
+		result := min(tt.a, tt.b)
+		if result != tt.want {
+			t.Errorf("min(%d, %d) = %d, want %d", tt.a, tt.b, result, tt.want)
+		}
+	}
+}
+
+func TestConvertMessagesToOllama_ToolCalls(t *testing.T) {
+	t.Run("assistant message with tool calls", func(t *testing.T) {
+		functionName := "getWeather"
+		messages := []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleAssistant,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("I'll check the weather for you."),
+				},
+				ChatAssistantMessage: &schemas.ChatAssistantMessage{
+					ToolCalls: []schemas.ChatAssistantMessageToolCall{
+						{
+							Index: 0,
+							Type:  schemas.Ptr("function"),
+							ID:    schemas.Ptr("call_123"),
+							Function: schemas.ChatAssistantMessageToolCallFunction{
+								Name:      &functionName,
+								Arguments: `{"location":"San Francisco"}`,
+							},
+						},
+					},
+				},
+			},
+		}
+
+		result := convertMessagesToOllama(messages)
+
+		if len(result) != 1 {
+			t.Fatalf("Expected 1 message, got %d", len(result))
+		}
+
+		msg := result[0]
+		if msg.Role != "assistant" {
+			t.Errorf("Expected role 'assistant', got %q", msg.Role)
+		}
+
+		if len(msg.ToolCalls) != 1 {
+			t.Fatalf("Expected 1 tool call, got %d", len(msg.ToolCalls))
+		}
+
+		if msg.ToolCalls[0].Function.Name != "getWeather" {
+			t.Errorf("Expected function name 'getWeather', got %q", msg.ToolCalls[0].Function.Name)
+		}
+
+		if msg.ToolName != nil {
+			t.Errorf("ToolName should be nil for assistant messages, got %q", *msg.ToolName)
+		}
+	})
+
+	t.Run("tool response message with correct mapping", func(t *testing.T) {
+		functionName := "getWeather"
+		// First: assistant makes a tool call
+		// Second: tool response references that call by tool_call_id
+		messages := []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleAssistant,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("I'll check the weather."),
+				},
+				ChatAssistantMessage: &schemas.ChatAssistantMessage{
+					ToolCalls: []schemas.ChatAssistantMessageToolCall{
+						{
+							Index: 0,
+							Type:  schemas.Ptr("function"),
+							ID:    schemas.Ptr("call_abc123"),
+							Function: schemas.ChatAssistantMessageToolCallFunction{
+								Name:      &functionName,
+								Arguments: `{"location":"Tokyo"}`,
+							},
+						},
+					},
+				},
+			},
+			{
+				Role: schemas.ChatMessageRoleTool,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr(`{"temperature": 72, "condition": "sunny"}`),
+				},
+				ChatToolMessage: &schemas.ChatToolMessage{
+					ToolCallID: schemas.Ptr("call_abc123"), // References the tool call
+				},
+			},
+		}
+
+		result := convertMessagesToOllama(messages)
+
+		if len(result) != 2 {
+			t.Fatalf("Expected 2 messages, got %d", len(result))
+		}
+
+		// Verify assistant message
+		assistantMsg := result[0]
+		if assistantMsg.Role != "assistant" {
+			t.Errorf("Expected role 'assistant', got %q", assistantMsg.Role)
+		}
+
+		// Verify tool response message
+		toolMsg := result[1]
+		if toolMsg.Role != "tool" {
+			t.Errorf("Expected role 'tool', got %q", toolMsg.Role)
+		}
+
+		if toolMsg.ToolName == nil {
+			t.Fatal("ToolName should be set for tool messages")
+		}
+
+		// CRITICAL: tool_name should be "getWeather" (from the mapping), NOT "call_abc123"
+		if *toolMsg.ToolName != "getWeather" {
+			t.Errorf("Expected tool_name 'getWeather', got %q", *toolMsg.ToolName)
+		}
+
+		if len(toolMsg.ToolCalls) != 0 {
+			t.Errorf("Tool response messages should not have tool_calls")
+		}
+	})
+
+	t.Run("tool response without prior assistant message", func(t *testing.T) {
+		// Edge case: tool response arrives without a prior tool call in the conversation
+		messages := []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleTool,
+				Name: schemas.Ptr("getWeather"), // Fallback to Name field
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr(`{"temperature": 72}`),
+				},
+				ChatToolMessage: &schemas.ChatToolMessage{
+					ToolCallID: schemas.Ptr("call_unknown"),
+				},
+			},
+		}
+
+		result := convertMessagesToOllama(messages)
+
+		if len(result) != 1 {
+			t.Fatalf("Expected 1 message, got %d", len(result))
+		}
+
+		msg := result[0]
+		if msg.ToolName == nil {
+			t.Fatal("ToolName should be set using Name field as fallback")
+		}
+
+		if *msg.ToolName != "getWeather" {
+			t.Errorf("Expected tool_name 'getWeather' from Name field, got %q", *msg.ToolName)
+		}
+	})
+
+	t.Run("multiple tool calls and responses", func(t *testing.T) {
+		weatherFunc := "getWeather"
+		timeFunc := "getTime"
+		messages := []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleAssistant,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("I'll check both."),
+				},
+				ChatAssistantMessage: &schemas.ChatAssistantMessage{
+					ToolCalls: []schemas.ChatAssistantMessageToolCall{
+						{
+							ID: schemas.Ptr("call_weather"),
+							Function: schemas.ChatAssistantMessageToolCallFunction{
+								Name:      &weatherFunc,
+								Arguments: `{"location":"NYC"}`,
+							},
+						},
+						{
+							ID: schemas.Ptr("call_time"),
+							Function: schemas.ChatAssistantMessageToolCallFunction{
+								Name:      &timeFunc,
+								Arguments: `{"timezone":"EST"}`,
+							},
+						},
+					},
+				},
+			},
+			{
+				Role: schemas.ChatMessageRoleTool,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr(`{"temp": 65}`),
+				},
+				ChatToolMessage: &schemas.ChatToolMessage{
+					ToolCallID: schemas.Ptr("call_weather"),
+				},
+			},
+			{
+				Role: schemas.ChatMessageRoleTool,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr(`{"time": "3pm"}`),
+				},
+				ChatToolMessage: &schemas.ChatToolMessage{
+					ToolCallID: schemas.Ptr("call_time"),
+				},
+			},
+		}
+
+		result := convertMessagesToOllama(messages)
+
+		if len(result) != 3 {
+			t.Fatalf("Expected 3 messages, got %d", len(result))
+		}
+
+		// Check first tool response
+		if result[1].ToolName == nil || *result[1].ToolName != "getWeather" {
+			t.Errorf("Expected first tool response to have tool_name 'getWeather'")
+		}
+
+		// Check second tool response
+		if result[2].ToolName == nil || *result[2].ToolName != "getTime" {
+			t.Errorf("Expected second tool response to have tool_name 'getTime'")
+		}
+	})
+
+	t.Run("tool calls on non-assistant message should be ignored", func(t *testing.T) {
+		functionName := "someFunction"
+		messages := []schemas.ChatMessage{
+			{
+				Role: schemas.ChatMessageRoleUser,
+				Content: &schemas.ChatMessageContent{
+					ContentStr: schemas.Ptr("Hello"),
+				},
+				ChatAssistantMessage: &schemas.ChatAssistantMessage{
+					ToolCalls: []schemas.ChatAssistantMessageToolCall{
+						{
+							Function: schemas.ChatAssistantMessageToolCallFunction{
+								Name: &functionName,
+							},
+						},
+					},
+				},
+			},
+		}
+
+		result := convertMessagesToOllama(messages)
+
+		if len(result) != 1 {
+			t.Fatalf("Expected 1 message, got %d", len(result))
+		}
+
+		// Tool calls should not be present for non-assistant messages
+		if len(result[0].ToolCalls) != 0 {
+			t.Errorf("User messages should not have tool_calls in Ollama format")
+		}
+	})
+}
+
+func TestConvertMessagesFromOllama_ToolCalls(t *testing.T) {
+	t.Run("assistant message with tool calls", func(t *testing.T) {
+		messages := []OllamaMessage{
+			{
+				Role:    "assistant",
+				Content: "I'll check the weather for you.",
+				ToolCalls: []OllamaToolCall{
+					{
+						Function: OllamaToolCallFunction{
+							Name: "getWeather",
+							Arguments: map[string]interface{}{
+								"location": "San Francisco",
+							},
+						},
+					},
+				},
+			},
+		}
+
+		result := convertMessagesFromOllama(messages)
+
+		if len(result) != 1 {
+			t.Fatalf("Expected 1 message, got %d", len(result))
+		}
+
+		msg := result[0]
+		if msg.Role != schemas.ChatMessageRoleAssistant {
+			t.Errorf("Expected role 'assistant', got %q", msg.Role)
+		}
+
+		if msg.ChatAssistantMessage == nil {
+			t.Fatal("ChatAssistantMessage should not be nil")
+		}
+
+		if len(msg.ChatAssistantMessage.ToolCalls) != 1 {
+			t.Fatalf("Expected 1 tool call, got %d", len(msg.ChatAssistantMessage.ToolCalls))
+		}
+
+		toolCall := msg.ChatAssistantMessage.ToolCalls[0]
+		if toolCall.Function.Name == nil || *toolCall.Function.Name != "getWeather" {
+			t.Errorf("Expected function name 'getWeather'")
+		}
+	})
+
+	t.Run("tool response message", func(t *testing.T) {
+		toolName := "getWeather"
+		messages := []OllamaMessage{
+			{
+				Role:     "tool",
+				Content:  `{"temperature": 72, "condition": "sunny"}`,
+				ToolName: &toolName,
+			},
+		}
+
+		result := convertMessagesFromOllama(messages)
+
+		if len(result) != 1 {
+			t.Fatalf("Expected 1 message, got %d", len(result))
+		}
+
+		msg := result[0]
+		if msg.Role != schemas.ChatMessageRoleTool {
+			t.Errorf("Expected role 'tool', got %q", msg.Role)
+		}
+
+		if msg.ChatToolMessage == nil {
+			t.Fatal("ChatToolMessage should not be nil")
+		}
+
+		if msg.ChatToolMessage.ToolCallID == nil {
+			t.Fatal("ToolCallID should be set")
+		}
+
+		if *msg.ChatToolMessage.ToolCallID != "getWeather" {
+			t.Errorf("Expected tool_call_id 'getWeather', got %q", *msg.ChatToolMessage.ToolCallID)
+		}
+
+		if msg.Name == nil || *msg.Name != "getWeather" {
+			t.Errorf("Expected Name 'getWeather'")
+		}
+	})
+}