Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a9c6818
Revert "vulkan: temporary cary of vulkan fixes (#12971)"
dhiltgen Nov 14, 2025
d2917b7
ggml update to b7087
dhiltgen Nov 7, 2025
366ed3e
fix argsort on metal
dhiltgen Nov 19, 2025
9a4271c
update to b7108
dhiltgen Nov 19, 2025
af56743
fix bakllava regression
dhiltgen Nov 11, 2025
4fd4574
fix lint logic to only compare against merge base and ignore files th…
dhiltgen Nov 20, 2025
0c24896
docs: fix output formatting in faq.mdx (#13231)
kokes Nov 29, 2025
6d9f932
.gitattributes: add app/webview to linguist-vendored (#13274)
jmorganca Nov 30, 2025
467bbc0
jetpack: require exact match or skip cuda_jetpack* (#13288)
dhiltgen Dec 1, 2025
9b52964
update to b7209 - performance regressions...
dhiltgen Nov 30, 2025
5b6a8e6
api/client: handle non-json streaming errors (#13007)
BruceMacD Dec 1, 2025
5541727
win: warn if ggml-base detected in PATH (#13289)
dhiltgen Dec 1, 2025
d3e0a0d
model: ministral w/ llama4 scaling (#13292)
pdevine Dec 2, 2025
f8f1071
CUDA: verify CC is supported by target library (#13298)
dhiltgen Dec 2, 2025
d771043
test: add ministral-3 (#13300)
dhiltgen Dec 2, 2025
5317202
llm: Don't always evict models on CPU-only systems
jessegross Nov 25, 2025
18b5958
test: avoid ministral tools test on low vram (#13302)
dhiltgen Dec 2, 2025
20aee96
Add Vulkan GPU support instructions in development.md (#13265)
chengcheng84 Dec 2, 2025
20fad26
feat: add M-RoPE support for Qwen2-VL and Qwen3-VL vision models
iosub Dec 2, 2025
cc9555a
Update user message format for temperature query (#13256)
nathan-hook Dec 2, 2025
3f30836
CUDA: filter devices on secondary discovery (#13317)
dhiltgen Dec 3, 2025
84a2ced
app: relay thinking false to server (#13319)
BruceMacD Dec 3, 2025
854d40e
ci: restore previous linter rules (#13322)
jmorganca Dec 4, 2025
0cf7794
ggml update to b7108 (#12992)
dhiltgen Dec 4, 2025
a03223b
cmd/bench: support writing benchmark output to file (#13263)
Eloitor Dec 4, 2025
0a844f8
convert: add deepseek converter (#12980)
pdevine Dec 4, 2025
7837a5b
ggml: Always set cache padding to 256
jessegross Dec 4, 2025
1108d8b
ggml: Enable flash attention for vision encoders
jessegross Dec 2, 2025
9191dfa
llm: Enable flash attention for mistral3 by default
jessegross Dec 4, 2025
31b8c6a
fix(api): correct Content-Type header for /api/chat and /api/generate…
ZeeeUs Dec 5, 2025
98eec35
Merge branch 'ollama:main' into feat/mrope-clean
iosub Dec 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ ml/backend/**/*.comp linguist-vendored
ml/backend/**/*.glsl linguist-vendored
ml/backend/**/CMakeLists.txt linguist-vendored

app/webview linguist-vendored

llama/build-info.cpp linguist-generated
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ jobs:

- uses: golangci/golangci-lint-action@v9
with:
only-new-issues: true
args: --new-from-merge-base ${{ github.base_ref }}

patches:
runs-on: ubuntu-latest
Expand Down
74 changes: 24 additions & 50 deletions .golangci.yaml
Original file line number Diff line number Diff line change
@@ -1,77 +1,51 @@
version: "2"
linters:
default: none
enable:
- asasalint
- bidichk
- bodyclose
- containedctx
- copyloopvar
- errcheck
- errorlint
- exptostd
- gocheckcompilerdirectives
- gocritic
- govet
- ineffassign
- intrange
- makezero
- misspell
- modernize
- nilerr
- nilnil
- nolintlint
- nosprintfhostport
- perfsprint
- prealloc
- sloglint
- staticcheck
- unconvert
- unused
- usestdlibvars
- usetesting
- wastedassign
- whitespace
disable:
- errcheck
- usestdlibvars
settings:
errcheck:
exclude-functions:
- fmt.Fprintf
perfsprint:
strconcat: false
concat-loop: false
govet:
disable:
- unusedresult
staticcheck:
checks:
- all
# Using a deprecated function, variable, constant or field.
# https://staticcheck.dev/docs/checks/#SA1019
- -QF* # disable quick fix suggestions
- -SA1019
# Incorrect or missing package comment.
# https://staticcheck.dev/docs/checks/#ST1000
- -ST1000
# Poorly chosen identifier.
# https://staticcheck.dev/docs/checks/#ST1003
- -ST1003
# The documentation of an exported function should start with the function's name.
# https://staticcheck.dev/docs/checks/#ST1020
- -ST1020
# The documentation of an exported type should start with type's name.
# https://staticcheck.dev/docs/checks/#ST1021
- -ST1021
# The documentation of an exported variable or constant should start with variable's name.
# https://staticcheck.dev/docs/checks/#ST1022
- -ST1022
usestdlibvars:
http-method: false
http-status-code: false

- -ST1000 # package comment format
- -ST1003 # underscores in package names
- -ST1005 # error strings should not be capitalized
- -ST1012 # error var naming (ErrFoo)
- -ST1016 # receiver name consistency
- -ST1020 # comment on exported function format
- -ST1021 # comment on exported type format
- -ST1022 # comment on exported var format
- -ST1023 # omit type from declaration
severity:
default: error
rules:
- linters:
- gofmt
- goimports
- intrange
severity: info
formatters:
enable:
- gci
- gofmt
- gofumpt
settings:
gci:
sections:
- standard
- default
- localmodule
2 changes: 1 addition & 1 deletion Makefile.sync
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
UPSTREAM=https://github.com/ggml-org/llama.cpp.git
WORKDIR=llama/vendor
FETCH_HEAD=3cfa9c3f125763305b4226bc032f1954f08990dc
FETCH_HEAD=7f8ef50cce40e3e7e4526a3696cb45658190e69a

.PHONY: help
help:
Expand Down
9 changes: 8 additions & 1 deletion api/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,14 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f

bts := scanner.Bytes()
if err := json.Unmarshal(bts, &errorResponse); err != nil {
return fmt.Errorf("unmarshal: %w", err)
if response.StatusCode >= http.StatusBadRequest {
return StatusError{
StatusCode: response.StatusCode,
Status: response.Status,
ErrorMessage: string(bts),
}
}
return errors.New(string(bts))
}

if response.StatusCode == http.StatusUnauthorized {
Expand Down
78 changes: 68 additions & 10 deletions api/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ func TestClientFromEnvironment(t *testing.T) {
type testError struct {
message string
statusCode int
raw bool // if true, write message as-is instead of JSON encoding
}

func (e testError) Error() string {
Expand Down Expand Up @@ -111,6 +112,20 @@ func TestClientStream(t *testing.T) {
},
},
},
{
name: "plain text error response",
responses: []any{
"internal server error",
},
wantErr: "internal server error",
},
{
name: "HTML error page",
responses: []any{
"<html><body>404 Not Found</body></html>",
},
wantErr: "404 Not Found",
},
}

for _, tc := range testCases {
Expand All @@ -135,6 +150,12 @@ func TestClientStream(t *testing.T) {
return
}

if str, ok := resp.(string); ok {
fmt.Fprintln(w, str)
flusher.Flush()
continue
}

if err := json.NewEncoder(w).Encode(resp); err != nil {
t.Fatalf("failed to encode response: %v", err)
}
Expand Down Expand Up @@ -173,25 +194,28 @@ func TestClientStream(t *testing.T) {

func TestClientDo(t *testing.T) {
testCases := []struct {
name string
response any
wantErr string
name string
response any
wantErr string
wantStatusCode int
}{
{
name: "immediate error response",
response: testError{
message: "test error message",
statusCode: http.StatusBadRequest,
},
wantErr: "test error message",
wantErr: "test error message",
wantStatusCode: http.StatusBadRequest,
},
{
name: "server error response",
response: testError{
message: "internal error",
statusCode: http.StatusInternalServerError,
},
wantErr: "internal error",
wantErr: "internal error",
wantStatusCode: http.StatusInternalServerError,
},
{
name: "successful response",
Expand All @@ -203,18 +227,43 @@ func TestClientDo(t *testing.T) {
Success: true,
},
},
{
name: "plain text error response",
response: testError{
message: "internal server error",
statusCode: http.StatusInternalServerError,
raw: true,
},
wantErr: "internal server error",
wantStatusCode: http.StatusInternalServerError,
},
{
name: "HTML error page",
response: testError{
message: "<html><body>404 Not Found</body></html>",
statusCode: http.StatusNotFound,
raw: true,
},
wantErr: "<html><body>404 Not Found</body></html>",
wantStatusCode: http.StatusNotFound,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if errResp, ok := tc.response.(testError); ok {
w.WriteHeader(errResp.statusCode)
err := json.NewEncoder(w).Encode(map[string]string{
"error": errResp.message,
})
if err != nil {
t.Fatal("failed to encode error response:", err)
if !errResp.raw {
err := json.NewEncoder(w).Encode(map[string]string{
"error": errResp.message,
})
if err != nil {
t.Fatal("failed to encode error response:", err)
}
} else {
// Write raw message (simulates non-JSON error responses)
fmt.Fprint(w, errResp.message)
}
return
}
Expand All @@ -241,6 +290,15 @@ func TestClientDo(t *testing.T) {
if err.Error() != tc.wantErr {
t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
}
if tc.wantStatusCode != 0 {
if statusErr, ok := err.(StatusError); ok {
if statusErr.StatusCode != tc.wantStatusCode {
t.Errorf("status code mismatch: got %d, want %d", statusErr.StatusCode, tc.wantStatusCode)
}
} else {
t.Errorf("expected StatusError, got %T", err)
}
}
return
}

Expand Down
8 changes: 4 additions & 4 deletions api/examples/chat/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@ func main() {
}

messages := []api.Message{
api.Message{
{
Role: "system",
Content: "Provide very brief, concise responses",
},
api.Message{
{
Role: "user",
Content: "Name some unusual animals",
},
api.Message{
{
Role: "assistant",
Content: "Monotreme, platypus, echidna",
},
api.Message{
{
Role: "user",
Content: "which of these is the most dangerous?",
},
Expand Down
6 changes: 2 additions & 4 deletions app/ui/app/src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,12 +204,10 @@ export async function* sendMessage(
data: uint8ArrayToBase64(att.data),
}));

// Only send think parameter when actually requesting thinking
// Don't send false as it causes issues with some providers
// Send think parameter when it's explicitly set (true, false, or a non-empty string).
const shouldSendThink =
think !== undefined &&
((typeof think === "boolean" && think) ||
(typeof think === "string" && think !== ""));
(typeof think === "boolean" || (typeof think === "string" && think !== ""));

const response = await fetch(`${API_BASE}/api/v1/chat/${chatId}`, {
method: "POST",
Expand Down
18 changes: 15 additions & 3 deletions cmd/bench/bench.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ func OutputMetrics(w io.Writer, format string, metrics []Metrics, verbose bool)
case "benchstat":
if verbose {
printHeader := func() {
fmt.Printf("sysname: %s\n", runtime.GOOS)
fmt.Printf("machine: %s\n", runtime.GOARCH)
fmt.Fprintf(w, "sysname: %s\n", runtime.GOOS)
fmt.Fprintf(w, "machine: %s\n", runtime.GOARCH)
}
once.Do(printHeader)
}
Expand Down Expand Up @@ -147,6 +147,17 @@ func BenchmarkChat(fOpt flagOptions) error {
return err
}

var out io.Writer = os.Stdout
if fOpt.outputFile != nil && *fOpt.outputFile != "" {
f, err := os.OpenFile(*fOpt.outputFile, os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
fmt.Fprintf(os.Stderr, "ERROR: cannot open output file %s: %v\n", *fOpt.outputFile, err)
return err
}
defer f.Close()
out = f
}

for _, model := range models {
for range *fOpt.epochs {
options := make(map[string]interface{})
Expand Down Expand Up @@ -241,13 +252,14 @@ func BenchmarkChat(fOpt flagOptions) error {
},
}

OutputMetrics(os.Stdout, *fOpt.format, metrics, *fOpt.verbose)
OutputMetrics(out, *fOpt.format, metrics, *fOpt.verbose)

if *fOpt.keepAlive > 0 {
time.Sleep(time.Duration(*fOpt.keepAlive*float64(time.Second)) + 200*time.Millisecond)
}
}
}

return nil
}

Expand Down
2 changes: 1 addition & 1 deletion cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -1430,7 +1430,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
latest.Summary()
}

return &api.Message{Role: role, Content: fullResponse.String()}, nil
return &api.Message{Role: role, Thinking: thinkingContent.String(), Content: fullResponse.String()}, nil
}

func generate(cmd *cobra.Command, opts runOptions) error {
Expand Down
2 changes: 2 additions & 0 deletions convert/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
conv = &gptossModel{}
case "DeepseekOCRForCausalLM":
conv = &deepseekocr{}
case "DeepseekV3ForCausalLM":
conv = &deepseek2Model{}
default:
return fmt.Errorf("unsupported architecture %q", p.Architectures[0])
}
Expand Down
Loading
Loading