chore: bump llama.cpp

thxCode · thxCode · commit d5e7261c3e51 · 2025-07-17T17:21:40.000+08:00
Signed-off-by: thxCode &lt;thxcode0824@gmail.com&gt;
diff --git a/llama-box/patches/ggml/ggml-cpu.patch b/llama-box/patches/ggml/ggml-cpu.patch
@@ -1,5 +1,5 @@
 diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt
-index 66a5ad8d2..6e0e55625 100644
+index 66a5ad8d..6e0e5562 100644
 --- a/ggml/src/ggml-cpu/CMakeLists.txt
 +++ b/ggml/src/ggml-cpu/CMakeLists.txt
 @@ -158,47 +158,94 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
@@ -353,7 +353,7 @@ index 66a5ad8d2..6e0e55625 100644
              endif()
          endif()
 diff --git a/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp b/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp
-index 67369147c..c460c5491 100644
+index 67369147..c460c549 100644
 --- a/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp
 +++ b/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp
 @@ -8,6 +8,10 @@
diff --git a/llama-box/patches/llama.cpp/batch.patch b/llama-box/patches/llama.cpp/batch.patch
@@ -1,5 +1,5 @@
 diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp
-index 3bc8554e5..be189b6ef 100644
+index 3bc8554e..be189b6e 100644
 --- a/src/llama-batch.cpp
 +++ b/src/llama-batch.cpp
 @@ -237,114 +237,114 @@ bool llama_batch_allocr::init(
diff --git a/llama-box/patches/llama.cpp/clip.patch b/llama-box/patches/llama.cpp/clip.patch
@@ -1,5 +1,5 @@
 diff --git a/tools/mtmd/clip-impl.h b/tools/mtmd/clip-impl.h
-index 62c936ed0..e393462fe 100644
+index 62c936ed..e393462f 100644
 --- a/tools/mtmd/clip-impl.h
 +++ b/tools/mtmd/clip-impl.h
 @@ -226,6 +226,7 @@ static void clip_log_internal(enum ggml_log_level level, const char * format, ..
@@ -35,7 +35,7 @@ index 62c936ed0..e393462fe 100644
  // split string by a `std::string delim` instead of `char delim`
  static std::vector<std::string> string_split_str(std::string s, const std::string & delimiter) {
 diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
-index 9146c9e9c..29df92558 100644
+index 9146c9e9..29df9255 100644
 --- a/tools/mtmd/clip.cpp
 +++ b/tools/mtmd/clip.cpp
 @@ -30,6 +30,11 @@
@@ -207,7 +207,7 @@ index 9146c9e9c..29df92558 100644
      return ctx->model.modality == CLIP_MODALITY_VISION;
  }
 diff --git a/tools/mtmd/clip.h b/tools/mtmd/clip.h
-index 08f3efb7b..d90921f64 100644
+index 08f3efb7..d90921f6 100644
 --- a/tools/mtmd/clip.h
 +++ b/tools/mtmd/clip.h
 @@ -6,6 +6,20 @@
@@ -372,7 +372,7 @@ index 08f3efb7b..d90921f64 100644
 +CLIP_API bool clip_has_audio_encoder(const struct clip_ctx * ctx);
 +CLIP_API bool clip_has_whisper_encoder(const struct clip_ctx * ctx);
 diff --git a/tools/mtmd/mtmd-audio.h b/tools/mtmd/mtmd-audio.h
-index b7b940aff..91d31d161 100644
+index b7b940af..91d31d16 100644
 --- a/tools/mtmd/mtmd-audio.h
 +++ b/tools/mtmd/mtmd-audio.h
 @@ -15,6 +15,20 @@
@@ -414,7 +414,7 @@ index b7b940aff..91d31d161 100644
  
  } // namespace whisper_precalc_filters
 diff --git a/tools/mtmd/mtmd-helper.cpp b/tools/mtmd/mtmd-helper.cpp
-index 686f42f39..eead1f8e6 100644
+index 686f42f3..eead1f8e 100644
 --- a/tools/mtmd/mtmd-helper.cpp
 +++ b/tools/mtmd/mtmd-helper.cpp
 @@ -458,3 +458,7 @@ mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char *
@@ -427,7 +427,7 @@ index 686f42f39..eead1f8e6 100644
 +}
 \ No newline at end of file
 diff --git a/tools/mtmd/mtmd-helper.h b/tools/mtmd/mtmd-helper.h
-index 5c0edc693..f474b0c93 100644
+index 5c0edc69..f474b0c9 100644
 --- a/tools/mtmd/mtmd-helper.h
 +++ b/tools/mtmd/mtmd-helper.h
 @@ -80,6 +80,8 @@ MTMD_API int32_t mtmd_helper_decode_image_chunk(mtmd_context * ctx,
diff --git a/llama-box/patches/llama.cpp/common.patch b/llama-box/patches/llama.cpp/common.patch
@@ -1,8 +1,8 @@
 diff --git a/common/common.cpp b/common/common.cpp
-index e4e71ad13..11e63a789 100644
+index 262b6799..21b0993a 100644
 --- a/common/common.cpp
 +++ b/common/common.cpp
-@@ -1035,7 +1035,16 @@ struct common_init_result common_init_from_params(common_params & params) {
+@@ -1041,7 +1041,16 @@ struct common_init_result common_init_from_params(common_params & params) {
  
          // some models (e.g. T5) don't have a BOS token
          if (bos != LLAMA_TOKEN_NULL) {
@@ -19,7 +19,7 @@ index e4e71ad13..11e63a789 100644
          }
          if (eos != LLAMA_TOKEN_NULL) {
              tmp.push_back(eos);
-@@ -1045,7 +1054,9 @@ struct common_init_result common_init_from_params(common_params & params) {
+@@ -1051,7 +1060,9 @@ struct common_init_result common_init_from_params(common_params & params) {
          }
  
          if (llama_model_has_encoder(model)) {
@@ -30,7 +30,7 @@ index e4e71ad13..11e63a789 100644
              llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
              if (decoder_start_token_id == LLAMA_TOKEN_NULL) {
                  decoder_start_token_id = bos;
-@@ -1054,7 +1065,9 @@ struct common_init_result common_init_from_params(common_params & params) {
+@@ -1060,7 +1071,9 @@ struct common_init_result common_init_from_params(common_params & params) {
              tmp.push_back(decoder_start_token_id);
          }
          if (llama_model_has_decoder(model)) {
@@ -42,7 +42,7 @@ index e4e71ad13..11e63a789 100644
          llama_memory_clear(llama_get_memory(lctx), true);
          llama_synchronize(lctx);
 diff --git a/src/llama-context.cpp b/src/llama-context.cpp
-index 7c07b047b..fdc973903 100644
+index 7c07b047..fdc97390 100644
 --- a/src/llama-context.cpp
 +++ b/src/llama-context.cpp
 @@ -927,9 +927,9 @@ int llama_context::decode(const llama_batch & batch_inp) {
diff --git a/llama-box/patches/llama.cpp/context.patch b/llama-box/patches/llama.cpp/context.patch
@@ -1,5 +1,5 @@
 diff --git a/src/llama-context.cpp b/src/llama-context.cpp
-index 7c07b047b..98552c59f 100644
+index 7c07b047..98552c59 100644
 --- a/src/llama-context.cpp
 +++ b/src/llama-context.cpp
 @@ -107,7 +107,7 @@ llama_context::llama_context(
diff --git a/llama-box/patches/llama.cpp/dynamic_link.patch b/llama-box/patches/llama.cpp/dynamic_link.patch
@@ -1,5 +1,5 @@
 diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp
-index c9daa4c39..26f219c47 100644
+index c9daa4c3..26f219c4 100644
 --- a/ggml/src/ggml-cpu/ggml-cpu.cpp
 +++ b/ggml/src/ggml-cpu/ggml-cpu.cpp
 @@ -635,6 +635,18 @@ static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const ch
diff --git a/llama-box/patches/llama.cpp/embedding.patch b/llama-box/patches/llama.cpp/embedding.patch
@@ -1,5 +1,5 @@
 diff --git a/include/llama.h b/include/llama.h
-index c83b75915..7632a5ccf 100644
+index 28e84d4d..8f9c66d3 100644
 --- a/include/llama.h
 +++ b/include/llama.h
 @@ -448,6 +448,7 @@ extern "C" {
@@ -11,7 +11,7 @@ index c83b75915..7632a5ccf 100644
      LLAMA_API uint32_t llama_n_batch    (const struct llama_context * ctx);
      LLAMA_API uint32_t llama_n_ubatch   (const struct llama_context * ctx);
 diff --git a/src/llama-context.cpp b/src/llama-context.cpp
-index 7c07b047b..88b6c1509 100644
+index 7c07b047..88b6c150 100644
 --- a/src/llama-context.cpp
 +++ b/src/llama-context.cpp
 @@ -103,6 +103,20 @@ llama_context::llama_context(
@@ -76,7 +76,7 @@ index 7c07b047b..88b6c1509 100644
      return ctx->n_ctx();
  }
 diff --git a/src/llama-context.h b/src/llama-context.h
-index 9ce05715a..b625d9197 100644
+index 9ce05715..b625d919 100644
 --- a/src/llama-context.h
 +++ b/src/llama-context.h
 @@ -37,6 +37,7 @@ struct llama_context {
diff --git a/llama-box/patches/llama.cpp/ggml-cann.patch b/llama-box/patches/llama.cpp/ggml-cann.patch
@@ -1,5 +1,5 @@
 diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
-index a248a7ec2..2c816ac22 100644
+index a248a7ec..2c816ac2 100644
 --- a/src/llama-graph.cpp
 +++ b/src/llama-graph.cpp
 @@ -544,20 +544,28 @@ ggml_tensor * llm_graph_context::build_ffn(
diff --git a/llama-box/patches/llama.cpp/ggml-cpu.patch b/llama-box/patches/llama.cpp/ggml-cpu.patch
@@ -1,5 +1,5 @@
 diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
-index fd77e9a6a..ba69608f0 100644
+index fd77e9a6..ba69608f 100644
 --- a/ggml/src/ggml-cpu/ops.cpp
 +++ b/ggml/src/ggml-cpu/ops.cpp
 @@ -5597,7 +5597,9 @@ static void ggml_compute_forward_soft_max_f32(
diff --git a/llama-box/patches/llama.cpp/ggml-cuda.patch b/llama-box/patches/llama.cpp/ggml-cuda.patch
@@ -1,5 +1,5 @@
 diff --git a/ggml/src/ggml-cuda/scale.cu b/ggml/src/ggml-cuda/scale.cu
-index 2ee9e5889..1b45a468b 100644
+index 2ee9e588..1b45a468 100644
 --- a/ggml/src/ggml-cuda/scale.cu
 +++ b/ggml/src/ggml-cuda/scale.cu
 @@ -1,7 +1,7 @@
diff --git a/llama-box/patches/llama.cpp/ggml-hip.patch b/llama-box/patches/llama.cpp/ggml-hip.patch
@@ -1,5 +1,5 @@
 diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
-index 1a2708ec9..72d5013ad 100644
+index 1a2708ec..72d5013a 100644
 --- a/ggml/src/ggml-cuda/common.cuh
 +++ b/ggml/src/ggml-cuda/common.cuh
 @@ -59,6 +59,10 @@
@@ -32,7 +32,7 @@ index 1a2708ec9..72d5013ad 100644
  #elif defined(RDNA3) || defined(RDNA4)
      c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
 diff --git a/ggml/src/ggml-cuda/mmvq.cu b/ggml/src/ggml-cuda/mmvq.cu
-index dc7adf509..6f9db6100 100644
+index dc7adf50..6f9db610 100644
 --- a/ggml/src/ggml-cuda/mmvq.cu
 +++ b/ggml/src/ggml-cuda/mmvq.cu
 @@ -63,7 +63,7 @@ enum mmvq_parameter_table_id {
diff --git a/llama-box/patches/llama.cpp/ggml-metal.patch b/llama-box/patches/llama.cpp/ggml-metal.patch
@@ -1,5 +1,5 @@
 diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
-index 44ddc69d0..6fed9a590 100644
+index 44ddc69d..6fed9a59 100644
 --- a/ggml/src/ggml-metal/ggml-metal.m
 +++ b/ggml/src/ggml-metal/ggml-metal.m
 @@ -1613,6 +1613,7 @@ static bool ggml_backend_metal_buffer_rset_init(
diff --git a/llama-box/patches/llama.cpp/ggml-rpc.patch b/llama-box/patches/llama.cpp/ggml-rpc.patch
@@ -1,5 +1,5 @@
 diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp
-index f468f796d..365a912bf 100644
+index f468f796..365a912b 100644
 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp
 +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp
 @@ -4,6 +4,8 @@
diff --git a/llama-box/patches/llama.cpp/grammer.patch b/llama-box/patches/llama.cpp/grammer.patch
@@ -1,5 +1,5 @@
 diff --git a/common/sampling.cpp b/common/sampling.cpp
-index 9c04d35fd..5a838d9fb 100644
+index 9c04d35f..5a838d9f 100644
 --- a/common/sampling.cpp
 +++ b/common/sampling.cpp
 @@ -434,6 +434,13 @@ llama_token common_sampler_last(const struct common_sampler * gsmpl) {
@@ -17,7 +17,7 @@ index 9c04d35fd..5a838d9fb 100644
      std::string result = "logits ";
  
 diff --git a/common/sampling.h b/common/sampling.h
-index 2064421db..ecfc99588 100644
+index 2064421d..ecfc9958 100644
 --- a/common/sampling.h
 +++ b/common/sampling.h
 @@ -91,6 +91,8 @@ llama_token_data_array * common_sampler_get_candidates(struct common_sampler * g
@@ -30,7 +30,7 @@ index 2064421db..ecfc99588 100644
  std::string common_sampler_print(const struct common_sampler * gsmpl);
  
 diff --git a/include/llama.h b/include/llama.h
-index c83b75915..314905fb0 100644
+index 28e84d4d..04373394 100644
 --- a/include/llama.h
 +++ b/include/llama.h
 @@ -1340,6 +1340,8 @@ extern "C" {
@@ -43,7 +43,7 @@ index c83b75915..314905fb0 100644
      //
      // Shorthand for:
 diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp
-index bed706bb2..2b3bd09e0 100644
+index bed706bb..2b3bd09e 100644
 --- a/src/llama-grammar.cpp
 +++ b/src/llama-grammar.cpp
 @@ -1086,6 +1086,10 @@ void llama_grammar_free_impl(struct llama_grammar * grammar) {
@@ -81,7 +81,7 @@ index bed706bb2..2b3bd09e0 100644
          }
      }
 diff --git a/src/llama-grammar.h b/src/llama-grammar.h
-index f8c291de9..c6e38937d 100644
+index f8c291de..c6e38937 100644
 --- a/src/llama-grammar.h
 +++ b/src/llama-grammar.h
 @@ -157,6 +157,8 @@ struct llama_grammar * llama_grammar_init_impl(
@@ -94,7 +94,7 @@ index f8c291de9..c6e38937d 100644
  
  // TODO: move the API below as member functions of llama_grammar
 diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
-index bfbf5fa23..bc4678a72 100644
+index bfbf5fa2..bc4678a7 100644
 --- a/src/llama-sampling.cpp
 +++ b/src/llama-sampling.cpp
 @@ -2540,6 +2540,15 @@ uint32_t llama_sampler_get_seed(const struct llama_sampler * smpl) {
diff --git a/llama-box/patches/llama.cpp/log.patch b/llama-box/patches/llama.cpp/log.patch
@@ -1,5 +1,5 @@
 diff --git a/common/log.cpp b/common/log.cpp
-index 52b31470c..6837c8e50 100644
+index 52b31470..6837c8e5 100644
 --- a/common/log.cpp
 +++ b/common/log.cpp
 @@ -8,6 +8,8 @@
diff --git a/llama-box/patches/llama.cpp/max_devices.patch b/llama-box/patches/llama.cpp/max_devices.patch
@@ -1,5 +1,5 @@
 diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
-index 788861a36..2afb0a87a 100644
+index 788861a3..2afb0a87 100644
 --- a/ggml/src/ggml-backend.cpp
 +++ b/ggml/src/ggml-backend.cpp
 @@ -609,7 +609,7 @@ static bool ggml_is_view_op(enum ggml_op op) {
@@ -12,7 +12,7 @@ index 788861a36..2afb0a87a 100644
  
  #ifndef GGML_SCHED_MAX_SPLIT_INPUTS
 diff --git a/src/llama.cpp b/src/llama.cpp
-index 34906cdb6..e5110294b 100644
+index 34906cdb..e5110294 100644
 --- a/src/llama.cpp
 +++ b/src/llama.cpp
 @@ -34,7 +34,7 @@ struct llama_sampler_chain_params llama_sampler_chain_default_params() {
diff --git a/llama-box/patches/llama.cpp/model.patch b/llama-box/patches/llama.cpp/model.patch
@@ -1,5 +1,5 @@
 diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
-index 5c7a0d087..d394bf955 100644
+index 5c7a0d08..d394bf95 100644
 --- a/src/llama-arch.cpp
 +++ b/src/llama-arch.cpp
 @@ -17,6 +17,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
@@ -34,7 +34,7 @@ index 5c7a0d087..d394bf955 100644
          LLM_ARCH_BERT,
          {
 diff --git a/src/llama-arch.h b/src/llama-arch.h
-index d4a2dea9e..c0fc4453f 100644
+index d4a2dea9..c0fc4453 100644
 --- a/src/llama-arch.h
 +++ b/src/llama-arch.h
 @@ -21,6 +21,7 @@ enum llm_arch {
@@ -46,7 +46,7 @@ index d4a2dea9e..c0fc4453f 100644
      LLM_ARCH_NOMIC_BERT,
      LLM_ARCH_NOMIC_BERT_MOE,
 diff --git a/src/llama-model.cpp b/src/llama-model.cpp
-index ffee997b8..e80f56295 100644
+index 1c437d55..394edff4 100644
 --- a/src/llama-model.cpp
 +++ b/src/llama-model.cpp
 @@ -751,6 +751,16 @@ void llama_model::load_hparams(llama_model_loader & ml) {
diff --git a/llama-box/patches/llama.cpp/model_py.patch b/llama-box/patches/llama.cpp/model_py.patch
@@ -1,5 +1,5 @@
 diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
-index 165afb194..e2aac7444 100755
+index 764163c4..2f7bb5b1 100755
 --- a/convert_hf_to_gguf.py
 +++ b/convert_hf_to_gguf.py
 @@ -4379,6 +4379,123 @@ class XLMRobertaModel(BertModel):
@@ -127,7 +127,7 @@ index 165afb194..e2aac7444 100755
  class GemmaModel(TextModel):
      model_arch = gguf.MODEL_ARCH.GEMMA
 diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
-index 486a165b6..df9fc495d 100644
+index 486a165b..df9fc495 100644
 --- a/gguf-py/gguf/constants.py
 +++ b/gguf-py/gguf/constants.py
 @@ -300,6 +300,7 @@ class MODEL_ARCH(IntEnum):
@@ -168,7 +168,7 @@ index 486a165b6..df9fc495d 100644
          MODEL_TENSOR.TOKEN_EMBD,
          MODEL_TENSOR.TOKEN_EMBD_NORM,
 diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
-index 2a675044f..8c9dc3be2 100644
+index 2a675044..8c9dc3be 100644
 --- a/gguf-py/gguf/tensor_mapping.py
 +++ b/gguf-py/gguf/tensor_mapping.py
 @@ -161,6 +161,7 @@ class TensorNameMap:
diff --git a/llama-box/patches/llama.cpp/mrope.patch b/llama-box/patches/llama.cpp/mrope.patch
@@ -1,5 +1,5 @@
 diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
-index fd77e9a6a..4ce3cd0a6 100644
+index fd77e9a6..4ce3cd0a 100644
 --- a/ggml/src/ggml-cpu/ops.cpp
 +++ b/ggml/src/ggml-cpu/ops.cpp
 @@ -6022,10 +6022,6 @@ static void ggml_compute_forward_rope_f32(
@@ -25,7 +25,7 @@ index fd77e9a6a..4ce3cd0a6 100644
          GGML_ASSERT(n_dims == ne0/2);
      }
 diff --git a/ggml/src/ggml-cuda/rope.cu b/ggml/src/ggml-cuda/rope.cu
-index d058504cd..c6dbd8216 100644
+index d058504c..c6dbd821 100644
 --- a/ggml/src/ggml-cuda/rope.cu
 +++ b/ggml/src/ggml-cuda/rope.cu
 @@ -371,10 +371,6 @@ void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
@@ -40,7 +40,7 @@ index d058504cd..c6dbd8216 100644
          GGML_ASSERT(n_dims == ne00/2);
      }
 diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
-index 44ddc69d0..c94395f70 100644
+index 44ddc69d..c94395f7 100644
 --- a/ggml/src/ggml-metal/ggml-metal.m
 +++ b/ggml/src/ggml-metal/ggml-metal.m
 @@ -4367,7 +4367,6 @@ static bool ggml_metal_encode_node(
@@ -52,7 +52,7 @@ index 44ddc69d0..c94395f70 100644
                  GGML_ASSERT( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32);
  
 diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp
-index 338825915..a1ba8a830 100644
+index 33882591..a1ba8a83 100644
 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp
 +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp
 @@ -6210,10 +6210,6 @@ static void ggml_cl_rope(ggml_backend_t backend, const ggml_tensor * src0, const
diff --git a/llama-box/patches/llama.cpp/ngram_cache.patch b/llama-box/patches/llama.cpp/ngram_cache.patch
@@ -1,5 +1,5 @@
 diff --git a/common/ngram-cache.cpp b/common/ngram-cache.cpp
-index d1a4d84c4..26ed2e894 100644
+index d1a4d84c..26ed2e89 100644
 --- a/common/ngram-cache.cpp
 +++ b/common/ngram-cache.cpp
 @@ -192,7 +192,6 @@ void common_ngram_cache_draft(
diff --git a/llama-box/patches/llama.cpp/progress_callback.patch b/llama-box/patches/llama.cpp/progress_callback.patch
@@ -1,5 +1,5 @@
 diff --git a/src/llama.cpp b/src/llama.cpp
-index 34906cdb6..61bf9386f 100644
+index 34906cdb..61bf9386 100644
 --- a/src/llama.cpp
 +++ b/src/llama.cpp
 @@ -153,10 +153,7 @@ static struct llama_model * llama_model_load_from_file_impl(
diff --git a/llama-box/patches/llama.cpp/pure_cpu.patch b/llama-box/patches/llama.cpp/pure_cpu.patch
diff --git a/llama-box/patches/llama.cpp/sampling.patch b/llama-box/patches/llama.cpp/sampling.patch
diff --git a/llama-box/patches/llama.cpp/seed.patch b/llama-box/patches/llama.cpp/seed.patch
diff --git a/llama-box/patches/llama.cpp/template.patch b/llama-box/patches/llama.cpp/template.patch
diff --git a/llama-box/patches/llama.cpp/tool_calling.patch b/llama-box/patches/llama.cpp/tool_calling.patch
diff --git a/llama-box/patches/llama.cpp/vendor_httplib.patch b/llama-box/patches/llama.cpp/vendor_httplib.patch
diff --git a/llama-box/patches/llama.cpp/vocab.patch b/llama-box/patches/llama.cpp/vocab.patch
diff --git a/llama.cpp b/llama.cpp