diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp index 1e26d8221b86b..4218b75f169ea 100644 --- a/examples/lookahead/lookahead.cpp +++ b/examples/lookahead/lookahead.cpp @@ -429,7 +429,7 @@ int main(int argc, char ** argv) { // KV cache management // if no verification token matched, we simply remove all cells from this batch -> no fragmentation - llama_memory_seq_rm(mem, -1, n_past, -1); + llama_memory_seq_rm(mem, 0, n_past, -1); if (seq_id_best != 0) { // if a verification token matched, we keep the best sequence and remove the rest diff --git a/tools/main/main.cpp b/tools/main/main.cpp index eb36c6884059c..9902f6870b2ab 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -354,7 +354,7 @@ int main(int argc, char ** argv) { } // remove any "future" tokens that we might have inherited from the previous session - llama_memory_seq_rm(mem, -1, n_matching_session_tokens, -1); + llama_memory_seq_rm(mem, 0, n_matching_session_tokens, -1); } LOG_DBG("recalculate the cached logits (check): embd_inp.size() %zu, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu\n",