-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathllama2local.py
More file actions
29 lines (22 loc) · 833 Bytes
/
llama2local.py
File metadata and controls
29 lines (22 loc) · 833 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from llama_cpp import Llama
import ChatGPT
# Load Llama 2 model
def model_call(model, prompt, temperature, top_p, top_k, repetition, max_length):
if model == 'LLaMa2-7B-Chat':
model_path = "files/models/llama-2-7b-chat.Q8_0.gguf"
elif model == 'LLaMa2-13B-Chat':
model_path = "files/models/llama-2-13b-chat.Q8_0.gguf"
else:
return ChatGPT.GPT35Call(prompt)
llm = Llama(model_path)
output = llm(prompt,
echo=False,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repeat_penalty=repetition,
max_tokens=max_length)
# Solely for bug-fixing, can be removed if desired
with open("response.txt", "w") as f:
f.write(str(output))
return output['choices'][0]['text']