-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.py
78 lines (67 loc) · 2.59 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""
Module bhodi_doc_analyzer.config
This module initializes the tokenizer, language model (LLM), embeddings,
vectorstore (in-memory Chroma), sequencer (dedicated summarization model) and
reranker pipelines for Bhodi.
"""
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from langchain_community.chat_models import ChatLlamaCpp
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# =============================================================================
# TOKENIZER AND LLM INITIALIZATION
# =============================================================================
tokenizer = AutoTokenizer.from_pretrained(
"unsloth/Qwen2.5-Coder-7B-Instruct",
use_fast=True
)
LOCAL_MODEL = "models/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf" # Local model path
llm = ChatLlamaCpp(
model_path=LOCAL_MODEL,
temperature=0.1,
n_ctx=3000,
n_gpu_layers=-1,
n_batch=50,
max_tokens=3000,
top_p=0.9,
verbose=False
)
# =============================================================================
# EMBEDDINGS AND NON-PERSISTENT VECTORSTORE INITIALIZATION
# =============================================================================
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cuda"}
)
# Non-persistent (in-memory) Chroma instance for chatbot conversation
vectorstore = Chroma(
embedding_function=embeddings,
persist_directory=None # In-memory only
)
# Retriever for chat context retrieval (volatile).
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# =============================================================================
# SEQUENCER (SUMMARIZATION MODEL) AND RERANKER INITIALIZATION
# =============================================================================
# This is a dedicated summarization model used as a sequencer.
sequencer = pipeline(
"summarization",
model="facebook/bart-large-cnn",
tokenizer="facebook/bart-large-cnn",
device=-1 # if using GPU 1; remove or set to -1 for CPU
)
# Initialize the reranker pipeline using a Hugging Face cross-encoder model.
reranker_tokenizer = AutoTokenizer.from_pretrained(
"cross-encoder/ms-marco-MiniLM-L-6-v2"
)
reranker_model = AutoModelForSequenceClassification.from_pretrained(
"cross-encoder/ms-marco-MiniLM-L-6-v2"
)
reranker = pipeline(
"text-classification",
model=reranker_model,
tokenizer=reranker_tokenizer,
device=-1 # if using GPU 1; remove or set to -1 for CPU
)