-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbackup.py
215 lines (181 loc) · 8.39 KB
/
backup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import streamlit as st
import os
from dotenv import load_dotenv
from elasticsearch import Elasticsearch
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_community.vectorstores import ElasticsearchStore
from langchain.schema import Document
from langchain.prompts import PromptTemplate
from langchain_elasticsearch import ElasticsearchStore
from langchain.schema import SystemMessage, HumanMessage
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage
from langchain.chains.combine_documents import create_stuff_documents_chain
# Load environment variables
load_dotenv()
# Access the Google API key
google_api_key = os.environ.get("GOOGLE_API_KEY")
if not google_api_key:
st.error("Google API key not found. Please set the GOOGLE_API_KEY environment variable.")
st.stop()
# Initialize Google Generative AI Embedding
embeddings_llm = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=google_api_key)
# Initialize Elasticsearch connection
es = Elasticsearch(hosts=["http://localhost:9200"], basic_auth=("elastic", "Xu*0DmHWEHvRoTWFA=Vs"))
# Load your JSON data
@st.cache_data
def load_data():
return [
{"id": 121, "Name": "Ramesh", "Address": "123 Main St, Mumbai",
"About": "Software Engineer with 5 years of experience in full-stack development."},
{"id": 122, "Name": "Rakesh", "Address": "456 Elm St, Delhi",
"About": "Marketing specialist focused on digital campaigns and brand management."},
{"id": 123, "Name": "Rahul", "Address": "789 Maple St, Bangalore",
"About": "Finance manager with a background in corporate finance and investment banking."},
{"id": 124, "Name": "Ramu", "Address": "101 Oak St, Hyderabad",
"About": "Operations manager with expertise in supply chain management and logistics."},
{"id": 125, "Name": "Raju", "Address": "202 Pine St, Chennai",
"About": "Project manager with a focus on IT infrastructure and cloud computing."},
{"id": 126, "Name": "Rohit", "Address": "303 Cedar St, Pune",
"About": "Data analyst with a passion for big data and machine learning."},
{"id": 127, "Name": "Ravi", "Address": "404 Birch St, Kolkata",
"About": "Graphic designer specializing in UI/UX design and visual branding."},
{"id": 128, "Name": "Rina", "Address": "505 Willow St, Ahmedabad",
"About": "Content writer and editor with expertise in SEO and digital marketing."},
{"id": 129, "Name": "Rajesh", "Address": "606 Palm St, Jaipur",
"About": "Sales executive with a strong background in B2B sales and customer relations."},
{"id": 130, "Name": "Rita", "Address": "707 Spruce St, Lucknow",
"About": "Human resources professional with experience in talent acquisition and employee engagement."}
]
# # Check if index exists, and create it with correct mappings if it doesn't
# def create_index():
# if not es.indices.exists(index="emp_data"):
# es.indices.create(
# index="emp_data",
# body={
# "settings": {
# "number_of_shards": 1,
# "number_of_replicas": 1
# }
# }
# )
# st.success("Index 'emp_data' created successfully.")
# else:
# st.info("Index 'emp_data' already exists.")
#
#
# def embed_and_index_data(data):
# for record in data:
# text_to_embed = ' '.join([
# record.get("Name"),
# record.get("Address"),
# record.get("About"),
# ])
# embedding_vector = embeddings_llm.embed_query(text_to_embed)
# record["vector"] = embedding_vector
# try:
# es.index(index="emp_data", body=record)
# print(f"Record {record} Stored successfully!")
# except Exception as e:
# st.error(f"Error indexing document: {str(e)}")
# return False
# st.success("Data indexed successfully with embeddings.")
# return True
# Custom ElasticsearchStore with modified document builder
# class CustomElasticsearchStore(ElasticsearchStore):
# @staticmethod
# def custom_doc_builder(hit: dict) -> Document:
# return Document(
# page_content=hit["_source"]["content"],
# metadata=hit["_source"]["metadata"]
# )
#
# def __init__(self, *args, **kwargs):
# super().__init__(*args, **kwargs)
# self.client.options(ignore_status=[400, 404])
# Initialize the chatbot
def init_chatbot():
# Initialize the vectorstore using Elasticsearch
vectorstore = ElasticsearchStore(
es_url="http://localhost:9200", # Elasticsearch URL
index_name="emp_data", # Index name in Elasticsearch
embedding=embeddings_llm, # Predefined embedding model (e.g., OpenAI embeddings)
es_user="elastic", # Elasticsearch username
es_password="Xu*0DmHWEHvRoTWFA=Vs" # Elasticsearch password
)
# Create a retriever from the vectorstore
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
# Initialize the LLM with Google Gemini Pro API
google_api_key = os.getenv("GOOGLE_API_KEY") # Load API key from environment variable
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=google_api_key)
# Initialize memory to store chat history
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
# Use the built-in ConversationalRetrievalChain (CRIE)
qa_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
# memory=memory, # Memory keeps track of chat history
return_source_documents=True, # Optionally return source documents
return_generated_question=True
)
return qa_chain, retriever, memory
def check_index_content():
# # Create the full retrieval chain
# qa_chain = create_retrieval_chain(
# retriever=history_aware_retriever,
# question_answer_chain=question_answer_chain,
# input_variables=["input", "context"] # Ensure it accepts context
# )
result = es.count(index="emp_data")
count = result['count']
st.write(f"Number of documents in index: {count}")
if count == 0:
st.warning("The index is empty. Please index your data first.")
# Streamlit UI
st.title("Conversation Analytics With Elasticsearch")
# Sidebar for data operations
with st.sidebar:
st.header("Data Operations")
if st.button("Create Index"):
create_index()
if st.button("Embed and Index Data"):
data = load_data()
if embed_and_index_data(data):
st.success("Data embedded and indexed successfully!")
else:
st.error("Failed to embed and index data. Check the logs for more information.")
if st.button("Check Index Content"):
check_index_content()
# Main chat interface
st.header("Chat with the Employee Data")
if "messages" not in st.session_state:
st.session_state.messages = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Ask a question about the employees"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Get chatbot response
chat_history = []
chatbot, retriever, memory = init_chatbot()
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
# Prepare the input for the chatbot
response = chatbot.invoke({"question": prompt, "chat_history": chat_history})
# chat_history.append((prompt, res["answer"]))
answer = response['answer']
st.markdown(answer)
st.session_state.messages.append({"role": "assistant", "content": answer})
# Update memory with the new exchange
# memory.chat_memory.add_user_message(prompt)
# memory.chat_memory.add_ai_message(response)
# Add a button to clear chat history
if st.button("Clear Chat History"):
st.session_state.messages = []
st.success("Chat history cleared!")