Skip to content

Commit

Permalink
include persona descrip, fix index pdf pages, make str chat_response
Browse files Browse the repository at this point in the history
  • Loading branch information
AnniePacheco committed Sep 11, 2024
1 parent 38d3e6d commit a11cd7f
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 26 deletions.
2 changes: 1 addition & 1 deletion apis/paios/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,7 @@ components:
prompt:
$ref: '#/components/schemas/textLong'
chat_response:
$ref: '#/components/schemas/textLong'
type: string
required:
- assistant_id
- conversation_id
Expand Down
2 changes: 1 addition & 1 deletion backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def create_backend_app():
'http://localhost',
'http://localhost:3080',
'http://localhost:5173',
'https://localhost:3000'
'http://localhost:3000'
]

# Add CORS middleware
Expand Down
54 changes: 30 additions & 24 deletions backend/managers/RagManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
from sqlalchemy import delete, select, func
from pathlib import Path
from typing import List, Tuple, Optional, Dict, Any
from backend.managers import ResourcesManager
from backend.managers import ResourcesManager, PersonasManager
from distutils.util import strtobool
import os
import logging
from typing import Union
from langchain.prompts import PromptTemplate

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -52,15 +53,13 @@ async def create_index(self, resource_id: str, path_files: List[str]) -> List[di

for path in path_files:
loader = PyPDFLoader(path)
docs = loader.load()
all_docs.append(docs[0])
docs = loader.load() # Return a list of documents for each file
all_docs.append(docs)
file_id = str(uuid4())
all_ids.append(file_id)

# Extract just the file name
all_ids.append(file_id)
file_name = Path(path).name
file_names.append(file_name)

# Collect file_id and file_name into a dictionary
file_info_list.append({"file_id": file_id, "file_name": file_name})

Expand All @@ -69,24 +68,24 @@ async def create_index(self, resource_id: str, path_files: List[str]) -> List[di
chunk_overlap=int(os.environ.get('CHUNK_OVERLAP')),
add_start_index=bool(strtobool(os.environ.get('ADD_START_INDEX')))
)

# Split documents while retaining metadata
split_documents = []
split_ids = []

for doc, doc_id in zip(all_docs, all_ids):
#split the document into smaller chunks
splits = text_splitter.split_documents([doc])
splits = text_splitter.split_documents(doc)
all_chunks.append(len(splits))
# Append each chunk to the split_documents list
for i, split in enumerate(splits):
split.metadata["original_id"] = doc_id
split_documents.append(split)
# Create unique IDs for each split based on the original ID and chunk index
split_ids.append(f"{doc_id}-{i}")

await self.create_files_for_resource(resource_id, file_names, all_ids, all_chunks)

# add the split documents to the vectorstore
vectorstore = await self.initialize_chroma(resource_id)
vectorstore.add_documents(documents=split_documents, ids=split_ids)
Expand Down Expand Up @@ -114,21 +113,29 @@ async def initialize_chroma(self, collection_name: str):

async def retrieve_and_generate(self, collection_name, query, llm) -> str:
resources_m = ResourcesManager()
personas_m = PersonasManager()
resource = await resources_m.retrieve_resource(collection_name)
personality_prompt = resource.description

system_prompt = (os.environ.get('SYSTEM_PROMPT') + "\n\n{context}" + "\n\nYou should answer the question just as the following assistant's personality would do it: " + personality_prompt)

prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
("human", "{input}"),
]
persona_id = resource.persona_id
persona = await personas_m.retrieve_persona(persona_id)
personality_prompt = persona.description
# Combine the system prompt and context
system_prompt = (os.environ.get('SYSTEM_PROMPT') + "\n\n{context}" +
"\n\nHere is some information about the assistant expertise to help you answer your questions: " +
personality_prompt)
# system_prompt = (os.environ.get('SYSTEM_PROMPT') + "\n\n{context}" )
prompt_template = PromptTemplate(
input_variables=["context", "input"],
template=system_prompt + "\n\n{input}"
)
print(f"Prompt: {prompt_template}\n")
vectorstore = await self.initialize_chroma(collection_name)
retriever = vectorstore.as_retriever()
question_answer_chain = create_stuff_documents_chain(llm, prompt)

# Use the LLM chain with the prompt
question_answer_chain = create_stuff_documents_chain(llm, prompt_template)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# Invoke the RAG chain with query as input
response = rag_chain.invoke({"input": query})
return response

Expand All @@ -137,7 +144,7 @@ async def upload_file(self, resource_id: str, files: List[UploadFile]) -> Union[
all_docs = []
for file in files:
# Define the directory where files will be saved
directory = Path(f"./uploads/{resource_id}")
directory = Path(f"./uploads/{resource_id}")
directory.mkdir(parents=True, exist_ok=True)

# Save the file
Expand Down Expand Up @@ -218,7 +225,6 @@ async def retrieve_files(self, resource_id: str, offset: int = 0, limit: int = 1
result = await session.execute(query)
files = [FileSchema.from_orm(file) for file in result.scalars().all()]

# Get total count
total_count = await self._get_total_count(filters)

return files, total_count
Expand Down

0 comments on commit a11cd7f

Please sign in to comment.