Skip to content

Commit 94b2beb

Browse files
authored
Merge pull request #88 from BillFarber/task/extendExamples
Some refactoring first
2 parents edfbca2 + 486c60f commit 94b2beb

File tree

3 files changed

+50
-19
lines changed

3 files changed

+50
-19
lines changed

examples/langchain/README.md

+16-8
Original file line numberDiff line numberDiff line change
@@ -43,28 +43,36 @@ into two different collections in the `langchain-test-content` database:
4343

4444
python load_data.py
4545

46-
Create a ".env" file to hold your OpenAI API key:
47-
48-
echo "OPENAI_API_KEY=<your key here>" > .env
46+
Create a ".env" file to hold your AzureOpenAI environment values. It should look
47+
something like this.
48+
```
49+
OPENAI_API_VERSION=2023-12-01-preview
50+
AZURE_OPENAI_ENDPOINT=<Your Azure OpenAI Endpoint>
51+
AZURE_OPENAI_API_KEY=<Your Azure OpenAI API Key>
52+
AZURE_LLM_DEPLOYMENT_NAME=gpt-test1-gpt-35-turbo
53+
AZURE_LLM_DEPLOYMENT_MODEL=gpt-35-turbo
54+
```
4955

5056
# Testing the retriever
5157

58+
## Testing using a retriever with a basic query
59+
5260
You are now ready to test the example retriever. Run the following to ask a question with the
5361
results augmented via the `marklogic_retriever.py` module in this project; you will be
54-
prompted for an OpenAI API key when you run this, which you can type or paste in:
62+
prompted for an AzureOpenAI API key when you run this, which you can type or paste in:
5563

56-
python ask.py "What is task decomposition?" posts
64+
python ask_similar_query.py "What is task decomposition?" posts
5765

5866
The retriever uses a [cts.similarQuery](https://docs.marklogic.com/cts.similarQuery) to select from the documents
5967
loaded via `load_data.py`. It defaults to a page length of 10. You can change this by providing a command line
6068
argument - e.g.:
6169

62-
python ask.py "What is task decomposition?" posts 15
70+
python ask_similar_query.py "What is task decomposition?" posts 15
6371

6472
Example of a question for the "sotu" (State of the Union speech) collection:
6573

66-
python ask.py "What are economic sanctions?" sotu 20
74+
python ask_similar_query.py "What are economic sanctions?" sotu 20
6775

6876
To use a word query instead of a similar query, along with a set of drop words, specify "word" as the 4th argument:
6977

70-
python ask.py "What are economic sanctions?" sotu 20 word
78+
python ask_similar_query.py "What are economic sanctions?" sotu 20 word
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
# Based on example at
2-
# https://python.langchain.com/docs/use_cases/question_answering/quickstart .
2+
# https://python.langchain.com/docs/use_cases/question_answering/quickstart .
33

4+
import os
45
import sys
56
from dotenv import load_dotenv
67
from langchain import hub
7-
from langchain_openai import ChatOpenAI
8+
from langchain_openai import AzureChatOpenAI
89
from langchain.schema import StrOutputParser
910
from langchain.schema.runnable import RunnablePassthrough
1011
from marklogic import Client
11-
from marklogic_retriever import MarkLogicRetriever
12+
from marklogic_similar_query_retriever import MarkLogicSimilarQueryRetriever
1213

1314

1415
def format_docs(docs):
@@ -17,7 +18,7 @@ def format_docs(docs):
1718

1819
question = sys.argv[1]
1920

20-
retriever = MarkLogicRetriever.create(
21+
retriever = MarkLogicSimilarQueryRetriever.create(
2122
Client("http://localhost:8003", digest=("langchain-user", "password"))
2223
)
2324
retriever.collections = [sys.argv[2]]
@@ -28,10 +29,20 @@ def format_docs(docs):
2829
load_dotenv()
2930

3031
prompt = hub.pull("rlm/rag-prompt")
31-
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
32+
# Note that the Azure OpenAI API key, the Azure OpenAI Endpoint, and the OpenAI API
33+
# Version, are all read from the environment automatically.
34+
llm = AzureChatOpenAI(
35+
model_name=os.getenv("AZURE_LLM_DEPLOYMENT_NAME"),
36+
azure_deployment=os.getenv("AZURE_LLM_DEPLOYMENT_NAME"),
37+
temperature=0,
38+
max_tokens=None,
39+
timeout=None,
40+
)
3241

3342
rag_chain = (
3443
{"context": retriever | format_docs, "question": RunnablePassthrough()}
35-
| prompt | llm | StrOutputParser()
44+
| prompt
45+
| llm
46+
| StrOutputParser()
3647
)
3748
print(rag_chain.invoke(question))

examples/langchain/marklogic_retriever.py renamed to examples/langchain/marklogic_similar_query_retriever.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,32 @@
1010
"""
1111

1212

13-
class MarkLogicRetriever(BaseRetriever):
13+
class MarkLogicSimilarQueryRetriever(BaseRetriever):
1414

1515
client: Client
1616
max_results: int = 10
1717
collections: List[str] = []
1818
query_type: str = "similar"
19-
drop_words = ["did", "the", "about", "a", "an", "is", "are", "what",
20-
"say", "do", "was", "that"]
19+
drop_words = [
20+
"did",
21+
"the",
22+
"about",
23+
"a",
24+
"an",
25+
"is",
26+
"are",
27+
"what",
28+
"say",
29+
"do",
30+
"was",
31+
"that",
32+
]
2133

2234
@classmethod
2335
def create(cls, client: Client):
2436
return cls(client=client)
2537

26-
def _get_relevant_documents(self, query: str) -> List[Document]:
38+
def _get_relevant_documents(self, query: str) -> List[Document]:
2739
words = []
2840
for word in query.split():
2941
if word.lower() not in self.drop_words:
@@ -43,7 +55,7 @@ def _get_relevant_documents(self, query: str) -> List[Document]:
4355
results = self.client.documents.search(
4456
query=cts_query,
4557
page_length=self.max_results,
46-
collections=self.collections
58+
collections=self.collections,
4759
)
4860
print(f"Count of matching MarkLogic documents: {len(results)}")
4961
return map(lambda doc: Document(page_content=doc.content), results)

0 commit comments

Comments
 (0)