Skip to content

Commit

Permalink
Merge pull request #1346 from Sinaptik-AI/fix/spell_check
Browse files Browse the repository at this point in the history
fix(spellCheck): allow keyword for this case
  • Loading branch information
ArslanSaleem committed Aug 31, 2024
2 parents 771bb72 + d67be68 commit 2f4a80f
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 19 deletions.
3 changes: 2 additions & 1 deletion ignore-words.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# ignore-words.txt
selectin
selectin
NotIn
6 changes: 2 additions & 4 deletions pandasai/ee/vectorstores/lanceDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,8 @@ def add_question_answer(
else:
data = {"id": ids, "qa": qa_str, "metadata": metadatas}

print("data: ", data)
df = pd.DataFrame(data)
print("df: ", df)
self._qa_table.add(df)
print("Len of table: ", self._qa_table.head())

return ids

Expand Down Expand Up @@ -244,7 +241,6 @@ def update_question_answer(
"qa": str(qa_str[i]),
"metadata": metadatas[i],
}
print("updated values: ", updated_values, ids[i])
self._qa_table.update(values=updated_values, where=f"id = '{ids[i]}'")

return ids
Expand Down Expand Up @@ -409,6 +405,8 @@ def _filter_docs_based_on_distance(
Returns:
_type_: _description_
"""
if not documents:
return documents
relevant_column = list(
documents[0].keys() - {"id", "vector", "metadata", "_distance"}
)
Expand Down
54 changes: 40 additions & 14 deletions tests/unit_tests/vectorstores/test_lancedb.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os
import shutil
import unittest
from unittest.mock import MagicMock

Expand All @@ -12,11 +14,11 @@ def setUp(self):
self.vector_store._format_qa = MagicMock(
side_effect=lambda q, c: f"Q: {q}\nA: {c}"
)
# self.vector_store._embedding_function = MagicMock(
# return_value=[[1.0, 2.0, 3.0]] * 2
# )
self.vector_store._qa_table
self.vector_store._docs_table

def tearDown(self) -> None:
path = "/tmp/lancedb"
if os.path.exists(path):
shutil.rmtree(path)

def test_constructor_default_parameters(self):
self.assertEqual(self.vector_store._max_samples, 1)
Expand Down Expand Up @@ -46,9 +48,9 @@ def test_add_question_answer_with_ids(self):
inserted_ids = self.vector_store.add_question_answer(
["What is LanceDB?", "How does it work?"],
["print('Hello')", "for i in range(10): print(i)"],
["test_id_1", "test_id_2"],
["test_id_11", "test_id_12"],
)
assert inserted_ids == ["test_id_1", "test_id_2"]
assert inserted_ids == ["test_id_11", "test_id_12"]

def test_add_question_answer_different_dimensions(self):
with self.assertRaises(ValueError):
Expand Down Expand Up @@ -92,44 +94,68 @@ def test_delete_docs(self):
self.assertEqual(deleted_docs, True)

def test_get_relevant_question_answers(self):
self.vector_store.add_question_answer(
["What is LanceDB?", "How does it work?"],
["print('Hello')", "for i in range(10): print(i)"],
["test_id_11", "test_id_12"],
)
result = self.vector_store.get_relevant_question_answers(
"What is LanceDB?", k=2
)
print("result: ", result)

self.assertEqual(
result,
{
"documents": [
[
"Q: What is LanceDB?\nA: print('Hello')",
"Q: What is LanceDB?\nA: print('Hello')",
"Q: How does it work?\nA: for i in range(10): print(i)",
]
],
"metadatas": [["None", "None"]],
},
)

def test_get_relevant_question_answers_by_ids(self):
result = self.vector_store.get_relevant_question_answers_by_id(["test_id_1"])
self.vector_store.add_question_answer(
["What is LanceDB?", "How does it work?"],
["print('Hello')", "for i in range(10): print(i)"],
["test_id_11", "test_id_12"],
)
result = self.vector_store.get_relevant_question_answers_by_id(["test_id_11"])
print(result)
self.assertEqual(
result,
[[{"metadata": "None", "qa": "Q: What is LanceDB?\nA: print('Hello')"}]],
[
[
{
"metadata": "None",
"qa": "Q: What is LanceDB?\nA: print('Hello')",
}
]
],
)

def test_get_relevant_docs(self):
self.vector_store.add_docs(
["Document 1", "Document 2", "Document 3"],
["test_id_1", "test_id_2", "test_id_3"],
)
result = self.vector_store.get_relevant_docs("What is LanceDB?", k=3)
print("result:", result)
self.assertEqual(
result,
{
"documents": [["Document 1", "Document 1", "Document 2"]],
"documents": [["Document 1", "Document 2", "Document 3"]],
"metadatas": [["None", "None", "None"]],
},
)

def test_get_relevant_docs_by_ids(self):
self.vector_store.add_docs(
["Document 1", "Document 2", "Document 3"],
["test_id_1", "test_id_2", "test_id_3"],
)
result = self.vector_store.get_relevant_docs_by_id(["test_id_1"])
print("Result docs ids: ", result)
self.assertEqual(result, [[{"doc": "Document 1", "metadata": "None"}]])


Expand Down

0 comments on commit 2f4a80f

Please sign in to comment.