Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(spellCheck): allow keyword for this case #1346

Merged
merged 2 commits into from
Aug 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ignore-words.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# ignore-words.txt
selectin
selectin
NotIn
6 changes: 2 additions & 4 deletions pandasai/ee/vectorstores/lanceDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,8 @@
else:
data = {"id": ids, "qa": qa_str, "metadata": metadatas}

print("data: ", data)
df = pd.DataFrame(data)
print("df: ", df)
self._qa_table.add(df)
print("Len of table: ", self._qa_table.head())

return ids

Expand Down Expand Up @@ -244,7 +241,6 @@
"qa": str(qa_str[i]),
"metadata": metadatas[i],
}
print("updated values: ", updated_values, ids[i])
self._qa_table.update(values=updated_values, where=f"id = '{ids[i]}'")

return ids
Expand Down Expand Up @@ -409,6 +405,8 @@
Returns:
_type_: _description_
"""
if not documents:
return documents

Check warning on line 409 in pandasai/ee/vectorstores/lanceDB.py

View check run for this annotation

Codecov / codecov/patch

pandasai/ee/vectorstores/lanceDB.py#L409

Added line #L409 was not covered by tests
relevant_column = list(
documents[0].keys() - {"id", "vector", "metadata", "_distance"}
)
Expand Down
54 changes: 40 additions & 14 deletions tests/unit_tests/vectorstores/test_lancedb.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os
import shutil
import unittest
from unittest.mock import MagicMock

Expand All @@ -12,11 +14,11 @@ def setUp(self):
self.vector_store._format_qa = MagicMock(
side_effect=lambda q, c: f"Q: {q}\nA: {c}"
)
# self.vector_store._embedding_function = MagicMock(
# return_value=[[1.0, 2.0, 3.0]] * 2
# )
self.vector_store._qa_table
self.vector_store._docs_table

def tearDown(self) -> None:
path = "/tmp/lancedb"
if os.path.exists(path):
shutil.rmtree(path)

def test_constructor_default_parameters(self):
self.assertEqual(self.vector_store._max_samples, 1)
Expand Down Expand Up @@ -46,9 +48,9 @@ def test_add_question_answer_with_ids(self):
inserted_ids = self.vector_store.add_question_answer(
["What is LanceDB?", "How does it work?"],
["print('Hello')", "for i in range(10): print(i)"],
["test_id_1", "test_id_2"],
["test_id_11", "test_id_12"],
)
assert inserted_ids == ["test_id_1", "test_id_2"]
assert inserted_ids == ["test_id_11", "test_id_12"]

def test_add_question_answer_different_dimensions(self):
with self.assertRaises(ValueError):
Expand Down Expand Up @@ -92,44 +94,68 @@ def test_delete_docs(self):
self.assertEqual(deleted_docs, True)

def test_get_relevant_question_answers(self):
self.vector_store.add_question_answer(
["What is LanceDB?", "How does it work?"],
["print('Hello')", "for i in range(10): print(i)"],
["test_id_11", "test_id_12"],
)
result = self.vector_store.get_relevant_question_answers(
"What is LanceDB?", k=2
)
print("result: ", result)

self.assertEqual(
result,
{
"documents": [
[
"Q: What is LanceDB?\nA: print('Hello')",
"Q: What is LanceDB?\nA: print('Hello')",
"Q: How does it work?\nA: for i in range(10): print(i)",
]
],
"metadatas": [["None", "None"]],
},
)

def test_get_relevant_question_answers_by_ids(self):
result = self.vector_store.get_relevant_question_answers_by_id(["test_id_1"])
self.vector_store.add_question_answer(
["What is LanceDB?", "How does it work?"],
["print('Hello')", "for i in range(10): print(i)"],
["test_id_11", "test_id_12"],
)
result = self.vector_store.get_relevant_question_answers_by_id(["test_id_11"])
print(result)
self.assertEqual(
result,
[[{"metadata": "None", "qa": "Q: What is LanceDB?\nA: print('Hello')"}]],
[
[
{
"metadata": "None",
"qa": "Q: What is LanceDB?\nA: print('Hello')",
}
]
],
)

def test_get_relevant_docs(self):
self.vector_store.add_docs(
["Document 1", "Document 2", "Document 3"],
["test_id_1", "test_id_2", "test_id_3"],
)
result = self.vector_store.get_relevant_docs("What is LanceDB?", k=3)
print("result:", result)
self.assertEqual(
result,
{
"documents": [["Document 1", "Document 1", "Document 2"]],
"documents": [["Document 1", "Document 2", "Document 3"]],
"metadatas": [["None", "None", "None"]],
},
)

def test_get_relevant_docs_by_ids(self):
self.vector_store.add_docs(
["Document 1", "Document 2", "Document 3"],
["test_id_1", "test_id_2", "test_id_3"],
)
result = self.vector_store.get_relevant_docs_by_id(["test_id_1"])
print("Result docs ids: ", result)
self.assertEqual(result, [[{"doc": "Document 1", "metadata": "None"}]])


Expand Down
Loading