Skip to content

Commit

Permalink
Merge pull request #72 from huridocs/update-transformers
Browse files Browse the repository at this point in the history
Update transformers
  • Loading branch information
gabriel-piles authored Oct 17, 2023
2 parents 89fc6a9 + 698608a commit bae1ff6
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 33 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/push_docker_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v3

- name: Free up space
run: make free_up_space

- name: Log in to the Container registry
uses: docker/login-action@v2
with:
Expand Down
4 changes: 2 additions & 2 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-r requirements.txt
mongomock==4.1.2
pytest==7.4.0
black==23.7.0
pytest==7.4.2
black==23.9.1
21 changes: 10 additions & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
fastapi==0.103.1
fastapi==0.103.2
graypy==2.1.0
PyYAML==6.0.1
python-multipart==0.0.6
Expand All @@ -7,27 +7,26 @@ gunicorn==21.2.0
pymongo==4.5.0
slugify==0.0.1
requests==2.31.0
PyRSMQ==0.4.5
redis==5.0.0
scikit-learn==1.3.0
PyRSMQ==0.5.0
redis==5.0.1
fuzzywuzzy==0.18.0
python-Levenshtein==0.21.1
sentry-sdk==1.31.0
python-Levenshtein==0.23.0
sentry-sdk==1.32.0
tdda==2.0.9
datasets==2.14.5
torch==2.0.1
evaluate==0.4.0
pandas==2.1.0
torch==2.1.0
evaluate==0.4.1
pandas==2.1.1
dateparser==1.1.8
langcodes==3.3.0
nltk==3.8.1
transformers==4.33.2
transformers==4.34.0
httpx==0.25.0
sentencepiece==0.1.99
accelerate==0.23.0
mongomock==4.1.2
fasttext-wheel==0.9.2
rich==13.5.3
rich==13.6.0
joblib==1.3.2
sentence_transformers==2.2.2
git+https://github.com/huggingface/setfit.git@fa86337d5e26ef22e48c20f0d2339e1f1d76b992
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def train(self, semantic_extraction_data: list[SemanticExtractionData]):
early_stopping_patience=4,
log_level="error",
generation_max_length=output_length,
use_cpu=not torch.cuda.is_available(),
)

run(model_arguments, data_training_arguments, t5_training_arguments)
Expand Down
39 changes: 19 additions & 20 deletions src/semantic_metadata_extraction/methods/test/test_mt5.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,22 @@

class TestMT5(TestCase):
def test_train(self):
pass
# start = time()
# print("GPU available?")
# print(torch.cuda.is_available())
#
# mt5_true_case_english_spanish = MT5TrueCaseEnglishSpanishMethod("mt5_test", "mt5_test")
#
# semantic_information_data = [
# SemanticExtractionData(text="foo", pdf_tags=[PdfTagData.from_text("1/ foo end")], language_iso="")
# ] * 10
# semantic_information_data += [
# SemanticExtractionData(text="var", pdf_tags=[PdfTagData.from_text("2/ var end")], language_iso="")
# ] * 10
#
# mt5_true_case_english_spanish.train(semantic_information_data)
#
# predictions = mt5_true_case_english_spanish.predict([SemanticPredictionData.from_text("1/ foo end")] * 300)
# self.assertEqual(300, len(predictions))
# self.assertEqual("foo", predictions[0])
# print(f"Predictions in {round(time() - start, 2)} seconds")
start = time()
print("GPU available?")
print(torch.cuda.is_available())

mt5_true_case_english_spanish = MT5TrueCaseEnglishSpanishMethod("mt5_test", "mt5_test")

semantic_information_data = [
SemanticExtractionData(text="foo", pdf_tags=[PdfTagData.from_text("1/ foo end")], language_iso="")
] * 5
semantic_information_data += [
SemanticExtractionData(text="var", pdf_tags=[PdfTagData.from_text("2/ var end")], language_iso="")
] * 5

mt5_true_case_english_spanish.train(semantic_information_data)

predictions = mt5_true_case_english_spanish.predict([SemanticPredictionData.from_text("1/ foo end")] * 10)
self.assertEqual(10, len(predictions))
self.assertEqual("foo", predictions[0])
print(f"Predictions in {round(time() - start, 2)} seconds")

0 comments on commit bae1ff6

Please sign in to comment.