Skip to content

Commit

Permalink
Merge pull request #100 from huridocs/ner-extractor
Browse files Browse the repository at this point in the history
Add gliner date parser and the flair NER extractors
  • Loading branch information
gabriel-piles authored Nov 12, 2024
2 parents 6609801 + be295f3 commit 272dd1d
Show file tree
Hide file tree
Showing 281 changed files with 167 additions and 841,863 deletions.
2 changes: 1 addition & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/venv/
/.venv/
.git
/docker_volume/
/models_data/
/src/tenant_test/
/huggingface/
/data/
5 changes: 4 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ jobs:
url: http://localhost:5056
method: GET
expected-status: 200
timeout: 60000
timeout: 240000
interval: 500

- name: Wait for queues
run: make wait_for_queues

- name: Test with unittest
run: make test
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ core
src/.pytest_cache
outputs
/performance_results/mistakes/
/docker_volume/
/models_data/
/huggingface/
/.ruff_cache/
/data/
Expand Down
10 changes: 4 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM pytorch/pytorch

RUN apt-get update && apt-get -y -q --no-install-recommends install libgomp1 pdftohtml
RUN apt-get -y install git
RUN mkdir -p /app/src /app/docker_volume
RUN mkdir -p /app/src /app/models_data

RUN addgroup --system python && adduser --system --group python
RUN chown -R python:python /app
Expand All @@ -20,11 +20,9 @@ WORKDIR /app
COPY ./src ./src

ENV PYTHONPATH "${PYTHONPATH}:/app/src"
ENV NLTK_DATA=/app/docker_volume/cache/nltk_data
ENV HF_DATASETS_CACHE=/app/docker_volume/cache/HF
ENV HF_HOME=/app/docker_volume/cache/HF_home
ENV TRANSFORMERS_CACHE=/app/docker_volume/cache/Transformers
ENV HF_HOME=/app/docker_volume/cache/Transformers
ENV NLTK_DATA=/app/models_data/cache/nltk_data
ENV HF_DATASETS_CACHE=/app/models_data/cache/HF
ENV HF_HOME=/app/models_data/cache/HF_home
ENV TRANSFORMERS_VERBOSITY=error
ENV TRANSFORMERS_NO_ADVISORY_WARNINGS=1
ENV CUDA_VISIBLE_DEVICES=0
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ check_format:
test:
. .venv/bin/activate; command cd src; command python -m pytest

wait_for_queues:
. .venv/bin/activate; command cd src/scripts; command python wait_for_queues.py

remove_docker_containers:
docker compose ps -q | xargs docker rm

Expand Down Expand Up @@ -46,9 +49,6 @@ stop:
delete_queues:
. .venv/bin/activate; python scripts/delete_queues.py

download_models:
. .venv/bin/activate; command cd src; python download_models.py

gpu:
. .venv/bin/activate; command cd src; python is_gpu_available.py

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ If the service is running, the end point definitions can be founded in the follo

The end points code can be founded inside the file `app.py`.

The errors are reported to the file `docker_volume/service.log`, if the configuration is not changed (
The errors are reported to the file `models_data/service.log`, if the configuration is not changed (
see [Get service logs](#get-service-logs))

## Queue processor
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:
context: .
dockerfile: Dockerfile
volumes:
- data:/app/docker_volume
- data:/app/models_data
depends_on:
- pdf_metadata_extraction_worker
environment:
Expand All @@ -29,7 +29,7 @@ services:
context: .
dockerfile: Dockerfile
volumes:
- data:/app/docker_volume
- data:/app/models_data
shm_size: '11gb'
depends_on:
- mongo_metadata_extraction
Expand Down
4 changes: 2 additions & 2 deletions gpu-docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:
context: .
dockerfile: Dockerfile
volumes:
- data:/app/docker_volume
- data:/app/models_data
depends_on:
- mongo_metadata_extraction
- pdf_metadata_extraction_worker
Expand All @@ -34,7 +34,7 @@ services:
context: .
dockerfile: Dockerfile
volumes:
- data:/app/docker_volume
- data:/app/models_data
shm_size: '11gb'
depends_on:
- mongo_metadata_extraction
Expand Down
4 changes: 2 additions & 2 deletions local-docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:
context: .
dockerfile: Dockerfile
volumes:
- data:/app/docker_volume
- data:/app/models_data
depends_on:
- mongo_metadata_extraction
- pdf_metadata_extraction_worker
Expand All @@ -27,7 +27,7 @@ services:
context: .
dockerfile: Dockerfile
volumes:
- data:/app/docker_volume
- data:/app/models_data
shm_size: '11gb'
depends_on:
- mongo_metadata_extraction
Expand Down
4 changes: 2 additions & 2 deletions local-gpu-docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:
context: .
dockerfile: Dockerfile
volumes:
- data:/app/docker_volume
- data:/app/models_data
depends_on:
- mongo_metadata_extraction
- pdf_metadata_extraction_worker
Expand All @@ -34,7 +34,7 @@ services:
context: .
dockerfile: Dockerfile
volumes:
- data:/app/docker_volume
- data:/app/models_data
shm_size: '11gb'
depends_on:
- mongo_metadata_extraction
Expand Down
34 changes: 0 additions & 34 deletions performance_results/all_results_10____2022_10_16_11_59.md

This file was deleted.

34 changes: 0 additions & 34 deletions performance_results/all_results_20____2022_10_15_23_00.md

This file was deleted.

62 changes: 0 additions & 62 deletions performance_results/all_results_30___2022_10_10_19_10.md

This file was deleted.

34 changes: 0 additions & 34 deletions performance_results/all_results_5____2022_10_15_21_53.md

This file was deleted.

Loading

0 comments on commit 272dd1d

Please sign in to comment.