diff --git a/.gitignore b/.gitignore
index 2ec71d4eaf..8f62c31378 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,11 +2,12 @@
.venv
.vscode/
+.idea/
# dependencies
/node_modules
vendor/
src/public/
-
+/media/
celery-worker.state
# testing
diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 47b822bbff..0000000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
- "python.linting.mypyEnabled": true,
- "python.linting.enabled": true,
- "python.formatting.provider": "true"
-}
diff --git a/Dockerfile b/Dockerfile
index ab04550773..8f3a30d688 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,49 +1,50 @@
-FROM python:3.8-slim-buster as base
+FROM python:3.8-bullseye AS base
LABEL maintainer="Deep Dev dev@thedeep.io"
-
ENV PYTHONUNBUFFERED 1
WORKDIR /code
+# Copy dependency files
COPY pyproject.toml poetry.lock /code/
+# Install required system dependencies
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
- # Basic Packages
iproute2 git vim \
- # Build required packages
gcc libc-dev libproj-dev \
- # NOTE: procps: For pkill command
- procps \
- # Deep Required Packages
wait-for-it binutils gdal-bin \
- # Upgrade pip and install python packages for code
+ libcairo2 \
+ libpango1.0-dev \
+ libpangocairo-1.0-0 \
+ fonts-dejavu-core \
+ fonts-liberation \
&& pip install --upgrade --no-cache-dir pip poetry \
- && poetry --version \
- # Configure to use system instead of virtualenvs
&& poetry config virtualenvs.create false \
&& poetry install --no-root \
- # Clean-up
- && pip uninstall -y poetry virtualenv-clone virtualenv \
&& apt-get remove -y gcc libc-dev libproj-dev \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
+# Verify installation
+RUN pip install weasyprint==53.0
# -------------------------- WEB ---------------------------------------
FROM base AS web
+# Copy all project files
COPY . /code/
# -------------------------- WORKER ---------------------------------------
FROM base AS worker
+# Additional worker-specific tools
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
libreoffice \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
+# Copy all project files
COPY . /code/
diff --git a/apps/deepl_integration/handlers.py b/apps/deepl_integration/handlers.py
index 43771d6a5b..0360f861c6 100644
--- a/apps/deepl_integration/handlers.py
+++ b/apps/deepl_integration/handlers.py
@@ -581,6 +581,7 @@ def send_trigger_request_to_extractor(
'request_type': NlpRequestType.USER if high_priority else NlpRequestType.SYSTEM,
}
response_content = None
+ print('the extraction endpoint is ', DeeplServiceEndpoint.DOCS_EXTRACTOR_ENDPOINT)
try:
response = requests.post(
DeeplServiceEndpoint.DOCS_EXTRACTOR_ENDPOINT,
@@ -610,6 +611,7 @@ def trigger_lead_extract(cls, lead, task_instance=None):
return True
# Get the lead to be extracted
url_to_extract = None
+ print('is lead url', lead.url, "is lead attachment", lead.attachment)
if lead.attachment:
url_to_extract = generate_file_url_for_legacy_deepl_server(lead.attachment)
elif lead.url:
diff --git a/apps/organization/views.py b/apps/organization/views.py
index 6b0f34919a..676bdcc94a 100644
--- a/apps/organization/views.py
+++ b/apps/organization/views.py
@@ -44,3 +44,5 @@ def get_queryset(self):
if self.kwargs.get('pk'):
return Organization.objects.prefetch_related('parent')
return Organization.objects.filter(parent=None)
+
+
diff --git a/apps/static/image/graphQL-logo.svg b/apps/static/image/graphQL-logo.svg
index 8e353ddbaa..58b2cc2877 100644
--- a/apps/static/image/graphQL-logo.svg
+++ b/apps/static/image/graphQL-logo.svg
@@ -1,71 +1,71 @@
-
-
-
-
+
+
+
+
diff --git a/apps/templates/connector/pdf.html b/apps/templates/connector/pdf.html
new file mode 100644
index 0000000000..31f0711af0
--- /dev/null
+++ b/apps/templates/connector/pdf.html
@@ -0,0 +1,79 @@
+
+
+
+
+ Data PDF
+
+
+
+ Data Report
+
+ {% load filter %}
+ {% for row in rows %}
+
+
Entry {{ forloop.counter }}
+
+ {% for key, value in columns|zipl:row %}
+ -
+
{{ key }}
+ {{ value }}
+
+ {% endfor %}
+
+
+ {% endfor %}
+
+
+
diff --git a/apps/unified_connector/migrations/0011_alter_connectorsource_source.py b/apps/unified_connector/migrations/0011_alter_connectorsource_source.py
new file mode 100644
index 0000000000..309dc7f6be
--- /dev/null
+++ b/apps/unified_connector/migrations/0011_alter_connectorsource_source.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.2.25 on 2024-10-16 12:53
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('unified_connector', '0010_auto_20240625_0806'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='connectorsource',
+ name='source',
+ field=models.CharField(choices=[('atom-feed', 'Atom Feed'), ('relief-web', 'Relifweb'), ('rss-feed', 'RSS Feed'), ('unhcr-portal', 'UNHCR Portal'), ('humanitarian-resp', 'Humanitarian Response'), ('pdna', 'Post Disaster Needs Assessments'), ('emm', 'European Media Monitor'), ('kobo', 'KoboToolbox')], max_length=20),
+ ),
+ ]
diff --git a/apps/unified_connector/models.py b/apps/unified_connector/models.py
index 0f0485d6c1..953065b23c 100644
--- a/apps/unified_connector/models.py
+++ b/apps/unified_connector/models.py
@@ -15,6 +15,7 @@
humanitarian_response,
pdna,
emm,
+ kobo,
)
@@ -120,6 +121,7 @@ class Source(models.TextChoices):
HUMANITARIAN_RESP = 'humanitarian-resp', 'Humanitarian Response'
PDNA = 'pdna', 'Post Disaster Needs Assessments'
EMM = 'emm', 'European Media Monitor'
+ KOBO = 'kobo', 'KoboToolbox'
class Status(models.IntegerChoices):
PENDING = 0, 'Pending'
@@ -135,6 +137,7 @@ class Status(models.IntegerChoices):
Source.HUMANITARIAN_RESP: humanitarian_response.HumanitarianResponse,
Source.PDNA: pdna.PDNA,
Source.EMM: emm.EMM,
+ Source.KOBO: kobo.Kobo,
}
title = models.CharField(max_length=255)
@@ -228,3 +231,4 @@ def update_aleady_added_using_lead(cls, lead, added=True):
connector_lead=lead.connector_lead,
source__unified_connector__project=lead.project,
).update(already_added=added)
+
diff --git a/apps/unified_connector/mutation.py b/apps/unified_connector/mutation.py
index 54c8bc070e..965e5b0061 100644
--- a/apps/unified_connector/mutation.py
+++ b/apps/unified_connector/mutation.py
@@ -1,4 +1,5 @@
import graphene
+import requests
from utils.graphene.mutation import (
generate_input_type_for_serializer,
@@ -36,6 +37,51 @@
serializer_class=ConnectorSourceLeadGqSerializer,
)
+class KoboValPsGrapheneMutation(PsGrapheneMutation):
+ @classmethod
+ def perform_mutate(cls, root, info, **kwargs):
+ from graphql import GraphQLError
+ data = kwargs['data']
+ print('data is ', data)
+ if not cls.validate_kobo(data):
+ raise GraphQLError("Invalid Kobo data: 'project_id' and 'token' combination did not retrieve any valid data")
+ instance, errors = cls._save_item(data, info, **kwargs)
+ return cls(result=instance, errors=errors, ok=not errors)
+
+ @classmethod
+ def validate_kobo(cls, data):
+ #TODO validate all sources
+ sources = data.get('sources', [])
+ source = sources[0] if sources else {}
+ if source and source.get('title') != 'KoboToolbox':
+ return True
+
+ params = source.get("params", {})
+ project_id = params.get('project_id')
+ token = params.get('token')
+
+ if not project_id or not token:
+ return False
+
+ # Validate Kobo API fetch
+ return cls.valid_kobo_fetch(project_id, token)
+
+ @classmethod
+ def valid_kobo_fetch(cls, project_id, token):
+ URL = 'https://kf.kobotoolbox.org/api/v2/assets/'
+ api_url = f"{URL}{project_id}/data/?format=json"
+ headers = {"Authorization": f"Token {token}"}
+
+ try:
+ response = requests.get(api_url, headers=headers, stream=True)
+ if response.status_code == 200:
+ return True
+ else:
+ # logger.error("Failed to fetch data from API, Status code: %d", response.status_code)
+ return False
+ except requests.RequestException as e:
+ # logger.critical("A critical error occurred while fetching data: %s", e)
+ return False
class UnifiedConnectorMixin():
@classmethod
@@ -43,7 +89,7 @@ def filter_queryset(cls, qs, info):
return qs.filter(project=info.context.active_project)
-class CreateUnifiedConnector(UnifiedConnectorMixin, PsGrapheneMutation):
+class CreateUnifiedConnector(UnifiedConnectorMixin, KoboValPsGrapheneMutation):
class Arguments:
data = UnifiedConnectorWithSourceInputType(required=True)
model = UnifiedConnector
@@ -52,7 +98,8 @@ class Arguments:
permissions = [PP.Permission.CREATE_UNIFIED_CONNECTOR]
-class UpdateUnifiedConnector(UnifiedConnectorMixin, PsGrapheneMutation):
+
+class UpdateUnifiedConnector(UnifiedConnectorMixin, KoboValPsGrapheneMutation):
class Arguments:
id = graphene.ID(required=True)
data = UnifiedConnectorInputType(required=True)
@@ -62,7 +109,9 @@ class Arguments:
permissions = [PP.Permission.UPDATE_UNIFIED_CONNECTOR]
-class UpdateUnifiedConnectorWithSource(UnifiedConnectorMixin, PsGrapheneMutation):
+
+
+class UpdateUnifiedConnectorWithSource(UnifiedConnectorMixin, KoboValPsGrapheneMutation):
class Arguments:
id = graphene.ID(required=True)
data = UnifiedConnectorWithSourceInputType(required=True)
diff --git a/apps/unified_connector/schema.py b/apps/unified_connector/schema.py
index 3d281b7719..01132986c7 100644
--- a/apps/unified_connector/schema.py
+++ b/apps/unified_connector/schema.py
@@ -32,6 +32,7 @@
def get_unified_connector_qs(info):
+
qs = UnifiedConnector.objects.filter(project=info.context.active_project)
if PP.check_permission(info, PP.Permission.VIEW_UNIFIED_CONNECTOR):
return qs
@@ -71,12 +72,21 @@ class Meta:
'authors',
)
+ @staticmethod
+ def resolve_url(root, info, **_):
+ print('the root title is', root.url.split('amazonaws.com/')[-1])
+ pdf_url = root.url.split('amazonaws.com/')[-1]
+ csv_url = pdf_url.replace('pdf', 'csv')
+ return {"pdf": get_presigned_url(pdf_url), "csv":get_presigned_url(csv_url)} if root.source.title=="KoboToolbox" else root.url
+
@staticmethod
def resolve_source(root, info, **_):
+
return root.source_id and info.context.dl.unified_connector.connector_lead_source.load(root.source_id)
@staticmethod
def resolve_authors(root, info, **_):
+
return info.context.dl.unified_connector.connector_lead_authors.load(root.pk)
@@ -92,6 +102,7 @@ class Meta:
'already_added',
)
+
@staticmethod
def get_custom_queryset(queryset, info, **_):
return get_connector_source_lead_qs(info)
@@ -107,6 +118,7 @@ class Meta:
filterset_class = ConnectorSourceLeadGQFilterSet
+
class ConnectorSourceStatsType(graphene.ObjectType):
date = graphene.Date(required=True)
count = graphene.Int(required=True)
@@ -143,14 +155,17 @@ class Meta:
@staticmethod
def get_custom_queryset(queryset, info, **_):
+
return get_connector_source_qs(info)
@staticmethod
def resolve_stats(root, info, **_):
+
return (root.stats or {}).get('published_dates') or []
@staticmethod
def resolve_leads_count(root, info, **_):
+
return info.context.dl.unified_connector.connector_source_leads_count.load(root.pk)
@@ -240,7 +255,8 @@ def resolve_connector_sources(root, info, **kwargs) -> QuerySet:
@staticmethod
def resolve_connector_source_leads(root, info, **kwargs) -> QuerySet:
- return get_connector_source_lead_qs(info)
+ qs = get_connector_source_lead_qs(info)
+ return qs
class RssFieldType(graphene.ObjectType):
@@ -264,3 +280,20 @@ def resolve_rss_fields(root, info, url):
@staticmethod
def resolve_atom_feed_fields(root, info, url):
return AtomFeed().query_fields({"feed-url": url})
+
+def get_presigned_url(object_key, expiration=3600):
+ import boto3
+ from botocore.exceptions import NoCredentialsError, PartialCredentialsError
+ from deep import settings
+ s3_client = boto3.client('s3', region_name=settings.AWS_S3_REGION_NAME,
+ aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
+ aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, )
+ try:
+ return s3_client.generate_presigned_url(
+ 'get_object',
+ Params={'Bucket': settings.AWS_STORAGE_BUCKET_NAME, 'Key': object_key},
+ ExpiresIn=expiration
+ )
+ except (NoCredentialsError, PartialCredentialsError) as e:
+ print(f"Error generating presigned URL: {e}")
+ return None
diff --git a/apps/unified_connector/serializers.py b/apps/unified_connector/serializers.py
index 20b99a6c8c..e79e7aa83f 100644
--- a/apps/unified_connector/serializers.py
+++ b/apps/unified_connector/serializers.py
@@ -35,6 +35,8 @@ class Meta:
)
+
+
class UnifiedConnectorGqSerializer(ProjectPropertySerializerMixin, TempClientIdMixin, UserResourceSerializer):
class Meta:
model = UnifiedConnector
@@ -56,6 +58,8 @@ def create(self, data):
return instance
+
+
class UnifiedConnectorWithSourceGqSerializer(UnifiedConnectorGqSerializer):
sources = ConnectorSourceGqSerializer(required=False, many=True)
@@ -71,11 +75,10 @@ class Meta:
def _get_prefetch_related_instances_qs(self, qs):
if self.instance:
return qs.filter(unified_connector=self.instance)
- return qs.none() # On create throw error if existing id is provided
+ return qs.none()
def validate_sources(self, sources):
source_found = set()
- # Only allow unique source per unified connectors
for source in sources:
source_type = source['source']
if source_type in source_found:
@@ -84,6 +87,7 @@ def validate_sources(self, sources):
return sources
+
class ConnectorSourceLeadGqSerializer(serializers.ModelSerializer):
class Meta:
model = ConnectorSourceLead
diff --git a/apps/unified_connector/sources/emm.py b/apps/unified_connector/sources/emm.py
index 7c3ba952ed..369c904344 100644
--- a/apps/unified_connector/sources/emm.py
+++ b/apps/unified_connector/sources/emm.py
@@ -22,7 +22,6 @@ class EMM(RssFeed):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
- # Sets up conf
self.has_emm_entities = False
self.has_emm_triggers = False
self.initialize()
@@ -92,6 +91,7 @@ def query_fields(self, params):
return real_fields
def get_content(self, url, params):
+
resp = requests.get(url)
return resp.content
diff --git a/apps/unified_connector/sources/kobo.py b/apps/unified_connector/sources/kobo.py
new file mode 100644
index 0000000000..963241380a
--- /dev/null
+++ b/apps/unified_connector/sources/kobo.py
@@ -0,0 +1,367 @@
+import logging
+
+import uuid, csv, io
+import time
+
+from botocore.exceptions import BotoCoreError, ClientError
+from django.conf import settings
+from rest_framework.exceptions import ValidationError
+import requests
+import datetime
+from connector.utils import ConnectorWrapper
+from lead.models import Lead
+from unified_connector.sources.base import Source
+
+from deep import settings
+from io import BytesIO
+from django.template.loader import render_to_string
+from weasyprint import HTML
+
+import hashlib
+from django.core.files.base import ContentFile
+import os
+from boto3.session import Session
+from botocore.exceptions import NoCredentialsError
+
+
+logger = logging.getLogger(__name__)
+
+
+@ConnectorWrapper
+class Kobo(Source):
+
+ URL = 'https://kf.kobotoolbox.org/api/v2/assets/'
+ title = 'KoboToolbox Reports'
+ key = 'kobo-toolbox'
+
+ options = [
+ {
+ 'key': 'project_id',
+ 'field_type': 'text',
+ 'title': 'Project ID',
+ },
+ {
+ 'key': 'token',
+ 'field_type': 'text',
+ 'title': 'Kobo API Token',
+ }
+ ]
+
+ def get_content(self, project_id, token):
+ api_url = f"{self.URL}{project_id}/data/?format=json"
+ headers = {"Authorization": f"Token {token}"}
+
+ try:
+ with requests.get(api_url, headers=headers, stream=True) as response:
+ if response.status_code == 200:
+ return response.json().get('results', [])
+ else:
+ logger.error("Failed to fetch data from API, Status code: %d", response.status_code)
+ except requests.RequestException as e:
+ logger.critical("A critical error occurred while fetching data: %s", e)
+ return []
+
+ def fetch(self, params):
+ logger.info(f'fetching for kobo commenced with params {params}')
+ result = []
+ project_id = params.get('project_id')
+ if not project_id:
+ return [], 0
+
+ token = params.get('token')
+ if not token:
+ return [], 0
+
+
+ try:
+ records = self.get_content(project_id, token)
+ if records:
+
+ qualitative_columns, rows = accumulate_columns_and_rows(records)
+ context = {
+ 'columns': qualitative_columns,
+ 'rows': rows,
+ }
+
+ html_string = render_to_string('connector/pdf.html', context)
+
+ html = HTML(string=html_string)
+ pdf_file = html.write_pdf()
+
+ pdf_stream = BytesIO(pdf_file)
+
+ file_path = save_file_remote(project_id, context, pdf_file=pdf_stream)
+ print(f'the media url is {settings.MEDIA_URL} and the media files location is {settings.MEDIAFILES_LOCATION}')
+ file_url = os.path.join(settings.MEDIA_URL, file_path)
+
+ date = datetime.now()
+ result = [{
+ 'title': project_id,
+ 'url': file_url,
+ 'source': 'KoboToolbox',
+ 'author': 'KoboToolbox',
+ 'published_on': date.date(),
+ 'source_type': Lead.SourceType.WEBSITE}
+ ]
+
+ logger.info(f'the resulted data of kobo is: {result}')
+ return result, len(result)
+ except Exception as e:
+ logger.error("An error occurred: %s", e)
+ return [], 0
+
+
+
+def calculate_md5(file_content):
+ """Calculate the MD5 checksum of a file-like object."""
+ hash_md5 = hashlib.md5()
+ for chunk in iter(lambda: file_content.read(4096), b""):
+ hash_md5.update(chunk)
+ file_content.seek(0) # Reset file pointer
+ return hash_md5.hexdigest()
+
+def verify_checksum_s3(bucket_name, object_key, local_checksum, s3_client):
+ """Verify the checksum of a file in S3."""
+ try:
+ response = s3_client.head_object(Bucket=bucket_name, Key=object_key)
+ s3_etag = response['ETag'].strip('"') # Remove quotes from ETag
+ return s3_etag == local_checksum
+ except NoCredentialsError:
+ raise Exception("AWS credentials not found.")
+ except Exception as e:
+ raise Exception(f"Error verifying checksum: {e}")
+
+
+def upload_to_s3_with_retry(bucket_name, object_key, file_content, local_checksum, max_retries=10,
+ retry_delay=1):
+ """
+ Upload a file to S3 with retry mechanism as a normal function.
+
+ Args:
+ bucket_name (str): S3 bucket name.
+ object_key (str): S3 object key.
+ encoded_pdf_content (str): Base64-encoded file content.
+ local_checksum (str): MD5 checksum of the file.
+ max_retries (int): Maximum number of retries.
+ retry_delay (int): Delay (in seconds) between retries.
+
+ Raises:
+ Exception: If all retries fail.
+ """
+ session = Session(
+ aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
+ aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
+ region_name=settings.AWS_S3_REGION_NAME,
+ )
+ s3_client = session.client('s3')
+
+ for attempt in range(max_retries):
+ try:
+ s3_client.put_object(Bucket=bucket_name, Key=object_key, Body=file_content)
+ logger.info(f"File {object_key} uploaded successfully to bucket {bucket_name}. and is going to be verified")
+
+ # Verify checksum
+ if not verify_checksum_s3(bucket_name, object_key, local_checksum, s3_client):
+ message = 'Checksum validation error'
+ logger.warning(f'{message} retrying... Attempt {attempt + 1} of {max_retries}')
+ raise ValidationError(message) # Raise to trigger retry
+ logger.info('checksum validation successful')
+ return True
+ except (BotoCoreError, ClientError, ValidationError) as exc:
+ logger.error(f"Attempt {attempt + 1} failed: {exc}")
+ if attempt < max_retries - 1:
+ time.sleep(retry_delay)
+ else:
+ logger.error("All retry attempts failed.")
+ raise
+
+
+def save_file_remote(project_id, context, pdf_file):
+ timestamp = datetime.now().strftime('%Y%m%dT%H%M%S')
+ directory_path = os.path.join(str(project_id), str(timestamp))
+ os.makedirs(directory_path, exist_ok=True)
+ file_id = uuid.uuid4()
+ bucket_name = settings.AWS_STORAGE_BUCKET_NAME
+
+ pdf_content = pdf_file.getvalue()
+ def compose_file_path(file_type):
+ file_path = os.path.join(file_type, directory_path, f"{file_id}.{file_type}")
+ remote_file_path = os.path.join(settings.MEDIAFILES_LOCATION, file_path)
+ return remote_file_path, file_path
+
+ pdf_remote_path, pdf_path = compose_file_path('pdf')
+ csv_remote_path, csv_path = compose_file_path('csv')
+
+ def generate_csv_data(context):
+ csv_buffer = io.StringIO()
+ writer = csv.writer(csv_buffer)
+ writer.writerow(context['columns'])
+ for row in context['rows']:
+ writer.writerow(row)
+ csv_content = csv_buffer.getvalue().encode('utf-8')
+ csv_file = ContentFile(csv_content)
+ return csv_content, csv_file
+
+ csv_content, csv_file = generate_csv_data(context)
+
+ def remote_save_routine(file, file_content, remote_file_path):
+ file_local_checksum = calculate_md5(file)
+ upload_to_s3_with_retry(bucket_name, remote_file_path, file_content, file_local_checksum)
+
+ remote_save_routine(pdf_file, pdf_content, pdf_remote_path)
+ remote_save_routine(csv_file, csv_content, csv_remote_path)
+ return pdf_path
+
+
+
+# def pdf_save_path_and_url(project_id, context, pdf_file):
+# project_id = project_id
+# timestamp = datetime.now().strftime('%Y%m%dT%H%M%S')
+# import uuid, csv
+# directory_path = os.path.join(
+# str(project_id),
+# str(timestamp),
+# )
+# pdf_directory_path = os.path.join("pdf", directory_path)
+# os.makedirs(directory_path, exist_ok=True)
+# file_id = uuid.uuid4()
+# pdf_file_path = os.path.join(pdf_directory_path, f"{file_id}.pdf")
+# def save_pdf():
+#
+# file_path = os.path.join(settings.MEDIAFILES_LOCATION, pdf_directory_path, f"{file_id}.pdf")
+# default_storage.save(file_path, ContentFile(pdf_file.getvalue()))
+# save_pdf()
+#
+# csv_directory_path = os.path.join(settings.MEDIAFILES_LOCATION, "csv", directory_path)
+# csv_file_path = os.path.join(csv_directory_path, f"{file_id}.csv")
+#
+# def save_csv():
+# import io
+# csv_buffer = io.StringIO()
+#
+# writer = csv.writer(csv_buffer)
+# writer.writerow(context['columns'])
+# for row in context['rows']:
+# writer.writerow(row)
+#
+# csv_content = ContentFile(csv_buffer.getvalue().encode('utf-8'))
+# default_storage.save(csv_file_path, csv_content)
+#
+# save_csv()
+#
+# return pdf_file_path
+#
+
+def accumulate_columns_and_rows(records):
+ """Accumulate all columns from the records and filter qualitative columns."""
+ all_columns_set = set()
+ rows = []
+
+ # Accumulate all unique columns across all records
+ for record in records:
+ all_columns_set.update(record.keys())
+
+ all_columns = sorted(all_columns_set)
+
+ # Filter qualitative columns based on values across all records
+ qualitative_columns = []
+ for col in all_columns:
+ if all(is_qualitative(col, record.get(col, "N/A")) for record in records):
+ qualitative_columns.append(col)
+
+ # Build rows with qualitative data
+ for record in records:
+ row = [record.get(column, "N/A") for column in qualitative_columns]
+ rows.append(row)
+
+ return qualitative_columns, rows
+
+
+import re
+from datetime import datetime
+
+BOOLEAN_TRUE_VALUES = {'true', 'yes', '1', 'on'}
+BOOLEAN_FALSE_VALUES = {'false', 'no', '0', 'off'}
+
+
+def is_uuid(value):
+ """Check if a string is a valid UUID."""
+ uuid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$', re.IGNORECASE)
+ return bool(uuid_pattern.match(value))
+
+
+def is_id_field(key, value):
+ """Check if a field is likely to be an ID field."""
+ if isinstance(key, str):
+ # Check if the key contains 'id' or 'uuid'
+ if 'id' in key.lower() or 'uuid' in key.lower():
+ return True
+
+ # Check if the value is a UUID
+ if isinstance(value, str) and is_uuid(value):
+ return True
+
+ # Check if it's a numeric ID
+ if isinstance(value, (int, str)):
+ try:
+ int(value)
+ return len(str(value)) > 5 # Assume IDs are typically longer than 5 digits
+ except ValueError:
+ pass
+
+ return False
+
+
+def is_date(value):
+ """Check if a string is a valid date."""
+ try:
+ datetime.fromisoformat(value.replace('Z', '+00:00'))
+ return True
+ except (ValueError, AttributeError):
+ return False
+
+
+def is_boolean(value):
+ """Check if the value represents a boolean."""
+ if isinstance(value, bool):
+ return True # Already a boolean
+
+ if isinstance(value, str):
+ normalized_value = value.strip().lower()
+ if normalized_value in BOOLEAN_TRUE_VALUES or normalized_value in BOOLEAN_FALSE_VALUES:
+ return True
+
+ return False
+
+
+def is_qualitative(key, value):
+ """
+ Helper function to determine if a value is qualitative based on its key, type, and content.
+ """
+
+ # Check if it's an ID field
+ if isinstance(value, (dict, list)):
+ return True
+
+ if is_id_field(key, value):
+ return False
+
+ # Check if it's a boolean
+ if is_boolean(value):
+ return False
+
+ if isinstance(value, str):
+ # Check if it's a number or date disguised as a string
+ try:
+ float(value)
+ return False # It's a number
+ except ValueError:
+ if is_date(value):
+ return False # It's a date
+ return True # It's a regular string, consider it qualitative
+
+ if isinstance(value, (int, float)):
+ return False # Numbers are quantitative
+
+ # Consider everything else as qualitative
+ return True
\ No newline at end of file
diff --git a/apps/unified_connector/sources/pdna.py b/apps/unified_connector/sources/pdna.py
index 8a7a7255ee..89bb21456f 100644
--- a/apps/unified_connector/sources/pdna.py
+++ b/apps/unified_connector/sources/pdna.py
@@ -89,6 +89,7 @@ def get_content(self, url, params):
return resp.text
def fetch(self, params):
+ print('ffffffffffffffffff', params)
country = params.get('country')
if not country:
return [], 0
@@ -119,6 +120,7 @@ def fetch(self, params):
'source_type': Lead.SourceType.WEBSITE,
}
results.append(data)
+
except Exception as e:
logger.warning(
"Exception parsing {} with params {}: {}".format(
diff --git a/apps/unified_connector/sources/relief_web.py b/apps/unified_connector/sources/relief_web.py
index 83a0292c14..5f1df8b5fd 100644
--- a/apps/unified_connector/sources/relief_web.py
+++ b/apps/unified_connector/sources/relief_web.py
@@ -317,7 +317,7 @@ def get_content(self, url, params):
def parse_filter_params(self, params):
filters = []
-
+ print('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
if params.get('country'):
filters.append({'field': 'country.iso3', 'value': params['country']})
if params.get('primary-country'):
diff --git a/apps/unified_connector/sources/unhcr_portal.py b/apps/unified_connector/sources/unhcr_portal.py
index 211f3b1c8b..59e4aae1a2 100644
--- a/apps/unified_connector/sources/unhcr_portal.py
+++ b/apps/unified_connector/sources/unhcr_portal.py
@@ -1,5 +1,7 @@
import json
import copy
+import logging
+
import requests
import datetime
@@ -9,7 +11,7 @@
from connector.utils import ConnectorWrapper
from .base import Source
-
+logger = logging.Logger(__name__)
COUNTRIES_OPTIONS = [
{"label": "All", "key": ""},
@@ -314,6 +316,7 @@ def fetch(self, params):
'source_type': '',
}
results.append(data)
+ logger.info(f'the resulted data of unhcr is: {results}')
footer = soup.find('div', {'class': 'pgSearch_results_footer'})
if not footer:
break
diff --git a/apps/unified_connector/templatetags/__init__.py b/apps/unified_connector/templatetags/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/apps/unified_connector/templatetags/filter.py b/apps/unified_connector/templatetags/filter.py
new file mode 100644
index 0000000000..c6f1a6f057
--- /dev/null
+++ b/apps/unified_connector/templatetags/filter.py
@@ -0,0 +1,27 @@
+from django import template
+
+register = template.Library()
+
+@register.filter
+def get_value_from_dict(dictionary, key):
+ """Returns the value from the dictionary corresponding to the given key."""
+ return dictionary.get(key, '')
+
+from django import template
+
+register = template.Library()
+
+@register.filter
+def get_item(list, key):
+ """Return the value for a given key in a dictionary."""
+ return list.get(key, '') #
+
+
+from django import template
+
+register = template.Library()
+
+@register.filter
+def zipl(list1, list2):
+ """Return zipped lists as a list of tuples."""
+ return zip(list1, list2)
diff --git a/changes.md b/changes.md
new file mode 100644
index 0000000000..e3a3ec58ea
--- /dev/null
+++ b/changes.md
@@ -0,0 +1,105 @@
+# Changes Report
+
+This report lists the files that have changed between the local HEAD and the remote branch `origin/develop`.
+
+## Files Changed
+| File Path | Change Type |
+|-----------------|---------------|
+| .gitignore | Modified |
+| .vscode/settings.json | Added |
+| Dockerfile | Modified |
+| PDFS/1/aBjuSQpEPKeu45Mn7hQVgX/20241116T201608/3195cf72-f1dc-4197-b2ca-7d8ef51e8c25.pdf | Deleted |
+| apps/analysis/models.py | Modified |
+| apps/analysis/mutation.py | Modified |
+| apps/analysis/schema.py | Modified |
+| apps/analysis/serializers.py | Modified |
+| apps/analysis/tasks.py | Modified |
+| apps/analysis/tests/test_mutations.py | Modified |
+| apps/analysis_framework/serializers.py | Modified |
+| apps/assisted_tagging/admin.py | Modified |
+| apps/assisted_tagging/dataloaders.py | Modified |
+| apps/assisted_tagging/migrations/0013_llmassistedtaggingpredication.py | Added |
+| apps/assisted_tagging/models.py | Modified |
+| apps/assisted_tagging/schema.py | Modified |
+| apps/assisted_tagging/serializers.py | Modified |
+| apps/assisted_tagging/tasks.py | Modified |
+| apps/assisted_tagging/tests/test_query.py | Modified |
+| apps/deepl_integration/handlers.py | Modified |
+| apps/deepl_integration/serializers.py | Modified |
+| apps/deepl_integration/views.py | Modified |
+| apps/entry/dataloaders.py | Modified |
+| apps/export/entries/json_exporter.py | Modified |
+| apps/export/tasks/tasks_entries.py | Modified |
+| apps/geo/enums.py | Modified |
+| apps/geo/filter_set.py | Modified |
+| apps/geo/migrations/0044_region_status.py | Added |
+| apps/geo/models.py | Modified |
+| apps/geo/mutations.py | Modified |
+| apps/geo/schema.py | Modified |
+| apps/geo/serializers.py | Modified |
+| apps/geo/tasks.py | Modified |
+| apps/lead/filter_set.py | Modified |
+| apps/organization/views.py | Modified |
+| apps/project/admin.py | Modified |
+| apps/project/mutation.py | Modified |
+| apps/project/serializers.py | Modified |
+| apps/static/image/graphQL-logo.svg | Modified |
+| apps/templates/connector/pdf.html | Deleted |
+| apps/unified_connector/migrations/0011_alter_connectorsource_source.py | Deleted |
+| apps/unified_connector/models.py | Modified |
+| apps/unified_connector/mutation.py | Modified |
+| apps/unified_connector/schema.py | Modified |
+| apps/unified_connector/serializers.py | Modified |
+| apps/unified_connector/sources/emm.py | Modified |
+| apps/unified_connector/sources/kobo.py | Deleted |
+| apps/unified_connector/sources/pdna.py | Modified |
+| apps/unified_connector/sources/relief_web.py | Modified |
+| apps/unified_connector/sources/unhcr_portal.py | Modified |
+| apps/unified_connector/templatetags/__init__.py | Deleted |
+| apps/unified_connector/templatetags/filter.py | Deleted |
+| csv/aBjuSQpEPKeu45Mn7hQVgX/20241116T213712/d9f2f7b7-d516-4a27-8e8d-a72124f4716b.csv | Deleted |
+| csv/aBjuSQpEPKeu45Mn7hQVgX/20241116T213739/cabd68e3-1892-4f7e-a29f-d219a1d718b1.csv | Deleted |
+| csv/aBjuSQpEPKeu45Mn7hQVgX/20241116T214255/746fdba5-1a64-476d-ad1a-be87ccf34193.csv | Deleted |
+| deep/deepl.py | Modified |
+| deep/exception_handler.py | Modified |
+| deep/settings.py | Modified |
+| deep/tests/test_case.py | Modified |
+| deep/urls.py | Modified |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T215942/4d345645-43f5-4a56-a05f-5bb0ddde923f.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T220607/9bee8649-a5df-4b61-bed9-1e3234084669.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T220607/b816cc3b-6ca8-4132-af15-0f582203f35d.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T220610/ab206fdf-7952-470f-a043-474720133e1d.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T220610/ce8fd664-fd63-48b3-97b4-3e081305632f.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T220613/46f7a509-5c8f-4506-9339-da2deea919d3.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T220613/699246ab-410d-4794-a208-6b278566307d.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T220615/b300c23b-2f87-4406-a6ff-a727bfa68366.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T220618/2a49ddae-5a77-4436-b2a5-e18906d267b9.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T220620/7e9b8ed9-339c-4bd3-a241-ffdd509b4522.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T233046/5420ef95-087c-4950-a2a3-507c2c8bc99a.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T233046/dc705d72-3f13-4915-bcb4-0eee3332cd71.csv | Deleted |
+| documents/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T233300/25b6d3c8-1d85-421c-892f-8e9d8b6a67e2.csv | Deleted |
+| documents/csv/afxi6P5vmSMxRn8APv9nTd/20241125T220601/195b5b2d-b964-4517-9823-6c1a531454c8.csv | Deleted |
+| documents/csv/afxi6P5vmSMxRn8APv9nTd/20241125T220601/b11a6263-2c8a-40b2-8a11-e050e0ebbb73.csv | Deleted |
+| documents/csv/afxi6P5vmSMxRn8APv9nTd/20241125T220602/1e81ba8b-8d03-48eb-a39e-22425b79c20d.csv | Deleted |
+| documents/csv/afxi6P5vmSMxRn8APv9nTd/20241125T220602/a5f9574b-ff0a-4b42-ada2-be6ff31dfaf6.csv | Deleted |
+| documents/csv/afxi6P5vmSMxRn8APv9nTd/20241125T220603/ac8c7049-709b-457a-9794-9fe6280274fa.csv | Deleted |
+| documents/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T215942/4d345645-43f5-4a56-a05f-5bb0ddde923f.pdf | Deleted |
+| documents/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T220607/9bee8649-a5df-4b61-bed9-1e3234084669.pdf | Deleted |
+| documents/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T220607/b816cc3b-6ca8-4132-af15-0f582203f35d.pdf | Deleted |
+| documents/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T220610/ab206fdf-7952-470f-a043-474720133e1d.pdf | Deleted |
+| documents/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T220610/ce8fd664-fd63-48b3-97b4-3e081305632f.pdf | Deleted |
+| documents/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T220613/46f7a509-5c8f-4506-9339-da2deea919d3.pdf | Deleted |
+| documents/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T220613/699246ab-410d-4794-a208-6b278566307d.pdf | Deleted |
+| documents/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T220615/b300c23b-2f87-4406-a6ff-a727bfa68366.pdf | Deleted |
+| documents/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T220618/2a49ddae-5a77-4436-b2a5-e18906d267b9.pdf | Deleted |
+| documents/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T220620/7e9b8ed9-339c-4bd3-a241-ffdd509b4522.pdf | Deleted |
+| documents/pdf/afxi6P5vmSMxRn8APv9nTd/20241125T220601/195b5b2d-b964-4517-9823-6c1a531454c8.pdf | Deleted |
+| documents/pdf/afxi6P5vmSMxRn8APv9nTd/20241125T220601/b11a6263-2c8a-40b2-8a11-e050e0ebbb73.pdf | Deleted |
+| documents/pdf/afxi6P5vmSMxRn8APv9nTd/20241125T220602/1e81ba8b-8d03-48eb-a39e-22425b79c20d.pdf | Deleted |
+| documents/pdf/afxi6P5vmSMxRn8APv9nTd/20241125T220602/a5f9574b-ff0a-4b42-ada2-be6ff31dfaf6.pdf | Deleted |
+| documents/pdf/afxi6P5vmSMxRn8APv9nTd/20241125T220603/ac8c7049-709b-457a-9794-9fe6280274fa.pdf | Deleted |
+| documentser/csv/aBjuSQpEPKeu45Mn7hQVgX/20241125T215855/6a674513-b7b0-4cba-b88c-30c18d95adcc.csv | Deleted |
+| documentser/pdf/aBjuSQpEPKeu45Mn7hQVgX/20241125T215855/6a674513-b7b0-4cba-b88c-30c18d95adcc.pdf | Deleted |
+| poetry.lock | Modified |
+| pyproject.toml | Modified |
+| schema.graphql | Modified |
diff --git a/deep/settings.py b/deep/settings.py
index dc24009be4..7f085a9c12 100644
--- a/deep/settings.py
+++ b/deep/settings.py
@@ -413,6 +413,8 @@
if env('DJANGO_USE_S3'):
# AWS S3 Bucket Credentials
+ AWS_STORAGE_BUCKET_NAME = env('AWS_STORAGE_BUCKET_NAME')
+ AWS_S3_REGION_NAME = env('AWS_S3_REGION_NAME')
AWS_STORAGE_BUCKET_NAME_STATIC = env('AWS_STORAGE_BUCKET_NAME_STATIC')
AWS_STORAGE_BUCKET_NAME_MEDIA = env('AWS_STORAGE_BUCKET_NAME_MEDIA')
# If environment variable are not provided, then EC2 Role will be used.
@@ -434,7 +436,7 @@
AWS_S3_FILE_OVERWRITE = False
AWS_DEFAULT_ACL = 'private'
AWS_QUERYSTRING_AUTH = True
- AWS_S3_CUSTOM_DOMAIN = None
+ AWS_S3_CUSTOM_DOMAIN = None
AWS_QUERYSTRING_EXPIRE = GALLERY_FILE_EXPIRE
AWS_S3_SIGNATURE_VERSION = 's3v4'
AWS_IS_GZIPPED = True
@@ -452,6 +454,7 @@
MEDIAFILES_LOCATION = 'media'
MEDIA_URL = "https://%s/%s/" % (AWS_S3_CUSTOM_DOMAIN, MEDIAFILES_LOCATION)
DEFAULT_FILE_STORAGE = 'deep.s3_storages.MediaStorage'
+
else:
STATIC_URL = '/static/'
STATIC_ROOT = '/static'
diff --git a/deep/urls.py b/deep/urls.py
index 3f14a5db8b..6bf6d48b8d 100644
--- a/deep/urls.py
+++ b/deep/urls.py
@@ -17,7 +17,6 @@
from . import converters
-# import autofixture
from user.views import (
UserViewSet,
@@ -644,10 +643,12 @@ def get_api_path(path):
),
name="favicon"),
] + static.static(
- settings.MEDIA_URL, view=xframe_options_exempt(serve),
+ settings.MEDIA_URL,
+ view=xframe_options_exempt(serve),
document_root=settings.MEDIA_ROOT
)
+
if settings.DEBUG:
import debug_toolbar
if 'debug_toolbar' in settings.INSTALLED_APPS:
@@ -663,8 +664,10 @@ def get_api_path(path):
re_path(r'^ec-email/$', EntryCommentEmail.as_view()),
re_path(r'^erc-email/$', EntryReviewCommentEmail.as_view()),
re_path(r'^render-debug/$', RenderChart.as_view()),
+
]
+
handler404 = Api_404View.as_view()
# TODO Uncomment after fixing custom autofixtures