Skip to content

Commit

Permalink
Merge pull request #60 from GSA/bulk-add-records
Browse files Browse the repository at this point in the history
Creates new interface for bulk adding records
  • Loading branch information
rshewitt authored May 1, 2024
2 parents f695fd5 + 25ce986 commit d9231d8
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 23 deletions.
17 changes: 17 additions & 0 deletions app/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,23 @@ def add_harvest_record(self, record_data):
self.db.rollback()
return None

def add_harvest_records(self, records_data: list) -> bool:
"""
Add many records at once
:param list records_data: List of records with unique UUIDs
:return bool success of operation
:raises Exception: if the records_data contains records with errors
"""
try:
self.db.bulk_insert_mappings(HarvestRecord, records_data)
self.db.commit()
return True
except Exception as e:
print("Error:", e)
self.db.rollback()
return None

# for test, will remove later
def get_all_harvest_records(self):
harvest_records = self.db.query(HarvestRecord).all()
Expand Down
4 changes: 2 additions & 2 deletions app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ class HarvestError(Base):
reference = db.Column(db.String)


class HarvestRecord(db.Model):
class HarvestRecord(Base):
__tablename__ = "harvest_record"

id = db.Column(db.String, primary_key=True)
identifier = db.Column(db.String())
harvest_job_id = db.Column(
db.String(36), db.ForeignKey("harvest_job.id"), nullable=True
)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "datagov-harvesting-logic"
version = "0.3.9"
version = "0.3.10"
description = ""
# authors = [
# {name = "Jin Sun", email = "[email protected]"},
Expand Down
81 changes: 61 additions & 20 deletions tests/database/test_db.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import uuid

import pytest
from sqlalchemy.orm import scoped_session, sessionmaker

Expand Down Expand Up @@ -43,12 +45,44 @@ def org_data():
return {"name": "Test Org", "logo": "https://example.com/logo.png"}


@pytest.fixture
def source_data(organization):
return {
"name": "Test Source",
"notification_emails": "[email protected]",
"organization_id": organization.id,
"frequency": "daily",
"url": "http://example.com",
"schema_type": "type1",
"source_type": "typeA",
"status": "active",
}


@pytest.fixture
def organization(interface, org_data):
org = interface.add_organization(org_data)
return org


@pytest.fixture
def job_data():
return {"status": "new"}


@pytest.fixture
def record_data():
return {"identifier": str(uuid.uuid4()), "source_hash": str(uuid.uuid4())}


@pytest.fixture
def records_data():
return [
{"identifier": str(uuid.uuid4()), "source_hash": str(uuid.uuid4())}
for i in range(10)
]


def test_add_organization(interface, org_data):
org = interface.add_organization(org_data)
assert org is not None
Expand All @@ -74,20 +108,6 @@ def test_delete_organization(interface, organization):
assert result == "Organization deleted successfully"


@pytest.fixture
def source_data(organization):
return {
"name": "Test Source",
"notification_emails": "[email protected]",
"organization_id": organization.id,
"frequency": "daily",
"url": "http://example.com",
"schema_type": "type1",
"source_type": "typeA",
"status": "active",
}


def test_add_harvest_source(interface, source_data):
source = interface.add_harvest_source(source_data)
assert source is not None
Expand Down Expand Up @@ -127,17 +147,38 @@ def test_delete_harvest_source(interface, source_data):
assert deleted_source is None


@pytest.fixture
def job_data(source_data):
return {"status": "new"}


def test_harvest_source_by_jobid(interface, source_data, job_data):

source = interface.add_harvest_source(source_data)
job_data["harvest_source_id"] = source.id

harvest_job = interface.add_harvest_job(job_data)
harvest_source = interface.get_source_by_jobid(harvest_job.id)

assert source.id == harvest_source["id"]


def test_add_harvest_record(interface, source_data, job_data, record_data):
source = interface.add_harvest_source(source_data)
job_data["harvest_source_id"] = source.id
harvest_job = interface.add_harvest_job(job_data)
record_data["harvest_source_id"] = source.id
record_data["harvest_job_id"] = harvest_job.id

record = interface.add_harvest_record(record_data)

assert record.harvest_source_id == source.id
assert record.harvest_job_id == harvest_job.id


def test_add_harvest_records(interface, source_data, job_data, records_data):
source = interface.add_harvest_source(source_data)
job_data["harvest_source_id"] = source.id
harvest_job = interface.add_harvest_job(job_data)

for record in records_data:
record["harvest_source_id"] = source.id
record["harvest_job_id"] = harvest_job.id

success = interface.add_harvest_records(records_data)
assert success is True
assert len(interface.get_all_harvest_records()) == 10

1 comment on commit d9231d8

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
harvester
   __init__.py50100% 
   ckan_utils.py4222 95%
   exceptions.py420100% 
   harvest.py4256565 85%
   logger_config.py10100% 
   utils.py6299 85%
TOTAL5777687% 

Tests Skipped Failures Errors Time
35 0 💤 0 ❌ 0 🔥 6.812s ⏱️

Please sign in to comment.