-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #15 from GSA/define-db-sqlalchemy
Define db sqlalchemy
- Loading branch information
Showing
10 changed files
with
514 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
## How do we organize and display data on data.gov? | ||
|
||
- Navigating to the [datasets](https://catalog.data.gov/dataset) page we see the following "filters" | ||
- topics | ||
- topic categories | ||
- dataset type | ||
- tags | ||
- formats | ||
- organization types | ||
- organizations | ||
- publishers | ||
- bureaus |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
@startuml harvester2.0 ERD | ||
|
||
skinparam linetype ortho | ||
|
||
' ERD | ||
card "Entity Relationship Diagram" as entities { | ||
entity "harvest_source" as source { | ||
uuid : uuid (PK) | ||
-- | ||
name: text | ||
notifications_emails: array( txt ) | ||
organization_name: txt | ||
frequency: text | ||
config: json | ||
urls: array( txt ) | ||
schema_validation_type: text | ||
} | ||
|
||
entity "harvest_record" as record { | ||
uuid : uuid (PK) | ||
-- | ||
source_id: uuid (FK) | ||
job_id: uuid (FK) | ||
status: text | ||
s3_path: text | ||
} | ||
|
||
entity "harvest_job" as job { | ||
uuid : uuid (PK) | ||
-- | ||
source_id: uuid (FK) | ||
status: text | ||
date_created | ||
date_finished: datetime | ||
extract_started: datetime | ||
extract_finished: datetime | ||
compare_started: datetime | ||
compare_finished: datetime | ||
records_added: smallint | ||
records_updated: smallint | ||
records_deleted: smallint | ||
records_errored: smallint | ||
records_ignored: smallint | ||
} | ||
|
||
entity "harvest_error" as error { | ||
uuid : uuid (PK) | ||
-- | ||
job_id: uuid (FK) | ||
record_id: uuid (FK) | ||
record_reported_id: text | ||
date_created: datetime | ||
error_type: text | ||
severity: enum | ||
message: text | ||
|
||
} | ||
} | ||
|
||
' lookup tables | ||
card "Lookup Tables" as lookup { | ||
|
||
entity "data_format" as data_format { | ||
name: text (PK) | ||
-- | ||
mime_type: text | ||
} | ||
} | ||
|
||
' enumerators | ||
card "Enumerators" as enumerators { | ||
|
||
enum error_severity { | ||
ERROR | ||
CRITICAL | ||
} | ||
|
||
enum job_status { | ||
CREATE | ||
PROCESSING | ||
COMPLETE | ||
} | ||
|
||
enum schema_validation { | ||
DCATUS | ||
ISO1911 | ||
} | ||
|
||
enum record_status { | ||
STALE | ||
ACTIVE | ||
INVALID | ||
} | ||
} | ||
|
||
' relationships | ||
source ||--|{ job | ||
source ||--|{ record | ||
job ||--|{ record | ||
job ||-|{ error | ||
|
||
@enduml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from sqlalchemy import text | ||
from sqlalchemy.dialects.postgresql import UUID | ||
from sqlalchemy.orm import DeclarativeBase, mapped_column | ||
|
||
|
||
class Base(DeclarativeBase): | ||
id = mapped_column( | ||
UUID(as_uuid=True), primary_key=True, server_default=text("gen_random_uuid()") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
from harvester.db.models import Base | ||
from sqlalchemy import ForeignKey, SMALLINT | ||
from sqlalchemy import String, DateTime, Enum | ||
from sqlalchemy.dialects.postgresql import JSON, UUID, ARRAY | ||
from sqlalchemy.orm import mapped_column | ||
from sqlalchemy.sql import func | ||
import enum | ||
|
||
|
||
class SeverityEnum(enum.Enum): | ||
error = "ERROR" | ||
critical = "CRITICAL" | ||
|
||
|
||
class HarvestSource(Base): | ||
__tablename__ = "harvest_source" | ||
__table_args__ = {"comment": "Contains information for each harvest source"} | ||
|
||
name = mapped_column(String, nullable=False) | ||
notification_emails = mapped_column(ARRAY(String), nullable=False) | ||
organization_name = mapped_column(String, nullable=False) | ||
frequency = mapped_column(String, nullable=False) # enum? | ||
config = mapped_column(JSON) | ||
urls = mapped_column(ARRAY(String), nullable=False) | ||
schema_validation_type = mapped_column(String, nullable=False) # enum? | ||
|
||
|
||
class HarvestJob(Base): | ||
__tablename__ = "harvest_job" | ||
__table_args__ = { | ||
"comment": "Contains job state information run through the pipeline" | ||
} | ||
|
||
source_id = mapped_column(UUID(as_uuid=True), ForeignKey("harvest_source.id")) | ||
status = mapped_column(String, nullable=False) # enum? | ||
date_created = mapped_column(DateTime(timezone=True), server_default=func.now()) | ||
date_finished = mapped_column(DateTime(timezone=True)) | ||
extract_started = mapped_column(DateTime(timezone=True)) | ||
extract_finished = mapped_column(DateTime(timezone=True)) | ||
compare_started = mapped_column(DateTime(timezone=True)) | ||
compare_finished = mapped_column(DateTime(timezone=True)) | ||
records_added = mapped_column(SMALLINT) | ||
records_updated = mapped_column(SMALLINT) | ||
records_deleted = mapped_column(SMALLINT) | ||
records_errored = mapped_column(SMALLINT) | ||
records_ignored = mapped_column(SMALLINT) | ||
|
||
|
||
class HarvestError(Base): | ||
__tablename__ = "harvest_error" | ||
__table_args__ = {"comment": "Table to contain all errors in the pipeline"} | ||
|
||
job_id = mapped_column(UUID(as_uuid=True), ForeignKey("harvest_job.id")) | ||
record_id = mapped_column(UUID(as_uuid=True)) | ||
record_reported_id = mapped_column(String) | ||
date_created = mapped_column(DateTime(timezone=True), server_default=func.now()) | ||
error_type = mapped_column(String) # enum? | ||
severity = mapped_column( | ||
Enum(SeverityEnum, values_callable=lambda enum: [e.value for e in enum]) | ||
) | ||
message = mapped_column(String) | ||
|
||
|
||
class HarvestRecord(Base): | ||
__tablename__ = "harvest_record" | ||
__table_args__ = {"comment": "Table to contain records"} | ||
|
||
job_id = mapped_column(UUID(as_uuid=True), ForeignKey("harvest_job.id")) | ||
source_id = mapped_column(UUID(as_uuid=True), ForeignKey("harvest_source.id")) | ||
status = mapped_column(String) # enum? | ||
s3_path = mapped_column(String) |
Oops, something went wrong.
9aab59f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Coverage Report