Skip to content

Commit

Permalink
refactor: introduce subgraphs to organize stages
Browse files Browse the repository at this point in the history
This abstracts the different sections better
  • Loading branch information
nickumia-reisys committed Sep 20, 2023
1 parent 49751e9 commit e1aa69b
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 78 deletions.
163 changes: 86 additions & 77 deletions docs/single_xml.mmd
Original file line number Diff line number Diff line change
@@ -1,15 +1,93 @@
flowchart TD
flowchart LR

%% Old Logic
gs([GATHER STARTED])
ge([GATHER ENDED])
fs([FETCH STARTED])
fe([FETCH ENDED])
is([IMPORT STARTED])
ie([IMPORT ENDED])
%% Algorithm
gather_stage ==> fetch_stage
fetch_stage ==> import_stage

subgraph gather_stage [Gather Stage]
direction TB
gs([GATHER STARTED])
ge([GATHER ENDED])
gs ==> _get_content_as_unicode
_get_content_as_unicode ==> is_existing_object
is_existing_object == Yes ==> get_existing_object
is_existing_object == No ==> create_object
get_existing_object ==> change_object
change_object ==> guess_standard
create_object ==> guess_standard
guess_standard ==> is_iso
is_iso == Yes ==> save_content
is_iso == No ==> save_original_document
save_content ==> ge
save_original_document ==> ge
_get_content_as_unicode-. Exception .-> error
end
subgraph fetch_stage [Fetch Stage]
direction TB
fs([FETCH STARTED])
fe([FETCH ENDED])
fs ==> do_nothing
do_nothing ==> fe
end
subgraph import_stage [Import Stage]
direction TB
is([IMPORT STARTED])
ie([IMPORT ENDED])
is ==> is_object_empty
is_object_empty == No ==> is_force_import
is_force_import == Yes ==> change_object_2
is_force_import == No ==> check_status_from_gather
change_object_2 ==> get_existing_object_2
check_status_from_gather ==> get_existing_object_2
get_existing_object_2 ==> is_delete
is_delete == Yes ==> delete
is_delete == No ==> is_iso_2
is_iso_2 == No ==> transform_to_iso
transform_to_iso == Success ==> save_content_2
save_content_2 == tranform ==> parse_iso
is_iso_2 == Yes ==> is_object_content_empty
is_object_content_empty == No ==> _validate_document
_validate_document == valid ==> parse_iso
_validate_document == not valid ==> continue_on_validation_errors
continue_on_validation_errors == Yes ==> parse_iso
continue_on_validation_errors == No ==> ie
parse_iso ==> update_object_reference
update_object_reference ==> is_guid_current
is_guid_current == No ==> update_guid
update_guid ==> is_guid_present
is_guid_present == No ==> generate_guid
is_guid_present == Yes ==> get_modified_date
generate_guid ==> get_modified_date
get_modified_date ==> spatial_package_create
spatial_package_create ==> is_source_private
is_source_private == Yes ==> mark_object_private
%% BUG: if source marked as private --> harvest --> changed to public --> harvest --> datasets remain private
is_source_private == No ==> is_source_part_of_topic
mark_object_private ==> is_source_part_of_topic
is_source_part_of_topic == Yes ==> mark_object_part_of_topic
is_source_part_of_topic == No ==> mark_as_geospatial
mark_object_part_of_topic ==> mark_as_geospatial
mark_as_geospatial ==> update_object_reference_2
update_object_reference_2 ==> is_status_new
is_status_new == Yes ==> create
is_status_new == No ==> is_modified_newer
is_modified_newer == No ==> transfer_job_history
transfer_job_history ==> delete_old_object
delete_old_object ==> reindex_package
is_modified_newer == Yes ==> update
create ==> ie
update ==> ie
is_object_content_empty-. Yes .-> error_2
parse_iso-. exception .-> error_2
is_guid_current-. Yes .-> error_2
get_modified_date-. exception .-> error_2
is_object_empty-. Yes .-> skip
reindex_package -.-> skip
end

%% Data
error[\Error/]
error_2[\Error/]
skip[/Skip\]

%% Functons
Expand Down Expand Up @@ -59,72 +137,3 @@ flowchart TD
is_source_private{Is the Harvest Source Private?}
is_source_part_of_topic{Is the Harvest Source part of a Topic?}
is_status_new{Is the Status new?}

%% Algorithm
gs ==> _get_content_as_unicode
_get_content_as_unicode-. Exception .-> error
_get_content_as_unicode ==> is_existing_object
is_existing_object == Yes ==> get_existing_object
is_existing_object == No ==> create_object
get_existing_object ==> change_object
change_object ==> guess_standard
create_object ==> guess_standard
guess_standard ==> is_iso
is_iso == Yes ==> save_content
is_iso == No ==> save_original_document
save_content ==> ge
save_original_document ==> ge
ge ==> fs
fs ==> do_nothing
do_nothing ==> fe
fe ==> is
is ==> is_object_empty
is_object_empty-. Yes .-> skip
is_object_empty == No ==> is_force_import
is_force_import == Yes ==> change_object_2
is_force_import == No ==> check_status_from_gather
change_object_2 ==> get_existing_object_2
check_status_from_gather ==> get_existing_object_2
get_existing_object_2 ==> is_delete
is_delete == Yes ==> delete
is_delete == No ==> is_iso_2
is_iso_2 == No ==> transform_to_iso
transform_to_iso == Success ==> save_content_2
save_content_2 == tranform ==> parse_iso
is_iso_2 == Yes ==> is_object_content_empty
is_object_content_empty-. Yes .-> error
is_object_content_empty == No ==> _validate_document
_validate_document == valid ==> parse_iso
_validate_document == not valid ==> continue_on_validation_errors
continue_on_validation_errors == Yes ==> parse_iso
continue_on_validation_errors == No ==> ie
parse_iso-. exception .-> error
parse_iso ==> update_object_reference
update_object_reference ==> is_guid_current
is_guid_current-. Yes .-> error
is_guid_current == No ==> update_guid
update_guid ==> is_guid_present
is_guid_present == No ==> generate_guid
is_guid_present == Yes ==> get_modified_date
generate_guid ==> get_modified_date
get_modified_date-. exception .-> error
get_modified_date ==> spatial_package_create
spatial_package_create ==> is_source_private
is_source_private == Yes ==> mark_object_private
%% BUG: if source marked as private --> harvest --> changed to public --> harvest --> datasets remain private
is_source_private == No ==> is_source_part_of_topic
mark_object_private ==> is_source_part_of_topic
is_source_part_of_topic == Yes ==> mark_object_part_of_topic
is_source_part_of_topic == No ==> mark_as_geospatial
mark_object_part_of_topic ==> mark_as_geospatial
mark_as_geospatial ==> update_object_reference_2
update_object_reference_2 ==> is_status_new
is_status_new == Yes ==> create
is_status_new == No ==> is_modified_newer
is_modified_newer == No ==> transfer_job_history
transfer_job_history ==> delete_old_object
delete_old_object ==> reindex_package
reindex_package -.-> skip
is_modified_newer == Yes ==> update
create ==> ie
update ==> ie
2 changes: 1 addition & 1 deletion docs/single_xml.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

1 comment on commit e1aa69b

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
harvester
   __init__.py30100% 
harvester/db/models
   __init__.py50100% 
   models.py530100% 
harvester/extract
   __init__.py1922 89%
   dcatus.py1122 82%
harvester/utils
   __init__.py00100% 
   json.py2266 73%
   pg.py3544 89%
   s3.py2466 75%
harvester/validate
   __init__.py00100% 
   dcat_us.py240100% 
TOTAL1962090% 

Tests Skipped Failures Errors Time
29 0 💤 0 ❌ 0 🔥 14.693s ⏱️

Please sign in to comment.