diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..2d54b3c --- /dev/null +++ b/.travis.yml @@ -0,0 +1,9 @@ +language: python +python: + - "3.6" +# command to install dependencies +install: + - pip install -r requirements.txt +# command to run tests +script: +- pytest diff --git a/process/clean/hl7.py b/process/clean/hl7.py index 33c46b4..504943a 100644 --- a/process/clean/hl7.py +++ b/process/clean/hl7.py @@ -2,84 +2,84 @@ def clean_json(filename, output_file=None): - input_file = open(filename) - input_file.readline() + with open(filename) as input_file: - # Remove and store comments - new_lines = ['{'] - comments = [None] - for line in input_file: - l = line.split("//", 1) - # remove empty lines, and remove \n - if l[0] not in ' ': - new_lines.append(l[0].replace("\n", "")) - comments.append(l[1] if len(l) >= 2 else None) + input_file.readline() - # Handle [{ }] {} from FHIR convention - lines = new_lines - new_lines = [] - for i, line in enumerate(lines): - # One line case [{ }] - m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*([^}]*)\s+\}\](,*)''', line) - if m is not None: - new_lines.append('{}"{}<list::{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4))) - continue + # Remove and store comments + new_lines = ['{'] + comments = [None] + for line in input_file: + l = line.split("//", 1) + # remove empty lines, and remove \n + if l[0] not in ' ': + new_lines.append(l[0].replace("\n", "")) + comments.append(l[1] if len(l) >= 2 else None) - # One line case { } - m = re.match('''(\s*)"([^"]*)"\s*:\s*{\s*([^}]*)\s+}(,*)''', line) - if m is not None: - new_lines.append('{}"{}<{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4))) - continue + # Handle [{ }] {} from FHIR convention + lines = new_lines + new_lines = [] + for i, line in enumerate(lines): + # One line case [{ }] + m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*([^}]*)\s+\}\](,*)''', line) + if m is not None: + new_lines.append('{}"{}<list::{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4))) + continue - # One line case { } exception with a \n in it - m = re.match('''(\s*"[^"]*"\s*:\s*{\s*[^"}\s]+)\s*$''', line) - if m is not None: - # concat in a single line, by extending i+1 and not append to new_line - next_line = lines[i + 1] - m_next = re.match('''\s*(\w.*)$''', next_line) - lines[i + 1] = m.group(1) + m_next.group(1) - # also romove the line in comments - del comments[i] - continue + # One line case { } + m = re.match('''(\s*)"([^"]*)"\s*:\s*{\s*([^}]*)\s+}(,*)''', line) + if m is not None: + new_lines.append('{}"{}<{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4))) + continue - # Multi line case [{ \n ... \n }] - m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*''', line) - if m is not None: - new_lines.append('{}"{}<list>": {}'.format(m.group(1), m.group(2), '[{')) - continue - else: - new_lines.append(line) + # One line case { } exception with a \n in it + m = re.match('''(\s*"[^"]*"\s*:\s*{\s*[^"}\s]+)\s*$''', line) + if m is not None: + # concat in a single line, by extending i+1 and not append to new_line + next_line = lines[i + 1] + m_next = re.match('''\s*(\w.*)$''', next_line) + lines[i + 1] = m.group(1) + m_next.group(1) + # also romove the line in comments + del comments[i] + continue - # Handle < > type extraction and codes handling - lines = new_lines - new_lines = [] - for i, line in enumerate(lines): - match = re.match('''(\s*)"([^"]*)"\s*:\s*\[?"?<([^>]*)>"?\]?(,*)''', line) - if match is not None: - given_type = match.group(3) - # Test if [ ] present - if re.match('''(\s*)"([^"]*)"\s*:\s*\["?<([^>]*)>"?\](,*)''', line): - list_marker = 'list::' + # Multi line case [{ \n ... \n }] + m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*''', line) + if m is not None: + new_lines.append('{}"{}<list>": {}'.format(m.group(1), m.group(2), '[{')) + continue else: - list_marker = '' + new_lines.append(line) - if given_type == 'code': # We need to get the code options given in comments - comment = comments[i] - code_match = re.match('''[^|]*(\s[A-Za-z\-]+\s(?:\|\s[A-Za-z\-]+\s)+)[^|]*''', comment) - if code_match is not None: - codes = code_match.group(1).strip().split(' | ') - given_type += '=' + '|'.join(codes) + # Handle < > type extraction and codes handling + lines = new_lines + new_lines = [] + for i, line in enumerate(lines): + match = re.match('''(\s*)"([^"]*)"\s*:\s*\[?"?<([^>]*)>"?\]?(,*)''', line) + if match is not None: + given_type = match.group(3) + # Test if [ ] present + if re.match('''(\s*)"([^"]*)"\s*:\s*\["?<([^>]*)>"?\](,*)''', line): + list_marker = 'list::' else: - raise TypeError('No code provided', match) + list_marker = '' - new_lines.append('{}"{}<{}{}>": null{}'.format(match.group(1), match.group(2), list_marker, given_type, match.group(4))) - continue - else: - new_lines.append(line) + if given_type == 'code': # We need to get the code options given in comments + comment = comments[i] + code_match = re.match('''[^|]*(\s[A-Za-z\-]+\s(?:\|\s[A-Za-z\-]+\s)+)[^|]*''', comment) + if code_match is not None: + codes = code_match.group(1).strip().split(' | ') + given_type += '=' + '|'.join(codes) + else: + raise TypeError('No code provided', match) - if output_file is not None: - output_file = open(output_file, 'w') - output_file.write('\n'.join(new_lines)) - output_file.close() - else: - return '\n'.join(new_lines) + new_lines.append('{}"{}<{}{}>": null{}'.format(match.group(1), match.group(2), list_marker, given_type, match.group(4))) + continue + else: + new_lines.append(line) + + if output_file is not None: + with open(output_file, 'w') as output_file: + output_file.write('\n'.join(new_lines)) + else: + return '\n'.join(new_lines) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6e9e9dd --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +black +pytest diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..687fc8a --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest +import os +import json + +from process import clean_json +from process import json_to_yml + +CLEAN_DATA_FOLDER = 'tests/data/clean' +CORRUPTED_DATA_FOLDER = 'tests/data/corrupted' +FILES = ['patient.json'] + + +@pytest.fixture(scope="module", + params=[os.path.join(CORRUPTED_DATA_FOLDER, filename) for filename in FILES]) +def cleaned_json(request): + return clean_json(request.param) + + +@pytest.fixture(scope="module") +def cleaned_yml(cleaned_json): + return json_to_yml(json.loads(cleaned_json)) + + +@pytest.fixture(scope="module", + params=[os.path.join(CLEAN_DATA_FOLDER, filename) for filename in FILES]) +def expected_json(request): + return json.load(open(request.param)) diff --git a/tests/data/clean/patient.json b/tests/data/clean/patient.json new file mode 100644 index 0000000..d838058 --- /dev/null +++ b/tests/data/clean/patient.json @@ -0,0 +1,40 @@ +{ + "resourceType" : "Patient", + "identifier<list::Identifier>": null, + "active<boolean>": null, + "name<list::HumanName>": null, + "telecom<list::ContactPoint>": null, + "gender<code=male|female|other|unknown>": null, + "birthDate<date>": null, + "deceasedBoolean<boolean>": null, + "deceasedDateTime<dateTime>": null, + "address<list::Address>": null, + "maritalStatus<CodeableConcept>": null, + "multipleBirthBoolean<boolean>": null, + "multipleBirthInteger<integer>": null, + "photo<list::Attachment>": null, + "contact<list>": [{ + "relationship<list::CodeableConcept>": null, + "name<HumanName>": null, + "telecom<list::ContactPoint>": null, + "address<Address>": null, + "gender<code=male|female|other|unknown>": null, + "organization<Reference(Organization)>": null, + "period<Period>": null + }], + "animal" : { + "species<CodeableConcept>": null, + "breed<CodeableConcept>": null, + "genderStatus<CodeableConcept>": null + }, + "communication<list>": [{ + "language<CodeableConcept>": null, + "preferred<boolean>": null + }], + "generalPractitioner<list::Reference(Organization|Practitioner)>": null, + "managingOrganization<Reference(Organization)>": null, + "link<list>": [{ + "other<Reference(Patient|RelatedPerson)>": null, + "type<code=replaced-by|replaces|refer|seealso>": null + }] +} \ No newline at end of file diff --git a/tests/data/corrupted/patient.json b/tests/data/corrupted/patient.json new file mode 100644 index 0000000..f2a993e --- /dev/null +++ b/tests/data/corrupted/patient.json @@ -0,0 +1,44 @@ +{doco + "resourceType" : "Patient", + // from Resource: id, meta, implicitRules, and language + // from DomainResource: text, contained, extension, and modifierExtension + "identifier" : [{ Identifier }], // An identifier for this patient + "active" : <boolean>, // Whether this patient's record is in active use + "name" : [{ HumanName }], // A name associated with the patient + "telecom" : [{ ContactPoint }], // A contact detail for the individual + "gender" : "<code>", // male | female | other | unknown + "birthDate" : "<date>", // The date of birth for the individual + // deceased[x]: Indicates if the individual is deceased or not. One of these 2: + "deceasedBoolean" : <boolean>, + "deceasedDateTime" : "<dateTime>", + "address" : [{ Address }], // Addresses for the individual + "maritalStatus" : { CodeableConcept }, // Marital (civil) status of a patient + // multipleBirth[x]: Whether patient is part of a multiple birth. One of these 2: + "multipleBirthBoolean" : <boolean>, + "multipleBirthInteger" : <integer>, + "photo" : [{ Attachment }], // Image of the patient + "contact" : [{ // A contact party (e.g. guardian, partner, friend) for the patient + "relationship" : [{ CodeableConcept }], // The kind of relationship + "name" : { HumanName }, // A name associated with the contact person + "telecom" : [{ ContactPoint }], // A contact detail for the person + "address" : { Address }, // Address for the contact person + "gender" : "<code>", // male | female | other | unknown + "organization" : { Reference(Organization) }, // C? Organization that is associated with the contact + "period" : { Period } // The period during which this contact person or organization is valid to be contacted relating to this patient + }], + "animal" : { // This patient is known to be an animal (non-human) + "species" : { CodeableConcept }, // R! E.g. Dog, Cow + "breed" : { CodeableConcept }, // E.g. Poodle, Angus + "genderStatus" : { CodeableConcept } // E.g. Neutered, Intact + }, + "communication" : [{ // A list of Languages which may be used to communicate with the patient about his or her health + "language" : { CodeableConcept }, // R! The language which can be used to communicate with the patient about his or her health + "preferred" : <boolean> // Language preference indicator + }], + "generalPractitioner" : [{ Reference(Organization|Practitioner) }], // Patient's nominated primary care provider + "managingOrganization" : { Reference(Organization) }, // Organization that is the custodian of the patient record + "link" : [{ // Link to another patient resource that concerns the same actual person + "other" : { Reference(Patient|RelatedPerson) }, // R! The other patient or related person resource that the link refers to + "type" : "<code>" // R! replaced-by | replaces | refer | seealso - type of link + }] +} \ No newline at end of file diff --git a/tests/test_clean.py b/tests/test_clean.py new file mode 100644 index 0000000..54a8639 --- /dev/null +++ b/tests/test_clean.py @@ -0,0 +1,19 @@ +import json + + +def test_clean_json(cleaned_json, expected_json): + + # assert cleaned json is a valid json file + cleaned_dict = json.loads(cleaned_json) + + # assert cleaned json is equal to expected output + assert cleaned_dict == expected_json + + + + + + + + + diff --git a/tests/test_convert.py b/tests/test_convert.py new file mode 100644 index 0000000..559870d --- /dev/null +++ b/tests/test_convert.py @@ -0,0 +1,11 @@ +from process import json_to_yml + +def test_json_to_yml(cleaned_json): + + # I don't know how to assess a yaml has the expected format + # more info: https://github.com/arkhn/fhir-store/issues/3 + pass + + + + diff --git a/tests/test_write.py b/tests/test_write.py new file mode 100644 index 0000000..b144d0e --- /dev/null +++ b/tests/test_write.py @@ -0,0 +1,21 @@ +import os +from process import write + +DOMAIN = 'domain_test' +SUBDOMAIN = 'subdomain_test' +RESOURCE = 'resource_test' + + +def test_write(cleaned_json, cleaned_yml): + + write(domain=DOMAIN, subdomain=SUBDOMAIN, resource=RESOURCE, format='json', file_data=cleaned_json) + write(domain=DOMAIN, subdomain=SUBDOMAIN, resource=RESOURCE, format='yml', file_data=cleaned_yml) + + json_path = os.path.join(os.path.join('json', DOMAIN, SUBDOMAIN, RESOURCE + '.json')) + yml_path = os.path.join(os.path.join('yml', DOMAIN, SUBDOMAIN, RESOURCE + '.yml')) + + # assert file have been written properly + with open(json_path, 'r') as content_file: + assert content_file.read() == cleaned_json + with open(yml_path, 'r') as content_file: + assert content_file.read() == cleaned_yml