Skip to content
This repository has been archived by the owner on Dec 16, 2019. It is now read-only.

Commit

Permalink
Merge pull request #7 from arkhn/jblemoine_create_tests
Browse files Browse the repository at this point in the history
Add travis and pytest
  • Loading branch information
LaRiffle authored Oct 31, 2018
2 parents 269fe22 + 8af0c3b commit c0cc89b
Show file tree
Hide file tree
Showing 10 changed files with 243 additions and 70 deletions.
9 changes: 9 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
language: python
python:
- "3.6"
# command to install dependencies
install:
- pip install -r requirements.txt
# command to run tests
script:
- pytest
140 changes: 70 additions & 70 deletions process/clean/hl7.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,84 +2,84 @@


def clean_json(filename, output_file=None):
input_file = open(filename)
input_file.readline()
with open(filename) as input_file:

# Remove and store comments
new_lines = ['{']
comments = [None]
for line in input_file:
l = line.split("//", 1)
# remove empty lines, and remove \n
if l[0] not in ' ':
new_lines.append(l[0].replace("\n", ""))
comments.append(l[1] if len(l) >= 2 else None)
input_file.readline()

# Handle [{ }] {} from FHIR convention
lines = new_lines
new_lines = []
for i, line in enumerate(lines):
# One line case [{ }]
m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*([^}]*)\s+\}\](,*)''', line)
if m is not None:
new_lines.append('{}"{}<list::{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4)))
continue
# Remove and store comments
new_lines = ['{']
comments = [None]
for line in input_file:
l = line.split("//", 1)
# remove empty lines, and remove \n
if l[0] not in ' ':
new_lines.append(l[0].replace("\n", ""))
comments.append(l[1] if len(l) >= 2 else None)

# One line case { }
m = re.match('''(\s*)"([^"]*)"\s*:\s*{\s*([^}]*)\s+}(,*)''', line)
if m is not None:
new_lines.append('{}"{}<{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4)))
continue
# Handle [{ }] {} from FHIR convention
lines = new_lines
new_lines = []
for i, line in enumerate(lines):
# One line case [{ }]
m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*([^}]*)\s+\}\](,*)''', line)
if m is not None:
new_lines.append('{}"{}<list::{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4)))
continue

# One line case { } exception with a \n in it
m = re.match('''(\s*"[^"]*"\s*:\s*{\s*[^"}\s]+)\s*$''', line)
if m is not None:
# concat in a single line, by extending i+1 and not append to new_line
next_line = lines[i + 1]
m_next = re.match('''\s*(\w.*)$''', next_line)
lines[i + 1] = m.group(1) + m_next.group(1)
# also romove the line in comments
del comments[i]
continue
# One line case { }
m = re.match('''(\s*)"([^"]*)"\s*:\s*{\s*([^}]*)\s+}(,*)''', line)
if m is not None:
new_lines.append('{}"{}<{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4)))
continue

# Multi line case [{ \n ... \n }]
m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*''', line)
if m is not None:
new_lines.append('{}"{}<list>": {}'.format(m.group(1), m.group(2), '[{'))
continue
else:
new_lines.append(line)
# One line case { } exception with a \n in it
m = re.match('''(\s*"[^"]*"\s*:\s*{\s*[^"}\s]+)\s*$''', line)
if m is not None:
# concat in a single line, by extending i+1 and not append to new_line
next_line = lines[i + 1]
m_next = re.match('''\s*(\w.*)$''', next_line)
lines[i + 1] = m.group(1) + m_next.group(1)
# also romove the line in comments
del comments[i]
continue

# Handle < > type extraction and codes handling
lines = new_lines
new_lines = []
for i, line in enumerate(lines):
match = re.match('''(\s*)"([^"]*)"\s*:\s*\[?"?<([^>]*)>"?\]?(,*)''', line)
if match is not None:
given_type = match.group(3)
# Test if [ ] present
if re.match('''(\s*)"([^"]*)"\s*:\s*\["?<([^>]*)>"?\](,*)''', line):
list_marker = 'list::'
# Multi line case [{ \n ... \n }]
m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*''', line)
if m is not None:
new_lines.append('{}"{}<list>": {}'.format(m.group(1), m.group(2), '[{'))
continue
else:
list_marker = ''
new_lines.append(line)

if given_type == 'code': # We need to get the code options given in comments
comment = comments[i]
code_match = re.match('''[^|]*(\s[A-Za-z\-]+\s(?:\|\s[A-Za-z\-]+\s)+)[^|]*''', comment)
if code_match is not None:
codes = code_match.group(1).strip().split(' | ')
given_type += '=' + '|'.join(codes)
# Handle < > type extraction and codes handling
lines = new_lines
new_lines = []
for i, line in enumerate(lines):
match = re.match('''(\s*)"([^"]*)"\s*:\s*\[?"?<([^>]*)>"?\]?(,*)''', line)
if match is not None:
given_type = match.group(3)
# Test if [ ] present
if re.match('''(\s*)"([^"]*)"\s*:\s*\["?<([^>]*)>"?\](,*)''', line):
list_marker = 'list::'
else:
raise TypeError('No code provided', match)
list_marker = ''

new_lines.append('{}"{}<{}{}>": null{}'.format(match.group(1), match.group(2), list_marker, given_type, match.group(4)))
continue
else:
new_lines.append(line)
if given_type == 'code': # We need to get the code options given in comments
comment = comments[i]
code_match = re.match('''[^|]*(\s[A-Za-z\-]+\s(?:\|\s[A-Za-z\-]+\s)+)[^|]*''', comment)
if code_match is not None:
codes = code_match.group(1).strip().split(' | ')
given_type += '=' + '|'.join(codes)
else:
raise TypeError('No code provided', match)

if output_file is not None:
output_file = open(output_file, 'w')
output_file.write('\n'.join(new_lines))
output_file.close()
else:
return '\n'.join(new_lines)
new_lines.append('{}"{}<{}{}>": null{}'.format(match.group(1), match.group(2), list_marker, given_type, match.group(4)))
continue
else:
new_lines.append(line)

if output_file is not None:
with open(output_file, 'w') as output_file:
output_file.write('\n'.join(new_lines))
else:
return '\n'.join(new_lines)
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
black
pytest
Empty file added tests/__init__.py
Empty file.
27 changes: 27 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pytest
import os
import json

from process import clean_json
from process import json_to_yml

CLEAN_DATA_FOLDER = 'tests/data/clean'
CORRUPTED_DATA_FOLDER = 'tests/data/corrupted'
FILES = ['patient.json']


@pytest.fixture(scope="module",
params=[os.path.join(CORRUPTED_DATA_FOLDER, filename) for filename in FILES])
def cleaned_json(request):
return clean_json(request.param)


@pytest.fixture(scope="module")
def cleaned_yml(cleaned_json):
return json_to_yml(json.loads(cleaned_json))


@pytest.fixture(scope="module",
params=[os.path.join(CLEAN_DATA_FOLDER, filename) for filename in FILES])
def expected_json(request):
return json.load(open(request.param))
40 changes: 40 additions & 0 deletions tests/data/clean/patient.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"resourceType" : "Patient",
"identifier<list::Identifier>": null,
"active<boolean>": null,
"name<list::HumanName>": null,
"telecom<list::ContactPoint>": null,
"gender<code=male|female|other|unknown>": null,
"birthDate<date>": null,
"deceasedBoolean<boolean>": null,
"deceasedDateTime<dateTime>": null,
"address<list::Address>": null,
"maritalStatus<CodeableConcept>": null,
"multipleBirthBoolean<boolean>": null,
"multipleBirthInteger<integer>": null,
"photo<list::Attachment>": null,
"contact<list>": [{
"relationship<list::CodeableConcept>": null,
"name<HumanName>": null,
"telecom<list::ContactPoint>": null,
"address<Address>": null,
"gender<code=male|female|other|unknown>": null,
"organization<Reference(Organization)>": null,
"period<Period>": null
}],
"animal" : {
"species<CodeableConcept>": null,
"breed<CodeableConcept>": null,
"genderStatus<CodeableConcept>": null
},
"communication<list>": [{
"language<CodeableConcept>": null,
"preferred<boolean>": null
}],
"generalPractitioner<list::Reference(Organization|Practitioner)>": null,
"managingOrganization<Reference(Organization)>": null,
"link<list>": [{
"other<Reference(Patient|RelatedPerson)>": null,
"type<code=replaced-by|replaces|refer|seealso>": null
}]
}
44 changes: 44 additions & 0 deletions tests/data/corrupted/patient.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{doco
"resourceType" : "Patient",
// from Resource: id, meta, implicitRules, and language
// from DomainResource: text, contained, extension, and modifierExtension
"identifier" : [{ Identifier }], // An identifier for this patient
"active" : <boolean>, // Whether this patient's record is in active use
"name" : [{ HumanName }], // A name associated with the patient
"telecom" : [{ ContactPoint }], // A contact detail for the individual
"gender" : "<code>", // male | female | other | unknown
"birthDate" : "<date>", // The date of birth for the individual
// deceased[x]: Indicates if the individual is deceased or not. One of these 2:
"deceasedBoolean" : <boolean>,
"deceasedDateTime" : "<dateTime>",
"address" : [{ Address }], // Addresses for the individual
"maritalStatus" : { CodeableConcept }, // Marital (civil) status of a patient
// multipleBirth[x]: Whether patient is part of a multiple birth. One of these 2:
"multipleBirthBoolean" : <boolean>,
"multipleBirthInteger" : <integer>,
"photo" : [{ Attachment }], // Image of the patient
"contact" : [{ // A contact party (e.g. guardian, partner, friend) for the patient
"relationship" : [{ CodeableConcept }], // The kind of relationship
"name" : { HumanName }, // A name associated with the contact person
"telecom" : [{ ContactPoint }], // A contact detail for the person
"address" : { Address }, // Address for the contact person
"gender" : "<code>", // male | female | other | unknown
"organization" : { Reference(Organization) }, // C? Organization that is associated with the contact
"period" : { Period } // The period during which this contact person or organization is valid to be contacted relating to this patient
}],
"animal" : { // This patient is known to be an animal (non-human)
"species" : { CodeableConcept }, // R! E.g. Dog, Cow
"breed" : { CodeableConcept }, // E.g. Poodle, Angus
"genderStatus" : { CodeableConcept } // E.g. Neutered, Intact
},
"communication" : [{ // A list of Languages which may be used to communicate with the patient about his or her health
"language" : { CodeableConcept }, // R! The language which can be used to communicate with the patient about his or her health
"preferred" : <boolean> // Language preference indicator
}],
"generalPractitioner" : [{ Reference(Organization|Practitioner) }], // Patient's nominated primary care provider
"managingOrganization" : { Reference(Organization) }, // Organization that is the custodian of the patient record
"link" : [{ // Link to another patient resource that concerns the same actual person
"other" : { Reference(Patient|RelatedPerson) }, // R! The other patient or related person resource that the link refers to
"type" : "<code>" // R! replaced-by | replaces | refer | seealso - type of link
}]
}
19 changes: 19 additions & 0 deletions tests/test_clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json


def test_clean_json(cleaned_json, expected_json):

# assert cleaned json is a valid json file
cleaned_dict = json.loads(cleaned_json)

# assert cleaned json is equal to expected output
assert cleaned_dict == expected_json









11 changes: 11 additions & 0 deletions tests/test_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from process import json_to_yml

def test_json_to_yml(cleaned_json):

# I don't know how to assess a yaml has the expected format
# more info: https://github.com/arkhn/fhir-store/issues/3
pass




21 changes: 21 additions & 0 deletions tests/test_write.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
from process import write

DOMAIN = 'domain_test'
SUBDOMAIN = 'subdomain_test'
RESOURCE = 'resource_test'


def test_write(cleaned_json, cleaned_yml):

write(domain=DOMAIN, subdomain=SUBDOMAIN, resource=RESOURCE, format='json', file_data=cleaned_json)
write(domain=DOMAIN, subdomain=SUBDOMAIN, resource=RESOURCE, format='yml', file_data=cleaned_yml)

json_path = os.path.join(os.path.join('json', DOMAIN, SUBDOMAIN, RESOURCE + '.json'))
yml_path = os.path.join(os.path.join('yml', DOMAIN, SUBDOMAIN, RESOURCE + '.yml'))

# assert file have been written properly
with open(json_path, 'r') as content_file:
assert content_file.read() == cleaned_json
with open(yml_path, 'r') as content_file:
assert content_file.read() == cleaned_yml

0 comments on commit c0cc89b

Please sign in to comment.