Merge pull request #7 from arkhn/jblemoine_create_tests

Add travis and pytest
arkhn · Oct 31, 2018 · c0cc89b · c0cc89b
2 parents 269fe22 + 8af0c3b
commit c0cc89b
Show file tree

Hide file tree

Showing 10 changed files with 243 additions and 70 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,9 @@
+language: python
+python:
+  - "3.6"
+# command to install dependencies
+install:
+  - pip install -r requirements.txt
+# command to run tests
+script:
+- pytest
diff --git a/process/clean/hl7.py b/process/clean/hl7.py
@@ -2,84 +2,84 @@
 
 
 def clean_json(filename, output_file=None):
-    input_file = open(filename)
-    input_file.readline()
+    with open(filename) as input_file:
 
-    # Remove and store comments
-    new_lines = ['{']
-    comments = [None]
-    for line in input_file:
-        l = line.split("//", 1)
-        # remove empty lines, and remove \n
-        if l[0] not in '         ':
-            new_lines.append(l[0].replace("\n", ""))
-            comments.append(l[1] if len(l) >= 2 else None)
+        input_file.readline()
 
-    # Handle [{ }] {} from FHIR convention
-    lines = new_lines
-    new_lines = []
-    for i, line in enumerate(lines):
-        # One line case [{ }]
-        m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*([^}]*)\s+\}\](,*)''', line)
-        if m is not None:
-            new_lines.append('{}"{}<list::{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4)))
-            continue
+        # Remove and store comments
+        new_lines = ['{']
+        comments = [None]
+        for line in input_file:
+            l = line.split("//", 1)
+            # remove empty lines, and remove \n
+            if l[0] not in '         ':
+                new_lines.append(l[0].replace("\n", ""))
+                comments.append(l[1] if len(l) >= 2 else None)
 
-        # One line case { }
-        m = re.match('''(\s*)"([^"]*)"\s*:\s*{\s*([^}]*)\s+}(,*)''', line)
-        if m is not None:
-            new_lines.append('{}"{}<{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4)))
-            continue
+        # Handle [{ }] {} from FHIR convention
+        lines = new_lines
+        new_lines = []
+        for i, line in enumerate(lines):
+            # One line case [{ }]
+            m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*([^}]*)\s+\}\](,*)''', line)
+            if m is not None:
+                new_lines.append('{}"{}<list::{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4)))
+                continue
 
-        # One line case { } exception with a \n in it
-        m = re.match('''(\s*"[^"]*"\s*:\s*{\s*[^"}\s]+)\s*$''', line)
-        if m is not None:
-            # concat in a single line, by extending i+1 and not append to new_line
-            next_line = lines[i + 1]
-            m_next = re.match('''\s*(\w.*)$''', next_line)
-            lines[i + 1] = m.group(1) + m_next.group(1)
-            # also romove the line in comments
-            del comments[i]
-            continue
+            # One line case { }
+            m = re.match('''(\s*)"([^"]*)"\s*:\s*{\s*([^}]*)\s+}(,*)''', line)
+            if m is not None:
+                new_lines.append('{}"{}<{}>": null{}'.format(m.group(1), m.group(2), m.group(3), m.group(4)))
+                continue
 
-        # Multi line case [{ \n ... \n }]
-        m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*''', line)
-        if m is not None:
-            new_lines.append('{}"{}<list>": {}'.format(m.group(1), m.group(2), '[{'))
-            continue
-        else:
-            new_lines.append(line)
+            # One line case { } exception with a \n in it
+            m = re.match('''(\s*"[^"]*"\s*:\s*{\s*[^"}\s]+)\s*$''', line)
+            if m is not None:
+                # concat in a single line, by extending i+1 and not append to new_line
+                next_line = lines[i + 1]
+                m_next = re.match('''\s*(\w.*)$''', next_line)
+                lines[i + 1] = m.group(1) + m_next.group(1)
+                # also romove the line in comments
+                del comments[i]
+                continue
 
-    # Handle < > type extraction and codes handling
-    lines = new_lines
-    new_lines = []
-    for i, line in enumerate(lines):
-        match = re.match('''(\s*)"([^"]*)"\s*:\s*\[?"?<([^>]*)>"?\]?(,*)''', line)
-        if match is not None:
-            given_type = match.group(3)
-            # Test if [ ] present
-            if re.match('''(\s*)"([^"]*)"\s*:\s*\["?<([^>]*)>"?\](,*)''', line):
-                list_marker = 'list::'
+            # Multi line case [{ \n ... \n }]
+            m = re.match('''(\s*)\"([^"]*)\"\s*\:\s*\[\{\s*''', line)
+            if m is not None:
+                new_lines.append('{}"{}<list>": {}'.format(m.group(1), m.group(2), '[{'))
+                continue
             else:
-                list_marker = ''
+                new_lines.append(line)
 
-            if given_type == 'code':  # We need to get the code options given in comments
-                comment = comments[i]
-                code_match = re.match('''[^|]*(\s[A-Za-z\-]+\s(?:\|\s[A-Za-z\-]+\s)+)[^|]*''', comment)
-                if code_match is not None:
-                    codes = code_match.group(1).strip().split(' | ')
-                    given_type += '=' + '|'.join(codes)
+        # Handle < > type extraction and codes handling
+        lines = new_lines
+        new_lines = []
+        for i, line in enumerate(lines):
+            match = re.match('''(\s*)"([^"]*)"\s*:\s*\[?"?<([^>]*)>"?\]?(,*)''', line)
+            if match is not None:
+                given_type = match.group(3)
+                # Test if [ ] present
+                if re.match('''(\s*)"([^"]*)"\s*:\s*\["?<([^>]*)>"?\](,*)''', line):
+                    list_marker = 'list::'
                 else:
-                    raise TypeError('No code provided', match)
+                    list_marker = ''
 
-            new_lines.append('{}"{}<{}{}>": null{}'.format(match.group(1), match.group(2), list_marker, given_type, match.group(4)))
-            continue
-        else:
-            new_lines.append(line)
+                if given_type == 'code':  # We need to get the code options given in comments
+                    comment = comments[i]
+                    code_match = re.match('''[^|]*(\s[A-Za-z\-]+\s(?:\|\s[A-Za-z\-]+\s)+)[^|]*''', comment)
+                    if code_match is not None:
+                        codes = code_match.group(1).strip().split(' | ')
+                        given_type += '=' + '|'.join(codes)
+                    else:
+                        raise TypeError('No code provided', match)
 
-    if output_file is not None:
-        output_file = open(output_file, 'w')
-        output_file.write('\n'.join(new_lines))
-        output_file.close()
-    else:
-        return '\n'.join(new_lines)
+                new_lines.append('{}"{}<{}{}>": null{}'.format(match.group(1), match.group(2), list_marker, given_type, match.group(4)))
+                continue
+            else:
+                new_lines.append(line)
+
+        if output_file is not None:
+            with open(output_file, 'w') as output_file:
+                output_file.write('\n'.join(new_lines))
+        else:
+            return '\n'.join(new_lines)
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+black
+pytest
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,27 @@
+import pytest
+import os
+import json
+
+from process import clean_json
+from process import json_to_yml
+
+CLEAN_DATA_FOLDER = 'tests/data/clean'
+CORRUPTED_DATA_FOLDER = 'tests/data/corrupted'
+FILES = ['patient.json']
+
+
+@pytest.fixture(scope="module",
+                params=[os.path.join(CORRUPTED_DATA_FOLDER, filename) for filename in FILES])
+def cleaned_json(request):
+    return clean_json(request.param)
+
+
+@pytest.fixture(scope="module")
+def cleaned_yml(cleaned_json):
+    return json_to_yml(json.loads(cleaned_json))
+
+
+@pytest.fixture(scope="module",
+                params=[os.path.join(CLEAN_DATA_FOLDER, filename) for filename in FILES])
+def expected_json(request):
+    return json.load(open(request.param))
diff --git a/tests/data/clean/patient.json b/tests/data/clean/patient.json
@@ -0,0 +1,40 @@
+{
+  "resourceType" : "Patient",
+  "identifier<list::Identifier>": null,
+  "active<boolean>": null,
+  "name<list::HumanName>": null,
+  "telecom<list::ContactPoint>": null,
+  "gender<code=male|female|other|unknown>": null,
+  "birthDate<date>": null,
+  "deceasedBoolean<boolean>": null,
+  "deceasedDateTime<dateTime>": null,
+  "address<list::Address>": null,
+  "maritalStatus<CodeableConcept>": null,
+  "multipleBirthBoolean<boolean>": null,
+  "multipleBirthInteger<integer>": null,
+  "photo<list::Attachment>": null,
+  "contact<list>": [{
+    "relationship<list::CodeableConcept>": null,
+    "name<HumanName>": null,
+    "telecom<list::ContactPoint>": null,
+    "address<Address>": null,
+    "gender<code=male|female|other|unknown>": null,
+    "organization<Reference(Organization)>": null,
+    "period<Period>": null
+  }],
+  "animal" : { 
+    "species<CodeableConcept>": null,
+    "breed<CodeableConcept>": null,
+    "genderStatus<CodeableConcept>": null
+  },
+  "communication<list>": [{
+    "language<CodeableConcept>": null,
+    "preferred<boolean>": null
+  }],
+  "generalPractitioner<list::Reference(Organization|Practitioner)>": null,
+  "managingOrganization<Reference(Organization)>": null,
+  "link<list>": [{
+    "other<Reference(Patient|RelatedPerson)>": null,
+    "type<code=replaced-by|replaces|refer|seealso>": null
+  }]
+}
diff --git a/tests/data/corrupted/patient.json b/tests/data/corrupted/patient.json
@@ -0,0 +1,44 @@
+{doco
+  "resourceType" : "Patient",
+  // from Resource: id, meta, implicitRules, and language
+  // from DomainResource: text, contained, extension, and modifierExtension
+  "identifier" : [{ Identifier }], // An identifier for this patient
+  "active" : <boolean>, // Whether this patient's record is in active use
+  "name" : [{ HumanName }], // A name associated with the patient
+  "telecom" : [{ ContactPoint }], // A contact detail for the individual
+  "gender" : "<code>", // male | female | other | unknown
+  "birthDate" : "<date>", // The date of birth for the individual
+  // deceased[x]: Indicates if the individual is deceased or not. One of these 2:
+  "deceasedBoolean" : <boolean>,
+  "deceasedDateTime" : "<dateTime>",
+  "address" : [{ Address }], // Addresses for the individual
+  "maritalStatus" : { CodeableConcept }, // Marital (civil) status of a patient
+  // multipleBirth[x]: Whether patient is part of a multiple birth. One of these 2:
+  "multipleBirthBoolean" : <boolean>,
+  "multipleBirthInteger" : <integer>,
+  "photo" : [{ Attachment }], // Image of the patient
+  "contact" : [{ // A contact party (e.g. guardian, partner, friend) for the patient
+    "relationship" : [{ CodeableConcept }], // The kind of relationship
+    "name" : { HumanName }, // A name associated with the contact person
+    "telecom" : [{ ContactPoint }], // A contact detail for the person
+    "address" : { Address }, // Address for the contact person
+    "gender" : "<code>", // male | female | other | unknown
+    "organization" : { Reference(Organization) }, // C? Organization that is associated with the contact
+    "period" : { Period } // The period during which this contact person or organization is valid to be contacted relating to this patient
+  }],
+  "animal" : { // This patient is known to be an animal (non-human)
+    "species" : { CodeableConcept }, // R!  E.g. Dog, Cow
+    "breed" : { CodeableConcept }, // E.g. Poodle, Angus
+    "genderStatus" : { CodeableConcept } // E.g. Neutered, Intact
+  },
+  "communication" : [{ // A list of Languages which may be used to communicate with the patient about his or her health
+    "language" : { CodeableConcept }, // R!  The language which can be used to communicate with the patient about his or her health
+    "preferred" : <boolean> // Language preference indicator
+  }],
+  "generalPractitioner" : [{ Reference(Organization|Practitioner) }], // Patient's nominated primary care provider
+  "managingOrganization" : { Reference(Organization) }, // Organization that is the custodian of the patient record
+  "link" : [{ // Link to another patient resource that concerns the same actual person
+    "other" : { Reference(Patient|RelatedPerson) }, // R!  The other patient or related person resource that the link refers to
+    "type" : "<code>" // R!  replaced-by | replaces | refer | seealso - type of link
+  }]
+}
diff --git a/tests/test_clean.py b/tests/test_clean.py
@@ -0,0 +1,19 @@
+import json
+
+
+def test_clean_json(cleaned_json, expected_json):
+
+    # assert cleaned json is a valid json file
+    cleaned_dict = json.loads(cleaned_json)
+
+    # assert cleaned json is equal to expected output
+    assert cleaned_dict == expected_json
+
+
+
+
+
+
+
+
+
diff --git a/tests/test_convert.py b/tests/test_convert.py
@@ -0,0 +1,11 @@
+from process import json_to_yml
+
+def test_json_to_yml(cleaned_json):
+
+    # I don't know how to assess a yaml has the expected format
+    # more info: https://github.com/arkhn/fhir-store/issues/3
+    pass
+
+
+
+
diff --git a/tests/test_write.py b/tests/test_write.py
@@ -0,0 +1,21 @@
+import os
+from process import write
+
+DOMAIN = 'domain_test'
+SUBDOMAIN = 'subdomain_test'
+RESOURCE = 'resource_test'
+
+
+def test_write(cleaned_json, cleaned_yml):
+
+    write(domain=DOMAIN, subdomain=SUBDOMAIN, resource=RESOURCE, format='json', file_data=cleaned_json)
+    write(domain=DOMAIN, subdomain=SUBDOMAIN, resource=RESOURCE, format='yml', file_data=cleaned_yml)
+
+    json_path = os.path.join(os.path.join('json', DOMAIN, SUBDOMAIN, RESOURCE + '.json'))
+    yml_path = os.path.join(os.path.join('yml', DOMAIN, SUBDOMAIN, RESOURCE + '.yml'))
+
+    # assert file have been written properly
+    with open(json_path, 'r') as content_file:
+        assert content_file.read() == cleaned_json
+    with open(yml_path, 'r') as content_file:
+        assert content_file.read() == cleaned_yml