DemocracyClub · symroe · Nov 15, 2025 · Nov 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -24,8 +24,6 @@ test-results
 node_modules/
 .vscode/
 /test-env
-/ynr/apps/sopn_parsing/tests/data/sopn_baseline.json
-/ynr/apps/sopn_parsing/tests/data/sopn_baseline_copy.json
 # PyCharm
 .idea/
 

diff --git a/Makefile b/Makefile
diff --git a/ynr/apps/bulk_adding/tests/test_bulk_add.py b/ynr/apps/bulk_adding/tests/test_bulk_add.py
@@ -752,79 +752,3 @@ def test_bulk_add_person_removes_spaces_from_name(self):
         self.assertContains(resp, "Review candidates")
         resp = form.submit()
         self.assertContains(resp, "Bart Simpson")
-
-    def test_fall_back_to_camelot_if_no_textract(self):
-        data = {"name": "Bart", "party_id": "PP52"}
-
-        raw_people = RawPeople.objects.create(
-            ballot=self.dulwich_post_ballot,
-            data=[data],
-            source_type=RawPeople.SOURCE_PARSED_PDF,
-        )
-
-        self.assertEqual(
-            raw_people.as_form_kwargs(),
-            {
-                "initial": [
-                    {
-                        "name": "Bart",
-                        "party": ["PP52", "PP52"],
-                        "previous_party_affiliations": [],
-                        "source": "",
-                    }
-                ]
-            },
-        )
-        raw_people.delete()
-
-        textract_data = {"name": "Lisa", "party_id": "PP53"}
-        raw_people = RawPeople.objects.create(
-            ballot=self.dulwich_post_ballot,
-            data=[data],
-            textract_data=[textract_data],
-            source_type=RawPeople.SOURCE_PARSED_PDF,
-        )
-
-        self.assertEqual(
-            raw_people.as_form_kwargs(),
-            {
-                "initial": [
-                    {
-                        "name": "Lisa",
-                        "party": ["PP53", "PP53"],
-                        "previous_party_affiliations": [],
-                        "source": "",
-                    }
-                ]
-            },
-        )
-
-    def test_can_change_parser_in_frontend(self):
-        """
-        Check that a query param can change the parser we use
-        """
-        BallotSOPN.objects.create(
-            source_url="http://example.com",
-            ballot=self.dulwich_post_ballot,
-            uploaded_file="sopn.pdf",
-        )
-        RawPeople.objects.create(
-            ballot=self.dulwich_post_ballot,
-            data=[{"name": "Bart", "party_id": "PP52"}],
-            textract_data=[{"name": "Lisa", "party_id": "PP53"}],
-            source_type=RawPeople.SOURCE_PARSED_PDF,
-        )
-        response = self.app.get(
-            "/bulk_adding/sopn/parl.65808.2015-05-07/", user=self.user
-        )
-        form = response.forms["bulk_add_form"]
-        # This should be the Textract data
-        self.assertEqual(form.fields["form-0-name"][0].value, "Lisa")
-
-        response = self.app.get(
-            "/bulk_adding/sopn/parl.65808.2015-05-07/?v1_parser=1",
-            user=self.user,
-        )
-        form = response.forms["bulk_add_form"]
-        # This should be the Textract data
-        self.assertEqual(form.fields["form-0-name"][0].value, "Bart")
diff --git a/ynr/apps/bulk_adding/views/sopns.py b/ynr/apps/bulk_adding/views/sopns.py
@@ -123,12 +123,8 @@ def get(self, request, *args, **kwargs):
         return super().get(request, *args, **kwargs)
 
     def get_active_parser(self) -> Optional[SOPNParsingBackends]:
-        if self.request.GET.get("v1_parser"):
-            return SOPNParsingBackends.CAMELOT
         if self.ballot.rawpeople.textract_data:
             return SOPNParsingBackends.TEXTRACT
-        if self.ballot.rawpeople.data:
-            return SOPNParsingBackends.CAMELOT
         return None
 
     def get_context_data(self, **kwargs):

diff --git a/ynr/apps/elections/templates/elections/includes/_sopn_debug.html b/ynr/apps/elections/templates/elections/includes/_sopn_debug.html
@@ -7,28 +7,12 @@ <h3>Parsing Status</h3>
         <ul>
             <li>Pages matched: {% if object.sopn.get_pages %}Yes (matched pages: {{ object.sopn.get_pages|join:", " }}
                 ){% else %}No{% endif %}</li>
-            <li>Camelot tables extracted: {% if object.sopn.camelotparsedsopn %}Yes{% else %}No{% endif %}</li>
             <li>Raw Person Data: {% if object.rawpeople %}Yes{% else %}No{% endif %}</li>
             <li>AWS Textract Data: {% if textract_parsed.raw_data %}Yes{% else %}No{% endif %}</li>
             <li>AWS Textract Parsed? {% if textract_parsed.parsed_data %}Yes{% else %}
                 No{% endif %}</li>
         </ul>
 
-        <h3>Camelot raw Data</h3>
-        {% if object.sopn.camelotparsedsopn.raw_data %}
-            <pre>{{ object.sopn.camelotparsedsopn.as_pandas.to_dict|pprint }}</pre>
-        {% else %}
-            N/A
-        {% endif %}
-
-        <h3>Camelot table Data</h3>
-        {% if object.sopn.camelotparsedsopn.data_as_html %}
-            {{ object.sopn.camelotparsedsopn.data_as_html|safe }}
-        {% else %}
-            N/A
-        {% endif %}
-        <br/>
-
 
         {% if textract_parsed and textract_parsed.as_textractor_document %}
             <h3>AWS extracted table{{ textract_parsed.as_textractor_document.tables|pluralize }}</h3>

diff --git a/ynr/apps/official_documents/models.py b/ynr/apps/official_documents/models.py
@@ -260,7 +260,6 @@ def parse(self):
 
         """
 
-        from sopn_parsing.helpers.extract_tables import extract_ballot_table
         from sopn_parsing.helpers.textract_helpers import (
             NotUsingAWSException,
             TextractSOPNHelper,
@@ -276,12 +275,6 @@ def parse(self):
             # There's a cron job that should pick up the result and carry on parsing later.
             textract_helper.start_detection()
 
-        if getattr(
-            settings, "CAMELOT_ENABLED", False
-        ) and self.uploaded_file.name.endswith(".pdf"):
-            # Camelot
-            extract_ballot_table(self.ballot)
-
 
 class BallotSOPNHistory(BaseBallotSOPN):
     ballot = models.ForeignKey(

diff --git a/ynr/apps/official_documents/tests/test_upload.py b/ynr/apps/official_documents/tests/test_upload.py
@@ -114,20 +114,9 @@ def test_upload_authorized(self):
         with open(self.example_image_filename, "rb") as f:
             form["uploaded_file"] = Upload("pilot.jpg", f.read())
 
-        # TODO: Add back in
-        # with patch(
-        #     "official_documents.views.extract_pages_for_ballot"
-        # ) as extract_pages, patch(
-        #     "official_documents.views.extract_ballot_table"
-        # ) as extract_tables, patch(
-        #     "official_documents.views.parse_raw_data_for_ballot"
-        # ) as parse_tables:
+
         response = form.submit()
         self.assertEqual(response.status_code, 302)
-        # TODO: Add back in
-        # extract_pages.assert_called_once()
-        # extract_tables.assert_called_once()
-        # parse_tables.assert_called_once()
 
         ballot_sopns = BallotSOPN.objects.all()
         self.assertEqual(ballot_sopns.count(), 1)
@@ -181,20 +170,8 @@ def test_docx_upload_form_validation(self):
         with open(self.example_docx_filename, "rb") as f:
             form["uploaded_file"] = Upload("pilot.docx", f.read())
 
-        # TODO: add back in
-        # with patch(
-        #     "official_documents.views.extract_pages_for_ballot"
-        # ) as extract_pages, patch(
-        #     "official_documents.views.extract_ballot_table"
-        # ) as extract_tables, patch(
-        #     "official_documents.views.parse_raw_data_for_ballot"
-        # ) as parse_tables:
         response = form.submit()
         self.assertEqual(response.status_code, 302)
-        # TODO Add back in
-        # extract_pages.assert_called_once()
-        # extract_tables.assert_called_once()
-        # parse_tables.assert_called_once()
         self.assertEqual(BallotSOPN.objects.count(), 1)
         self.assertEqual(response.location, self.ballot.get_sopn_url())
 

diff --git a/ynr/apps/sopn_parsing/helpers/extract_tables.py b/ynr/apps/sopn_parsing/helpers/extract_tables.py
diff --git a/ynr/apps/sopn_parsing/helpers/parse_tables.py b/ynr/apps/sopn_parsing/helpers/parse_tables.py
@@ -476,20 +476,12 @@ def parse_dataframe(ballot: Ballot, df: DataFrame):
 
 def parse_raw_data(ballot: Ballot, reparse=False):
     """
-    Given a Ballot, go and get the Camelot and the AWS Textract dataframes
+    Given a Ballot, go and get the AWS Textract dataframes
     and process them
     """
 
-    camelot_model = getattr(ballot.sopn, "camelotparsedsopn", None)
-    camelot_data = {}
     textract_model = getattr(ballot.sopn, "awstextractparsedsopn", None)
     textract_data = {}
-    if (
-        camelot_model
-        and camelot_model.raw_data_type == "pandas"
-        and (reparse or not camelot_model.parsed_data)
-    ):
-        camelot_data = parse_dataframe(ballot, camelot_model.as_pandas)
     if (
         textract_model
         and textract_model.raw_data
@@ -500,7 +492,7 @@ def parse_raw_data(ballot: Ballot, reparse=False):
             textract_model.parse_raw_data()
         textract_data = parse_dataframe(ballot, textract_model.as_pandas)
 
-    if camelot_data or textract_data:
+    if textract_data:
         # Check there isn't a rawpeople object from another (better) source
         rawpeople_qs = RawPeople.objects.filter(ballot=ballot).exclude(
             source_type=RawPeople.SOURCE_PARSED_PDF
@@ -510,7 +502,7 @@ def parse_raw_data(ballot: Ballot, reparse=False):
                 RawPeople.objects.update_or_create(
                     ballot=ballot,
                     defaults={
-                        "data": camelot_data or "",
+                        "data": "",
                         "textract_data": textract_data or "",
                         "source": "Parsed from {}".format(
                             ballot.sopn.source_url
@@ -525,17 +517,10 @@ def parse_raw_data(ballot: Ballot, reparse=False):
                 return
         # We've done the parsing, so let's still save the result
         storage = DefaultStorage()
-        storage.save(
-            f"raw_people/camelot_{ballot.ballot_paper_id}.json",
-            ContentFile(json.dumps(camelot_data, indent=4).encode("utf8")),
-        )
         storage.save(
             f"raw_people/textract_{ballot.ballot_paper_id}.json",
             ContentFile(json.dumps(textract_data, indent=4).encode("utf8")),
         )
-        if camelot_model:
-            ballot.sopn.camelotparsedsopn.status = "parsed"
-            ballot.sopn.camelotparsedsopn.save()
         if textract_model:
             ballot.sopn.awstextractparsedsopn.status = "parsed"
             ballot.sopn.awstextractparsedsopn.save()
diff --git a/ynr/apps/sopn_parsing/management/commands/sopn_parsing_extract_tables.py b/ynr/apps/sopn_parsing/management/commands/sopn_parsing_extract_tables.py