liip · bellisk · Apr 22, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 21, 2026
diff --git a/ckanext/switzerland/harvester/base_sbb_harvester.py b/ckanext/switzerland/harvester/base_sbb_harvester.py
@@ -185,12 +185,22 @@ def get_config_validation_schema(self):
                 "storage_adapter": str,
                 "bucket": str,
                 voluptuous.Required("date_pattern", default=""): str,
+                voluptuous.Required(
+                    "resource_sort_order", default="desc"
+                ): voluptuous.In(["asc", "desc"]),
             }
         )
 
     def load_config(self, config_str):
         schema = self.get_config_validation_schema()
         data = json.loads(config_str)
+        # If resource_regex is in the config, ignore resource_sort_order.
+        if "resource_regex" in data and "resource_sort_order" in data:
+            del data["resource_sort_order"]
+            log.info(
+                "resource_regex is set in config: ignoring resource_sort_order and "
+                "using default value (desc)"
+            )
         return schema(data)
 
     # tested
@@ -988,23 +998,34 @@ def _get_ordered_resources(self, package):
         unmatched_resources = []
 
         # get filename regex for permalink from harvester config or fallback to a
-        # catch-all
+        # catch-all that matches all filenames (.*)
         identifier_regex = self.config["resource_regex"]
         for resource in package["resources"]:
             if re.match(identifier_regex, resource["identifier"], re.IGNORECASE):
                 ordered_resources.append(resource)
             else:
+                # We only add to unmatched_resources if the resource_regex exists and a
+                # filename doesn't match it
                 unmatched_resources.append(resource)
 
+        if self.config["resource_sort_order"] == "asc":
+            reverse = False
+        else:
+            reverse = True
+        log.debug(
+            f"resource_sort_order is {self.config['resource_sort_order']}"
+            f" and reverse is {reverse}"
+        )
+
         if self.config["date_pattern"]:
             ordered_resources.sort(
                 key=lambda r: re.search(
                     self.config["date_pattern"], r["identifier"]
                 ).group(),
-                reverse=True,
+                reverse=reverse,
             )
         else:
-            ordered_resources.sort(key=lambda r: r["identifier"], reverse=True)
+            ordered_resources.sort(key=lambda r: r["identifier"], reverse=reverse)
 
         return ordered_resources, unmatched_resources
 
@@ -1019,9 +1040,11 @@ def finalize(self, harvest_object, harvest_object_data):  # noqa: C901
         # Deleting old resources, generate permalink, order resources:
         # We do this by matching a regex, defined in the `resource_regex` key of the
         # harvester json config, against the identifier (filename) of the resources of
-        # the dataset. The ones that matched are thrown in a list and sorted by name,
-        # descending. This makes the newest file appear first when the filesnames have
-        # the correct format (YYYY-MM-DD-*).
+        # the dataset. The ones that matched are thrown in a list and sorted by name.
+        # When resource_regex is omitted from the config JSON, resource_sort_order
+        # selects ascending vs descending; when resource_regex is included in the
+        # config, resource_sort_order is ignored and matched resources are always sorted
+        # descending (newest name first for YYYYMMDD-* style names).
         # In case filesnames have different structure, e.g., *_YYYY-MM-DD.csv,
         # `date_pattern` should be specified in the harvester configuration, which is
         # used to list the newest files on the top of the list.

diff --git a/ckanext/switzerland/tests/base_ftp_harvester_tests.py b/ckanext/switzerland/tests/base_ftp_harvester_tests.py
@@ -24,6 +24,7 @@ def run_harvester(
         self,
         force_all=False,
         resource_regex=None,
+        resource_sort_order=None,
         max_resources=None,
         dataset=data.dataset_name,
         timetable_regex=None,
@@ -47,6 +48,8 @@ def run_harvester(
             config["force_all"] = True
         if resource_regex:
             config["resource_regex"] = resource_regex
+        if resource_sort_order is not None:
+            config["resource_sort_order"] = resource_sort_order
         if max_resources:
             config["max_resources"] = max_resources
         if timetable_regex:

diff --git a/ckanext/switzerland/tests/test_sbb_harvester.py b/ckanext/switzerland/tests/test_sbb_harvester.py
@@ -363,6 +363,78 @@ def test_order_permalink_regex(self):
             ),
         )
 
+    def test_resource_sort_order_default_is_desc(self):
+        """If resource_sort_order is omitted from config, resources are ordered desc."""
+        filesystem = self.get_filesystem(filename="20160901.csv")
+        MockFTPStorageAdapter.filesystem = filesystem
+        path = os.path.join(data.environment, data.folder, "20160902.csv")
+        filesystem.writetext(path, data.dataset_content_2)
+        self.run_harvester(ftp_server="testserver")
+
+        package = self.get_package()
+        self.assertEqual(package.resources[0].extras["identifier"], "20160902.csv")
+        self.assertEqual(package.resources[1].extras["identifier"], "20160901.csv")
+
+    def test_resource_sort_order_desc_explicit(self):
+        """resource_sort_order 'desc' sorts identifiers descending (newest name first)."""
+        filesystem = self.get_filesystem(filename="20160901.csv")
+        MockFTPStorageAdapter.filesystem = filesystem
+        path = os.path.join(data.environment, data.folder, "20160902.csv")
+        filesystem.writetext(path, data.dataset_content_2)
+        self.run_harvester(ftp_server="testserver", resource_sort_order="desc")
+
+        package = self.get_package()
+        self.assertEqual(package.resources[0].extras["identifier"], "20160902.csv")
+        self.assertEqual(package.resources[1].extras["identifier"], "20160901.csv")
+
+    def test_resource_sort_order_asc(self):
+        """resource_sort_order 'asc' sorts identifiers ascending."""
+        filesystem = self.get_filesystem(filename="20160901.csv")
+        MockFTPStorageAdapter.filesystem = filesystem
+        path = os.path.join(data.environment, data.folder, "20160902.csv")
+        filesystem.writetext(path, data.dataset_content_2)
+        self.run_harvester(ftp_server="testserver", resource_sort_order="asc")
+
+        package = self.get_package()
+        self.assertEqual(package.resources[0].extras["identifier"], "20160901.csv")
+        self.assertEqual(package.resources[1].extras["identifier"], "20160902.csv")
+        self.assertEqual(
+            package.extras["permalink"],
+            "http://odp.test/dataset/{}/resource/{}/download/20160901.csv".format(
+                package.id, package.resources[0].id
+            ),
+        )
+
+    def test_resource_sort_order_ignored_when_resource_regex_in_config(self):
+        """If resource_regex is present in the harvester config, resource_sort_order is
+        ignored: matched files keep descending identifier order even when asc is set.
+        Unmatched files still precede the matched block (unchanged).
+        """
+        filesystem = self.get_filesystem(filename="20160901.csv")
+        MockFTPStorageAdapter.filesystem = filesystem
+        path = os.path.join(data.environment, data.folder, "20160902.csv")
+        filesystem.writetext(path, data.dataset_content_2)
+        path = os.path.join(data.environment, data.folder, "1111Resource.csv")
+        filesystem.writetext(path, data.dataset_content_3)
+        path = os.path.join(data.environment, data.folder, "9999Resource.csv")
+        filesystem.writetext(path, data.dataset_content_3)
+
+        self.run_harvester(
+            resource_regex=r"\d{8}.csv",
+            resource_sort_order="asc",
+            ftp_server="testserver",
+        )
+        package = self.get_package()
+        self.assertEqual(
+            [r.extras["identifier"] for r in package.resources],
+            [
+                "1111Resource.csv",
+                "9999Resource.csv",
+                "20160902.csv",
+                "20160901.csv",
+            ],
+        )
+
     # cleanup tests
     def test_max_resources(self):
         filesystem = self.get_filesystem(filename="20160901.csv")