gdcc · JR-1991 · May 29, 2024 · Jun 3, 2024 · Jun 12, 2024 · Jun 12, 2024
diff --git a/easyDataverse/base.py b/easyDataverse/base.py
@@ -25,6 +25,7 @@ class DataverseBase(BaseModel):
     )
 
     _changed: Set = PrivateAttr(default_factory=set)
+    _new: bool = PrivateAttr(default=True)
 
     # ! Overloads
     def __setattr__(self, name: str, value: Any) -> None:
@@ -184,10 +185,10 @@ def to_dataverse_json(self, indent: int = 2) -> str:
     def extract_changed(self) -> List[Dict]:
         """Extracts the changed fields from the object"""
 
-        self._add_changed_multiples()
-
         changed_fields = []
 
+        self._add_changed_multiples()
+
         for name in self._changed:
             field = self.model_fields[name]
 
@@ -212,17 +213,16 @@ def _add_changed_multiples(self):
             if not self._is_multiple(field):
                 continue
 
-            value = getattr(self, name)
-            has_changes = any(value._changed for value in value)
+            has_changed = any(
+                compound._changed or compound._new
+                for compound in getattr(self, name)
+            )
 
-            if has_changes:
+            if has_changed:
                 self._changed.add(name)
 
     def _process_multiple_compound(self, compounds) -> List[Dict]:
-        """Whenever a single compound has changed, return all compounds."""
-
-        if not any(len(compound._changed) for compound in compounds):
-            return []
+        """Processes multiple compounds"""
 
         return [compound.dataverse_dict() for compound in compounds]
 
@@ -254,6 +254,21 @@ def _wrap_changed(self, field: FieldInfo, value: Any):
             "value": value,
         }
 
+    def _set_new_prop(self, value: bool):
+        """Sets the new property of the object"""
+
+        self._new = value
+
+        for attr, field in self.model_fields.items():
+            if not field.json_schema_extra["typeClass"] == "compound":
+                continue
+
+            if field.json_schema_extra["multiple"]:
+                for compound in getattr(self, attr):
+                    compound._set_new_prop(value)
+            else:
+                getattr(self, attr)._set_new_prop(value)
+
     @staticmethod
     def is_empty(value):
         """Checks whether a given value is None or empty"""

diff --git a/easyDataverse/dataset.py b/easyDataverse/dataset.py
@@ -225,10 +225,16 @@ def upload(
 
         return self.p_id
 
-    def update(self):
+    def update(self, replace: bool = True):
         """Updates a dataset if a p_id has been given.
 
         Use this function to update a dataset that has already been uploaded to Dataverse.
+
+        Args:
+            replace (bool, optional): Whether to replace the dataset or not. Defaults to True.
+
+        Raises:
+            HTTPError: If the dataset could not be updated.
         """
 
         if not self.p_id:
@@ -238,6 +244,7 @@ def update(self):
             to_change=self._extract_changes(),
             p_id=self.p_id,  # type: ignore
             files=self.files,
+            replace=replace,
             DATAVERSE_URL=str(self.DATAVERSE_URL),  # type: ignore
             API_TOKEN=str(self.API_TOKEN),
         )

diff --git a/easyDataverse/dataverse.py b/easyDataverse/dataverse.py
@@ -339,6 +339,10 @@ def load_dataset(
                 n_parallel_downloads=n_parallel_downloads,
             )
 
+        # Set "new" prop to False
+        for metadatablock in dataset.metadatablocks.values():
+            metadatablock._set_new_prop(False)
+
         return dataset
 
     def _fetch_dataset(

diff --git a/easyDataverse/uploader.py b/easyDataverse/uploader.py
@@ -116,6 +116,7 @@ def update_dataset(
     p_id: str,
     to_change: Dict,
     files: List[File],
+    replace: bool,
     DATAVERSE_URL: Optional[str] = None,
     API_TOKEN: Optional[str] = None,
 ) -> bool:
@@ -125,6 +126,7 @@ def update_dataset(
         p_id (str): Persistent ID of the dataset.
         to_change (Dict): Dictionary of fields to change.
         files (List[File]): List of files that should be uploaded. Can also include directory names.
+        replace (bool, optional): Whether to replace the existing files. Defaults to False.
         DATAVERSE_URL (Optional[str], optional): The URL of the Dataverse instance. Defaults to None.
         API_TOKEN (Optional[str], optional): The API token for authentication. Defaults to None.
 
@@ -137,6 +139,7 @@ def update_dataset(
     _update_metadata(
         p_id=p_id,
         to_change=to_change,
+        replace=replace,
         base_url=DATAVERSE_URL,  # type: ignore
         api_token=API_TOKEN,  # type: ignore
     )
@@ -155,6 +158,7 @@ def _update_metadata(
     to_change: Dict,
     base_url: str,
     api_token: str,
+    replace: bool,
 ):
     """Updates the metadata of a dataset.
 
@@ -167,9 +171,11 @@ def _update_metadata(
     Raises:
         requests.HTTPError: If the request fails.
     """
-    EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace=true"
-    headers = {"X-Dataverse-key": api_token}
 
-    response = requests.put(EDIT_ENDPOINT, headers=headers, json=to_change)
+    headers = {"X-Dataverse-key": api_token}
+    endpoint = f"/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace={str(replace).lower()}"
+    url = urljoin(base_url, endpoint)
+    response = requests.put(url, headers=headers, json=to_change)
 
-    response.raise_for_status()
+    if response.status_code != 200:
+        raise requests.HTTPError(f"Failed to update metadata: {response.text}")
diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py
@@ -13,7 +13,6 @@ def test_dataset_update(
         credentials,
         minimal_upload,
     ):
-
         # Arrange
         base_url, api_token = credentials
         url = f"{base_url}/api/dataverses/root/datasets"
@@ -38,6 +37,11 @@ def test_dataset_update(
         # Fetch the dataset and update the title
         dataset = dataverse.load_dataset(pid)
         dataset.citation.title = "Title has changed"
+
+        # Check if multiple compound changes are tracked too
+        dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1")
+        dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2")
+
         dataset.update()
 
         # Re-fetch the dataset
@@ -59,10 +63,184 @@ def test_dataset_update(
             )
         )
 
+        other_id_fields = next(
+            filter(
+                lambda x: x["typeName"] == "otherId",
+                updated_dataset["data"]["metadataBlocks"]["citation"]["fields"],
+            )
+        )["value"]
+
         # Assert
         assert (
             title_field["value"] == "Title has changed"
         ), "The updated dataset title does not match the expected title."
+        assert (
+            len(other_id_fields) == 2
+        ), "The updated dataset does not have the expected number of other ids."
+        assert (
+            other_id_fields[0]["otherIdValue"]["value"] == "softwareid1"
+        ), "The updated dataset does not have the expected other id."
+        assert (
+            other_id_fields[1]["otherIdValue"]["value"] == "softwareid2"
+        ), "The updated dataset does not have the expected other id."
+        assert (
+            other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1"
+        ), "The updated dataset does not have the expected other id agency."
+        assert (
+            other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2"
+        ), "The updated dataset does not have the expected other id agency."
+
+    @pytest.mark.integration
+    def test_dataset_update_wo_replace(
+        self,
+        credentials,
+        minimal_upload,
+    ):
+        # Arrange
+        base_url, api_token = credentials
+        url = f"{base_url}/api/dataverses/root/datasets"
+        response = requests.post(
+            url=url,
+            json=minimal_upload,
+            headers={
+                "X-Dataverse-key": api_token,
+                "Content-Type": "application/json",
+            },
+        )
+
+        response.raise_for_status()
+        pid = response.json()["data"]["persistentId"]
+
+        # Act
+        dataverse = Dataverse(
+            server_url=base_url,
+            api_token=api_token,
+        )
+
+        # Fetch the dataset and update the title
+        dataset = dataverse.load_dataset(pid)
+
+        # Check if multiple compound changes are tracked too
+        dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1")
+        dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2")
+
+        dataset.update(replace=False)
+
+        # Re-fetch the dataset
+        url = (
+            f"{base_url}/api/datasets/:persistentId/versions/:draft?persistentId={pid}"
+        )
+
+        response = requests.get(
+            url,
+            headers={"X-Dataverse-key": api_token},
+        )
+
+        response.raise_for_status()
+        updated_dataset = response.json()
+        other_id_fields = next(
+            filter(
+                lambda x: x["typeName"] == "otherId",
+                updated_dataset["data"]["metadataBlocks"]["citation"]["fields"],
+            )
+        )["value"]
+
+        # Assert
+        assert (
+            len(other_id_fields) == 2
+        ), "The updated dataset does not have the expected number of other ids."
+        assert (
+            other_id_fields[0]["otherIdValue"]["value"] == "softwareid1"
+        ), "The updated dataset does not have the expected other id."
+        assert (
+            other_id_fields[1]["otherIdValue"]["value"] == "softwareid2"
+        ), "The updated dataset does not have the expected other id."
+        assert (
+            other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1"
+        ), "The updated dataset does not have the expected other id agency."
+        assert (
+            other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2"
+        ), "The updated dataset does not have the expected other id agency."
+
+
+    @pytest.mark.integration
+    def test_update_edit(
+        self,
+        credentials,
+        minimal_upload,
+    ):
+        # Arrange
+        base_url, api_token = credentials
+        url = f"{base_url}/api/dataverses/root/datasets"
+        response = requests.post(
+            url=url,
+            json=minimal_upload,
+            headers={
+                "X-Dataverse-key": api_token,
+                "Content-Type": "application/json",
+            },
+        )
+
+        response.raise_for_status()
+        pid = response.json()["data"]["persistentId"]
+
+        # Act
+        dataverse = Dataverse(
+            server_url=base_url,
+            api_token=api_token,
+        )
+
+        # Fetch the dataset and update the title
+        dataset = dataverse.load_dataset(pid)
+
+        # Check if multiple compound changes are tracked too
+        dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1")
+        dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2")
+
+        dataset.update(replace=False)
+
+        # Fetch another time and edit the first entry
+        dataset = dataverse.load_dataset(pid)
+        dataset.citation.other_id[0].agency = "Software Heritage1 updated"
+
+        dataset.update(replace=False)
+
+        # Re-fetch the dataset
+        url = (
+            f"{base_url}/api/datasets/:persistentId/versions/:draft?persistentId={pid}"
+        )
+
+        response = requests.get(
+            url,
+            headers={"X-Dataverse-key": api_token},
+        )
+
+        response.raise_for_status()
+        updated_dataset = response.json()
+        other_id_fields = next(
+            filter(
+                lambda x: x["typeName"] == "otherId",
+                updated_dataset["data"]["metadataBlocks"]["citation"]["fields"],
+            )
+        )["value"]
+
+        # Assert
+        assert (
+            len(other_id_fields) == 2
+        ), "The updated dataset does not have the expected number of other ids."
+        assert (
+            other_id_fields[0]["otherIdValue"]["value"] == "softwareid1"
+        ), "The updated dataset does not have the expected other id."
+        assert (
+            other_id_fields[1]["otherIdValue"]["value"] == "softwareid2"
+        ), "The updated dataset does not have the expected other id."
+        assert (
+            other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1 updated"
+        ), "The updated dataset does not have the expected other id agency."
+        assert (
+            other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2"
+        ), "The updated dataset does not have the expected other id agency."
+
 
     @staticmethod
     def sort_citation(dataset: Dict):