Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions easyDataverse/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class DataverseBase(BaseModel):
)

_changed: Set = PrivateAttr(default_factory=set)
_new: bool = PrivateAttr(default=True)

# ! Overloads
def __setattr__(self, name: str, value: Any) -> None:
Expand Down Expand Up @@ -184,10 +185,10 @@ def to_dataverse_json(self, indent: int = 2) -> str:
def extract_changed(self) -> List[Dict]:
"""Extracts the changed fields from the object"""

self._add_changed_multiples()

changed_fields = []

self._add_changed_multiples()

for name in self._changed:
field = self.model_fields[name]

Expand All @@ -212,17 +213,16 @@ def _add_changed_multiples(self):
if not self._is_multiple(field):
continue

value = getattr(self, name)
has_changes = any(value._changed for value in value)
has_changed = any(
compound._changed or compound._new
for compound in getattr(self, name)
)

if has_changes:
if has_changed:
self._changed.add(name)

def _process_multiple_compound(self, compounds) -> List[Dict]:
"""Whenever a single compound has changed, return all compounds."""

if not any(len(compound._changed) for compound in compounds):
return []
"""Processes multiple compounds"""

return [compound.dataverse_dict() for compound in compounds]

Expand Down Expand Up @@ -254,6 +254,21 @@ def _wrap_changed(self, field: FieldInfo, value: Any):
"value": value,
}

def _set_new_prop(self, value: bool):
"""Sets the new property of the object"""

self._new = value

for attr, field in self.model_fields.items():
if not field.json_schema_extra["typeClass"] == "compound":
continue

if field.json_schema_extra["multiple"]:
for compound in getattr(self, attr):
compound._set_new_prop(value)
else:
getattr(self, attr)._set_new_prop(value)

@staticmethod
def is_empty(value):
"""Checks whether a given value is None or empty"""
Expand Down
9 changes: 8 additions & 1 deletion easyDataverse/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,16 @@ def upload(

return self.p_id

def update(self):
def update(self, replace: bool = True):
"""Updates a dataset if a p_id has been given.

Use this function to update a dataset that has already been uploaded to Dataverse.

Args:
replace (bool, optional): Whether to replace the dataset or not. Defaults to True.

Raises:
HTTPError: If the dataset could not be updated.
"""

if not self.p_id:
Expand All @@ -238,6 +244,7 @@ def update(self):
to_change=self._extract_changes(),
p_id=self.p_id, # type: ignore
files=self.files,
replace=replace,
DATAVERSE_URL=str(self.DATAVERSE_URL), # type: ignore
API_TOKEN=str(self.API_TOKEN),
)
Expand Down
4 changes: 4 additions & 0 deletions easyDataverse/dataverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,10 @@ def load_dataset(
n_parallel_downloads=n_parallel_downloads,
)

# Set "new" prop to False
for metadatablock in dataset.metadatablocks.values():
metadatablock._set_new_prop(False)

return dataset

def _fetch_dataset(
Expand Down
14 changes: 10 additions & 4 deletions easyDataverse/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def update_dataset(
p_id: str,
to_change: Dict,
files: List[File],
replace: bool,
DATAVERSE_URL: Optional[str] = None,
API_TOKEN: Optional[str] = None,
) -> bool:
Expand All @@ -125,6 +126,7 @@ def update_dataset(
p_id (str): Persistent ID of the dataset.
to_change (Dict): Dictionary of fields to change.
files (List[File]): List of files that should be uploaded. Can also include directory names.
replace (bool, optional): Whether to replace the existing files. Defaults to False.
DATAVERSE_URL (Optional[str], optional): The URL of the Dataverse instance. Defaults to None.
API_TOKEN (Optional[str], optional): The API token for authentication. Defaults to None.

Expand All @@ -137,6 +139,7 @@ def update_dataset(
_update_metadata(
p_id=p_id,
to_change=to_change,
replace=replace,
base_url=DATAVERSE_URL, # type: ignore
api_token=API_TOKEN, # type: ignore
)
Expand All @@ -155,6 +158,7 @@ def _update_metadata(
to_change: Dict,
base_url: str,
api_token: str,
replace: bool,
):
"""Updates the metadata of a dataset.

Expand All @@ -167,9 +171,11 @@ def _update_metadata(
Raises:
requests.HTTPError: If the request fails.
"""
EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace=true"
headers = {"X-Dataverse-key": api_token}

response = requests.put(EDIT_ENDPOINT, headers=headers, json=to_change)
headers = {"X-Dataverse-key": api_token}
endpoint = f"/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace={str(replace).lower()}"
url = urljoin(base_url, endpoint)
response = requests.put(url, headers=headers, json=to_change)

response.raise_for_status()
if response.status_code != 200:
raise requests.HTTPError(f"Failed to update metadata: {response.text}")
180 changes: 179 additions & 1 deletion tests/integration/test_dataset_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def test_dataset_update(
credentials,
minimal_upload,
):

# Arrange
base_url, api_token = credentials
url = f"{base_url}/api/dataverses/root/datasets"
Expand All @@ -38,6 +37,11 @@ def test_dataset_update(
# Fetch the dataset and update the title
dataset = dataverse.load_dataset(pid)
dataset.citation.title = "Title has changed"

# Check if multiple compound changes are tracked too
dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1")
dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2")

dataset.update()

# Re-fetch the dataset
Expand All @@ -59,10 +63,184 @@ def test_dataset_update(
)
)

other_id_fields = next(
filter(
lambda x: x["typeName"] == "otherId",
updated_dataset["data"]["metadataBlocks"]["citation"]["fields"],
)
)["value"]

# Assert
assert (
title_field["value"] == "Title has changed"
), "The updated dataset title does not match the expected title."
assert (
len(other_id_fields) == 2
), "The updated dataset does not have the expected number of other ids."
assert (
other_id_fields[0]["otherIdValue"]["value"] == "softwareid1"
), "The updated dataset does not have the expected other id."
assert (
other_id_fields[1]["otherIdValue"]["value"] == "softwareid2"
), "The updated dataset does not have the expected other id."
assert (
other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1"
), "The updated dataset does not have the expected other id agency."
assert (
other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2"
), "The updated dataset does not have the expected other id agency."

@pytest.mark.integration
def test_dataset_update_wo_replace(
self,
credentials,
minimal_upload,
):
# Arrange
base_url, api_token = credentials
url = f"{base_url}/api/dataverses/root/datasets"
response = requests.post(
url=url,
json=minimal_upload,
headers={
"X-Dataverse-key": api_token,
"Content-Type": "application/json",
},
)

response.raise_for_status()
pid = response.json()["data"]["persistentId"]

# Act
dataverse = Dataverse(
server_url=base_url,
api_token=api_token,
)

# Fetch the dataset and update the title
dataset = dataverse.load_dataset(pid)

# Check if multiple compound changes are tracked too
dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1")
dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2")

dataset.update(replace=False)

# Re-fetch the dataset
url = (
f"{base_url}/api/datasets/:persistentId/versions/:draft?persistentId={pid}"
)

response = requests.get(
url,
headers={"X-Dataverse-key": api_token},
)

response.raise_for_status()
updated_dataset = response.json()
other_id_fields = next(
filter(
lambda x: x["typeName"] == "otherId",
updated_dataset["data"]["metadataBlocks"]["citation"]["fields"],
)
)["value"]

# Assert
assert (
len(other_id_fields) == 2
), "The updated dataset does not have the expected number of other ids."
assert (
other_id_fields[0]["otherIdValue"]["value"] == "softwareid1"
), "The updated dataset does not have the expected other id."
assert (
other_id_fields[1]["otherIdValue"]["value"] == "softwareid2"
), "The updated dataset does not have the expected other id."
assert (
other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1"
), "The updated dataset does not have the expected other id agency."
assert (
other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2"
), "The updated dataset does not have the expected other id agency."


@pytest.mark.integration
def test_update_edit(
self,
credentials,
minimal_upload,
):
# Arrange
base_url, api_token = credentials
url = f"{base_url}/api/dataverses/root/datasets"
response = requests.post(
url=url,
json=minimal_upload,
headers={
"X-Dataverse-key": api_token,
"Content-Type": "application/json",
},
)

response.raise_for_status()
pid = response.json()["data"]["persistentId"]

# Act
dataverse = Dataverse(
server_url=base_url,
api_token=api_token,
)

# Fetch the dataset and update the title
dataset = dataverse.load_dataset(pid)

# Check if multiple compound changes are tracked too
dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1")
dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2")

dataset.update(replace=False)

# Fetch another time and edit the first entry
dataset = dataverse.load_dataset(pid)
dataset.citation.other_id[0].agency = "Software Heritage1 updated"

dataset.update(replace=False)

# Re-fetch the dataset
url = (
f"{base_url}/api/datasets/:persistentId/versions/:draft?persistentId={pid}"
)

response = requests.get(
url,
headers={"X-Dataverse-key": api_token},
)

response.raise_for_status()
updated_dataset = response.json()
other_id_fields = next(
filter(
lambda x: x["typeName"] == "otherId",
updated_dataset["data"]["metadataBlocks"]["citation"]["fields"],
)
)["value"]

# Assert
assert (
len(other_id_fields) == 2
), "The updated dataset does not have the expected number of other ids."
assert (
other_id_fields[0]["otherIdValue"]["value"] == "softwareid1"
), "The updated dataset does not have the expected other id."
assert (
other_id_fields[1]["otherIdValue"]["value"] == "softwareid2"
), "The updated dataset does not have the expected other id."
assert (
other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1 updated"
), "The updated dataset does not have the expected other id agency."
assert (
other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2"
), "The updated dataset does not have the expected other id agency."


@staticmethod
def sort_citation(dataset: Dict):
Expand Down
Loading