From 8c5feff9a2e27d3c31c4315e2a2e960d17e624fc Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 2 Sep 2025 11:50:48 +0200 Subject: [PATCH 1/7] Use class-level model_fields in DataverseBase Replaces instance-level access to model_fields with class-level access via self.__class__.model_fields in DataverseBase methods. This ensures correct field resolution and avoids potential issues with inheritance or instance-specific modifications. --- easyDataverse/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/easyDataverse/base.py b/easyDataverse/base.py index 0c67ce9..108eac1 100644 --- a/easyDataverse/base.py +++ b/easyDataverse/base.py @@ -28,7 +28,7 @@ class DataverseBase(BaseModel): # ! Overloads def __setattr__(self, name: str, value: Any) -> None: - if name in self.model_fields: + if name in self.__class__.model_fields: self._changed.add(name) return super().__setattr__(name, value) @@ -189,7 +189,7 @@ def extract_changed(self) -> List[Dict]: changed_fields = [] for name in self._changed: - field = self.model_fields[name] + field = self.__class__.model_fields[name] if self._is_compound(field) and self._is_multiple(field): value = self._process_multiple_compound(getattr(self, name)) @@ -206,7 +206,7 @@ def extract_changed(self) -> List[Dict]: def _add_changed_multiples(self): """Checks whether a compound has multiple changed fields""" - for name, field in self.model_fields.items(): + for name, field in self.__class__.model_fields.items(): if not self._is_compound(field): continue if not self._is_multiple(field): From 9ee8c877f8763f79ca18c57f56046af13abfb8f5 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 2 Sep 2025 11:51:03 +0200 Subject: [PATCH 2/7] Add support for updating dataset license Introduces logic to update a dataset's license, supporting both predefined and custom licenses via JSON-LD metadata. Adds integration tests for custom and predefined license updates, and a fixture for minimal custom license upload. --- easyDataverse/dataset.py | 1 + easyDataverse/uploader.py | 129 +++++++++++++++++- .../minimal_upload_custom_license.json | 84 ++++++++++++ tests/integration/test_dataset_update.py | 117 ++++++++++++++++ 4 files changed, 330 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/minimal_upload_custom_license.json diff --git a/easyDataverse/dataset.py b/easyDataverse/dataset.py index 4698e23..7521400 100644 --- a/easyDataverse/dataset.py +++ b/easyDataverse/dataset.py @@ -336,6 +336,7 @@ def update(self): update_dataset( to_change=self._extract_changes(), + license=self.license, p_id=self.p_id, # type: ignore files=self.files, DATAVERSE_URL=str(self.DATAVERSE_URL), # type: ignore diff --git a/easyDataverse/uploader.py b/easyDataverse/uploader.py index 4a98f57..8070599 100644 --- a/easyDataverse/uploader.py +++ b/easyDataverse/uploader.py @@ -1,14 +1,17 @@ from urllib.parse import urljoin import httpx +import rich from rich.panel import Panel from rich.console import Console -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from dvuploader import File, DVUploader from pyDataverse.api import NativeApi, DataAccessApi from pyDataverse.models import Dataset +from easyDataverse.license import CustomLicense, License + def upload_to_dataverse( json_data: str, @@ -118,6 +121,7 @@ def update_dataset( files: List[File], DATAVERSE_URL: Optional[str] = None, API_TOKEN: Optional[str] = None, + license: Optional[Union[CustomLicense, License]] = None, ) -> bool: """Uploads and updates the metadata of a draft dataset. @@ -141,6 +145,14 @@ def update_dataset( api_token=API_TOKEN, # type: ignore ) + if license is not None: + _update_license( + p_id=p_id, + license=license, + base_url=DATAVERSE_URL, # type: ignore + api_token=API_TOKEN, # type: ignore + ) + _uploadFiles( files=files, p_id=p_id, @@ -173,3 +185,118 @@ def _update_metadata( response = httpx.put(EDIT_ENDPOINT, headers=headers, json=to_change) response.raise_for_status() + + +def _update_license( + p_id: str, + license: Union[CustomLicense, License], + base_url: str, + api_token: str, +): + """Updates the license of a dataset. + + Args: + p_id (str): Persistent ID of the dataset. + license (Union[CustomLicense, License]): License object to update. + base_url (str): URL of the dataverse instance. + api_token (str): API token of the user. + + Raises: + AssertionError: If license is not a License or CustomLicense instance. + Exception: If the JSON-LD metadata update fails. + """ + assert isinstance(license, License) or isinstance(license, CustomLicense), ( + "License must be a License or CustomLicense" + ) + + headers = { + "X-Dataverse-key": api_token, + "Accept": "application/ld+json", + "Content-Type": "application/ld+json", + } + + # First, fetch the JSON-LD metadata + data = _fetch_json_ld_metadata( + p_id=p_id, + base_url=base_url, + headers=headers, + ) + + if isinstance(license, CustomLicense): + for field in License.json_ld_field_names(): + data.pop(field, None) + data.update(license.to_json_ld()) + else: + for field in CustomLicense.json_ld_field_names(): + data.pop(field, None) + data.update(license.to_json_ld()) + + # Then, update the JSON-LD metadata on the server + _update_json_ld_metadata( + p_id=p_id, + data=data, + base_url=base_url, + headers=headers, + ) + + +def _fetch_json_ld_metadata( + p_id: str, + base_url: str, + headers: Dict[str, str], +): + """Fetches JSON-LD metadata for a dataset. + + Args: + p_id (str): Persistent ID of the dataset. + base_url (str): URL of the dataverse instance. + headers (Dict[str, str]): HTTP headers including API token. + + Returns: + Dict: The JSON-LD metadata for the dataset. + + Raises: + httpx.HTTPError: If the request fails. + AssertionError: If the response doesn't contain expected data structure. + """ + response = httpx.get( + f"{base_url.rstrip('/')}/api/datasets/:persistentId/metadata?persistentId={p_id}", + headers=headers, + ) + response.raise_for_status() + content = response.json() + assert "data" in content + + return content["data"] + + +def _update_json_ld_metadata( + p_id: str, + data: Dict, + base_url: str, + headers: Dict[str, str], +): + """Updates JSON-LD metadata for a dataset. + + Args: + p_id (str): Persistent ID of the dataset. + data (Dict): The JSON-LD metadata to update. + base_url (str): URL of the dataverse instance. + headers (Dict[str, str]): HTTP headers including API token. + + Returns: + Dict: The response from the server. + + Raises: + Exception: If the update fails (status code != 200). + """ + response = httpx.put( + f"{base_url.rstrip('/')}/api/datasets/:persistentId/metadata?persistentId={p_id}&replace=true", + headers=headers, + json=data, + ) + + if response.status_code != 200: + raise Exception(f"Failed to update JSON-LD metadata: {response.text}") + + return response.json() diff --git a/tests/fixtures/minimal_upload_custom_license.json b/tests/fixtures/minimal_upload_custom_license.json new file mode 100644 index 0000000..282d223 --- /dev/null +++ b/tests/fixtures/minimal_upload_custom_license.json @@ -0,0 +1,84 @@ +{ + "datasetVersion": { + "termsOfUse": "This dataset is provided for research and educational purposes only. Commercial use is prohibited without explicit permission from the dataset owner.", + "confidentialityDeclaration": "This dataset contains no confidential or personally identifiable information. All data has been anonymized and aggregated for research purposes.", + "citationRequirements": "When using this dataset, please cite: Doe, John (2024). My dataset. [Dataset]. Available at: http://localhost:8080", + "conditions": "Users must acknowledge the source of the data and agree not to redistribute the dataset without permission. Any publications using this data should include proper attribution.", + "citation": "John Doe, 2025, My dataset, https://doi.org/10.5072/FK2/VJCLOP, Root, V1", + "metadataBlocks": { + "citation": { + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "author", + "value": [ + { + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "John Doe" + } + } + ] + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "datasetContact", + "value": [ + { + "datasetContactName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "datasetContactName", + "value": "John Doe" + }, + "datasetContactEmail": { + "multiple": false, + "typeClass": "primitive", + "typeName": "datasetContactEmail", + "value": "john@doe.com" + } + } + ] + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "dsDescription", + "value": [ + { + "dsDescriptionValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue", + "value": "This is a description of the dataset" + }, + "dsDescriptionDate": { + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionDate", + "value": "2024" + } + } + ] + }, + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "subject", + "value": ["Other"] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "title", + "value": "My dataset" + } + ] + } + } + } +} diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index 4bd6952..515af49 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -2,8 +2,10 @@ import pytest import httpx +import rich from easyDataverse import Dataverse +from easyDataverse.license import CustomLicense, License class TestDatasetUpdate: @@ -63,6 +65,121 @@ def test_dataset_update( "The updated dataset title does not match the expected title." ) + @pytest.mark.integration + def test_custom_license_update( + self, + credentials, + minimal_upload_custom_license, + ): + # Arrange + base_url, api_token = credentials + url = f"{base_url}/api/dataverses/root/datasets" + response = httpx.post( + url=url, + json=minimal_upload_custom_license, + headers={ + "X-Dataverse-key": api_token, + "Content-Type": "application/json", + }, + ) + + response.raise_for_status() + pid = response.json()["data"]["persistentId"] + + # Act + dataverse = Dataverse( + server_url=base_url, + api_token=api_token, + ) + + # Fetch the dataset and update the title + dataset = dataverse.load_dataset(pid) + dataset.license = CustomLicense( + termsOfUse="CHANGED", + confidentialityDeclaration="CHANGED", + specialPermissions="CHANGED", + restrictions="CHANGED", + citationRequirements="CHANGED", + conditions="CHANGED", + depositorRequirements="CHANGED", + disclaimer="CHANGED", + ) + + dataset.update() + + # Re-fetch the dataset + refetched_dataset = dataverse.load_dataset(pid) + + # Assert + assert isinstance(refetched_dataset.license, CustomLicense) + assert refetched_dataset.license.terms_of_use == "CHANGED" + assert refetched_dataset.license.confidentiality_declaration == "CHANGED" + assert refetched_dataset.license.special_permissions == "CHANGED" + assert refetched_dataset.license.restrictions == "CHANGED" + assert refetched_dataset.license.citation_requirements == "CHANGED" + assert refetched_dataset.license.conditions == "CHANGED" + assert refetched_dataset.license.depositor_requirements == "CHANGED" + assert refetched_dataset.license.disclaimer == "CHANGED" + + assert dataset.dataverse_dict() == refetched_dataset.dataverse_dict(), ( + "Dataset contents are not the same" + ) + + @pytest.mark.integration + def test_custom_license_update_with_predefined_license( + self, + credentials, + minimal_upload, + ): + # Arrange + base_url, api_token = credentials + url = f"{base_url}/api/dataverses/root/datasets" + response = httpx.post( + url=url, + json=minimal_upload, + headers={ + "X-Dataverse-key": api_token, + "Content-Type": "application/json", + }, + ) + + response.raise_for_status() + pid = response.json()["data"]["persistentId"] + + # Act + dataverse = Dataverse( + server_url=base_url, + api_token=api_token, + ) + + # Fetch the dataset and update the title + dataset = dataverse.load_dataset(pid) + assert isinstance(dataset.license, License), ( + "Dataset license is not a predefined license" + ) + + # Update the license to a different predefined license + expected_license = next( + license + for license in dataverse.licenses.values() + if license.name != dataset.license.name + ) + dataset.license = expected_license + + dataset.update() + + # Re-fetch the dataset + refetched_dataset = dataverse.load_dataset(pid) + + # Assert + assert refetched_dataset.license == expected_license, ( + "Dataset license is not the expected license" + ) + + assert dataset.dataverse_dict() == refetched_dataset.dataverse_dict(), ( + "Dataset contents are not the same" + ) + @staticmethod def sort_citation(dataset: Dict): citation = dataset["datasetVersion"]["metadataBlocks"]["citation"] From f78e80d9537e24316ee2530857e01fbbe3fefc50 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 2 Sep 2025 11:51:10 +0200 Subject: [PATCH 3/7] Add JSON-LD conversion methods to license models Introduces to_json_ld and json_ld_field_names methods to License and CustomLicense classes, enabling conversion of license data to JSON-LD format and retrieval of relevant field names. This facilitates interoperability with systems using JSON-LD for metadata representation. --- easyDataverse/license.py | 52 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/easyDataverse/license.py b/easyDataverse/license.py index 946b644..860aa6b 100644 --- a/easyDataverse/license.py +++ b/easyDataverse/license.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import List, Optional from urllib import parse from pydantic import BaseModel, ConfigDict, Field import httpx @@ -70,6 +70,28 @@ def fetch_by_name(cls, name: str, server_url: str) -> "License": except StopIteration: raise Exception(f"License '{name}' not found at '{server_url}'") + def to_json_ld(self): + """ + Convert the license to JSON-LD format. + + Returns: + dict: A dictionary containing the license information in JSON-LD format, + with the license URI mapped to the schema:license property. + """ + return { + "schema:license": self.uri, + } + + @staticmethod + def json_ld_field_names() -> List[str]: + """ + Get the JSON-LD field names for the license. + + Returns: + List[str]: A list of JSON-LD field names for the license. + """ + return ["schema:license"] + class CustomLicense(BaseModel): """ @@ -130,3 +152,31 @@ class CustomLicense(BaseModel): description="Disclaimer for the dataset.", alias="disclaimer", ) + + def to_json_ld(self): + """Convert the custom license to JSON-LD format. + + Returns: + dict: A dictionary with keys prefixed with 'dvcore:' containing + the license fields in JSON-LD format, excluding None values. + """ + return { + f"dvcore:{k}": v + for k, v in self.model_dump( + mode="json", + exclude_none=True, + by_alias=True, + ).items() + } + + @staticmethod + def json_ld_field_names() -> List[str]: + """ + Get the JSON-LD field names for the custom license. + + Returns: + List[str]: A list of JSON-LD field names for the custom license. + """ + return [ + f"dvcore:{field.alias}" for field in CustomLicense.model_fields.values() + ] From a6f9b2a4e4a2e1b1aef099a6dc89e44e2df66411 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 2 Sep 2025 11:51:34 +0200 Subject: [PATCH 4/7] Remove unused rich import statements Eliminated unnecessary imports of the 'rich' module from uploader.py and test_dataset_update.py to clean up the codebase. --- easyDataverse/uploader.py | 1 - tests/integration/test_dataset_update.py | 1 - 2 files changed, 2 deletions(-) diff --git a/easyDataverse/uploader.py b/easyDataverse/uploader.py index 8070599..4403348 100644 --- a/easyDataverse/uploader.py +++ b/easyDataverse/uploader.py @@ -1,6 +1,5 @@ from urllib.parse import urljoin import httpx -import rich from rich.panel import Panel from rich.console import Console diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index 515af49..de8d0fd 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -2,7 +2,6 @@ import pytest import httpx -import rich from easyDataverse import Dataverse from easyDataverse.license import CustomLicense, License From 844bb7da1c4e1a8df01be3ac0d341abc7dbe55f4 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 2 Sep 2025 14:54:22 +0200 Subject: [PATCH 5/7] Improve type checking and error handling in uploader Refactored license type assertion to use tuple for isinstance. Changed generic Exception to httpx.HTTPError for failed JSON-LD metadata updates to provide more specific error handling. --- easyDataverse/uploader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/easyDataverse/uploader.py b/easyDataverse/uploader.py index 4403348..6834737 100644 --- a/easyDataverse/uploader.py +++ b/easyDataverse/uploader.py @@ -204,7 +204,7 @@ def _update_license( AssertionError: If license is not a License or CustomLicense instance. Exception: If the JSON-LD metadata update fails. """ - assert isinstance(license, License) or isinstance(license, CustomLicense), ( + assert isinstance(license, (License, CustomLicense)), ( "License must be a License or CustomLicense" ) @@ -296,6 +296,6 @@ def _update_json_ld_metadata( ) if response.status_code != 200: - raise Exception(f"Failed to update JSON-LD metadata: {response.text}") + raise httpx.HTTPError(f"Failed to update JSON-LD metadata: {response.text}") return response.json() From 56114161bff48a33e1a6b662175912ca5b50c8ab Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 2 Sep 2025 14:54:29 +0200 Subject: [PATCH 6/7] Add fixture for custom license upload Introduces the minimal_upload_custom_license pytest fixture to load data from minimal_upload_custom_license.json for tests requiring custom license uploads. --- tests/conftest.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 74d7e42..b9738f7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,3 +34,11 @@ def minimal_upload_other_license(): Returns the contents of the 'minimal_upload.json' file as a dictionary. """ return json.load(open("tests/fixtures/minimal_upload_other_license.json")) + + +@pytest.fixture() +def minimal_upload_custom_license(): + """ + Returns the contents of the 'minimal_upload.json' file as a dictionary. + """ + return json.load(open("tests/fixtures/minimal_upload_custom_license.json")) From 50e8a50377c5148d66a861bf58b33424054bac0d Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 2 Sep 2025 14:55:25 +0200 Subject: [PATCH 7/7] Update comments to reflect license changes in tests Changed comments in test_dataset_update.py to correctly indicate that the tests update the dataset license instead of the title, improving code clarity. --- tests/integration/test_dataset_update.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index de8d0fd..42e8af8 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -91,7 +91,7 @@ def test_custom_license_update( api_token=api_token, ) - # Fetch the dataset and update the title + # Fetch the dataset and update the license dataset = dataverse.load_dataset(pid) dataset.license = CustomLicense( termsOfUse="CHANGED", @@ -151,7 +151,7 @@ def test_custom_license_update_with_predefined_license( api_token=api_token, ) - # Fetch the dataset and update the title + # Fetch the dataset and update the license dataset = dataverse.load_dataset(pid) assert isinstance(dataset.license, License), ( "Dataset license is not a predefined license"