diff --git a/easyDataverse/dataset.py b/easyDataverse/dataset.py index 4698e23..7521400 100644 --- a/easyDataverse/dataset.py +++ b/easyDataverse/dataset.py @@ -336,6 +336,7 @@ def update(self): update_dataset( to_change=self._extract_changes(), + license=self.license, p_id=self.p_id, # type: ignore files=self.files, DATAVERSE_URL=str(self.DATAVERSE_URL), # type: ignore diff --git a/easyDataverse/license.py b/easyDataverse/license.py index 946b644..860aa6b 100644 --- a/easyDataverse/license.py +++ b/easyDataverse/license.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import List, Optional from urllib import parse from pydantic import BaseModel, ConfigDict, Field import httpx @@ -70,6 +70,28 @@ def fetch_by_name(cls, name: str, server_url: str) -> "License": except StopIteration: raise Exception(f"License '{name}' not found at '{server_url}'") + def to_json_ld(self): + """ + Convert the license to JSON-LD format. + + Returns: + dict: A dictionary containing the license information in JSON-LD format, + with the license URI mapped to the schema:license property. + """ + return { + "schema:license": self.uri, + } + + @staticmethod + def json_ld_field_names() -> List[str]: + """ + Get the JSON-LD field names for the license. + + Returns: + List[str]: A list of JSON-LD field names for the license. + """ + return ["schema:license"] + class CustomLicense(BaseModel): """ @@ -130,3 +152,31 @@ class CustomLicense(BaseModel): description="Disclaimer for the dataset.", alias="disclaimer", ) + + def to_json_ld(self): + """Convert the custom license to JSON-LD format. + + Returns: + dict: A dictionary with keys prefixed with 'dvcore:' containing + the license fields in JSON-LD format, excluding None values. + """ + return { + f"dvcore:{k}": v + for k, v in self.model_dump( + mode="json", + exclude_none=True, + by_alias=True, + ).items() + } + + @staticmethod + def json_ld_field_names() -> List[str]: + """ + Get the JSON-LD field names for the custom license. + + Returns: + List[str]: A list of JSON-LD field names for the custom license. + """ + return [ + f"dvcore:{field.alias}" for field in CustomLicense.model_fields.values() + ] diff --git a/easyDataverse/uploader.py b/easyDataverse/uploader.py index 4a98f57..6834737 100644 --- a/easyDataverse/uploader.py +++ b/easyDataverse/uploader.py @@ -3,12 +3,14 @@ from rich.panel import Panel from rich.console import Console -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from dvuploader import File, DVUploader from pyDataverse.api import NativeApi, DataAccessApi from pyDataverse.models import Dataset +from easyDataverse.license import CustomLicense, License + def upload_to_dataverse( json_data: str, @@ -118,6 +120,7 @@ def update_dataset( files: List[File], DATAVERSE_URL: Optional[str] = None, API_TOKEN: Optional[str] = None, + license: Optional[Union[CustomLicense, License]] = None, ) -> bool: """Uploads and updates the metadata of a draft dataset. @@ -141,6 +144,14 @@ def update_dataset( api_token=API_TOKEN, # type: ignore ) + if license is not None: + _update_license( + p_id=p_id, + license=license, + base_url=DATAVERSE_URL, # type: ignore + api_token=API_TOKEN, # type: ignore + ) + _uploadFiles( files=files, p_id=p_id, @@ -173,3 +184,118 @@ def _update_metadata( response = httpx.put(EDIT_ENDPOINT, headers=headers, json=to_change) response.raise_for_status() + + +def _update_license( + p_id: str, + license: Union[CustomLicense, License], + base_url: str, + api_token: str, +): + """Updates the license of a dataset. + + Args: + p_id (str): Persistent ID of the dataset. + license (Union[CustomLicense, License]): License object to update. + base_url (str): URL of the dataverse instance. + api_token (str): API token of the user. + + Raises: + AssertionError: If license is not a License or CustomLicense instance. + Exception: If the JSON-LD metadata update fails. + """ + assert isinstance(license, (License, CustomLicense)), ( + "License must be a License or CustomLicense" + ) + + headers = { + "X-Dataverse-key": api_token, + "Accept": "application/ld+json", + "Content-Type": "application/ld+json", + } + + # First, fetch the JSON-LD metadata + data = _fetch_json_ld_metadata( + p_id=p_id, + base_url=base_url, + headers=headers, + ) + + if isinstance(license, CustomLicense): + for field in License.json_ld_field_names(): + data.pop(field, None) + data.update(license.to_json_ld()) + else: + for field in CustomLicense.json_ld_field_names(): + data.pop(field, None) + data.update(license.to_json_ld()) + + # Then, update the JSON-LD metadata on the server + _update_json_ld_metadata( + p_id=p_id, + data=data, + base_url=base_url, + headers=headers, + ) + + +def _fetch_json_ld_metadata( + p_id: str, + base_url: str, + headers: Dict[str, str], +): + """Fetches JSON-LD metadata for a dataset. + + Args: + p_id (str): Persistent ID of the dataset. + base_url (str): URL of the dataverse instance. + headers (Dict[str, str]): HTTP headers including API token. + + Returns: + Dict: The JSON-LD metadata for the dataset. + + Raises: + httpx.HTTPError: If the request fails. + AssertionError: If the response doesn't contain expected data structure. + """ + response = httpx.get( + f"{base_url.rstrip('/')}/api/datasets/:persistentId/metadata?persistentId={p_id}", + headers=headers, + ) + response.raise_for_status() + content = response.json() + assert "data" in content + + return content["data"] + + +def _update_json_ld_metadata( + p_id: str, + data: Dict, + base_url: str, + headers: Dict[str, str], +): + """Updates JSON-LD metadata for a dataset. + + Args: + p_id (str): Persistent ID of the dataset. + data (Dict): The JSON-LD metadata to update. + base_url (str): URL of the dataverse instance. + headers (Dict[str, str]): HTTP headers including API token. + + Returns: + Dict: The response from the server. + + Raises: + Exception: If the update fails (status code != 200). + """ + response = httpx.put( + f"{base_url.rstrip('/')}/api/datasets/:persistentId/metadata?persistentId={p_id}&replace=true", + headers=headers, + json=data, + ) + + if response.status_code != 200: + raise httpx.HTTPError(f"Failed to update JSON-LD metadata: {response.text}") + + return response.json() diff --git a/tests/conftest.py b/tests/conftest.py index 74d7e42..b9738f7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,3 +34,11 @@ def minimal_upload_other_license(): Returns the contents of the 'minimal_upload.json' file as a dictionary. """ return json.load(open("tests/fixtures/minimal_upload_other_license.json")) + + +@pytest.fixture() +def minimal_upload_custom_license(): + """ + Returns the contents of the 'minimal_upload.json' file as a dictionary. + """ + return json.load(open("tests/fixtures/minimal_upload_custom_license.json")) diff --git a/tests/fixtures/minimal_upload_custom_license.json b/tests/fixtures/minimal_upload_custom_license.json new file mode 100644 index 0000000..282d223 --- /dev/null +++ b/tests/fixtures/minimal_upload_custom_license.json @@ -0,0 +1,84 @@ +{ + "datasetVersion": { + "termsOfUse": "This dataset is provided for research and educational purposes only. Commercial use is prohibited without explicit permission from the dataset owner.", + "confidentialityDeclaration": "This dataset contains no confidential or personally identifiable information. All data has been anonymized and aggregated for research purposes.", + "citationRequirements": "When using this dataset, please cite: Doe, John (2024). My dataset. [Dataset]. Available at: http://localhost:8080", + "conditions": "Users must acknowledge the source of the data and agree not to redistribute the dataset without permission. Any publications using this data should include proper attribution.", + "citation": "John Doe, 2025, My dataset, https://doi.org/10.5072/FK2/VJCLOP, Root, V1", + "metadataBlocks": { + "citation": { + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "author", + "value": [ + { + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "John Doe" + } + } + ] + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "datasetContact", + "value": [ + { + "datasetContactName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "datasetContactName", + "value": "John Doe" + }, + "datasetContactEmail": { + "multiple": false, + "typeClass": "primitive", + "typeName": "datasetContactEmail", + "value": "john@doe.com" + } + } + ] + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "dsDescription", + "value": [ + { + "dsDescriptionValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue", + "value": "This is a description of the dataset" + }, + "dsDescriptionDate": { + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionDate", + "value": "2024" + } + } + ] + }, + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "subject", + "value": ["Other"] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "title", + "value": "My dataset" + } + ] + } + } + } +} diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index b7ec291..1bce103 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -1,9 +1,10 @@ from typing import Dict -import pytest import httpx +import pytest from easyDataverse import Dataverse +from easyDataverse.license import CustomLicense, License class TestDatasetUpdate: @@ -64,67 +65,119 @@ def test_dataset_update( ) @pytest.mark.integration - def test_dataset_update_with_multiple_fields( + def test_custom_license_update( self, credentials, + minimal_upload_custom_license, ): # Arrange base_url, api_token = credentials + url = f"{base_url}/api/dataverses/root/datasets" + response = httpx.post( + url=url, + json=minimal_upload_custom_license, + headers={ + "X-Dataverse-key": api_token, + "Content-Type": "application/json", + }, + ) + + response.raise_for_status() + pid = response.json()["data"]["persistentId"] + + # Act dataverse = Dataverse( server_url=base_url, api_token=api_token, ) - # Create a dataset - dataset = dataverse.create_dataset() - dataset.citation.title = "My dataset" - dataset.citation.subject = ["Other"] - dataset.citation.add_author(name="John Doe") - dataset.citation.add_ds_description( - value="This is a description of the dataset", - date="2024", - ) - dataset.citation.add_dataset_contact( - name="John Doe", - email="john@doe.com", + # Fetch the dataset and update the license + dataset = dataverse.load_dataset(pid) + dataset.license = CustomLicense( + termsOfUse="CHANGED", + confidentialityDeclaration="CHANGED", + specialPermissions="CHANGED", + restrictions="CHANGED", + citationRequirements="CHANGED", + conditions="CHANGED", + depositorRequirements="CHANGED", + disclaimer="CHANGED", ) - pid = dataset.upload("Root") - - # Act - # Re-fetch the dataset and add other ID - dataset = dataverse.load_dataset(pid) - dataset.citation.add_other_id(agency="DOI", value="10.5072/easy-dataverse") dataset.update() - # Re-fetch the dataset to verify the update - url = ( - f"{base_url}/api/datasets/:persistentId/versions/:draft?persistentId={pid}" + # Re-fetch the dataset + refetched_dataset = dataverse.load_dataset(pid) + + # Assert + assert isinstance(refetched_dataset.license, CustomLicense) + assert refetched_dataset.license.terms_of_use == "CHANGED" + assert refetched_dataset.license.confidentiality_declaration == "CHANGED" + assert refetched_dataset.license.special_permissions == "CHANGED" + assert refetched_dataset.license.restrictions == "CHANGED" + assert refetched_dataset.license.citation_requirements == "CHANGED" + assert refetched_dataset.license.conditions == "CHANGED" + assert refetched_dataset.license.depositor_requirements == "CHANGED" + assert refetched_dataset.license.disclaimer == "CHANGED" + + assert dataset.dataverse_dict() == refetched_dataset.dataverse_dict(), ( + "Dataset contents are not the same" ) - response = httpx.get( - url, - headers={"X-Dataverse-key": api_token}, + @pytest.mark.integration + def test_custom_license_update_with_predefined_license( + self, + credentials, + minimal_upload, + ): + # Arrange + base_url, api_token = credentials + url = f"{base_url}/api/dataverses/root/datasets" + response = httpx.post( + url=url, + json=minimal_upload, + headers={ + "X-Dataverse-key": api_token, + "Content-Type": "application/json", + }, ) response.raise_for_status() - updated_dataset = response.json() - other_id_field = next( - filter( - lambda x: x["typeName"] == "otherId", - updated_dataset["data"]["metadataBlocks"]["citation"]["fields"], - ), - None, + pid = response.json()["data"]["persistentId"] + + # Act + dataverse = Dataverse( + server_url=base_url, + api_token=api_token, + ) + + # Fetch the dataset and update the license + dataset = dataverse.load_dataset(pid) + assert isinstance(dataset.license, License), ( + "Dataset license is not a predefined license" + ) + + # Update the license to a different predefined license + expected_license = next( + license + for license in dataverse.licenses.values() + if license.name != dataset.license.name ) + dataset.license = expected_license + + dataset.update() + + # Re-fetch the dataset + refetched_dataset = dataverse.load_dataset(pid) # Assert - assert other_id_field is not None, "Other ID field should be present" - assert len(other_id_field["value"]) > 0, "Other ID field should have values" - assert any( - item["otherIdAgency"]["value"] == "DOI" - and item["otherIdValue"]["value"] == "10.5072/easy-dataverse" - for item in other_id_field["value"] - ), "The DOI other ID should be present in the updated dataset" + assert refetched_dataset.license == expected_license, ( + "Dataset license is not the expected license" + ) + + assert dataset.dataverse_dict() == refetched_dataset.dataverse_dict(), ( + "Dataset contents are not the same" + ) @staticmethod def sort_citation(dataset: Dict):