Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added addon_imps/links/__init__.py
Empty file.
249 changes: 249 additions & 0 deletions addon_imps/links/dataverse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
from __future__ import annotations

import asyncio
import re
from dataclasses import dataclass
from urllib.parse import urlparse

from django.core.exceptions import ValidationError

from addon_toolkit.interfaces import link
from addon_toolkit.interfaces.link import (
ItemResult,
ItemSampleResult,
ItemType,
)


DATAVERSE_REGEX = re.compile(r"^dataverse/(?P<id>\d*)$")
DATASET_REGEX = re.compile(r"^dataset/(?P<id>\d*)$")
FILE_REGEX = re.compile(r"^file/(?P<id>\d*)$")


@dataclass
class DataverseLinkImp(link.LinkAddonHttpRequestorImp):
"""link on dataverse

see https://guides.dataverse.org/en/latest/api/native-api.html
"""

async def get_external_account_id(self, _: dict[str, str]) -> str:
try:
async with self.network.GET("api/v1/users/:me") as response:
if not response.http_status.is_success:
raise ValidationError(
"Could not get dataverse account id, check your API Token"
)
content = await response.json_content()
return content.get("data", {}).get("id")
except ValueError as exc:
if "relative url may not alter the base url" in str(exc).lower():
raise ValidationError(
"Invalid host URL. Please check your Dataverse base URL."
)
raise

async def build_wb_config(
self,
) -> dict:
match = DATASET_REGEX.match(self.config.connected_root_id)
async with self.network.GET(f"api/datasets/{match['id']}") as response:
content = await response.json_content()
parsed = parse_dataset(content)
return {
"id": match["id"],
"name": parsed.item_name,
"doi": content["data"]["latestVersion"]["datasetPersistentId"],
"host": urlparse(self.config.external_api_url).hostname,
}

async def list_root_items(self, page_cursor: str = "") -> link.ItemSampleResult:
async with self.network.GET(
"api/mydata/retrieve",
query=[
["selected_page", page_cursor],
*[("role_ids", role) for role in range(1, 9)],
("dvobject_types", "Dataverse"),
*[
("published_states", state)
for state in [
"Unpublished",
"Published",
"Draft",
"Deaccessioned",
"In+Review",
]
],
],
) as response:
content = await response.json_content()
if resp_data := content.get("data"):
return parse_mydata(resp_data)
return ItemSampleResult(items=[], total_count=0)

async def get_item_info(self, item_id: str) -> link.ItemResult:
if not item_id:
return ItemResult(item_id="", item_name="", item_type=ItemType.FOLDER)
elif match := DATAVERSE_REGEX.match(item_id):
entity = await self._fetch_dataverse(match["id"])
elif match := DATASET_REGEX.match(item_id):
entity = await self._fetch_dataset(match["id"])
elif match := FILE_REGEX.match(item_id):
entity = await self._fetch_file(match["id"])
else:
raise ValueError(f"Invalid item id: {item_id}")

return entity

async def list_child_items(
self,
item_id: str,
page_cursor: str = "",
item_type: link.ItemType | None = None,
) -> link.ItemSampleResult:
if not item_id:
return await self.list_root_items(page_cursor)
elif item_type != ItemType.FILE and (match := DATAVERSE_REGEX.match(item_id)):
items = await self._fetch_dataverse_items(match["id"])
return link.ItemSampleResult(
items=items,
total_count=len(items),
)
elif item_type != ItemType.FOLDER and (match := DATASET_REGEX.match(item_id)):
items = await self._fetch_dataset_files(match["id"])
return link.ItemSampleResult(
items=items,
total_count=len(items),
)
else:
return ItemSampleResult(items=[], total_count=0)

async def _fetch_dataverse_items(self, dataverse_id) -> list[ItemResult]:
async with self.network.GET(
f"api/dataverses/{dataverse_id}/contents"
) as response:
response_content = await response.json_content()
return await asyncio.gather(
*[
self.get_dataverse_or_dataset_item(item)
for item in response_content["data"]
]
)

async def get_dataverse_or_dataset_item(self, item: dict):
match item["type"]:
case "dataset":
return await self._fetch_dataset(item["id"])
case "dataverse":
return parse_dataverse_as_subitem(item)
raise ValueError(f"Invalid item type: {item['type']}")

async def _fetch_dataverse(self, dataverse_id) -> ItemResult:
async with self.network.GET(f"api/dataverses/{dataverse_id}") as response:
return parse_dataverse(await response.json_content())

async def _fetch_dataset(self, dataset_id: str) -> ItemResult:
async with self.network.GET(f"api/datasets/{dataset_id}") as response:
return parse_dataset(await response.json_content())

async def _fetch_dataset_files(self, dataset_id) -> list[ItemResult]:
async with self.network.GET(f"api/datasets/{dataset_id}") as response:
return parse_dataset_files(await response.json_content())

async def _fetch_file(self, dataverse_id) -> ItemResult:
async with self.network.GET(f"api/files/{dataverse_id}") as response:
return parse_datafile(await response.json_content())


###
# module-local helpers


def parse_dataverse_as_subitem(data: dict):
return ItemResult(
item_type=ItemType.FOLDER,
item_name=data["title"],
item_id=f'dataverse/{data["id"]}',
can_be_root=False,
)


def parse_datafile(data: dict):
if data.get("data"):
data = data["data"]

return ItemResult(
item_type=ItemType.FILE,
item_name=data["label"],
item_id=f'file/{data["dataFile"]["id"]}',
)


def parse_dataverse(data: dict):
if data.get("data"):
data = data["data"]
return ItemResult(
item_type=ItemType.FOLDER,
item_name=data["name"],
item_id=f'dataverse/{data["id"]}',
can_be_root=False,
)


def parse_mydata(data: dict):
if data.get("data"):
data = data["data"]
return ItemSampleResult(
items=[
ItemResult(
item_id=f"dataverse/{file['entity_id']}",
item_name=file["name"],
item_type=ItemType.FOLDER,
can_be_root=False,
may_contain_root_candidates=True,
)
for file in data["items"]
],
total_count=data["total_count"],
next_sample_cursor=(
data["pagination"]["nextPageNumber"]
if data["pagination"]["hasNextPageNumber"]
else None
),
)


def parse_dataset(data: dict) -> ItemResult:
if data.get("data"):
data = data["data"]
try:
return ItemResult(
item_id=f'dataset/{data["id"]}',
item_name=[
item
for item in data["latestVersion"]["metadataBlocks"]["citation"][
"fields"
]
if item["typeName"] == "title"
][0]["value"],
item_type=ItemType.FOLDER,
may_contain_root_candidates=False,
)
except (KeyError, IndexError) as e:
raise ValueError(f"Invalid dataset response: {e=}")


def parse_dataset_files(data: dict) -> list[ItemResult]:
if data.get("data"):
data = data["data"]
try:
return [
ItemResult(
item_id=f"file/{file['dataFile']['id']}",
item_name=file["label"],
item_type=ItemType.FILE,
)
for file in data["latestVersion"]["files"]
]
except (KeyError, IndexError) as e:
raise ValueError(f"Invalid dataset response:{e=}")
Empty file.
40 changes: 39 additions & 1 deletion addon_service/addon_imp/instantiation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
StorageAddonImp,
StorageConfig,
)
from addon_toolkit.interfaces.link import (
LinkAddonClientRequestorImp,
LinkAddonHttpRequestorImp,
LinkAddonImp,
LinkConfig,
)


if TYPE_CHECKING:
Expand All @@ -31,20 +37,23 @@
AuthorizedCitationAccount,
AuthorizedComputingAccount,
AuthorizedStorageAccount,
AuthorizedLinkAccount,
)


async def get_addon_instance(
imp_cls: type[AddonImp],
account: AuthorizedAccount,
config: StorageConfig | CitationConfig | ComputingConfig,
config: StorageConfig | CitationConfig | ComputingConfig | LinkConfig,
) -> AddonImp:
if issubclass(imp_cls, StorageAddonImp):
return await get_storage_addon_instance(imp_cls, account, config)
elif issubclass(imp_cls, CitationAddonImp):
return await get_citation_addon_instance(imp_cls, account, config)
elif issubclass(imp_cls, ComputingAddonImp):
return await get_computing_addon_instance(imp_cls, account, config)
elif issubclass(imp_cls, LinkAddonImp):
return await get_link_addon_instance(imp_cls, account, config)
raise ValueError(f"unknown addon type {imp_cls}")


Expand Down Expand Up @@ -134,3 +143,32 @@ async def get_computing_addon_instance(


get_computing_addon_instance__blocking = async_to_sync(get_computing_addon_instance)

async def get_link_addon_instance(
imp_cls: type[LinkAddonImp],
account: AuthorizedLinkAccount,
config: StorageConfig,
) -> LinkAddonImp:
"""create an instance of a `StorageAddonImp`

(TODO: decide on a common constructor for all `AddonImp`s, remove this)
"""
assert issubclass(imp_cls, LinkAddonImp)
assert (
imp_cls is not LinkAddonImp
), "Addons shouldn't directly extend StorageAddonImp"
if issubclass(imp_cls, LinkAddonHttpRequestorImp):
imp = imp_cls(
config=config,
network=GravyvaletHttpRequestor(
client_session=await get_singleton_client_session(),
prefix_url=config.external_api_url,
account=account,
),
)
if issubclass(imp_cls, LinkAddonClientRequestorImp):
imp = imp_cls(credentials=await account.get_credentials__async(), config=config)

return imp

get_link_addon_instance__blocking = async_to_sync(get_link_addon_instance)
3 changes: 2 additions & 1 deletion addon_service/addon_operation_invocation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from addon_toolkit.interfaces.citation import CitationConfig
from addon_toolkit.interfaces.computing import ComputingConfig
from addon_toolkit.interfaces.storage import StorageConfig
from addon_toolkit.interfaces.link import LinkConfig


class AddonOperationInvocation(AddonsServiceBaseModel):
Expand Down Expand Up @@ -71,7 +72,7 @@ def imp_cls(self) -> type[AddonImp]:
return self.thru_account.imp_cls

@property
def config(self) -> StorageConfig | CitationConfig | ComputingConfig:
def config(self) -> StorageConfig | CitationConfig | ComputingConfig | LinkConfig:
if self.thru_addon:
return get_config_for_addon(self.thru_addon)
return get_config_for_account(self.thru_account)
Expand Down
1 change: 1 addition & 0 deletions addon_service/addon_operation_invocation/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class Meta:
"base_account__external_service",
"base_account__authorizedstorageaccount",
"base_account__authorizedcitationaccount",
"base_account__authorizedlinkaccount",
"base_account__account_owner",
),
related_link_view_name=view_names.related_view(RESOURCE_TYPE),
Expand Down
22 changes: 15 additions & 7 deletions addon_service/addon_operation_invocation/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@
)
from addon_toolkit import AddonOperationType

from ..authorized_account.citation.serializers import (
AuthorizedCitationAccountSerializer,
)
from ..authorized_account.computing.serializers import (
AuthorizedComputingAccountSerializer,
)
from ..authorized_account.models import AuthorizedAccount
from ..authorized_account.citation.serializers import AuthorizedCitationAccountSerializer
from ..authorized_account.computing.serializers import AuthorizedComputingAccountSerializer
from ..authorized_account.link.serializers import AuthorizedLinkAccountSerializer
from ..authorized_account.storage.serializers import AuthorizedStorageAccountSerializer

from ..configured_addon.models import ConfiguredAddon
from ..configured_addon.citation.serializers import ConfiguredCitationAddonSerializer
from ..configured_addon.computing.serializers import ConfiguredComputingAddonSerializer
from ..configured_addon.models import ConfiguredAddon
from ..configured_addon.link.serializers import ConfiguredLinkAddonSerializer
from ..configured_addon.storage.serializers import ConfiguredStorageAddonSerializer

from .models import AddonOperationInvocation
from .serializers import AddonOperationInvocationSerializer

Expand Down Expand Up @@ -63,6 +63,10 @@ def retrieve_related(self, request, *args, **kwargs):
serializer = AuthorizedComputingAccountSerializer(
instance, context={"request": request}
)
elif hasattr(instance, "authorizedlinkaccount"):
serializer = AuthorizedLinkAccountSerializer(
instance, context={"request": request}
)
else:
raise ValueError("unknown authorized account type")
elif isinstance(instance, ConfiguredAddon):
Expand All @@ -78,6 +82,10 @@ def retrieve_related(self, request, *args, **kwargs):
serializer = ConfiguredComputingAddonSerializer(
instance, context={"request": request}
)
elif hasattr(instance, "configuredlinkaddon"):
serializer = ConfiguredLinkAddonSerializer(
instance, context={"request": request}
)
else:
raise ValueError("unknown configured addon type")
else:
Expand Down
Loading
Loading