Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""Migration to convert dataset authors from string format to object format."""

import json
import logging

from django.db import migrations

logger = logging.getLogger(__name__)


def convert_authors_to_object_format(apps, schema_editor):
"""Convert dataset authors from string format to object format."""
Dataset = apps.get_model("api_methods", "Dataset")

# Get all datasets with authors
datasets = Dataset.objects.filter(authors__isnull=False).exclude(authors="")
total_count = datasets.count()

if total_count == 0:
logger.info("No datasets with authors found to convert")
return

logger.info("Converting %d datasets with authors to new format", total_count)

updated_count = 0
skipped_count = 0
error_count = 0

for dataset in datasets:
try:
# Parse current authors
if isinstance(dataset.authors, str):
try:
current_authors = json.loads(dataset.authors)
except (json.JSONDecodeError, TypeError):
# Skip datasets with invalid JSON
logger.warning("Dataset %s has invalid authors JSON, skipping", dataset.uuid)
skipped_count += 1
continue
else:
current_authors = dataset.authors

if not current_authors:
skipped_count += 1
continue

# Check if already in new format (has dict with 'name' key)
if isinstance(current_authors, list) and current_authors:
if isinstance(current_authors[0], dict) and "name" in current_authors[0]:
logger.debug("Dataset %s already in new format, skipping", dataset.uuid)
skipped_count += 1
continue

# Convert string authors to object format
if isinstance(current_authors[0], str):
new_authors = []
for author in current_authors:
if isinstance(author, str):
new_authors.append({"name": author, "orcid_id": ""})
else:
# Handle unexpected format
logger.warning(
"Dataset %s has unexpected author format: %s",
dataset.uuid, author
)
new_authors.append({"name": str(author), "orcid_id": ""})

# Update the dataset
dataset.authors = json.dumps(new_authors)
dataset.save(update_fields=["authors"])
updated_count += 1
logger.debug("Converted dataset %s authors", dataset.uuid)
else:
# Authors are already objects, skip
skipped_count += 1
else:
# Empty or invalid authors list
skipped_count += 1

except Exception as e:
error_count += 1
logger.error("Error converting dataset %s: %s", dataset.uuid, e)

logger.info(
"Author conversion complete: %d updated, %d skipped, %d errors",
updated_count, skipped_count, error_count
)


def reverse_authors_to_string_format(apps, schema_editor):
"""Reverse migration: convert authors back to string format."""
Dataset = apps.get_model("api_methods", "Dataset")

# Get all datasets with authors
datasets = Dataset.objects.filter(authors__isnull=False).exclude(authors="")
total_count = datasets.count()

if total_count == 0:
logger.info("No datasets with authors found to reverse")
return

logger.info("Reversing %d datasets with authors to string format", total_count)

updated_count = 0
skipped_count = 0
error_count = 0

for dataset in datasets:
try:
# Parse current authors
if isinstance(dataset.authors, str):
try:
current_authors = json.loads(dataset.authors)
except (json.JSONDecodeError, TypeError):
skipped_count += 1
continue
else:
current_authors = dataset.authors

if not current_authors:
skipped_count += 1
continue

# Check if in object format (has dict with 'name' key)
if isinstance(current_authors, list) and current_authors:
if isinstance(current_authors[0], dict) and "name" in current_authors[0]:
# Convert object authors back to string format
string_authors = [author["name"] for author in current_authors]

# Update the dataset
dataset.authors = json.dumps(string_authors)
dataset.save(update_fields=["authors"])
updated_count += 1
logger.debug("Reversed dataset %s authors", dataset.uuid)
else:
# Already in string format, skip
skipped_count += 1
else:
# Empty or invalid authors list
skipped_count += 1

except Exception as e:
error_count += 1
logger.error("Error reversing dataset %s: %s", dataset.uuid, e)

logger.info(
"Author reversal complete: %d updated, %d skipped, %d errors",
updated_count, skipped_count, error_count
)


class Migration(migrations.Migration):
"""Migration to convert dataset authors to object format."""

dependencies = [
("api_methods", "0015_rename_postprocesseddata_deprecatedpostprocesseddata_and_more"),
]

operations = [
migrations.RunPython(
convert_authors_to_object_format,
reverse_authors_to_string_format,
hints={"target_db": "default"},
),
]
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0016_alter_usersharepermission_permission_level
0016_convert_dataset_authors_to_object_format
65 changes: 18 additions & 47 deletions gateway/sds_gateway/api_methods/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from django.db import models
from django.db.models import ProtectedError
from django.db.models import QuerySet
from django.db.models.signals import post_delete
from django.db.models.signals import post_save
from django.db.models.signals import pre_delete
from django.dispatch import receiver
Expand Down Expand Up @@ -665,26 +664,30 @@ def from_db(cls, db, field_names, values):
setattr(instance, field, json.loads(getattr(instance, field)))
return instance

def update_authors_field(self):
"""Update the authors field based on current permissions."""
authors_data = UserSharePermission.get_dataset_authors(self.uuid)
author_names = [author["name"] for author in authors_data]

# Update the authors field
self.authors = author_names
self.save(update_fields=["authors"])

def get_authors_display(self):
"""Get the authors as a list for display purposes."""
if not self.authors:
return []

if isinstance(self.authors, str):
try:
return json.loads(self.authors)
except (json.JSONDecodeError, TypeError):
return [self.authors]
# from_db should have already converted JSON string to list
if not isinstance(self.authors, list):
log.warning(
"Dataset %s: authors field is not a list (type: %s)",
self.uuid,
type(self.authors).__name__,
)
return []

# Check if authors are in old string format and need conversion
if self.authors and isinstance(self.authors[0], str):
log.warning(
"Dataset %s: authors still in old string format, needs migration",
self.uuid,
)
# Convert old format for backward compatibility
return [{"name": author, "orcid_id": ""} for author in self.authors]

# Authors should already be in new object format
return self.authors


Expand Down Expand Up @@ -1415,35 +1418,3 @@ def handle_sharegroup_soft_delete(sender, instance: ShareGroup, **kwargs) -> Non
# Update the enabled status based on remaining groups
permission.update_enabled_status()
permission.save()


@receiver(post_save, sender=UserSharePermission)
def handle_usersharepermission_change(
sender, instance: UserSharePermission, **kwargs
) -> None:
"""
Handle changes to UserSharePermission by updating dataset authors field.
"""
if instance.item_type == ItemType.DATASET and instance.is_enabled:
# Update the authors field for the dataset
dataset = Dataset.objects.filter(
uuid=instance.item_uuid, is_deleted=False
).first()
if dataset:
dataset.update_authors_field()


@receiver(post_delete, sender=UserSharePermission)
def handle_usersharepermission_delete(
sender, instance: UserSharePermission, **kwargs
) -> None:
"""
Handle deletion of UserSharePermission by updating dataset authors field.
"""
if instance.item_type == ItemType.DATASET:
# Update the authors field for the dataset
dataset = Dataset.objects.filter(
uuid=instance.item_uuid, is_deleted=False
).first()
if dataset:
dataset.update_authors_field()
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 4.2.14 on 2025-09-04 12:52

from django.db import migrations, models
from ..validators import validate_orcid_id


class Migration(migrations.Migration):

dependencies = [
('users', '0010_userapikey_description_and_name'),
]

operations = [
migrations.AddField(
model_name='user',
name='orcid_id',
field=models.CharField(
blank=True,
max_length=19,
validators=[validate_orcid_id],
verbose_name='ORCID ID'
),
),
]
2 changes: 1 addition & 1 deletion gateway/sds_gateway/users/migrations/max_migration.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0010_userapikey_description_and_name
0011_user_orcid_id_alter_user_is_approved
6 changes: 5 additions & 1 deletion gateway/sds_gateway/users/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from .managers import APIKeyUserManager
from .managers import UserManager
from .validators import validate_orcid_id


class User(AbstractUser):
Expand All @@ -28,6 +29,9 @@ class User(AbstractUser):
username: str | None = None
name = models.CharField(_("Name of User"), blank=True, max_length=255)
email = models.EmailField(_("Email address"), unique=True)
orcid_id = models.CharField(
_("ORCID ID"), blank=True, max_length=19, validators=[validate_orcid_id]
)
is_approved = models.BooleanField(
_("Approved"),
default=settings.SDS_NEW_USERS_APPROVED_ON_CREATION,
Expand All @@ -37,7 +41,7 @@ class User(AbstractUser):
)

USERNAME_FIELD = "email"
REQUIRED_FIELDS = []
REQUIRED_FIELDS: list[str] = []

objects = UserManager()

Expand Down
17 changes: 17 additions & 0 deletions gateway/sds_gateway/users/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Custom validators for the users app."""

import re

from django.core.exceptions import ValidationError
from django.utils.translation import gettext_lazy as _


def validate_orcid_id(value):
"""Validate the ORCID ID format."""
if not value:
return value

if not re.match(r"^\d{4}-\d{4}-\d{4}-\d{4}$", value):
raise ValidationError(_("ORCID ID must be in the format 0000-0000-0000-0000."))

return value