spectrumx · klpoland · Oct 1, 2025 · Sep 19, 2025 · Sep 25, 2025 · Oct 1, 2025
diff --git a/gateway/sds_gateway/api_methods/migrations/0016_convert_dataset_authors_to_object_format.py b/gateway/sds_gateway/api_methods/migrations/0016_convert_dataset_authors_to_object_format.py
@@ -0,0 +1,165 @@
+"""Migration to convert dataset authors from string format to object format."""
+
+import json
+import logging
+
+from django.db import migrations
+
+logger = logging.getLogger(__name__)
+
+
+def convert_authors_to_object_format(apps, schema_editor):
+    """Convert dataset authors from string format to object format."""
+    Dataset = apps.get_model("api_methods", "Dataset")
+
+    # Get all datasets with authors
+    datasets = Dataset.objects.filter(authors__isnull=False).exclude(authors="")
+    total_count = datasets.count()
+
+    if total_count == 0:
+        logger.info("No datasets with authors found to convert")
+        return
+
+    logger.info("Converting %d datasets with authors to new format", total_count)
+
+    updated_count = 0
+    skipped_count = 0
+    error_count = 0
+
+    for dataset in datasets:
+        try:
+            # Parse current authors
+            if isinstance(dataset.authors, str):
+                try:
+                    current_authors = json.loads(dataset.authors)
+                except (json.JSONDecodeError, TypeError):
+                    # Skip datasets with invalid JSON
+                    logger.warning("Dataset %s has invalid authors JSON, skipping", dataset.uuid)
+                    skipped_count += 1
+                    continue
+            else:
+                current_authors = dataset.authors
+
+            if not current_authors:
+                skipped_count += 1
+                continue
+
+            # Check if already in new format (has dict with 'name' key)
+            if isinstance(current_authors, list) and current_authors:
+                if isinstance(current_authors[0], dict) and "name" in current_authors[0]:
+                    logger.debug("Dataset %s already in new format, skipping", dataset.uuid)
+                    skipped_count += 1
+                    continue
+
+                # Convert string authors to object format
+                if isinstance(current_authors[0], str):
+                    new_authors = []
+                    for author in current_authors:
+                        if isinstance(author, str):
+                            new_authors.append({"name": author, "orcid_id": ""})
+                        else:
+                            # Handle unexpected format
+                            logger.warning(
+                                "Dataset %s has unexpected author format: %s", 
+                                dataset.uuid, author
+                            )
+                            new_authors.append({"name": str(author), "orcid_id": ""})
+
+                    # Update the dataset
+                    dataset.authors = json.dumps(new_authors)
+                    dataset.save(update_fields=["authors"])
+                    updated_count += 1
+                    logger.debug("Converted dataset %s authors", dataset.uuid)
+                else:
+                    # Authors are already objects, skip
+                    skipped_count += 1
+            else:
+                # Empty or invalid authors list
+                skipped_count += 1
+
+        except Exception as e:
+            error_count += 1
+            logger.error("Error converting dataset %s: %s", dataset.uuid, e)
+
+    logger.info(
+        "Author conversion complete: %d updated, %d skipped, %d errors", 
+        updated_count, skipped_count, error_count
+    )
+
+
+def reverse_authors_to_string_format(apps, schema_editor):
+    """Reverse migration: convert authors back to string format."""
+    Dataset = apps.get_model("api_methods", "Dataset")
+
+    # Get all datasets with authors
+    datasets = Dataset.objects.filter(authors__isnull=False).exclude(authors="")
+    total_count = datasets.count()
+
+    if total_count == 0:
+        logger.info("No datasets with authors found to reverse")
+        return
+
+    logger.info("Reversing %d datasets with authors to string format", total_count)
+
+    updated_count = 0
+    skipped_count = 0
+    error_count = 0
+
+    for dataset in datasets:
+        try:
+            # Parse current authors
+            if isinstance(dataset.authors, str):
+                try:
+                    current_authors = json.loads(dataset.authors)
+                except (json.JSONDecodeError, TypeError):
+                    skipped_count += 1
+                    continue
+            else:
+                current_authors = dataset.authors
+
+            if not current_authors:
+                skipped_count += 1
+                continue
+
+            # Check if in object format (has dict with 'name' key)
+            if isinstance(current_authors, list) and current_authors:
+                if isinstance(current_authors[0], dict) and "name" in current_authors[0]:
+                    # Convert object authors back to string format
+                    string_authors = [author["name"] for author in current_authors]
+
+                    # Update the dataset
+                    dataset.authors = json.dumps(string_authors)
+                    dataset.save(update_fields=["authors"])
+                    updated_count += 1
+                    logger.debug("Reversed dataset %s authors", dataset.uuid)
+                else:
+                    # Already in string format, skip
+                    skipped_count += 1
+            else:
+                # Empty or invalid authors list
+                skipped_count += 1
+
+        except Exception as e:
+            error_count += 1
+            logger.error("Error reversing dataset %s: %s", dataset.uuid, e)
+
+    logger.info(
+        "Author reversal complete: %d updated, %d skipped, %d errors", 
+        updated_count, skipped_count, error_count
+    )
+
+
+class Migration(migrations.Migration):
+    """Migration to convert dataset authors to object format."""
+
+    dependencies = [
+        ("api_methods", "0015_rename_postprocesseddata_deprecatedpostprocesseddata_and_more"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            convert_authors_to_object_format,
+            reverse_authors_to_string_format,
+            hints={"target_db": "default"},
+        ),
+    ]
diff --git a/gateway/sds_gateway/api_methods/migrations/max_migration.txt b/gateway/sds_gateway/api_methods/migrations/max_migration.txt
@@ -1 +1 @@
-0016_alter_usersharepermission_permission_level
+0016_convert_dataset_authors_to_object_format
diff --git a/gateway/sds_gateway/api_methods/models.py b/gateway/sds_gateway/api_methods/models.py
@@ -14,7 +14,6 @@
 from django.db import models
 from django.db.models import ProtectedError
 from django.db.models import QuerySet
-from django.db.models.signals import post_delete
 from django.db.models.signals import post_save
 from django.db.models.signals import pre_delete
 from django.dispatch import receiver
@@ -665,26 +664,30 @@ def from_db(cls, db, field_names, values):
                 setattr(instance, field, json.loads(getattr(instance, field)))
         return instance
 
-    def update_authors_field(self):
-        """Update the authors field based on current permissions."""
-        authors_data = UserSharePermission.get_dataset_authors(self.uuid)
-        author_names = [author["name"] for author in authors_data]
-
-        # Update the authors field
-        self.authors = author_names
-        self.save(update_fields=["authors"])
-
     def get_authors_display(self):
         """Get the authors as a list for display purposes."""
         if not self.authors:
             return []
 
-        if isinstance(self.authors, str):
-            try:
-                return json.loads(self.authors)
-            except (json.JSONDecodeError, TypeError):
-                return [self.authors]
+        # from_db should have already converted JSON string to list
+        if not isinstance(self.authors, list):
+            log.warning(
+                "Dataset %s: authors field is not a list (type: %s)",
+                self.uuid,
+                type(self.authors).__name__,
+            )
+            return []
+
+        # Check if authors are in old string format and need conversion
+        if self.authors and isinstance(self.authors[0], str):
+            log.warning(
+                "Dataset %s: authors still in old string format, needs migration",
+                self.uuid,
+            )
+            # Convert old format for backward compatibility
+            return [{"name": author, "orcid_id": ""} for author in self.authors]
 
+        # Authors should already be in new object format
         return self.authors
 
 
@@ -1415,35 +1418,3 @@ def handle_sharegroup_soft_delete(sender, instance: ShareGroup, **kwargs) -> Non
             # Update the enabled status based on remaining groups
             permission.update_enabled_status()
             permission.save()
-
-
-@receiver(post_save, sender=UserSharePermission)
-def handle_usersharepermission_change(
-    sender, instance: UserSharePermission, **kwargs
-) -> None:
-    """
-    Handle changes to UserSharePermission by updating dataset authors field.
-    """
-    if instance.item_type == ItemType.DATASET and instance.is_enabled:
-        # Update the authors field for the dataset
-        dataset = Dataset.objects.filter(
-            uuid=instance.item_uuid, is_deleted=False
-        ).first()
-        if dataset:
-            dataset.update_authors_field()
-
-
-@receiver(post_delete, sender=UserSharePermission)
-def handle_usersharepermission_delete(
-    sender, instance: UserSharePermission, **kwargs
-) -> None:
-    """
-    Handle deletion of UserSharePermission by updating dataset authors field.
-    """
-    if instance.item_type == ItemType.DATASET:
-        # Update the authors field for the dataset
-        dataset = Dataset.objects.filter(
-            uuid=instance.item_uuid, is_deleted=False
-        ).first()
-        if dataset:
-            dataset.update_authors_field()
diff --git a/gateway/sds_gateway/users/migrations/0011_user_orcid_id_alter_user_is_approved.py b/gateway/sds_gateway/users/migrations/0011_user_orcid_id_alter_user_is_approved.py
@@ -0,0 +1,24 @@
+# Generated by Django 4.2.14 on 2025-09-04 12:52
+
+from django.db import migrations, models
+from ..validators import validate_orcid_id
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('users', '0010_userapikey_description_and_name'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='user',
+            name='orcid_id',
+            field=models.CharField(
+                blank=True, 
+                max_length=19, 
+                validators=[validate_orcid_id],
+                verbose_name='ORCID ID'
+            ),
+        ),
+    ]
diff --git a/gateway/sds_gateway/users/migrations/max_migration.txt b/gateway/sds_gateway/users/migrations/max_migration.txt
@@ -1 +1 @@
-0010_userapikey_description_and_name
+0011_user_orcid_id_alter_user_is_approved
diff --git a/gateway/sds_gateway/users/models.py b/gateway/sds_gateway/users/models.py
@@ -13,6 +13,7 @@
 
 from .managers import APIKeyUserManager
 from .managers import UserManager
+from .validators import validate_orcid_id
 
 
 class User(AbstractUser):
@@ -28,6 +29,9 @@ class User(AbstractUser):
     username: str | None = None
     name = models.CharField(_("Name of User"), blank=True, max_length=255)
     email = models.EmailField(_("Email address"), unique=True)
+    orcid_id = models.CharField(
+        _("ORCID ID"), blank=True, max_length=19, validators=[validate_orcid_id]
+    )
     is_approved = models.BooleanField(
         _("Approved"),
         default=settings.SDS_NEW_USERS_APPROVED_ON_CREATION,
@@ -37,7 +41,7 @@ class User(AbstractUser):
     )
 
     USERNAME_FIELD = "email"
-    REQUIRED_FIELDS = []
+    REQUIRED_FIELDS: list[str] = []
 
     objects = UserManager()
 

diff --git a/gateway/sds_gateway/users/validators.py b/gateway/sds_gateway/users/validators.py
@@ -0,0 +1,17 @@
+"""Custom validators for the users app."""
+
+import re
+
+from django.core.exceptions import ValidationError
+from django.utils.translation import gettext_lazy as _
+
+
+def validate_orcid_id(value):
+    """Validate the ORCID ID format."""
+    if not value:
+        return value
+
+    if not re.match(r"^\d{4}-\d{4}-\d{4}-\d{4}$", value):
+        raise ValidationError(_("ORCID ID must be in the format 0000-0000-0000-0000."))
+
+    return value
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		0016_alter_usersharepermission_permission_level
		0016_convert_dataset_authors_to_object_format
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		0010_userapikey_description_and_name
		0011_user_orcid_id_alter_user_is_approved