diff --git a/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py b/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py new file mode 100644 index 00000000000..655e4b6c039 --- /dev/null +++ b/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py @@ -0,0 +1,152 @@ +import logging + +from django.core.management.base import BaseCommand +from django.apps import apps +from django.db.models import Q + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = 'Update unclaimed records for preprint versions' + + def add_arguments(self, parser): + parser.add_argument( + '--dry-run', + action='store_true', + dest='dry_run', + help='Run the command without saving changes', + ) + + def handle(self, *args, **options): + dry_run = options.get('dry_run', False) + update_unclaimed_records_for_preprint_versions(dry_run=dry_run) + + +def update_unclaimed_records_for_preprint_versions(dry_run=False): + Preprint = apps.get_model('osf.Preprint') + Guid = apps.get_model('osf.Guid') + OSFUser = apps.get_model('osf.OSFUser') + GuidVersionsThrough = apps.get_model('osf.GuidVersionsThrough') + + preprint_filters = ( + Q(preprintcontributor__user__is_registered=False) | + Q(preprintcontributor__user__date_disabled__isnull=False) + ) + + mode = 'DRY RUN' if dry_run else 'UPDATING' + logger.info(f'Starting {mode} for unclaimed records for preprint versions') + + preprints_count = Preprint.objects.filter( + preprint_filters + ).distinct('versioned_guids__guid').count() + + logger.info(f'Found {preprints_count} preprints with unregistered contributors') + + processed_count = 0 + skipped_count = 0 + updated_count = 0 + + logger.info('-' * 50) + logger.info(f'{mode} MODE') + logger.info('-' * 50) + + for preprint in Preprint.objects.filter( + preprint_filters + ).prefetch_related('_contributors').distinct( + 'versioned_guids__guid' + ): + processed_count += 1 + try: + guid, version = Guid.split_guid(preprint._id) + logger.info(f'[{processed_count}/{preprints_count}] Processing preprint {preprint._id}') + + latest_version_through = GuidVersionsThrough.objects.filter(guid___id=guid).last() + if not latest_version_through: + logger.error(f'No version found for guid {guid}, skipping') + skipped_count += 1 + continue + + latest_version_number = latest_version_through.version + unregistered_contributors = preprint.contributor_set.filter(user__is_registered=False) + logger.info(f'Found {unregistered_contributors.count()} unregistered contributors for preprint {preprint._id}') + + for contributor in unregistered_contributors: + try: + records_key_for_current_guid = [key for key in contributor.user.unclaimed_records.keys() if guid in key] + if records_key_for_current_guid: + records_key_for_current_guid.sort( + key=lambda x: int(x.split(Preprint.GUID_VERSION_DELIMITER)[1]), + ) + record_info = contributor.user.unclaimed_records[records_key_for_current_guid[0]] + for current_version in range(1, int(latest_version_number) + 1): + preprint_id = f'{guid}{Preprint.GUID_VERSION_DELIMITER}{current_version}' + if preprint_id not in contributor.user.unclaimed_records.keys(): + if not dry_run: + try: + preprint_obj = Preprint.load(preprint_id) + referrer = OSFUser.load(record_info['referrer_id']) + + if not preprint_obj: + logger.error(f'Could not load preprint {preprint_id}, skipping') + continue + + if not referrer: + logger.error(f'Could not load referrer {record_info["referrer_id"]}, skipping') + continue + + logger.info(f'Adding unclaimed record for {preprint_id} for user {contributor.user._id}') + contributor.user.unclaimed_records[preprint_id] = contributor.user.add_unclaimed_record( + claim_origin=preprint_obj, + referrer=referrer, + given_name=record_info.get('name', None), + email=record_info.get('email', None), + provided_pid=preprint_id, + ) + contributor.user.save() + updated_count += 1 + logger.info(f'Successfully saved unclaimed record for {preprint_id}') + except Exception as e: + logger.error(f'Error adding unclaimed record for {preprint_id}: {str(e)}') + else: + logger.info(f'[DRY RUN] Would add unclaimed record for {preprint_id} for user {contributor.user._id}') + updated_count += 1 + else: + try: + all_versions = [guid.referent for guid in GuidVersionsThrough.objects.filter(guid___id=guid)] + logger.info(f'Found {len(all_versions)} versions for preprint with guid {guid}') + + for current_preprint in all_versions: + preprint_id = current_preprint._id + if preprint_id not in contributor.user.unclaimed_records.keys(): + if not dry_run: + try: + logger.info(f'Adding unclaimed record for {preprint_id} for user {contributor.user._id}') + contributor.user.unclaimed_records[preprint_id] = contributor.user.add_unclaimed_record( + claim_origin=current_preprint, + referrer=current_preprint.creator, + given_name=contributor.user.fullname, + email=contributor.user.username, + provided_pid=preprint_id, + ) + contributor.user.save() + updated_count += 1 + logger.info(f'Successfully saved unclaimed record for {preprint_id}') + except Exception as e: + logger.error(f'Error adding unclaimed record for {preprint_id}: {str(e)}') + else: + logger.info(f'[DRY RUN] Would add unclaimed record for {preprint_id} for user {contributor.user._id}') + updated_count += 1 + except Exception as e: + logger.error(f'Error processing versions for guid {guid}: {str(e)}') + except Exception as e: + logger.error(f'Error processing contributor {contributor.id}: {str(e)}') + + except Exception as e: + logger.error(f'Unexpected error processing preprint {preprint.id}: {str(e)}') + skipped_count += 1 + + if dry_run: + logger.info(f'Processed: {processed_count}, Would update: {updated_count}, Skipped: {skipped_count}') + else: + logger.info(f'Processed: {processed_count}, Updated: {updated_count}, Skipped: {skipped_count}') diff --git a/osf/models/preprint.py b/osf/models/preprint.py index bb9bec0190e..917d99b638b 100644 --- a/osf/models/preprint.py +++ b/osf/models/preprint.py @@ -463,6 +463,21 @@ def create_version(cls, create_from_guid, auth): sentry.log_exception(e) sentry.log_message(f'Contributor was not added to new preprint version due to error: ' f'[preprint={preprint._id}, user={contributor.user._id}]') + + # Add new version record for unregistered contributors + for contributor in preprint.contributor_set.filter(Q(user__is_registered=False) | Q(user__date_disabled__isnull=False)): + try: + contributor.user.add_unclaimed_record( + claim_origin=preprint, + referrer=auth.user, + email=contributor.user.email, + given_name=contributor.user.fullname, + ) + except ValidationError as e: + sentry.log_exception(e) + sentry.log_message(f'Unregistered contributor was not added to new preprint version due to error: ' + f'[preprint={preprint._id}, user={contributor.user._id}]') + # Add affiliated institutions for institution in latest_version.affiliated_institutions.all(): preprint.add_affiliated_institution(institution, auth.user, ignore_user_affiliation=True) diff --git a/osf/models/user.py b/osf/models/user.py index 5a1183f9547..06405d691ab 100644 --- a/osf/models/user.py +++ b/osf/models/user.py @@ -68,6 +68,7 @@ MAX_QUICKFILES_MERGE_RENAME_ATTEMPTS = 1000 + def get_default_mailing_lists(): return {'Open Science Framework Help': True}