Skip to content

[ENG-7263] Fix/eng 7263 #11090

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
152 changes: 152 additions & 0 deletions osf/management/commands/fix_unclaimed_records_for_preprint_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import logging

from django.core.management.base import BaseCommand
from django.apps import apps
from django.db.models import Q

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = 'Update unclaimed records for preprint versions'

def add_arguments(self, parser):
parser.add_argument(
'--dry-run',
action='store_true',
dest='dry_run',
help='Run the command without saving changes',
)

def handle(self, *args, **options):
dry_run = options.get('dry_run', False)
update_unclaimed_records_for_preprint_versions(dry_run=dry_run)


def update_unclaimed_records_for_preprint_versions(dry_run=False):
Preprint = apps.get_model('osf.Preprint')
Guid = apps.get_model('osf.Guid')
OSFUser = apps.get_model('osf.OSFUser')
GuidVersionsThrough = apps.get_model('osf.GuidVersionsThrough')

preprint_filters = (
Q(preprintcontributor__user__is_registered=False) |
Q(preprintcontributor__user__date_disabled__isnull=False)
)

mode = 'DRY RUN' if dry_run else 'UPDATING'
logger.info(f'Starting {mode} for unclaimed records for preprint versions')

preprints_count = Preprint.objects.filter(
preprint_filters
).distinct('versioned_guids__guid').count()

logger.info(f'Found {preprints_count} preprints with unregistered contributors')

processed_count = 0
skipped_count = 0
updated_count = 0

logger.info('-' * 50)
logger.info(f'{mode} MODE')
logger.info('-' * 50)

for preprint in Preprint.objects.filter(
preprint_filters
).prefetch_related('_contributors').distinct(
'versioned_guids__guid'
):
processed_count += 1
try:
guid, version = Guid.split_guid(preprint._id)
logger.info(f'[{processed_count}/{preprints_count}] Processing preprint {preprint._id}')

latest_version_through = GuidVersionsThrough.objects.filter(guid___id=guid).last()
if not latest_version_through:
logger.error(f'No version found for guid {guid}, skipping')
skipped_count += 1
continue

latest_version_number = latest_version_through.version
unregistered_contributors = preprint.contributor_set.filter(user__is_registered=False)
logger.info(f'Found {unregistered_contributors.count()} unregistered contributors for preprint {preprint._id}')

for contributor in unregistered_contributors:
try:
records_key_for_current_guid = [key for key in contributor.user.unclaimed_records.keys() if guid in key]
if records_key_for_current_guid:
records_key_for_current_guid.sort(
key=lambda x: int(x.split(Preprint.GUID_VERSION_DELIMITER)[1]),
)
record_info = contributor.user.unclaimed_records[records_key_for_current_guid[0]]
for current_version in range(1, int(latest_version_number) + 1):
preprint_id = f'{guid}{Preprint.GUID_VERSION_DELIMITER}{current_version}'
if preprint_id not in contributor.user.unclaimed_records.keys():
if not dry_run:
try:
preprint_obj = Preprint.load(preprint_id)
referrer = OSFUser.load(record_info['referrer_id'])

if not preprint_obj:
logger.error(f'Could not load preprint {preprint_id}, skipping')
continue

if not referrer:
logger.error(f'Could not load referrer {record_info["referrer_id"]}, skipping')
continue

logger.info(f'Adding unclaimed record for {preprint_id} for user {contributor.user._id}')
contributor.user.unclaimed_records[preprint_id] = contributor.user.add_unclaimed_record(
claim_origin=preprint_obj,
referrer=referrer,
given_name=record_info.get('name', None),
email=record_info.get('email', None),
provided_pid=preprint_id,
)
contributor.user.save()
updated_count += 1
logger.info(f'Successfully saved unclaimed record for {preprint_id}')
except Exception as e:
logger.error(f'Error adding unclaimed record for {preprint_id}: {str(e)}')
else:
logger.info(f'[DRY RUN] Would add unclaimed record for {preprint_id} for user {contributor.user._id}')
updated_count += 1
else:
try:
all_versions = [guid.referent for guid in GuidVersionsThrough.objects.filter(guid___id=guid)]
logger.info(f'Found {len(all_versions)} versions for preprint with guid {guid}')

for current_preprint in all_versions:
preprint_id = current_preprint._id
if preprint_id not in contributor.user.unclaimed_records.keys():
if not dry_run:
try:
logger.info(f'Adding unclaimed record for {preprint_id} for user {contributor.user._id}')
contributor.user.unclaimed_records[preprint_id] = contributor.user.add_unclaimed_record(
claim_origin=current_preprint,
referrer=current_preprint.creator,
given_name=contributor.user.fullname,
email=contributor.user.username,
provided_pid=preprint_id,
)
contributor.user.save()
updated_count += 1
logger.info(f'Successfully saved unclaimed record for {preprint_id}')
except Exception as e:
logger.error(f'Error adding unclaimed record for {preprint_id}: {str(e)}')
else:
logger.info(f'[DRY RUN] Would add unclaimed record for {preprint_id} for user {contributor.user._id}')
updated_count += 1
except Exception as e:
logger.error(f'Error processing versions for guid {guid}: {str(e)}')
except Exception as e:
logger.error(f'Error processing contributor {contributor.id}: {str(e)}')

except Exception as e:
logger.error(f'Unexpected error processing preprint {preprint.id}: {str(e)}')
skipped_count += 1

if dry_run:
logger.info(f'Processed: {processed_count}, Would update: {updated_count}, Skipped: {skipped_count}')
else:
logger.info(f'Processed: {processed_count}, Updated: {updated_count}, Skipped: {skipped_count}')
15 changes: 15 additions & 0 deletions osf/models/preprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,21 @@ def create_version(cls, create_from_guid, auth):
sentry.log_exception(e)
sentry.log_message(f'Contributor was not added to new preprint version due to error: '
f'[preprint={preprint._id}, user={contributor.user._id}]')

# Add new version record for unregistered contributors
for contributor in preprint.contributor_set.filter(Q(user__is_registered=False) | Q(user__date_disabled__isnull=False)):
try:
contributor.user.add_unclaimed_record(
claim_origin=preprint,
referrer=auth.user,
email=contributor.user.email,
given_name=contributor.user.fullname,
)
except ValidationError as e:
sentry.log_exception(e)
sentry.log_message(f'Unregistered contributor was not added to new preprint version due to error: '
f'[preprint={preprint._id}, user={contributor.user._id}]')

# Add affiliated institutions
for institution in latest_version.affiliated_institutions.all():
preprint.add_affiliated_institution(institution, auth.user, ignore_user_affiliation=True)
Expand Down
1 change: 1 addition & 0 deletions osf/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@

MAX_QUICKFILES_MERGE_RENAME_ATTEMPTS = 1000


def get_default_mailing_lists():
return {'Open Science Framework Help': True}

Expand Down
Loading