diff --git a/pybossa/api/__init__.py b/pybossa/api/__init__.py index ae034b7cb..407d3d8d4 100644 --- a/pybossa/api/__init__.py +++ b/pybossa/api/__init__.py @@ -557,6 +557,10 @@ def update_user_preferences(user_name): user_preferences = None if user: + # Capture old profile for logging before update + profile = user.info.get('metadata', {}).get('profile', '') + profile_old = json.loads(profile) if profile else {} + # Add a metadata section if not found. if 'metadata' not in user.info: user.info['metadata'] = {} @@ -569,10 +573,15 @@ def update_user_preferences(user_name): # Save user preferences. user_repo.update(user) + # Log profile update with old and new values + current_app.logger.info( + "User profile updated via api/preferences. User %s (id=%s, email=%s) " + "Old profile: %s, New profile: %s", + user.name, user.id, user.email_addr, profile_old, payload + ) # Clear user in cache. cached_users.delete_user_pref_metadata(user) - # Return updated metadata and user preferences. user_preferences = user.info.get('metadata', {}) diff --git a/pybossa/cache/task_browse_helpers.py b/pybossa/cache/task_browse_helpers.py index 967172963..3738d6c7b 100644 --- a/pybossa/cache/task_browse_helpers.py +++ b/pybossa/cache/task_browse_helpers.py @@ -385,6 +385,11 @@ def _get_field_filters(filters): def user_meet_task_requirement(task_id, user_filter, user_profile): + current_app.logger.info( + "Checking user meet worker filter requirement for task %s, worker_filter: %s, user_profile: %s", + task_id, user_filter, user_profile + ) + for field, filters in user_filter.items(): if not user_profile.get(field): # if user profile does not have attribute, user does not qualify for the task @@ -416,6 +421,7 @@ def user_meet_task_requirement(task_id, user_filter, user_profile): except Exception as e: current_app.logger.exception(f"Validating worker filter failed for task {task_id} on field {field}, comparison failed") return False + current_app.logger.info("User met worker filter requirement for Task %s, worker_filter: %s, user_profile: %s", task_id, user_filter, user_profile) return True diff --git a/pybossa/sched.py b/pybossa/sched.py index 211f6650b..0070d19d4 100644 --- a/pybossa/sched.py +++ b/pybossa/sched.py @@ -146,6 +146,10 @@ def template_get_locked_task(project_id, user_id=None, user_ip=None, if task_id: task = session.query(Task).get(task_id) if task: + current_app.logger.info( + "locked_scheduler. Project %s, User %s - task_id %s requested directly.", + project_id, user_id, task_id + ) return [task] if offset > 2: @@ -227,6 +231,10 @@ def template_get_locked_task(project_id, user_id=None, user_ip=None, # validate user qualification and calculate task preference score user_profile = json.loads(user_profile) if user_profile else {} + current_app.logger.info( + "locked_scheduler. Project %s, User %s - filter_user_prefs=%s, saved_task_position=%s, task_id_map=%s, user_profile=%s", + project_id, user_id, filter_user_prefs, saved_task_position, bool(task_id_map), user_profile + ) task_rank_info = [] for task_id, taskcount, n_answers, calibration, w_filter, w_pref, timeout in rows: score = 0 @@ -237,6 +245,11 @@ def template_get_locked_task(project_id, user_id=None, user_ip=None, score = -ttl # Saved tasks sink to the bottom, but with earliest saved task first elif ttl > 0 and saved_task_position == SavedTaskPositionEnum.FIRST: score = sys.maxsize - ttl # Earliest saved task first + current_app.logger.info( + "locked_scheduler. Task %s added via SAVED_TASK path (bypasses worker_filter). " + "User %s, project %s, w_filter=%s", + task_id, user_id, project_id, w_filter + ) task_rank_info.append((task_id, taskcount, n_answers, calibration, score, None, timeout)) elif filter_user_prefs: # Only include when filter requirement is met w_pref = w_pref or {} @@ -244,8 +257,22 @@ def template_get_locked_task(project_id, user_id=None, user_ip=None, meet_requirement = cached_task_browse_helpers.user_meet_task_requirement(task_id, w_filter, user_profile) if meet_requirement: score = cached_task_browse_helpers.get_task_preference_score(w_pref, user_profile) + current_app.logger.info( + "locked_scheduler. Task %s PASSED filter_user_prefs check. User %s, project %s, score=%s", + task_id, user_id, project_id, score + ) task_rank_info.append((task_id, taskcount, n_answers, calibration, score, None, timeout)) + else: + current_app.logger.info( + "locked_scheduler. Task %s EXCLUDED by filter_user_prefs. User %s, project %s, w_filter=%s", + task_id, user_id, project_id, w_filter + ) else: # Default/locker schedulers + current_app.logger.info( + "locked_scheduler. Task %s added via DEFAULT path (no worker_filter check). " + "User %s, project %s, filter_user_prefs=%s, w_filter=%s", + task_id, user_id, project_id, filter_user_prefs, w_filter + ) task_rank_info.append((task_id, taskcount, n_answers, calibration, score, None, timeout)) rows = sorted(task_rank_info, key=lambda tup: tup[4], reverse=True) @@ -445,19 +472,39 @@ def get_locked_task(project_id, user_id=None, limit=1, rand_within_priority=Fals @locked_scheduler -def get_user_pref_task(project_id, user_id=None, limit=1, rand_within_priority=False, - task_type='gold_last', filter_user_prefs=True, task_category_filters="", saved_task_position=None): - """ Select a new task based on user preference set under user profile. +def _get_user_pref_task_impl(project_id, user_id=None, limit=1, rand_within_priority=False, + task_type='gold_last', filter_user_prefs=True, task_category_filters="", + saved_task_position=None, user_ip=None, external_uid=None, offset=0, + orderby='priority_0', desc=True, task_id=None): + """Internal implementation of get_user_pref_task decorated with @locked_scheduler.""" + return locked_task_sql(project_id, user_id=user_id, limit=limit, + rand_within_priority=rand_within_priority, task_type=task_type, + filter_user_prefs=True, task_category_filters=task_category_filters) + + +def get_user_pref_task(project_id, user_id=None, user_ip=None, external_uid=None, + limit=1, offset=0, orderby='priority_0', desc=True, + rand_within_priority=False, task_type='gold_last', + task_category_filters="", saved_task_position=None, task_id=None, **kwargs): + """Select a new task based on user preference set under user profile. For each incomplete task, check if the number of users working on the task is smaller than the number of answers still needed. In that case, acquire a lock on the task that matches user preference(if any) with users profile and return the task to the user. If offset is nonzero, skip that amount of available tasks before returning to the user. + + This wrapper ensures filter_user_prefs=True is always used, regardless of + whether the function is called via new_task() or directly. """ - return locked_task_sql(project_id, user_id=user_id, limit=limit, - rand_within_priority=rand_within_priority, task_type=task_type, - filter_user_prefs=True, task_category_filters=task_category_filters) + return _get_user_pref_task_impl( + project_id, user_id=user_id, user_ip=user_ip, external_uid=external_uid, + limit=limit, offset=offset, orderby=orderby, desc=desc, + rand_within_priority=rand_within_priority, task_type=task_type, + filter_user_prefs=True, # Always True for user preference scheduler + task_category_filters=task_category_filters, + saved_task_position=saved_task_position, task_id=task_id + ) def fetch_lock_for_user(project_id, task_id, user_id): diff --git a/pybossa/view/account.py b/pybossa/view/account.py index f5183d685..f167cdd43 100644 --- a/pybossa/view/account.py +++ b/pybossa/view/account.py @@ -1159,12 +1159,30 @@ def add_metadata(name): country_name_to_country_code=app_settings.upref_mdata.country_name_to_country_code, country_code_to_country_name=app_settings.upref_mdata.country_code_to_country_name) + # Capture old profile for logging before update + old_metadata = user.info.get('metadata', {}) + old_profile = old_metadata.get('profile', '') + old_profile_parsed = json.loads(old_profile) if old_profile and isinstance(old_profile, str) else old_profile if old_profile else {} + user_pref, metadata = get_user_pref_and_metadata(name, form) user.info['metadata'] = metadata ensure_user_data_access_assignment_from_form(user.info, form) user.user_pref = user_pref + + # Get new profile for logging + new_profile = metadata.get('profile', '') + new_profile_parsed = json.loads(new_profile) if new_profile and isinstance(new_profile, str) else new_profile if new_profile else {} + + # Log profile update with old and new values + current_app.logger.info( + "User profile updated via account view. User %s (id=%s, email=%s) " + "Old profile: %s, New profile: %s", + user.name, user.id, user.email_addr, old_profile_parsed, new_profile_parsed + ) + user_repo.update(user) cached_users.delete_user_pref_metadata(user) + flash("Input saved successfully", "info") return redirect(url_for('account.profile', name=name)) diff --git a/setup.py b/setup.py index e0196b830..f62cfe5cc 100644 --- a/setup.py +++ b/setup.py @@ -129,7 +129,7 @@ "raven==6.10.0", "rax-default-network-flags-python-novaclient-ext==0.4.0", "rax-scheduled-images-python-novaclient-ext==0.3.1", - "readability-lxml==0.8.1", + "readability-lxml==0.8.4.1", "redis==3.5.3", "rednose==1.3.0", "requests==2.31.0", diff --git a/test/test_user_pref_sched.py b/test/test_user_pref_sched.py index 726ecd4f4..9b6906c6f 100644 --- a/test/test_user_pref_sched.py +++ b/test/test_user_pref_sched.py @@ -770,3 +770,203 @@ def test_upref_sched_gold_task(self): resp = json.loads(res.data) assert resp['id'] == tasks[0].id, \ 'task presented should not be gold task' + + @with_context + def test_task_routing_fte_only_filter(self): + ''' + Test that users with fte_only=1 in profile can get tasks with fte_only=[1, "=="] filter, + while users with fte_only!=1 cannot get such tasks + ''' + # Create user with fte_only=0.1 - should NOT get the task + user_info_non_fte = dict(metadata={ + "profile": json.dumps({ + "bbg_index": 0.1, + "mkt_cap": 0.1, + "private": 0.5, + "nace": 0.1, + "bics": 1.0, + "bclass": 0.1, + "tier": 0.7, + "attention": 0.8, + "fte_only": 0.1 + }) + }) + non_fte_user = UserFactory.create(id=500, name="john", fullname="john doe", info=user_info_non_fte) + user_repo.save(non_fte_user) + + # Create user with fte_only=1 - SHOULD get the task + user_info_fte = dict(metadata={ + "profile": json.dumps({ + "bbg_index": 0.1, + "mkt_cap": 0.1, + "private": 0.5, + "nace": 0.1, + "bics": 1.0, + "bclass": 0.1, + "tier": 0.7, + "attention": 0.8, + "fte_only": 1.0 + }) + }) + fte_user = UserFactory.create(id=501, name="jane", fullname="jane doe", info=user_info_fte) + user_repo.save(fte_user) + + # Clear user cache to ensure profile is loaded correctly + from pybossa.cache import users as cached_users + cached_users.delete_user_pref_metadata(non_fte_user) + cached_users.delete_user_pref_metadata(fte_user) + + # Create project with task_queue scheduler + project = ProjectFactory.create(owner=non_fte_user) + project.info['sched'] = Schedulers.task_queue + project_repo.save(project) + + # Create tasks - explicitly create task 2 without worker_filter + # Task 0: Requires fte_only=1 with all other filters + task0 = TaskFactory.create(project=project, n_answers=10) + task0.worker_filter = { + "bics": [1, "=="], + "nace": [0, ">="], + "tier": [0, ">="], + "bclass": [0, ">="], + "mkt_cap": [0, ">="], + "private": [0.5, ">="], + "fte_only": [1, "=="], + "bbg_index": [0, ">="] + } + task_repo.save(task0) + + # Task 1: Another task requiring fte_only=1 + task1 = TaskFactory.create(project=project, n_answers=10) + task1.worker_filter = { + "fte_only": [1, "=="], + "bics": [1, "=="] + } + task_repo.save(task1) + + # Task 2: No worker_filter - should be available to both users + task2 = TaskFactory.create(project=project, n_answers=10) + # Don't set worker_filter at all - leave it as default (None) + + # Test 1: User with fte_only=0.1 should NOT get tasks with fte_only=1 requirement + available_tasks_non_fte = n_available_tasks_for_user(project, non_fte_user.id) + assert available_tasks_non_fte == 1, \ + f'User with fte_only=0.1 should only get 1 task (no filter), but got {available_tasks_non_fte}' + + # Test 2: User with fte_only=1.0 should get all tasks + available_tasks_fte = n_available_tasks_for_user(project, fte_user.id) + assert available_tasks_fte == 3, \ + f'User with fte_only=1.0 should get all 3 tasks, but got {available_tasks_fte}' + + # Test 3: Verify task assignment via API for non-fte user + task_assigned_non_fte = get_user_pref_task(project.id, non_fte_user.id) + if task_assigned_non_fte: + assigned_task = task_assigned_non_fte[0] + # The assigned task should have no worker_filter or empty worker_filter + assert not assigned_task.worker_filter or assigned_task.worker_filter == {}, \ + f'User with fte_only=0.1 should only get task without worker_filter, got task {assigned_task.id} with filter {assigned_task.worker_filter}' + + # Test 4: Verify task assignment via API for fte user can get filtered tasks + task_assigned_fte = get_user_pref_task(project.id, fte_user.id) + assert task_assigned_fte, 'User with fte_only=1.0 should get a task' + # The assigned task should be one of the three available tasks + assert task_assigned_fte[0].id in [task0.id, task1.id, task2.id], \ + 'User with fte_only=1.0 should get one of the available tasks' + + @with_context + def test_task_routing_fte_only_strict_equality(self): + ''' + Test that fte_only equality filter strictly requires exact match + ''' + # Create user with fte_only=0.9 (close to 1 but not equal) + user_info = dict(metadata={ + "profile": json.dumps({ + "bbg_index": 0.5, + "mkt_cap": 0.5, + "private": 0.5, + "nace": 0.5, + "bics": 1.0, + "bclass": 0.5, + "tier": 0.5, + "fte_only": 0.9 + }) + }) + user = UserFactory.create(id=502, info=user_info) + user_repo.save(user) + + project = ProjectFactory.create(owner=user) + project.info['sched'] = Schedulers.task_queue + project_repo.save(project) + + # Create task requiring exact fte_only=1 + task = TaskFactory.create(project=project, n_answers=10) + task.worker_filter = { + "fte_only": [1, "=="], + "bics": [1, "=="] + } + task_repo.save(task) + + # User with fte_only=0.9 should NOT get task requiring fte_only=1 + available_tasks = n_available_tasks_for_user(project, user.id) + assert available_tasks == 0, \ + 'User with fte_only=0.9 should NOT get task requiring fte_only==1' + + @with_context + def test_task_routing_missing_profile_field(self): + ''' + Test that users without fte_only field in profile cannot get tasks requiring fte_only + ''' + # Create user without fte_only in profile + user_info = dict(metadata={ + "profile": json.dumps({ + "bbg_index": 0.5, + "bics": 1.0, + "tier": 0.5 + }) + }) + user = UserFactory.create(id=503, info=user_info) + user_repo.save(user) + + project = ProjectFactory.create(owner=user) + project.info['sched'] = Schedulers.task_queue + project_repo.save(project) + + # Create task requiring fte_only + task = TaskFactory.create(project=project, n_answers=10) + task.worker_filter = { + "fte_only": [1, "=="] + } + task_repo.save(task) + + # User without fte_only field should NOT get the task + available_tasks = n_available_tasks_for_user(project, user.id) + assert available_tasks == 0, \ + 'User without fte_only field in profile should NOT get task requiring fte_only' + + @with_context + def test_get_user_pref_task_with_task_id(self): + ''' + Test that passing a specific task_id to the scheduler returns that task + This covers the task_id parameter code path in locked_scheduler + ''' + owner = UserFactory.create(id=504) + owner.user_pref = {'languages': ['en']} + user_repo.save(owner) + + project = ProjectFactory.create(owner=owner) + project.info['sched'] = Schedulers.user_pref + project_repo.save(project) + + # Create multiple tasks + tasks = TaskFactory.create_batch(3, project=project, n_answers=10) + for task in tasks: + task.user_pref = {'languages': ['en']} + task_repo.save(task) + + # Request a specific task by task_id + target_task_id = tasks[1].id + result = get_user_pref_task(project.id, owner.id, task_id=target_task_id) + + assert result is not None, 'Should return a task when task_id is provided' + assert len(result) == 1, 'Should return exactly one task' + assert result[0].id == target_task_id, f'Should return the requested task {target_task_id}, but got {result[0].id}'