Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion pybossa/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,10 @@ def update_user_preferences(user_name):

user_preferences = None
if user:
# Capture old profile for logging before update
profile = user.info.get('metadata', {}).get('profile', '')
profile_old = json.loads(profile) if profile else {}

# Add a metadata section if not found.
if 'metadata' not in user.info:
user.info['metadata'] = {}
Expand All @@ -569,10 +573,15 @@ def update_user_preferences(user_name):

# Save user preferences.
user_repo.update(user)
# Log profile update with old and new values
current_app.logger.info(
"User profile updated via api/preferences. User %s (id=%s, email=%s) "
"Old profile: %s, New profile: %s",
user.name, user.id, user.email_addr, profile_old, payload
)

# Clear user in cache.
cached_users.delete_user_pref_metadata(user)

# Return updated metadata and user preferences.
user_preferences = user.info.get('metadata', {})

Expand Down
6 changes: 6 additions & 0 deletions pybossa/cache/task_browse_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,11 @@ def _get_field_filters(filters):


def user_meet_task_requirement(task_id, user_filter, user_profile):
current_app.logger.info(
"Checking user meet worker filter requirement for task %s, worker_filter: %s, user_profile: %s",
task_id, user_filter, user_profile
)

for field, filters in user_filter.items():
if not user_profile.get(field):
# if user profile does not have attribute, user does not qualify for the task
Expand Down Expand Up @@ -416,6 +421,7 @@ def user_meet_task_requirement(task_id, user_filter, user_profile):
except Exception as e:
current_app.logger.exception(f"Validating worker filter failed for task {task_id} on field {field}, comparison failed")
return False
current_app.logger.info("User met worker filter requirement for Task %s, worker_filter: %s, user_profile: %s", task_id, user_filter, user_profile)
return True


Expand Down
59 changes: 53 additions & 6 deletions pybossa/sched.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ def template_get_locked_task(project_id, user_id=None, user_ip=None,
if task_id:
task = session.query(Task).get(task_id)
if task:
current_app.logger.info(
"locked_scheduler. Project %s, User %s - task_id %s requested directly.",
project_id, user_id, task_id
)
return [task]

if offset > 2:
Expand Down Expand Up @@ -227,6 +231,10 @@ def template_get_locked_task(project_id, user_id=None, user_ip=None,

# validate user qualification and calculate task preference score
user_profile = json.loads(user_profile) if user_profile else {}
current_app.logger.info(
"locked_scheduler. Project %s, User %s - filter_user_prefs=%s, saved_task_position=%s, task_id_map=%s, user_profile=%s",
project_id, user_id, filter_user_prefs, saved_task_position, bool(task_id_map), user_profile
)
task_rank_info = []
for task_id, taskcount, n_answers, calibration, w_filter, w_pref, timeout in rows:
score = 0
Expand All @@ -237,15 +245,34 @@ def template_get_locked_task(project_id, user_id=None, user_ip=None,
score = -ttl # Saved tasks sink to the bottom, but with earliest saved task first
elif ttl > 0 and saved_task_position == SavedTaskPositionEnum.FIRST:
score = sys.maxsize - ttl # Earliest saved task first
current_app.logger.info(
"locked_scheduler. Task %s added via SAVED_TASK path (bypasses worker_filter). "
"User %s, project %s, w_filter=%s",
task_id, user_id, project_id, w_filter
)
task_rank_info.append((task_id, taskcount, n_answers, calibration, score, None, timeout))
elif filter_user_prefs: # Only include when filter requirement is met
w_pref = w_pref or {}
w_filter = w_filter or {}
meet_requirement = cached_task_browse_helpers.user_meet_task_requirement(task_id, w_filter, user_profile)
if meet_requirement:
score = cached_task_browse_helpers.get_task_preference_score(w_pref, user_profile)
current_app.logger.info(
"locked_scheduler. Task %s PASSED filter_user_prefs check. User %s, project %s, score=%s",
task_id, user_id, project_id, score
)
task_rank_info.append((task_id, taskcount, n_answers, calibration, score, None, timeout))
else:
current_app.logger.info(
"locked_scheduler. Task %s EXCLUDED by filter_user_prefs. User %s, project %s, w_filter=%s",
task_id, user_id, project_id, w_filter
)
else: # Default/locker schedulers
current_app.logger.info(
"locked_scheduler. Task %s added via DEFAULT path (no worker_filter check). "
"User %s, project %s, filter_user_prefs=%s, w_filter=%s",
task_id, user_id, project_id, filter_user_prefs, w_filter
)
task_rank_info.append((task_id, taskcount, n_answers, calibration, score, None, timeout))
rows = sorted(task_rank_info, key=lambda tup: tup[4], reverse=True)

Expand Down Expand Up @@ -445,19 +472,39 @@ def get_locked_task(project_id, user_id=None, limit=1, rand_within_priority=Fals


@locked_scheduler
def get_user_pref_task(project_id, user_id=None, limit=1, rand_within_priority=False,
task_type='gold_last', filter_user_prefs=True, task_category_filters="", saved_task_position=None):
""" Select a new task based on user preference set under user profile.
def _get_user_pref_task_impl(project_id, user_id=None, limit=1, rand_within_priority=False,
task_type='gold_last', filter_user_prefs=True, task_category_filters="",
saved_task_position=None, user_ip=None, external_uid=None, offset=0,
orderby='priority_0', desc=True, task_id=None):
"""Internal implementation of get_user_pref_task decorated with @locked_scheduler."""
return locked_task_sql(project_id, user_id=user_id, limit=limit,
rand_within_priority=rand_within_priority, task_type=task_type,
filter_user_prefs=True, task_category_filters=task_category_filters)


def get_user_pref_task(project_id, user_id=None, user_ip=None, external_uid=None,
limit=1, offset=0, orderby='priority_0', desc=True,
rand_within_priority=False, task_type='gold_last',
task_category_filters="", saved_task_position=None, task_id=None, **kwargs):
"""Select a new task based on user preference set under user profile.

For each incomplete task, check if the number of users working on the task
is smaller than the number of answers still needed. In that case, acquire
a lock on the task that matches user preference(if any) with users profile
and return the task to the user. If offset is nonzero, skip that amount of
available tasks before returning to the user.

This wrapper ensures filter_user_prefs=True is always used, regardless of
whether the function is called via new_task() or directly.
"""
return locked_task_sql(project_id, user_id=user_id, limit=limit,
rand_within_priority=rand_within_priority, task_type=task_type,
filter_user_prefs=True, task_category_filters=task_category_filters)
return _get_user_pref_task_impl(
project_id, user_id=user_id, user_ip=user_ip, external_uid=external_uid,
limit=limit, offset=offset, orderby=orderby, desc=desc,
rand_within_priority=rand_within_priority, task_type=task_type,
filter_user_prefs=True, # Always True for user preference scheduler
task_category_filters=task_category_filters,
saved_task_position=saved_task_position, task_id=task_id
)


def fetch_lock_for_user(project_id, task_id, user_id):
Expand Down
18 changes: 18 additions & 0 deletions pybossa/view/account.py
Original file line number Diff line number Diff line change
Expand Up @@ -1159,12 +1159,30 @@ def add_metadata(name):
country_name_to_country_code=app_settings.upref_mdata.country_name_to_country_code,
country_code_to_country_name=app_settings.upref_mdata.country_code_to_country_name)

# Capture old profile for logging before update
old_metadata = user.info.get('metadata', {})
old_profile = old_metadata.get('profile', '')
old_profile_parsed = json.loads(old_profile) if old_profile and isinstance(old_profile, str) else old_profile if old_profile else {}

user_pref, metadata = get_user_pref_and_metadata(name, form)
user.info['metadata'] = metadata
ensure_user_data_access_assignment_from_form(user.info, form)
user.user_pref = user_pref

# Get new profile for logging
new_profile = metadata.get('profile', '')
new_profile_parsed = json.loads(new_profile) if new_profile and isinstance(new_profile, str) else new_profile if new_profile else {}

# Log profile update with old and new values
current_app.logger.info(
"User profile updated via account view. User %s (id=%s, email=%s) "
"Old profile: %s, New profile: %s",
user.name, user.id, user.email_addr, old_profile_parsed, new_profile_parsed
)

user_repo.update(user)
cached_users.delete_user_pref_metadata(user)

flash("Input saved successfully", "info")
return redirect(url_for('account.profile', name=name))

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@
"raven==6.10.0",
"rax-default-network-flags-python-novaclient-ext==0.4.0",
"rax-scheduled-images-python-novaclient-ext==0.3.1",
"readability-lxml==0.8.1",
"readability-lxml==0.8.4.1",
"redis==3.5.3",
"rednose==1.3.0",
"requests==2.31.0",
Expand Down
200 changes: 200 additions & 0 deletions test/test_user_pref_sched.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,3 +770,203 @@ def test_upref_sched_gold_task(self):
resp = json.loads(res.data)
assert resp['id'] == tasks[0].id, \
'task presented should not be gold task'

@with_context
def test_task_routing_fte_only_filter(self):
'''
Test that users with fte_only=1 in profile can get tasks with fte_only=[1, "=="] filter,
while users with fte_only!=1 cannot get such tasks
'''
# Create user with fte_only=0.1 - should NOT get the task
user_info_non_fte = dict(metadata={
"profile": json.dumps({
"bbg_index": 0.1,
"mkt_cap": 0.1,
"private": 0.5,
"nace": 0.1,
"bics": 1.0,
"bclass": 0.1,
"tier": 0.7,
"attention": 0.8,
"fte_only": 0.1
})
})
non_fte_user = UserFactory.create(id=500, name="john", fullname="john doe", info=user_info_non_fte)
user_repo.save(non_fte_user)

# Create user with fte_only=1 - SHOULD get the task
user_info_fte = dict(metadata={
"profile": json.dumps({
"bbg_index": 0.1,
"mkt_cap": 0.1,
"private": 0.5,
"nace": 0.1,
"bics": 1.0,
"bclass": 0.1,
"tier": 0.7,
"attention": 0.8,
"fte_only": 1.0
})
})
fte_user = UserFactory.create(id=501, name="jane", fullname="jane doe", info=user_info_fte)
user_repo.save(fte_user)

# Clear user cache to ensure profile is loaded correctly
from pybossa.cache import users as cached_users
cached_users.delete_user_pref_metadata(non_fte_user)
cached_users.delete_user_pref_metadata(fte_user)

# Create project with task_queue scheduler
project = ProjectFactory.create(owner=non_fte_user)
project.info['sched'] = Schedulers.task_queue
project_repo.save(project)

# Create tasks - explicitly create task 2 without worker_filter
# Task 0: Requires fte_only=1 with all other filters
task0 = TaskFactory.create(project=project, n_answers=10)
task0.worker_filter = {
"bics": [1, "=="],
"nace": [0, ">="],
"tier": [0, ">="],
"bclass": [0, ">="],
"mkt_cap": [0, ">="],
"private": [0.5, ">="],
"fte_only": [1, "=="],
"bbg_index": [0, ">="]
}
task_repo.save(task0)

# Task 1: Another task requiring fte_only=1
task1 = TaskFactory.create(project=project, n_answers=10)
task1.worker_filter = {
"fte_only": [1, "=="],
"bics": [1, "=="]
}
task_repo.save(task1)

# Task 2: No worker_filter - should be available to both users
task2 = TaskFactory.create(project=project, n_answers=10)
# Don't set worker_filter at all - leave it as default (None)

# Test 1: User with fte_only=0.1 should NOT get tasks with fte_only=1 requirement
available_tasks_non_fte = n_available_tasks_for_user(project, non_fte_user.id)
assert available_tasks_non_fte == 1, \
f'User with fte_only=0.1 should only get 1 task (no filter), but got {available_tasks_non_fte}'

# Test 2: User with fte_only=1.0 should get all tasks
available_tasks_fte = n_available_tasks_for_user(project, fte_user.id)
assert available_tasks_fte == 3, \
f'User with fte_only=1.0 should get all 3 tasks, but got {available_tasks_fte}'

# Test 3: Verify task assignment via API for non-fte user
task_assigned_non_fte = get_user_pref_task(project.id, non_fte_user.id)
if task_assigned_non_fte:
assigned_task = task_assigned_non_fte[0]
# The assigned task should have no worker_filter or empty worker_filter
assert not assigned_task.worker_filter or assigned_task.worker_filter == {}, \
f'User with fte_only=0.1 should only get task without worker_filter, got task {assigned_task.id} with filter {assigned_task.worker_filter}'

# Test 4: Verify task assignment via API for fte user can get filtered tasks
task_assigned_fte = get_user_pref_task(project.id, fte_user.id)
assert task_assigned_fte, 'User with fte_only=1.0 should get a task'
# The assigned task should be one of the three available tasks
assert task_assigned_fte[0].id in [task0.id, task1.id, task2.id], \
'User with fte_only=1.0 should get one of the available tasks'

@with_context
def test_task_routing_fte_only_strict_equality(self):
'''
Test that fte_only equality filter strictly requires exact match
'''
# Create user with fte_only=0.9 (close to 1 but not equal)
user_info = dict(metadata={
"profile": json.dumps({
"bbg_index": 0.5,
"mkt_cap": 0.5,
"private": 0.5,
"nace": 0.5,
"bics": 1.0,
"bclass": 0.5,
"tier": 0.5,
"fte_only": 0.9
})
})
user = UserFactory.create(id=502, info=user_info)
user_repo.save(user)

project = ProjectFactory.create(owner=user)
project.info['sched'] = Schedulers.task_queue
project_repo.save(project)

# Create task requiring exact fte_only=1
task = TaskFactory.create(project=project, n_answers=10)
task.worker_filter = {
"fte_only": [1, "=="],
"bics": [1, "=="]
}
task_repo.save(task)

# User with fte_only=0.9 should NOT get task requiring fte_only=1
available_tasks = n_available_tasks_for_user(project, user.id)
assert available_tasks == 0, \
'User with fte_only=0.9 should NOT get task requiring fte_only==1'

@with_context
def test_task_routing_missing_profile_field(self):
'''
Test that users without fte_only field in profile cannot get tasks requiring fte_only
'''
# Create user without fte_only in profile
user_info = dict(metadata={
"profile": json.dumps({
"bbg_index": 0.5,
"bics": 1.0,
"tier": 0.5
})
})
user = UserFactory.create(id=503, info=user_info)
user_repo.save(user)

project = ProjectFactory.create(owner=user)
project.info['sched'] = Schedulers.task_queue
project_repo.save(project)

# Create task requiring fte_only
task = TaskFactory.create(project=project, n_answers=10)
task.worker_filter = {
"fte_only": [1, "=="]
}
task_repo.save(task)

# User without fte_only field should NOT get the task
available_tasks = n_available_tasks_for_user(project, user.id)
assert available_tasks == 0, \
'User without fte_only field in profile should NOT get task requiring fte_only'

@with_context
def test_get_user_pref_task_with_task_id(self):
'''
Test that passing a specific task_id to the scheduler returns that task
This covers the task_id parameter code path in locked_scheduler
'''
owner = UserFactory.create(id=504)
owner.user_pref = {'languages': ['en']}
user_repo.save(owner)

project = ProjectFactory.create(owner=owner)
project.info['sched'] = Schedulers.user_pref
project_repo.save(project)

# Create multiple tasks
tasks = TaskFactory.create_batch(3, project=project, n_answers=10)
for task in tasks:
task.user_pref = {'languages': ['en']}
task_repo.save(task)

# Request a specific task by task_id
target_task_id = tasks[1].id
result = get_user_pref_task(project.id, owner.id, task_id=target_task_id)

assert result is not None, 'Should return a task when task_id is provided'
assert len(result) == 1, 'Should return exactly one task'
assert result[0].id == target_task_id, f'Should return the requested task {target_task_id}, but got {result[0].id}'