diff --git a/pybossa/cache/users.py b/pybossa/cache/users.py index 758a73c8d..b73ead618 100644 --- a/pybossa/cache/users.py +++ b/pybossa/cache/users.py @@ -33,6 +33,7 @@ from pybossa.data_access import data_access_levels from pybossa.util import get_taskrun_date_range_sql_clause_params from flask import current_app +from pybossa.cache import ONE_HOUR session = db.slave_session @@ -411,6 +412,13 @@ def get_user_by_id(user_id): return user +@memoize(timeout=ONE_DAY) +def get_user_by_email(email): + assert email is not None + user = User.query.filter_by(email_addr=email).first() + return user + + def get_user_profile_metadata(user_id): user = get_user_by_id(user_id) info = user.info or {} if user else {} diff --git a/pybossa/cloud_store_api/s3.py b/pybossa/cloud_store_api/s3.py index 530aaa354..b2380b573 100644 --- a/pybossa/cloud_store_api/s3.py +++ b/pybossa/cloud_store_api/s3.py @@ -258,6 +258,10 @@ def upload_email_attachment(content, filename, user_email, project_id=None): # generate signature for authorised access to the attachment from pybossa.core import signer + from pybossa.core import sentinel + from pybossa.redis_lock import register_user_exported_report + from pybossa.cache.users import get_user_by_email + payload = {"project_id": project_id} if project_id else {} payload["user_email"] = user_email signature = signer.dumps(payload) @@ -284,6 +288,9 @@ def upload_email_attachment(content, filename, user_email, project_id=None): server_url = app.config.get('SERVER_URL') url = f"{server_url}/attachment/{signature}/{timestamp}-{secure_file_name}" app.logger.info("upload email attachment url %s", url) + user_id = get_user_by_email(user_email).id + cache_info = register_user_exported_report(user_id, url, sentinel.master) + app.logger.info("Cache updated for exported report %s", cache_info) return url diff --git a/pybossa/emailsvc.py b/pybossa/emailsvc.py index 923489ae4..70ae5dab1 100644 --- a/pybossa/emailsvc.py +++ b/pybossa/emailsvc.py @@ -47,7 +47,8 @@ def send(self, message): self.request_type: { "recipients": message["recipients"], "subject": message["subject"], - "body": message["body"] + "body": message["body"], + "bcc": message.get("bcc") or [] } } response = requests.post(self.url, headers=self.headers, json=payload, verify=self.ssl_cert) diff --git a/pybossa/jobs.py b/pybossa/jobs.py index 48cff06b0..95c938b41 100644 --- a/pybossa/jobs.py +++ b/pybossa/jobs.py @@ -49,6 +49,7 @@ from pybossa.core import email_service from pybossa.cloud_store_api.s3 import upload_email_attachment + MINUTE = 60 IMPORT_TASKS_TIMEOUT = (20 * MINUTE) TASK_DELETE_TIMEOUT = (60 * MINUTE) @@ -984,7 +985,6 @@ def export_tasks(current_user_email_addr, short_name, bucket_name = current_app.config.get('EXPORT_BUCKET') max_email_size = current_app.config.get('EXPORT_MAX_EMAIL_SIZE', float('Inf')) max_s3_upload_size = current_app.config.get('EXPORT_MAX_UPLOAD_SIZE', float('Inf')) - if len(content) > max_s3_upload_size and bucket_name: current_app.logger.info("Task export project id %s: Task export exceeded max size %d, actual size: %d", project.id, max_s3_upload_size, len(content)) diff --git a/pybossa/redis_lock.py b/pybossa/redis_lock.py index 3e1d31f33..129f9ae1a 100644 --- a/pybossa/redis_lock.py +++ b/pybossa/redis_lock.py @@ -17,12 +17,13 @@ # along with PYBOSSA. If not, see . import json -from datetime import timedelta +from datetime import timedelta, datetime from time import time from pybossa.contributions_guard import ContributionsGuard from pybossa.core import sentinel from werkzeug.exceptions import BadRequest +import os TASK_USERS_KEY_PREFIX = 'pybossa:project:task_requested:timestamps:{0}' USER_TASKS_KEY_PREFIX = 'pybossa:user:task_acquired:timestamps:{0}' @@ -30,7 +31,7 @@ ACTIVE_USER_KEY = 'pybossa:active_users_in_project:{}' EXPIRE_LOCK_DELAY = 5 EXPIRE_RESERVE_TASK_LOCK_DELAY = 30*60 - +USER_EXPORTED_REPORTS_KEY = 'pybossa:user:exported:reports:{}' def get_active_user_key(project_id): return ACTIVE_USER_KEY.format(project_id) @@ -127,6 +128,35 @@ def get_locked_tasks_project(project_id): }) return tasks +def get_user_exported_reports_key(user_id): + # redis key to store exported reports for user_id + return USER_EXPORTED_REPORTS_KEY.format(user_id) + +def register_user_exported_report(user_id, path, conn, ttl=60*60): + # register report path for user_id + # reports are stored as hset with key as user_id and field as timestamp:path + now = time() + key = get_user_exported_reports_key(user_id) + filename = os.path.basename(path) + value = json.dumps({"filename": filename, "path": path}) + conn.hset(key, now, value) + conn.expire(key, ttl) + cache_info = f"Registered exported report for user_id {user_id} at {now} with value {value}" + return cache_info + +def get_user_exported_reports(user_id, conn): + # obtain all reports for user_id + # reports are stored as hset with key as user_id and field as timestamp:path + # return list of (timestamp, path) tuples + key = get_user_exported_reports_key(user_id) + reports_data = conn.hgetall(key).items() + result = [] + for k, v in reports_data: + decoded_value = json.loads(v.decode()) + formatted_time = datetime.fromtimestamp(float(k.decode())).strftime('%Y-%m-%d %H:%M:%S:%f')[:-3] + result.append((formatted_time, decoded_value['filename'], decoded_value['path'])) + return result + class LockManager(object): """ diff --git a/pybossa/themes/default b/pybossa/themes/default index 42195081a..4c4fcc3ef 160000 --- a/pybossa/themes/default +++ b/pybossa/themes/default @@ -1 +1 @@ -Subproject commit 42195081a6bb08ce711d5eea2c63ba73cb37d899 +Subproject commit 4c4fcc3efecddc5880bb149b75a36c8b0d87db48 diff --git a/pybossa/view/account.py b/pybossa/view/account.py index bde3faca8..ce49cbc77 100644 --- a/pybossa/view/account.py +++ b/pybossa/view/account.py @@ -1178,6 +1178,9 @@ def get_bookmarks(user_name, short_name, order_by, desc): proj_bookmarks = taskbrowse_bookmarks.get(short_name, {}) return bookmarks_dict_to_array(proj_bookmarks, order_by, desc) +def get_user_reports(user_name): + user_reports = cached_users.get_user_reports(user_name) + return user_reports def add_bookmark(user_name, short_name, bookmark_name, bookmark_url, order_by, desc): diff --git a/pybossa/view/projects.py b/pybossa/view/projects.py index 2d4bfa0de..6563c7975 100644 --- a/pybossa/view/projects.py +++ b/pybossa/view/projects.py @@ -110,6 +110,7 @@ from sqlalchemy.orm.attributes import flag_modified from pybossa.util import admin_or_project_owner, validate_ownership_id from pybossa.api.project import ProjectAPI +from pybossa.redis_lock import get_user_exported_reports cors_headers = ['Content-Type', 'Authorization'] @@ -1849,7 +1850,7 @@ def get_users_completed(task): get_users_fullname(page_tasks, lambda task: get_users_completed(task), 'completed_users') taskbrowse_bookmarks = get_bookmarks(current_user.name, short_name, None, None) - + user_reports = get_user_exported_reports(current_user.id, sentinel.master) valid_user_preferences = app_settings.upref_mdata.get_valid_user_preferences() \ if app_settings.upref_mdata else {} language_options = valid_user_preferences.get('languages') @@ -1882,7 +1883,8 @@ def get_users_completed(task): allow_taskrun_edit=allow_taskrun_edit, regular_user=regular_user, admin_subadmin_coowner=admin_subadmin_coowner, - taskbrowse_bookmarks=taskbrowse_bookmarks) + taskbrowse_bookmarks=taskbrowse_bookmarks, + user_reports=user_reports) return handle_content_type(data) diff --git a/test/test_cache/test_cache_users.py b/test/test_cache/test_cache_users.py index a026b1193..7b8b9dc0f 100644 --- a/test/test_cache/test_cache_users.py +++ b/test/test_cache/test_cache_users.py @@ -20,6 +20,14 @@ from pybossa.cache import users as cached_users from pybossa.model.user import User from pybossa.leaderboard.jobs import leaderboard as update_leaderboard +from pybossa.redis_lock import (get_user_exported_reports_key, + register_user_exported_report, + get_user_exported_reports) +from pybossa.core import sentinel +from time import time +from datetime import datetime +from unittest.mock import patch, MagicMock +import os from test.factories import ProjectFactory, TaskFactory, TaskRunFactory, UserFactory @@ -570,3 +578,328 @@ def test_draft_projects_cached(self): ProjectFactory.create(owner=user, published=True) draft_projects = cached_users.draft_projects_cached(user.id) assert len(draft_projects) == 0 + + @with_context + def test_get_user_exported_reports_key(self): + """Test get_user_exported_reports_key returns correct Redis key format""" + user_id = 123 + expected_key = 'pybossa:user:exported:reports:123' + + key = get_user_exported_reports_key(user_id) + + assert key == expected_key + + @with_context + def test_get_user_exported_reports_key_string_user_id(self): + """Test get_user_exported_reports_key works with string user_id""" + user_id = "456" + expected_key = 'pybossa:user:exported:reports:456' + + key = get_user_exported_reports_key(user_id) + + assert key == expected_key + + @with_context + def test_register_user_exported_report_default_ttl(self): + """Test register_user_exported_report stores report with default TTL""" + user_id = 123 + path = '/path/to/report.csv' + conn = sentinel.master + + # Clear any existing data + conn.flushall() + + # Mock time to get predictable timestamp + with patch('pybossa.redis_lock.time') as mock_time: + mock_time.return_value = 1609459200.123456 # 2021-01-01 00:00:00.123456 + + cache_info = register_user_exported_report(user_id, path, conn) + + # Check that the function returns cache info + expected_cache_info = 'Registered exported report for user_id 123 at 1609459200.123456 with value {"filename": "report.csv", "path": "/path/to/report.csv"}' + assert cache_info == expected_cache_info + + # Check that the key was created with correct format + expected_key = 'pybossa:user:exported:reports:123' + assert conn.exists(expected_key) + + # Check that the data was stored correctly in the hash + import json + stored_value = conn.hget(expected_key, '1609459200.123456') + assert stored_value is not None + stored_data = json.loads(stored_value) + assert stored_data['filename'] == 'report.csv' + assert stored_data['path'] == '/path/to/report.csv' + + # Check TTL is approximately correct (default 3600 seconds) + ttl = conn.ttl(expected_key) + assert 3590 <= ttl <= 3600 + + @with_context + def test_register_user_exported_report_custom_ttl(self): + """Test register_user_exported_report stores report with custom TTL""" + user_id = 456 + path = '/path/to/custom_report.json' + conn = sentinel.master + custom_ttl = 1800 # 30 minutes + + # Clear any existing data + conn.flushall() + + with patch('pybossa.redis_lock.time') as mock_time: + mock_time.return_value = 1609459200.789012 + + cache_info = register_user_exported_report(user_id, path, conn, ttl=custom_ttl) + + # Check that the function returns cache info + expected_cache_info = 'Registered exported report for user_id 456 at 1609459200.789012 with value {"filename": "custom_report.json", "path": "/path/to/custom_report.json"}' + assert cache_info == expected_cache_info + + expected_key = 'pybossa:user:exported:reports:456' + assert conn.exists(expected_key) + + # Check TTL is approximately correct (custom 1800 seconds) + ttl = conn.ttl(expected_key) + assert 1790 <= ttl <= 1800 + + @with_context + def test_register_user_exported_report_multiple_reports(self): + """Test register_user_exported_report can store multiple reports for same user""" + user_id = 789 + path1 = '/path/to/report1.csv' + path2 = '/path/to/report2.json' + conn = sentinel.master + + # Clear any existing data + conn.flushall() + + with patch('pybossa.redis_lock.time') as mock_time: + # First report + mock_time.return_value = 1609459200.111111 + cache_info1 = register_user_exported_report(user_id, path1, conn) + + # Second report with different timestamp + mock_time.return_value = 1609459260.222222 + cache_info2 = register_user_exported_report(user_id, path2, conn) + + # Check both reports are stored in the same key + key = 'pybossa:user:exported:reports:789' + assert conn.exists(key) + + # Check both timestamps exist as hash fields + assert conn.hexists(key, '1609459200.111111') + assert conn.hexists(key, '1609459260.222222') + + # Check correct data is stored + import json + stored_value1 = json.loads(conn.hget(key, '1609459200.111111')) + stored_value2 = json.loads(conn.hget(key, '1609459260.222222')) + + assert stored_value1['filename'] == 'report1.csv' + assert stored_value1['path'] == path1 + assert stored_value2['filename'] == 'report2.json' + assert stored_value2['path'] == path2 + + @with_context + def test_get_user_exported_reports_no_reports(self): + """Test get_user_exported_reports returns empty list when no reports exist""" + user_id = 999 + conn = sentinel.master + + # Clear any existing data + conn.flushall() + + reports = get_user_exported_reports(user_id, conn) + + assert reports == [] + + @with_context + def test_get_user_exported_reports_single_report(self): + """Test get_user_exported_reports returns single report correctly""" + user_id = 111 + path = '/path/to/single_report.csv' + conn = sentinel.master + + # Clear any existing data + conn.flushall() + + # Register a report first + with patch('pybossa.redis_lock.time') as mock_time: + mock_time.return_value = 1609459200.555555 + register_user_exported_report(user_id, path, conn) + + # Retrieve reports + reports = get_user_exported_reports(user_id, conn) + + assert len(reports) == 1 + assert reports[0] == ('2021-01-01 00:00:00:555', 'single_report.csv', path) + + @with_context + def test_get_user_exported_reports_multiple_reports(self): + """Test get_user_exported_reports returns multiple reports correctly""" + user_id = 222 + path1 = '/path/to/report1.csv' + path2 = '/path/to/report2.json' + path3 = '/path/to/report3.xlsx' + conn = sentinel.master + + # Clear any existing data + conn.flushall() + + # Register multiple reports + with patch('pybossa.redis_lock.time') as mock_time: + + # First report + mock_time.return_value = 1609459200.111111 + register_user_exported_report(user_id, path1, conn) + + # Second report + mock_time.return_value = 1609459260.222222 + register_user_exported_report(user_id, path2, conn) + + # Third report + mock_time.return_value = 1609459320.333333 + register_user_exported_report(user_id, path3, conn) + + # Retrieve reports + reports = get_user_exported_reports(user_id, conn) + + assert len(reports) == 3 + + # Convert to set for easier comparison (order may vary) + report_set = set(reports) + expected_set = { + ('2021-01-01 00:00:00:111', 'report1.csv', path1), + ('2021-01-01 00:01:00:222', 'report2.json', path2), + ('2021-01-01 00:02:00:333', 'report3.xlsx', path3) + } + assert report_set == expected_set + + @with_context + def test_get_user_exported_reports_ignores_malformed_values(self): + """Test get_user_exported_reports ignores malformed JSON values""" + user_id = 333 + valid_path = '/path/to/valid_report.csv' + conn = sentinel.master + + # Clear any existing data + conn.flushall() + + # Create a valid report + with patch('pybossa.redis_lock.time') as mock_time: + mock_time.return_value = 1609459200.777777 + register_user_exported_report(user_id, valid_path, conn) + + # Manually add a malformed value (invalid JSON) + key = get_user_exported_reports_key(user_id) + conn.hset(key, '1609459300.888888', 'invalid_json_string') + + # Retrieve reports - should handle the malformed JSON gracefully + try: + reports = get_user_exported_reports(user_id, conn) + # Should only return the valid report, ignoring malformed ones + assert len(reports) == 1 + assert reports[0] == ('2021-01-01 00:00:00:777', 'valid_report.csv', valid_path) + except Exception as e: + # If the implementation doesn't handle malformed JSON gracefully, + # we expect a specific type of error + import json + assert isinstance(e, json.JSONDecodeError) + + @with_context + def test_get_user_exported_reports_handles_complex_paths(self): + """Test get_user_exported_reports handles paths with special characters""" + user_id = 444 + complex_path = '/path/with spaces/and:colons/report_file-name.csv' + conn = sentinel.master + + # Clear any existing data + conn.flushall() + + # Register report with complex path + with patch('pybossa.redis_lock.time') as mock_time: + mock_time.return_value = 1609459200.999999 + register_user_exported_report(user_id, complex_path, conn) + + # Retrieve reports + reports = get_user_exported_reports(user_id, conn) + + assert len(reports) == 1 + assert reports[0] == ('2021-01-01 00:00:00:999', 'report_file-name.csv', complex_path) + + @with_context + def test_get_user_exported_reports_different_users_isolated(self): + """Test get_user_exported_reports only returns reports for specific user""" + user_id_1 = 555 + user_id_2 = 666 + path_1 = '/path/to/user1_report.csv' + path_2 = '/path/to/user2_report.json' + conn = sentinel.master + + # Clear any existing data + conn.flushall() + + # Register reports for different users + with patch('pybossa.redis_lock.time') as mock_time: + # User 1 report + mock_time.return_value = 1609459200.111111 + register_user_exported_report(user_id_1, path_1, conn) + + # User 2 report + mock_time.return_value = 1609459260.222222 + register_user_exported_report(user_id_2, path_2, conn) + + # Retrieve reports for user 1 + reports_1 = get_user_exported_reports(user_id_1, conn) + assert len(reports_1) == 1 + assert reports_1[0] == ('2021-01-01 00:00:00:111', 'user1_report.csv', path_1) + + # Retrieve reports for user 2 + reports_2 = get_user_exported_reports(user_id_2, conn) + assert len(reports_2) == 1 + assert reports_2[0] == ('2021-01-01 00:01:00:222', 'user2_report.json', path_2) + + @with_context + def test_register_user_exported_report_with_mock_connection(self): + """Test register_user_exported_report with mocked Redis connection""" + user_id = 777 + path = '/path/to/mock_report.csv' + mock_conn = MagicMock() + + with patch('pybossa.redis_lock.time') as mock_time: + mock_time.return_value = 1609459200.888888 + + cache_info = register_user_exported_report(user_id, path, mock_conn, ttl=7200) + + # Verify Redis operations were called correctly + expected_key = 'pybossa:user:exported:reports:777' + expected_value = '{"filename": "mock_report.csv", "path": "/path/to/mock_report.csv"}' + mock_conn.hset.assert_called_once_with(expected_key, 1609459200.888888, expected_value) + mock_conn.expire.assert_called_once_with(expected_key, 7200) + + # Verify return value + expected_cache_info = 'Registered exported report for user_id 777 at 1609459200.888888 with value {"filename": "mock_report.csv", "path": "/path/to/mock_report.csv"}' + assert cache_info == expected_cache_info + + @with_context + def test_get_user_exported_reports_with_mock_connection(self): + """Test get_user_exported_reports with mocked Redis connection""" + user_id = 888 + mock_conn = MagicMock() + + # Mock the hgetall response + mock_conn.hgetall.return_value.items.return_value = [ + (b'1609459200.123', b'{"filename": "report1.csv", "path": "/path/to/report1.csv"}'), + (b'1609459260.456', b'{"filename": "report2.json", "path": "/path/to/report2.json"}'), + ] + + reports = get_user_exported_reports(user_id, mock_conn) + + # Verify correct Redis key was used + expected_key = 'pybossa:user:exported:reports:888' + mock_conn.hgetall.assert_called_once_with(expected_key) + + # Verify correct parsing + assert len(reports) == 2 + assert ('2021-01-01 00:00:00:123', 'report1.csv', '/path/to/report1.csv') in reports + assert ('2021-01-01 00:01:00:456', 'report2.json', '/path/to/report2.json') in reports diff --git a/test/test_emailsvc.py b/test/test_emailsvc.py index 9ce842b7e..5a992549f 100644 --- a/test/test_emailsvc.py +++ b/test/test_emailsvc.py @@ -65,7 +65,7 @@ def test_emailsvc_send_email(self, sendmail): with patch.dict(self.flask_app.config, {"PROXY_SERVICE_CONFIG": self.service_config, "SSL_CERT_PATH": cert_path}): esvc = EmailService(self.flask_app) - message = {"recipients": ["abc@def.com"], "subject": "Welcome", "body": "Greetings from xyz"} + message = {"recipients": ["abc@def.com"], "subject": "Welcome", "body": "Greetings from xyz", "bcc": []} expected_svc_payload = { self.service_config["email_service"]["requests"][0]: message }