diff --git a/pybossa/api/api_base.py b/pybossa/api/api_base.py index b6b092e6b..721a9719a 100644 --- a/pybossa/api/api_base.py +++ b/pybossa/api/api_base.py @@ -27,28 +27,30 @@ """ import json -from flask import request, abort, Response, current_app -from flask_login import current_user -from flask.views import MethodView + from flasgger import swag_from -from werkzeug.exceptions import NotFound, Unauthorized, Forbidden, BadRequest -from werkzeug.exceptions import MethodNotAllowed -from pybossa.util import jsonpify, fuzzyboolean, get_avatar_url -from pybossa.util import get_user_id_or_ip -from pybossa.core import ratelimits, uploader +from flask import Response, abort, current_app, request +from flask.views import MethodView +from flask_login import current_user +from werkzeug.exceptions import (BadRequest, Forbidden, MethodNotAllowed, + NotFound, Unauthorized) + from pybossa.auth import ensure_authorized_to -from pybossa.hateoas import Hateoas -from pybossa.ratelimit import ratelimit +from pybossa.cache.announcements import reset as reset_announcements +from pybossa.cache.categories import reset as reset_categories +from pybossa.cache.projects import clean_project +from pybossa.cache.users import delete_user_summary_id +from pybossa.core import (announcement_repo, auditlog_repo, blog_repo, + helping_repo, performance_stats_repo, project_repo, + project_stats_repo, ratelimits, result_repo, + task_repo, uploader, user_repo) from pybossa.error import ErrorStatus -from pybossa.core import project_repo, user_repo, task_repo, result_repo, auditlog_repo -from pybossa.core import announcement_repo, blog_repo, helping_repo, performance_stats_repo -from pybossa.core import project_stats_repo +from pybossa.hateoas import Hateoas from pybossa.model import DomainObject, announcement from pybossa.model.task import Task -from pybossa.cache.projects import clean_project -from pybossa.cache.users import delete_user_summary_id -from pybossa.cache.categories import reset as reset_categories -from pybossa.cache.announcements import reset as reset_announcements +from pybossa.ratelimit import ratelimit +from pybossa.util import (fuzzyboolean, get_avatar_url, get_user_id_or_ip, + jsonpify) repos = {'Task': {'repo': task_repo, 'filter': 'filter_tasks_by', 'get': 'get_task', 'save': 'save', 'update': 'update', @@ -141,6 +143,7 @@ def get(self, oid): try: ensure_authorized_to('read', self.__class__) query = self._db_query(oid) + self._enrich_get_response(oid, query) json_response = self._create_json_response(query, oid) return Response(json_response, mimetype='application/json') except Exception as e: @@ -637,6 +640,12 @@ def _sign_item(self, item): """Apply custom signature""" pass + def _enrich_get_response(self, oid, item): + """Method to be overriden in inheriting classes for enriching the + response for a GET request + """ + pass + def _copy_original(self, item): """change if need to keep some information about the original request""" return item diff --git a/pybossa/api/task.py b/pybossa/api/task.py index fbeb316df..61a712509 100644 --- a/pybossa/api/task.py +++ b/pybossa/api/task.py @@ -22,31 +22,31 @@ * tasks """ -from flask import abort, current_app +import copy +import hashlib +import json +import re + +from flask import abort, current_app, url_for from flask_login import current_user from werkzeug.exceptions import BadRequest, Conflict, NotFound -from pybossa.model.task import Task -from pybossa.model.project import Project -from pybossa.core import result_repo -from pybossa.util import sign_task -from .api_base import APIBase + from pybossa.api.pwd_manager import get_pwd_manager -from pybossa.util import get_user_id_or_ip, validate_required_fields -from pybossa.core import task_repo, project_repo -from pybossa.cache.projects import get_project_data -from pybossa.data_access import when_data_access -import hashlib -from flask import url_for -from pybossa.cloud_store_api.s3 import upload_json_data from pybossa.auth.task import TaskAuth from pybossa.cache import delete_memoized +from pybossa.cache.projects import get_project_data from pybossa.cache.task_browse_helpers import get_searchable_columns -import json -import copy -from pybossa.task_creator_helper import get_task_expiration +from pybossa.cloud_store_api.s3 import upload_json_data +from pybossa.core import project_repo, result_repo, signer, task_repo +from pybossa.data_access import when_data_access from pybossa.model import make_timestamp -from pybossa.task_creator_helper import generate_checksum -from pybossa.cache.projects import get_project_data +from pybossa.model.project import Project +from pybossa.model.task import Task +from pybossa.task_creator_helper import generate_checksum, get_task_expiration +from pybossa.util import get_user_id_or_ip, sign_task, validate_required_fields +from pybossa.view.fileproxy import read_encrypted_file_with_signature + +from .api_base import APIBase class TaskAPI(APIBase): @@ -157,6 +157,68 @@ def _sign_item(self, item): def _select_attributes(self, data): return TaskAuth.apply_access_control(data, user=current_user, project_data=get_project_data(data['project_id'])) + def _parse_private_json_upload_url(self, path): + """ + Parse a private JSON upload URL to extract store, bucket, project_id, and path components. + + Args: + path (str): Path like '/fileproxy/encrypted////' + + Returns: + dict: Dictionary with keys 'store', 'bucket', 'project_id', 'path' + + Raises: + ValueError: If path doesn't match expected format + """ + pattern = r'^/?fileproxy/encrypted/([^/]+)/([^/]+)/(\d+)/(.+)$' + match = re.match(pattern, path) + + if not match: + raise ValueError(f"Path '{path}' doesn't match expected format: /fileproxy/encrypted////") + + store, bucket, project_id_str, file_path = match.groups() + + return { + 'store': store, + 'bucket': bucket, + 'project_id': int(project_id_str), + 'path': file_path + } + + + def _enrich_get_response(self, task_id: str, tasks: list[Task]): + if not current_app.config.get('ENABLE_ENCRYPTION'): + current_app.logger.info("Encryption not enabled, skipping task enrichment") + return + + for task in tasks: + if not task.info or "private_json__upload_url" not in task.info: + continue + + url_parts = self._parse_private_json_upload_url(task.info["private_json__upload_url"]) + + store = url_parts.get('store') + bucket = url_parts.get('bucket') + project_id = url_parts.get('project_id') + path = url_parts.get('path') + key_name = '/{}/{}'.format(project_id, path) + signature = signer.dumps({'task_id': task_id}) + + decrypted_data, _key = read_encrypted_file_with_signature(store, project_id, bucket, key_name, signature) + + try: + if decrypted_data and isinstance(decrypted_data, str): + decrypted_data = json.loads(decrypted_data) + except Exception as e: + current_app.logger.error(f"Error parsing decrypted data as JSON for task id {task_id}: {str(e)}") + decrypted_data = None + + if decrypted_data and isinstance(decrypted_data, dict): + task.info.update(decrypted_data) + + del task.info["private_json__upload_url"] + + def put(self, oid): # reset cache / memoized delete_memoized(get_searchable_columns) diff --git a/pybossa/view/fileproxy.py b/pybossa/view/fileproxy.py index 0fce1d58a..6da0a8607 100644 --- a/pybossa/view/fileproxy.py +++ b/pybossa/view/fileproxy.py @@ -16,26 +16,26 @@ # You should have received a copy of the GNU Affero General Public License # along with PYBOSSA. If not, see . -from urllib.parse import urlparse, parse_qs +import json from functools import wraps -from flask import Blueprint, current_app, Response, request -from flask_login import current_user, login_required +from urllib.parse import parse_qs, urlparse -import six import requests -import json -from werkzeug.exceptions import Forbidden, BadRequest, InternalServerError, NotFound +import six +from boto.exception import S3ResponseError +from flask import Blueprint, Response, current_app, request +from flask_login import current_user, login_required +from werkzeug.exceptions import (BadRequest, Forbidden, InternalServerError, + NotFound) from pybossa.cache.projects import get_project_data -from boto.exception import S3ResponseError from pybossa.contributions_guard import ContributionsGuard -from pybossa.core import task_repo, signer +from pybossa.core import signer, task_repo from pybossa.encryption import AESWithGCM # from pybossa.pybhdfs.client import HDFSKerberos from pybossa.sched import has_lock from pybossa.task_creator_helper import get_encryption_key, read_encrypted_file - blueprint = Blueprint('fileproxy', __name__) TASK_SIGNATURE_MAX_SIZE = 128 @@ -71,6 +71,17 @@ def check_allowed(user_id, task_id, project, is_valid_url): raise Forbidden('FORBIDDEN') +def get_read_encrypted_file_with_signature_response(store, project_id, bucket, key_name, signature): + decrypted, key = read_encrypted_file_with_signature(store, project_id, bucket, key_name, signature) + + response = Response(decrypted, content_type=key.content_type) + if hasattr(key, "content_encoding") and key.content_encoding: + response.headers.add('Content-Encoding', key.content_encoding) + if hasattr(key, "content_disposition") and key.content_disposition: + response.headers.add('Content-Disposition', key.content_disposition) + return response + + def read_encrypted_file_with_signature(store, project_id, bucket, key_name, signature): if not signature: current_app.logger.exception('Project id {} no signature {}'.format(project_id, key_name)) @@ -89,14 +100,8 @@ def read_encrypted_file_with_signature(store, project_id, bucket, key_name, sign task_id = payload['task_id'] check_allowed(current_user.id, task_id, project, lambda v: v == request.path) - decrypted, key = read_encrypted_file(store, project, bucket, key_name) - response = Response(decrypted, content_type=key.content_type) - if hasattr(key, "content_encoding") and key.content_encoding: - response.headers.add('Content-Encoding', key.content_encoding) - if hasattr(key, "content_disposition") and key.content_disposition: - response.headers.add('Content-Disposition', key.content_disposition) - return response + return read_encrypted_file(store, project, bucket, key_name) @blueprint.route('/encrypted///workflow_request///') @@ -107,7 +112,7 @@ def encrypted_workflow_file(store, bucket, workflow_uid, project_id, path): key_name = '/workflow_request/{}/{}/{}'.format(workflow_uid, project_id, path) signature = request.args.get('task-signature') current_app.logger.info('Project id {} decrypt workflow file. {}'.format(project_id, path)) - return read_encrypted_file_with_signature(store, project_id, bucket, key_name, signature) + return get_read_encrypted_file_with_signature_response(store, project_id, bucket, key_name, signature) @blueprint.route('/encrypted////') @@ -119,7 +124,7 @@ def encrypted_file(store, bucket, project_id, path): signature = request.args.get('task-signature') current_app.logger.info('Project id {} decrypt file. {}'.format(project_id, path)) current_app.logger.info("store %s, bucket %s, project_id %s, path %s", store, bucket, str(project_id), path) - return read_encrypted_file_with_signature(store, project_id, bucket, key_name, signature) + return get_read_encrypted_file_with_signature_response(store, project_id, bucket, key_name, signature) def encrypt_task_response_data(task_id, project_id, data): diff --git a/setup.py b/setup.py index e0196b830..f62cfe5cc 100644 --- a/setup.py +++ b/setup.py @@ -129,7 +129,7 @@ "raven==6.10.0", "rax-default-network-flags-python-novaclient-ext==0.4.0", "rax-scheduled-images-python-novaclient-ext==0.3.1", - "readability-lxml==0.8.1", + "readability-lxml==0.8.4.1", "redis==3.5.3", "rednose==1.3.0", "requests==2.31.0", diff --git a/test/test_api/test_task_api.py b/test/test_api/test_task_api.py index 88da3524d..52bed2b60 100644 --- a/test/test_api/test_task_api.py +++ b/test/test_api/test_task_api.py @@ -15,22 +15,20 @@ # # You should have received a copy of the GNU Affero General Public License # along with PYBOSSA. If not, see . +import hashlib import json -from unittest.mock import patch, call +from test import db, with_context +from test.factories import (ExternalUidTaskRunFactory, ProjectFactory, + TaskFactory, TaskRunFactory, UserFactory) +from test.helper.gig_helper import make_admin, make_subadmin +from test.test_api import TestAPI +from unittest.mock import call, patch from nose.tools import assert_equal from pybossa.api.task import TaskAPI -from pybossa.repositories import ProjectRepository -from pybossa.repositories import ResultRepository -from pybossa.repositories import TaskRepository -from test import db, with_context -from test.factories import ExternalUidTaskRunFactory -from test.factories import ProjectFactory, TaskFactory, TaskRunFactory, \ - UserFactory -from test.helper.gig_helper import make_subadmin, make_admin -from test.test_api import TestAPI -import hashlib +from pybossa.repositories import (ProjectRepository, ResultRepository, + TaskRepository) project_repo = ProjectRepository(db) task_repo = TaskRepository(db) @@ -102,6 +100,53 @@ def test_task_query_list_project_ids(self, auth): err_msg = 'This task should not be in the list as the user participated.' assert task_orig.id not in task_ids, err_msg + + @with_context + @patch('pybossa.api.task.TaskAPI._verify_auth') + @patch('pybossa.api.task.read_encrypted_file_with_signature') + def test_task_query_list_project_ids_with_tasks_with_info(self, mock_read_encrypted, auth): + """Get a list of tasks using a list of project_ids.""" + + from flask import current_app + + # Mock the encrypted file read function to return test data + mock_read_encrypted.return_value = {"decrypted_data": "test_content"}, "sample_key" + + with patch.dict(current_app.config, {'ENABLE_ENCRYPTION': True}): + auth.return_value = True + projects = ProjectFactory.create_batch(3) + tasks = [] + for project in projects: + tmp = TaskFactory.create_batch(2, project=project) + for t in tmp: + t.info = { + "private_json__upload_url": "/fileproxy/encrypted/store/bucket/%s/%s" % (project.id, t.id) + } + tasks.append(t) + + user = UserFactory.create() + project_ids = [project.id for project in projects] + url = '/api/task?all=1&project_id=%s&limit=100&api_key=%s' % (project_ids, user.api_key) + res = self.app.get(url) + data = json.loads(res.data) + + assert len(data) == 3 * 2, len(data) + for task in data: + assert task['project_id'] in project_ids + task_project_ids = list(set([task['project_id'] for task in data])) + assert sorted(project_ids) == sorted(task_project_ids) + + # more filters + res = self.app.get(url + '&orderby=created&desc=true') + data = json.loads(res.data) + assert data[0]['id'] == tasks[-1].id + + task_orig = tasks[0] + task_run = TaskRunFactory.create(task=task_orig, user=user) + + project_ids = [project.id for project in projects] + url = '/api/task?project_id=%s&limit=100&participated=true&api_key=%s' % (project_ids, user.api_key) + @with_context @patch('pybossa.api.task.TaskAPI._verify_auth') def test_task_query_participated_user_ip(self, auth):