Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 26 additions & 17 deletions pybossa/api/api_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,30 @@

"""
import json
from flask import request, abort, Response, current_app
from flask_login import current_user
from flask.views import MethodView

from flasgger import swag_from
from werkzeug.exceptions import NotFound, Unauthorized, Forbidden, BadRequest
from werkzeug.exceptions import MethodNotAllowed
from pybossa.util import jsonpify, fuzzyboolean, get_avatar_url
from pybossa.util import get_user_id_or_ip
from pybossa.core import ratelimits, uploader
from flask import Response, abort, current_app, request
from flask.views import MethodView
from flask_login import current_user
from werkzeug.exceptions import (BadRequest, Forbidden, MethodNotAllowed,
NotFound, Unauthorized)

from pybossa.auth import ensure_authorized_to
from pybossa.hateoas import Hateoas
from pybossa.ratelimit import ratelimit
from pybossa.cache.announcements import reset as reset_announcements
from pybossa.cache.categories import reset as reset_categories
from pybossa.cache.projects import clean_project
from pybossa.cache.users import delete_user_summary_id
from pybossa.core import (announcement_repo, auditlog_repo, blog_repo,
helping_repo, performance_stats_repo, project_repo,
project_stats_repo, ratelimits, result_repo,
task_repo, uploader, user_repo)
from pybossa.error import ErrorStatus
from pybossa.core import project_repo, user_repo, task_repo, result_repo, auditlog_repo
from pybossa.core import announcement_repo, blog_repo, helping_repo, performance_stats_repo
from pybossa.core import project_stats_repo
from pybossa.hateoas import Hateoas
from pybossa.model import DomainObject, announcement
from pybossa.model.task import Task
from pybossa.cache.projects import clean_project
from pybossa.cache.users import delete_user_summary_id
from pybossa.cache.categories import reset as reset_categories
from pybossa.cache.announcements import reset as reset_announcements
from pybossa.ratelimit import ratelimit
from pybossa.util import (fuzzyboolean, get_avatar_url, get_user_id_or_ip,
jsonpify)

repos = {'Task': {'repo': task_repo, 'filter': 'filter_tasks_by',
'get': 'get_task', 'save': 'save', 'update': 'update',
Expand Down Expand Up @@ -141,6 +143,7 @@ def get(self, oid):
try:
ensure_authorized_to('read', self.__class__)
query = self._db_query(oid)
self._enrich_get_response(oid, query)
json_response = self._create_json_response(query, oid)
return Response(json_response, mimetype='application/json')
except Exception as e:
Expand Down Expand Up @@ -637,6 +640,12 @@ def _sign_item(self, item):
"""Apply custom signature"""
pass

def _enrich_get_response(self, oid, item):
"""Method to be overriden in inheriting classes for enriching the
response for a GET request
"""
pass

def _copy_original(self, item):
"""change if need to keep some information about the original request"""
return item
Expand Down
98 changes: 80 additions & 18 deletions pybossa/api/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,31 @@
* tasks

"""
from flask import abort, current_app
import copy
import hashlib
import json
import re

from flask import abort, current_app, url_for
from flask_login import current_user
from werkzeug.exceptions import BadRequest, Conflict, NotFound
from pybossa.model.task import Task
from pybossa.model.project import Project
from pybossa.core import result_repo
from pybossa.util import sign_task
from .api_base import APIBase

from pybossa.api.pwd_manager import get_pwd_manager
from pybossa.util import get_user_id_or_ip, validate_required_fields
from pybossa.core import task_repo, project_repo
from pybossa.cache.projects import get_project_data
from pybossa.data_access import when_data_access
import hashlib
from flask import url_for
from pybossa.cloud_store_api.s3 import upload_json_data
from pybossa.auth.task import TaskAuth
from pybossa.cache import delete_memoized
from pybossa.cache.projects import get_project_data
from pybossa.cache.task_browse_helpers import get_searchable_columns
import json
import copy
from pybossa.task_creator_helper import get_task_expiration
from pybossa.cloud_store_api.s3 import upload_json_data
from pybossa.core import project_repo, result_repo, signer, task_repo
from pybossa.data_access import when_data_access
from pybossa.model import make_timestamp
from pybossa.task_creator_helper import generate_checksum
from pybossa.cache.projects import get_project_data
from pybossa.model.project import Project
from pybossa.model.task import Task
from pybossa.task_creator_helper import generate_checksum, get_task_expiration
from pybossa.util import get_user_id_or_ip, sign_task, validate_required_fields
from pybossa.view.fileproxy import read_encrypted_file_with_signature

from .api_base import APIBase


class TaskAPI(APIBase):
Expand Down Expand Up @@ -157,6 +157,68 @@ def _sign_item(self, item):
def _select_attributes(self, data):
return TaskAuth.apply_access_control(data, user=current_user, project_data=get_project_data(data['project_id']))

def _parse_private_json_upload_url(self, path):
"""
Parse a private JSON upload URL to extract store, bucket, project_id, and path components.

Args:
path (str): Path like '/fileproxy/encrypted/<store>/<bucket>/<project_id>/<path>'

Returns:
dict: Dictionary with keys 'store', 'bucket', 'project_id', 'path'

Raises:
ValueError: If path doesn't match expected format
"""
pattern = r'^/?fileproxy/encrypted/([^/]+)/([^/]+)/(\d+)/(.+)$'
match = re.match(pattern, path)

if not match:
raise ValueError(f"Path '{path}' doesn't match expected format: /fileproxy/encrypted/<store>/<bucket>/<project_id>/<path>")

store, bucket, project_id_str, file_path = match.groups()

return {
'store': store,
'bucket': bucket,
'project_id': int(project_id_str),
'path': file_path
}


def _enrich_get_response(self, task_id: str, tasks: list[Task]):
if not current_app.config.get('ENABLE_ENCRYPTION'):
current_app.logger.info("Encryption not enabled, skipping task enrichment")
return

for task in tasks:
if not task.info or "private_json__upload_url" not in task.info:
continue

url_parts = self._parse_private_json_upload_url(task.info["private_json__upload_url"])

store = url_parts.get('store')
bucket = url_parts.get('bucket')
project_id = url_parts.get('project_id')
path = url_parts.get('path')
key_name = '/{}/{}'.format(project_id, path)
signature = signer.dumps({'task_id': task_id})

decrypted_data, _key = read_encrypted_file_with_signature(store, project_id, bucket, key_name, signature)

try:
if decrypted_data and isinstance(decrypted_data, str):
decrypted_data = json.loads(decrypted_data)
except Exception as e:
current_app.logger.error(f"Error parsing decrypted data as JSON for task id {task_id}: {str(e)}")
decrypted_data = None

if decrypted_data and isinstance(decrypted_data, dict):
task.info.update(decrypted_data)

del task.info["private_json__upload_url"]


def put(self, oid):
# reset cache / memoized
delete_memoized(get_searchable_columns)
Expand Down
41 changes: 23 additions & 18 deletions pybossa/view/fileproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,26 @@
# You should have received a copy of the GNU Affero General Public License
# along with PYBOSSA. If not, see <http://www.gnu.org/licenses/>.

from urllib.parse import urlparse, parse_qs
import json
from functools import wraps
from flask import Blueprint, current_app, Response, request
from flask_login import current_user, login_required
from urllib.parse import parse_qs, urlparse

import six
import requests
import json
from werkzeug.exceptions import Forbidden, BadRequest, InternalServerError, NotFound
import six
from boto.exception import S3ResponseError
from flask import Blueprint, Response, current_app, request
from flask_login import current_user, login_required
from werkzeug.exceptions import (BadRequest, Forbidden, InternalServerError,
NotFound)

from pybossa.cache.projects import get_project_data
from boto.exception import S3ResponseError
from pybossa.contributions_guard import ContributionsGuard
from pybossa.core import task_repo, signer
from pybossa.core import signer, task_repo
from pybossa.encryption import AESWithGCM
# from pybossa.pybhdfs.client import HDFSKerberos
from pybossa.sched import has_lock
from pybossa.task_creator_helper import get_encryption_key, read_encrypted_file


blueprint = Blueprint('fileproxy', __name__)

TASK_SIGNATURE_MAX_SIZE = 128
Expand Down Expand Up @@ -71,6 +71,17 @@ def check_allowed(user_id, task_id, project, is_valid_url):

raise Forbidden('FORBIDDEN')

def get_read_encrypted_file_with_signature_response(store, project_id, bucket, key_name, signature):
decrypted, key = read_encrypted_file_with_signature(store, project_id, bucket, key_name, signature)

response = Response(decrypted, content_type=key.content_type)
if hasattr(key, "content_encoding") and key.content_encoding:
response.headers.add('Content-Encoding', key.content_encoding)
if hasattr(key, "content_disposition") and key.content_disposition:
response.headers.add('Content-Disposition', key.content_disposition)
return response


def read_encrypted_file_with_signature(store, project_id, bucket, key_name, signature):
if not signature:
current_app.logger.exception('Project id {} no signature {}'.format(project_id, key_name))
Expand All @@ -89,14 +100,8 @@ def read_encrypted_file_with_signature(store, project_id, bucket, key_name, sign
task_id = payload['task_id']

check_allowed(current_user.id, task_id, project, lambda v: v == request.path)
decrypted, key = read_encrypted_file(store, project, bucket, key_name)

response = Response(decrypted, content_type=key.content_type)
if hasattr(key, "content_encoding") and key.content_encoding:
response.headers.add('Content-Encoding', key.content_encoding)
if hasattr(key, "content_disposition") and key.content_disposition:
response.headers.add('Content-Disposition', key.content_disposition)
return response
return read_encrypted_file(store, project, bucket, key_name)


@blueprint.route('/encrypted/<string:store>/<string:bucket>/workflow_request/<string:workflow_uid>/<int:project_id>/<path:path>')
Expand All @@ -107,7 +112,7 @@ def encrypted_workflow_file(store, bucket, workflow_uid, project_id, path):
key_name = '/workflow_request/{}/{}/{}'.format(workflow_uid, project_id, path)
signature = request.args.get('task-signature')
current_app.logger.info('Project id {} decrypt workflow file. {}'.format(project_id, path))
return read_encrypted_file_with_signature(store, project_id, bucket, key_name, signature)
return get_read_encrypted_file_with_signature_response(store, project_id, bucket, key_name, signature)


@blueprint.route('/encrypted/<string:store>/<string:bucket>/<int:project_id>/<path:path>')
Expand All @@ -119,7 +124,7 @@ def encrypted_file(store, bucket, project_id, path):
signature = request.args.get('task-signature')
current_app.logger.info('Project id {} decrypt file. {}'.format(project_id, path))
current_app.logger.info("store %s, bucket %s, project_id %s, path %s", store, bucket, str(project_id), path)
return read_encrypted_file_with_signature(store, project_id, bucket, key_name, signature)
return get_read_encrypted_file_with_signature_response(store, project_id, bucket, key_name, signature)


def encrypt_task_response_data(task_id, project_id, data):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@
"raven==6.10.0",
"rax-default-network-flags-python-novaclient-ext==0.4.0",
"rax-scheduled-images-python-novaclient-ext==0.3.1",
"readability-lxml==0.8.1",
"readability-lxml==0.8.4.1",
"redis==3.5.3",
"rednose==1.3.0",
"requests==2.31.0",
Expand Down
67 changes: 56 additions & 11 deletions test/test_api/test_task_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,20 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with PYBOSSA. If not, see <http://www.gnu.org/licenses/>.
import hashlib
import json
from unittest.mock import patch, call
from test import db, with_context
from test.factories import (ExternalUidTaskRunFactory, ProjectFactory,
TaskFactory, TaskRunFactory, UserFactory)
from test.helper.gig_helper import make_admin, make_subadmin
from test.test_api import TestAPI
from unittest.mock import call, patch

from nose.tools import assert_equal

from pybossa.api.task import TaskAPI
from pybossa.repositories import ProjectRepository
from pybossa.repositories import ResultRepository
from pybossa.repositories import TaskRepository
from test import db, with_context
from test.factories import ExternalUidTaskRunFactory
from test.factories import ProjectFactory, TaskFactory, TaskRunFactory, \
UserFactory
from test.helper.gig_helper import make_subadmin, make_admin
from test.test_api import TestAPI
import hashlib
from pybossa.repositories import (ProjectRepository, ResultRepository,
TaskRepository)

project_repo = ProjectRepository(db)
task_repo = TaskRepository(db)
Expand Down Expand Up @@ -102,6 +100,53 @@ def test_task_query_list_project_ids(self, auth):
err_msg = 'This task should not be in the list as the user participated.'
assert task_orig.id not in task_ids, err_msg


@with_context
@patch('pybossa.api.task.TaskAPI._verify_auth')
@patch('pybossa.api.task.read_encrypted_file_with_signature')
def test_task_query_list_project_ids_with_tasks_with_info(self, mock_read_encrypted, auth):
"""Get a list of tasks using a list of project_ids."""

from flask import current_app

# Mock the encrypted file read function to return test data
mock_read_encrypted.return_value = {"decrypted_data": "test_content"}, "sample_key"

with patch.dict(current_app.config, {'ENABLE_ENCRYPTION': True}):
auth.return_value = True
projects = ProjectFactory.create_batch(3)
tasks = []
for project in projects:
tmp = TaskFactory.create_batch(2, project=project)
for t in tmp:
t.info = {
"private_json__upload_url": "/fileproxy/encrypted/store/bucket/%s/%s" % (project.id, t.id)
}
tasks.append(t)

user = UserFactory.create()
project_ids = [project.id for project in projects]
url = '/api/task?all=1&project_id=%s&limit=100&api_key=%s' % (project_ids, user.api_key)
res = self.app.get(url)
data = json.loads(res.data)

assert len(data) == 3 * 2, len(data)
for task in data:
assert task['project_id'] in project_ids
task_project_ids = list(set([task['project_id'] for task in data]))
assert sorted(project_ids) == sorted(task_project_ids)

# more filters
res = self.app.get(url + '&orderby=created&desc=true')
data = json.loads(res.data)
assert data[0]['id'] == tasks[-1].id

task_orig = tasks[0]
task_run = TaskRunFactory.create(task=task_orig, user=user)

project_ids = [project.id for project in projects]
url = '/api/task?project_id=%s&limit=100&participated=true&api_key=%s' % (project_ids, user.api_key)

@with_context
@patch('pybossa.api.task.TaskAPI._verify_auth')
def test_task_query_participated_user_ip(self, auth):
Expand Down