Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions pybossa/cloud_store_api/base_conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,6 @@ def get_contents_as_string(self, encoding=None, **kwargs): # pylint: disable=W0
"""Returns contents as bytes or string, depending on encoding parameter.
If encoding is None, returns bytes, otherwise, returns
a string.

parameter "encoding" is default to None. This is consistent with boto2
get_contents_as_string() method:
http://boto.cloudhackers.com/en/latest/ref/s3.html#boto.s3.key.Key.get_contents_as_string
"""
return self.base_client.get_contents_as_string(
bucket=self.bucket, path=self.name, encoding=encoding
Expand Down
166 changes: 25 additions & 141 deletions pybossa/cloud_store_api/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,7 @@
import time

from flask import current_app
from boto.auth_handler import AuthHandler
import boto.auth

from boto.exception import S3ResponseError
from boto.s3.key import Key
from boto.s3.bucket import Bucket
from boto.s3.connection import S3Connection, OrdinaryCallingFormat
from boto.provider import Provider
import jwt
from werkzeug.exceptions import BadRequest
from boto3.session import Session
from botocore.client import Config
Expand Down Expand Up @@ -59,58 +51,36 @@ def create_connection(**kwargs):
cert=kwargs.get("cert", False),
proxy_url=kwargs.get("proxy_url")
)
if 'object_service' in kwargs:
current_app.logger.info("Calling ProxiedConnection")
conn = ProxiedConnection(**kwargs)
else:
current_app.logger.info("Calling CustomConnection")
conn = CustomConnection(**kwargs)
return conn


class CustomProvider(Provider):
"""Extend Provider to carry information about the end service provider, in
case the service is being proxied.
"""

def __init__(self, name, access_key=None, secret_key=None,
security_token=None, profile_name=None, object_service=None,
auth_headers=None):
self.object_service = object_service or name
self.auth_headers = auth_headers
super(CustomProvider, self).__init__(name, access_key, secret_key,
security_token, profile_name)
current_app.logger.info("Calling CustomConnection")
conn = CustomConnection(**kwargs)
return conn


class CustomConnection(S3Connection):
class CustomConnection(BaseConnection):

def __init__(self, *args, **kwargs):
if not kwargs.get('calling_format'):
kwargs['calling_format'] = OrdinaryCallingFormat()

kwargs['provider'] = CustomProvider('aws',
kwargs.get('aws_access_key_id'),
kwargs.get('aws_secret_access_key'),
kwargs.get('security_token'),
kwargs.get('profile_name'),
kwargs.pop('object_service', None),
kwargs.pop('auth_headers', None))

kwargs['bucket_class'] = CustomBucket

ssl_no_verify = kwargs.pop('s3_ssl_no_verify', False)
self.host_suffix = kwargs.pop('host_suffix', '')

super(CustomConnection, self).__init__(*args, **kwargs)

if kwargs.get('is_secure', True) and ssl_no_verify:
self.https_validate_certificates = False
context = ssl._create_unverified_context()
self.http_connection_kwargs['context'] = context

def get_path(self, path='/', *args, **kwargs):
ret = super(CustomConnection, self).get_path(path, *args, **kwargs)
return self.host_suffix + ret
super().__init__() # super(CustomConnection, self).__init__(*args, **kwargs)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment seems left here by mistake?


aws_access_key_id = kwargs.get("aws_access_key_id")
aws_secret_access_key = kwargs.get("aws_secret_access_key")
region_name = kwargs.get("region_name", "us-east-1")
cert = kwargs.get('cert', False)
proxy_url = kwargs.get('proxy_url', None)
proxies = {"https": proxy_url, "http": proxy_url} if proxy_url else None
ssl_verify = kwargs.get('ssl_verify', True)
self.client = Session().client(
service_name="s3",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=region_name,
use_ssl=ssl_verify,
verify=cert,
config=Config(
proxies=proxies,
s3={"addressing_style": "path"} # equivalent to OrdinaryCallingFormat under old boto
),
)


class CustomConnectionV2(BaseConnection):
Expand All @@ -133,89 +103,3 @@ def __init__(
proxies={"https": proxy_url, "http": proxy_url},
),
)


class CustomBucket(Bucket):
"""Handle both 200 and 204 as response code"""

def delete_key(self, *args, **kwargs):
try:
super(CustomBucket, self).delete_key(*args, **kwargs)
except S3ResponseError as e:
if e.status != 200:
raise


class ProxiedKey(Key):

def should_retry(self, response, chunked_transfer=False):
if 200 <= response.status <= 299:
return True
return super(ProxiedKey, self).should_retry(response, chunked_transfer)


class ProxiedBucket(CustomBucket):

def __init__(self, *args, **kwargs):
super(ProxiedBucket, self).__init__(*args, **kwargs)
self.set_key_class(ProxiedKey)


class ProxiedConnection(CustomConnection):
"""Object Store connection through proxy API. Sets the proper headers and
creates the jwt; use the appropriate Bucket and Key classes.
"""

def __init__(self, client_id, client_secret, object_service, *args, **kwargs):
self.client_id = client_id
self.client_secret = client_secret
kwargs['object_service'] = object_service
super(ProxiedConnection, self).__init__(*args, **kwargs)
self.set_bucket_class(ProxiedBucket)

def make_request(self, method, bucket='', key='', headers=None, data='',
query_args=None, sender=None, override_num_retries=None,
retry_handler=None):
headers = headers or {}
headers['jwt'] = self.create_jwt(method, self.host, bucket, key)
headers['x-objectservice-id'] = self.provider.object_service.upper()
current_app.logger.info("Calling ProxiedConnection.make_request. headers %s", str(headers))
return super(ProxiedConnection, self).make_request(method, bucket, key,
headers, data, query_args, sender, override_num_retries,
retry_handler)

def create_jwt(self, method, host, bucket, key):
now = int(time.time())
path = self.get_path(self.calling_format.build_path_base(bucket, key))
current_app.logger.info("create_jwt called. method %s, host %s, bucket %s, key %s, path %s", method, host, str(bucket), str(key), str(path))
payload = {
'iat': now,
'nbf': now,
'exp': now + 300,
'method': method,
'iss': self.client_id,
'host': host,
'path': path,
'region': 'ny'
}
return jwt.encode(payload, self.client_secret, algorithm='HS256')


class CustomAuthHandler(AuthHandler):
"""Implements sending of custom auth headers"""

capability = ['s3']

def __init__(self, host, config, provider):
if not provider.auth_headers:
raise boto.auth_handler.NotReadyToAuthenticate()
self._provider = provider
super(CustomAuthHandler, self).__init__(host, config, provider)

def add_auth(self, http_request, **kwargs):
headers = http_request.headers
for header, attr in self._provider.auth_headers:
headers[header] = getattr(self._provider, attr)

def sign_string(self, *args, **kwargs):
return ''
14 changes: 2 additions & 12 deletions pybossa/cloud_store_api/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@
import re
from tempfile import NamedTemporaryFile
from urllib.parse import urlparse
import boto
from botocore.exceptions import ClientError
from flask import current_app as app
from werkzeug.utils import secure_filename
import magic
from werkzeug.exceptions import BadRequest
from pybossa.cloud_store_api.connection import create_connection
from pybossa.encryption import AESWithGCM
import json
from time import perf_counter
import time
from datetime import datetime, timedelta


allowed_mime_types = ['application/pdf',
Expand Down Expand Up @@ -203,22 +201,14 @@ def get_file_from_s3(s3_bucket, path, conn_name=DEFAULT_CONN, decrypt=False):

def get_content_and_key_from_s3(s3_bucket, path, conn_name=DEFAULT_CONN,
decrypt=False, secret=None):
begin_time = perf_counter()
_, key = get_s3_bucket_key(s3_bucket, path, conn_name)
content = key.get_contents_as_string()
duration = perf_counter() - begin_time
file_path = f"{s3_bucket}/{path}"
app.logger.info("get_content_and_key_from_s3. Load file contents %s duration %f seconds", file_path, duration)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious why these performance metrics were removed, although we didn't necessarily need them in the first place.

begin_time = perf_counter()
if decrypt:
if not secret:
secret = app.config.get('FILE_ENCRYPTION_KEY')
cipher = AESWithGCM(secret)
content = cipher.decrypt(content)
duration = perf_counter() - begin_time
app.logger.info("get_content_and_key_from_s3. file %s decryption duration %f seconds", file_path, duration)
else:
app.logger.info("get_content_and_key_from_s3. file %s no decryption duration %f seconds", file_path, duration)
try:
if type(content) == bytes:
content = content.decode()
Expand All @@ -238,7 +228,7 @@ def delete_file_from_s3(s3_bucket, s3_url, conn_name=DEFAULT_CONN):
try:
bucket, key = get_s3_bucket_key(s3_bucket, s3_url, conn_name)
bucket.delete_key(key.name, version_id=key.version_id, headers=headers)
except boto.exception.S3ResponseError:
except ClientError as e:
app.logger.exception('S3: unable to delete file {0}'.format(s3_url))


Expand Down
7 changes: 4 additions & 3 deletions pybossa/task_creator_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import json
import requests
from six import string_types
from boto.exception import S3ResponseError
from botocore.exceptions import ClientError
from werkzeug.exceptions import InternalServerError, NotFound
from pybossa.util import get_time_plus_delta_ts
from pybossa.cloud_store_api.s3 import upload_json_data, get_content_from_s3
Expand Down Expand Up @@ -183,9 +183,10 @@ def read_encrypted_file(store, project, bucket, key_name):
try:
decrypted, key = get_content_and_key_from_s3(
bucket, key_name, conn_name, decrypt=secret, secret=secret)
except S3ResponseError as e:
except ClientError as e:
current_app.logger.exception('Project id {} get task file {} {}'.format(project.id, key_name, e))
if e.error_code == 'NoSuchKey':
error_code = e.response.get('Error', {}).get('Code')
if error_code == 'NoSuchKey':
raise NotFound('File Does Not Exist')
else:
raise InternalServerError('An Error Occurred')
Expand Down
1 change: 0 additions & 1 deletion pybossa/view/fileproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from werkzeug.exceptions import Forbidden, BadRequest, InternalServerError, NotFound

from pybossa.cache.projects import get_project_data
from boto.exception import S3ResponseError
from pybossa.contributions_guard import ContributionsGuard
from pybossa.core import task_repo, signer
from pybossa.encryption import AESWithGCM
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
"Babel==2.9.1",
"beautifulsoup4==4.10.0",
"blinker==1.4",
"boto==2.49.0",
"boto3==1.28.62",
"botocore==1.31.62",
"cachelib==0.3.0",
Expand Down
Loading
Loading