From 4341b2e84f4bd5674f10fe0c97f8aa205458ce33 Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Thu, 19 Dec 2024 17:46:25 -0500 Subject: [PATCH 01/13] Start of BlobUploader code in Python client library. --- .../instrumentation/_blobupload/README.md | 0 .../instrumentation/_blobupload/__init__.py | 0 .../_blobupload/api/__init__.py | 14 ++++ .../instrumentation/_blobupload/api/blob.py | 72 +++++++++++++++++++ .../_blobupload/api/blob_uploader.py | 13 ++++ .../_blobupload/api/constants.py | 4 ++ .../_blobupload/api/content_type.py | 24 +++++++ .../instrumentation/_blobupload/api/labels.py | 29 ++++++++ .../_blobupload/api/provider.py | 55 ++++++++++++++ .../_blobupload/backend/__init__.py | 0 .../_blobupload/backend/google/__init__.py | 0 .../backend/google/gcs/__init__.py | 0 .../backend/google/gcs/_gcs_impl.py | 0 .../_blobupload/backend/registry.py | 0 .../_blobupload/utils/__init__.py | 0 .../_blobupload/utils/simple_blob_uploader.py | 31 ++++++++ .../utils/simple_blob_uploader_adaptor.py | 49 +++++++++++++ 17 files changed, 291 insertions(+) create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/__init__.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/__init__.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/registry.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py new file mode 100644 index 0000000000..ad647a26cb --- /dev/null +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py @@ -0,0 +1,14 @@ +from opentelemetry.instrumentation._blobupload.api.constants import NOT_UPLOADED +from opentelemetry.instrumentation._blobupload.api.blob import Blob +from opentelemetry.instrumentation._blobupload.api.blob_uploader import BlobUploader +from opentelemetry.instrumentation._blobupload.api.content_type import detect_content_type +from opentelemetry.instrumentation._blobupload.api.labels import ( + generate_labels_for_span, + generate_labels_for_event, + generate_labels_for_span_event +) +from opentelemetry.instrumentation._blobupload.api.provider import ( + BlobUploaderProvider, + set_blob_uploader_provider, + get_glob_uploader +) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py new file mode 100644 index 0000000000..1f0d29a934 --- /dev/null +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py @@ -0,0 +1,72 @@ +import base64 + +class Blob(object): + """Represents an opaque binary object and associated metadata. + + This object conteptually has the following properties: + + - raw_bytes: the actual data (payload) of the Blob + - content_type: metadata about the content type (e.g. "image/jpeg") + - labels: key/value data that can be used to identify and contextualize + the object such as {"trace_id": "...", "span_id": "...", "filename": ...} + """ + + def __init__(self, raw_bytes: bytes, content_type: Optional[str]=None, labels: Optional[dict]=None): + """Initialize the blob with an explicit set of properties. + + Args: + raw_bytes: the required payload + content_type: the MIME type describing the type of data in the payload + labels: additional key/value data about the Blob + """ + self._raw_bytes = _raw_bytes + self._content_type = content_type + self._labels = labels or {} + + @staticmethod + def from_data_uri(cls, uri: str, labels: Optional[dict]=None) -> Blob: + """Instantiate a blob from a 'data:...' URI. + + Args: + uri: A URI in the 'data:' format. Supports a subset of 'data:' URIs + that encode the data with the 'base64' extension and that include + a content type. Should work with any normal 'image/jpeg', 'image/png', + 'application/pdf', 'audio/aac', and many others. DOES NOT SUPPORT + encoding data as percent-encoded text (no "base64"). + + labels: Additional key/value data to include in the constructed Blob. + """ + if not uri.startswith('data:'): + raise ValueError('Invalid "uri"; expected "data:" prefix. Found: "{}"'.format(uri)) + if not ';base64,' in uri: + raise ValueError('Invalid "uri"; expected ";base64," section. Found: "{}"'.format(uri)) + data_prefix_len = len('data:') + after_data_prefix = uri[data_prefix_len:] + if ';' not in after_data_prefix: + raise ValueError('Invalid "uri"; expected ";" in URI. Found: "{}"'.format(uri)) + content_type, remaining = after_data_prefix.split(';', 1) + while not remaining.startswith('base64,'): + _, remaining = remaining.split(';', 1) + assert remaining.startswith('base64,') + base64_len = len('base64,') + base64_encoded_content = remaining[base64_len:] + try: + raw_bytes = base64.standard_b64decode(base64_encoded_content) + except ValueError: + raw_bytes = base64.urlsafe_b64decode(base64_encoded_content) + return Blob(raw_bytes, content_type=content_type, labels=labels) + + @property + def raw_bytes(self) -> bytes: + """Returns the raw bytes (payload) of this Blob.""" + return self._raw_bytes + + @property + def content_type(self) -> Optional[str]: + """Returns the content type (or None) of this Blob.""" + return self._content_type + + @property + def labels(self) -> dict: + """Returns the key/value metadata of this Blob.""" + return _frozendict(self._labels) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py new file mode 100644 index 0000000000..2c82be4fc0 --- /dev/null +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py @@ -0,0 +1,13 @@ +"""Defines an interface for performing asynchronous blob uploading.""" + +import abc + +from opentelemetry.instrumentation._blobupload.api.constants import NOT_UPLOADED +from opentelemetry.instrumentation._blobupload.api.blob import Blob + +class BlobUploader(abc.ABC): + """Pure abstract base class representing a component that does blob uploading.""" + + @abc.abstractmethod + def upload_async(self, blob: Blob) -> str: + return NOT_UPLOADED diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py new file mode 100644 index 0000000000..22f8864cd5 --- /dev/null +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py @@ -0,0 +1,4 @@ +"""Defines contexts that are used by the '_blobupload' package.""" + +# Special constant used to indicate that a BlobUploader did not upload. +NOT_UPLOADED = '/dev/null' diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py new file mode 100644 index 0000000000..223f833a61 --- /dev/null +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py @@ -0,0 +1,24 @@ +"""Provides utilities for automatic content-type detection.""" + +# Helper used to handle the possibility of optional 'magic' dependency +# being unavailable for guessing the MIME type of raw bytes. +class _FallBackModule(object): + """Class that is shaped like the portion of 'magic' we need.""" + + def from_buffer(self, raw_bytes): + """Fallback, subpar implementation of 'from_buffer'.""" + return 'application/octet-stream' + + +# Set up '_module' to either use 'magic' or the fallback. +_module = _FallBackModule() +try: + import magic + _module = magic +except ImportError: + pass + + +def detect_content_type(raw_bytes: bytes) -> str: + """Attempts to infer the content type of the specified data.""" + return _module.from_buffer(raw_bytes) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py new file mode 100644 index 0000000000..3ceed8cb7a --- /dev/null +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py @@ -0,0 +1,29 @@ +"""Provides utilities for providing basic identifying labels for blobs.""" + +def generate_labels_for_span(trace_id, span_id) -> dict: + """Returns metadata for a span.""" + return { + 'otel_type': 'span', + 'trace_id': trace_id, + 'span_id': span_id + } + + +def generate_labels_for_event(trace_id, span_id, event_name) -> dict: + """Returns metadata for an event.""" + result = generate_labels_for_span(trace_id, span_id) + result.update({ + 'otel_type': 'event', + 'event_name': event_name, + }) + return result + + +def generate_labels_for_span_event(trace_id, span_id, event_name, event_index) -> dict: + """Returns metadata for a span event.""" + result = generate_labels_for_event(trace_id, span_id, event_name) + result.update({ + 'otel_type': 'span_event', + 'event_index': event_index, + }) + return result diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py new file mode 100644 index 0000000000..2aab9026b5 --- /dev/null +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py @@ -0,0 +1,55 @@ +import abc +import logging + +from opentelemetry.instrumentation._blobupload.api import BlobUploader + + +_logger = logging.getLogger(__name__) + + +class _NoOpBlobUploader(BlobUploader): + """Implementation of BlobUploader that does nothing.""" + + def upload_async(self, blob: Blob) -> str: + return NOT_UPLOADED + + +class BlobUploaderProvider(abc.ABC): + """Pure abstract base for configuring how to provide a BlobUploader.""" + + def get_blob_uploader(self, use_case: Optional[str]) -> BlobUploader: + """Returns a BlobUploader for the specified use case. + + Args: + use_case: An optional use case that describes what the uploader is for. This could + name a particular package, class, or instrumentation. It is intended to allow + users to differentiate upload behavior based on the target instrumentation. + + Returns: + A BlobUploader that is appropriate for the use case. + """ + return _NoOpBlobUploader() + + +class _DefaultBlobUploaderProvider(BlobUploaderProvider): + """Default provider used when none has been configured.""" + + def get_blob_uploader(self, use_case: Optional[str]) -> BlobUploader: + use_case_formatted = '(None)' + if use_case: + use_case_formatted = use_case + _logger.warning('No BlobUploaderProvider configured; returning a no-op for use case {}'.format(use_case_formatted)) + return _NoOpBlobUploader() + + +_blob_uploader_provider = _DefaultBlobUploaderProvider() + + +def set_blob_uploader_provider(provider: BlobUploaderProvider): + """Allows configuring the behavior of 'get_blob_uploader.""" + global _blob_uploader_provider + _blob_uploader_provider = provider + + +def get_blob_uploader(use_case: Optional[str] = None) -> BlobUploader: + return _blob_uploader_provider.get_blob_uploader(use_case) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/registry.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/registry.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py new file mode 100644 index 0000000000..b8fda2862b --- /dev/null +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py @@ -0,0 +1,31 @@ +"""Defines a simple, synchronous interface for providing a backend implementation.""" + + +class SimpleBlobUploader(ABC): + """Pure abstract base class of a backend implementation that is synchronous.""" + + @abstractmethod + def generate_destination_uri(self, blob: Blob) -> str: + """Generates a URI of where the blob will get written. + + Args: + blob: the blob which will be uploaded. + + Returns: + A new, unique URI that represents the target destination of the data. + """ + raise NotImplementedError('SimpleBlobUploader.generate_destination_uri') + + @abstractmethod + def upload_sync(self, uri: str, blob: Blob): + """Synchronously writes the blob to the specified destination URI. + + Args: + uri: A destination URI that was previously created by the function + 'create_destination_uri' with the same blob. + blob: The blob that should get uploaded. + + Effects: + Attempts to upload/write the Blob to the specified destination URI. + """ + raise NotImplementedError('SimpleBlobUploader.upload_sync') \ No newline at end of file diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py new file mode 100644 index 0000000000..f5231fb229 --- /dev/null +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py @@ -0,0 +1,49 @@ +from concurrent.futures import Executor, ProcessPoolExecutor + +from opentelemetry.instrumentation._blobupload.api import Blob +from opentelemetry.instrumentation._blobupload.api import BlobUploader +from opentelemetry.instrumentation._blobupload.api import detect_content_type +from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader import SimpleBlobUploader + + +def _with_content_type(blob: Blob) -> Blob: + if blob.content_type is not None: + return blob + content_type = detect_content_type(blob.raw_bytes) + return Blob(blob.raw_bytes, content_type=content_type, labels=blob.labels) + + +def _UploadAction(object): + + def __init__(self, simple_uploader, uri, blob): + self._simple_uploader = simple_uploader + self._uri = uri + self._blob = blob + + def __call__(self): + self._simple_uploader.upload_sync(self._uri, self._blob) + + +def _create_default_executor(): + return ProcessPoolExecutor() + + +class _SimpleBlobUploaderAdaptor(BlobUploader): + + def __init__(self, simple_uploader, executor=None): + self._simple_uploader = simple_uploader + self._executor = executor or _create_default_executor() + + def upload_async(self, blob: Blob) -> str: + full_blob = _with_content_type(blob) + uri = self._simple_uploader.generate_destination_uri(full_blob) + self._do_in_background(_UploadAction(self._simple_uploader, uri, full_blob)) + return uri + + def _do_in_background(self, action): + + + +def blob_uploader_from_simple_blob_uploader(simple_uploader: SimpleBlobUploader) -> BlobUploader: + return _SimpleBlobUploaderAdaptor(simple_uploader) + From 924cd3707f1a2dff55a3f75c50cf596761e18a28 Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Fri, 20 Dec 2024 14:39:40 -0500 Subject: [PATCH 02/13] Implement the adaptor, add comments. --- .../_blobupload/api/__init__.py | 2 + .../utils/simple_blob_uploader_adaptor.py | 53 +++++++++++++++++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py index ad647a26cb..610a6a762c 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py @@ -1,3 +1,5 @@ +"""Exposes API methods to callers from the package name.""" + from opentelemetry.instrumentation._blobupload.api.constants import NOT_UPLOADED from opentelemetry.instrumentation._blobupload.api.blob import Blob from opentelemetry.instrumentation._blobupload.api.blob_uploader import BlobUploader diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py index f5231fb229..d6efcab17a 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py @@ -1,3 +1,5 @@ +import atexit + from concurrent.futures import Executor, ProcessPoolExecutor from opentelemetry.instrumentation._blobupload.api import Blob @@ -7,6 +9,7 @@ def _with_content_type(blob: Blob) -> Blob: + """Returns a variant of the Blob with the content type auto-detected if needed.""" if blob.content_type is not None: return blob content_type = detect_content_type(blob.raw_bytes) @@ -14,6 +17,7 @@ def _with_content_type(blob: Blob) -> Blob: def _UploadAction(object): + """Represents the work to be done in the background to upload a blob.""" def __init__(self, simple_uploader, uri, blob): self._simple_uploader = simple_uploader @@ -24,15 +28,49 @@ def __call__(self): self._simple_uploader.upload_sync(self._uri, self._blob) -def _create_default_executor(): +def _create_default_executor_no_cleanup(): + """Instantiates an executor subject to configuration.""" + # Potential future enhancement: allow the default executor to be + # configured using environment variables (e.g. to select between + # threads or processes, to choose number of workers, etc.) + # + # It is because of this potential future enhancement, that we + # have moved this logic into a separate function despite it + # being currently logically quite simple. return ProcessPoolExecutor() +def _create_default_executor(): + """Creates an executor and registers appropriate cleanup.""" + result = _create_default_executor_no_cleanup() + def _cleanup(): + result.shutdown() + atexit.register(_cleanup) + return result + +# Global default executor so that multiple uses of the adaptor +# do not waste resources creating many duplicative executors. +# Used in the '_get_or_create_default_executor' function below. +_default_executor = None + + +def _get_or_create_default_executor(): + """Return or lazily instantiate a shared default executor.""" + global _default_executor + if _default_executor is None: + _default_executor = _create_default_executor() + return _default_executor + + class _SimpleBlobUploaderAdaptor(BlobUploader): + """Implementation of 'BlobUploader' wrapping a 'SimpleBlobUploader'. + + This implements the core of the function 'blob_uploader_from_simple_blob_uploader'. + """ - def __init__(self, simple_uploader, executor=None): + def __init__(self, simple_uploader: SimpleBlobUploader, executor: Optional[Executor]=None): self._simple_uploader = simple_uploader - self._executor = executor or _create_default_executor() + self._executor = executor or _get_or_create_default_executor() def upload_async(self, blob: Blob) -> str: full_blob = _with_content_type(blob) @@ -41,9 +79,18 @@ def upload_async(self, blob: Blob) -> str: return uri def _do_in_background(self, action): + self._executor.submit(action) def blob_uploader_from_simple_blob_uploader(simple_uploader: SimpleBlobUploader) -> BlobUploader: + """Implements a 'BlobUploader' using the supplied 'SimpleBlobUploader'. + + The purpose of this function is to allow backend implementations/vendors to be able to + implement their logic much more simply, using synchronous uploading interfaces. + + This function takes care of the nitty gritty details necessary to supply an asynchronous + interface on top of the simpler logic supplied by the backend system. + """ return _SimpleBlobUploaderAdaptor(simple_uploader) From 84fe2500b92d9f8e93a31225b7ef3746da06a289 Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Fri, 20 Dec 2024 16:20:10 -0500 Subject: [PATCH 03/13] Implement the GCS uploader. --- .../backend/google/gcs/__init__.py | 1 + .../backend/google/gcs/_gcs_impl.py | 79 +++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py index e69de29bb2..f99cc3a5f1 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py @@ -0,0 +1 @@ +from opentelemetry.instrumentation._blobupload.backend.google.gcs._gcs_impl import GcsBlobUploader diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py index e69de29bb2..4585a9a70a 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py @@ -0,0 +1,79 @@ +import io +import uuid + +from google.cloud.storage import Client as GcsClient +from google.cloud.storage import Blob as GcsBlob + +from opentelemetry.instrumentation._blobupload.api import Blob +from opentelemetry.instrumentation._blobupload.api import BlobUploader +from opentelemetry.instrumentation._blobupload.utils import SimpleBlobUploader +from opentelemetry.instrumentation._blobupload.utils import blob_uploader_from_simple_blob_uploader + + +def _path_segment_from_labels(labels): + """Returns a path segment based on blob label metadata. + + This aims to return paths like: + + 'traces/12345/spans/56789' + 'traces/12345/spans/56789/events/0' + 'traces/12345/spans/56789/events/some.event.name' + + ...depending on the particular type of signal source. + + """ + segments = [] + target_segments = [ + ('traces', 'trace_id', 'unknown'), + ('spans', 'span_id', 'unknown'), + ('events', 'event_index', None), + ] + for segment_prefix, label_key, default_val in target_segments: + label_value = labels.get(label_key) or default_val + if label_value: + segments.append(segment_prefix) + segments.append(label_value) + if ((labels.get('otel_type') in ['event', 'span_event']) and + ('events' not in segments)): + event_name = labels.get('event_name') or 'unknown' + segments.append('events') + segments.append(event_name) + return '/'.join(segments) + + + +class _SimpleGcsBlobUploader(SimpleBlobUploader): + + def __init__(self, prefix: str, client:Optional[GcsClient]=None): + if not prefix: + raise ValueError('Must supply a non-empty prefix.') + if not prefix.startswith('gs://'): + raise ValueError('Invalid prefix; must start with "gs://"; found: "{}".'.format(prefix)) + if not prefix.endswith('/'): + prefix = '{}/'.format(prefix) + self._prefix = prefix + self._client = client or GcsClient() + + def generate_destination_uri(self, blob: Blob) -> str: + origin_path = _path_segment_from_labels(blob.labels) + upload_id = uuid.uuid4().hex + return '{}{}/uploads/{}'.format(self._prefix, origin_path, upload_id) + + def upload_sync(self, uri: str, blob: Blob): + gcs_blob = GcsBlob.from_string(uri, client=self._client) + gcs_blob.upload_from_file( + io.BytesIO(blob.raw_bytes), + content_type=blob.content_type) + metadata = gcs_blob.metadata or {} + metadata.update(blob.labels) + gcs_blob.metadata = metadata + + +class GcsBlobUploader(BlobUploader): + + def __init__(self, prefix: str, client:Optional[GcsClient]=None): + simple_uploader = _SimpleGcsBlobUploader(prefix, client) + self._delegate = blob_uploader_from_simple_blob_uploader(simple_uploader) + + def upload_async(self, blob: Blob) -> str: + return self._delegate.upload_async(blob) From 9906a13db2be7dc91fe3f12f4db06619a82a7dd4 Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Wed, 8 Jan 2025 16:13:58 -0500 Subject: [PATCH 04/13] Began adding tests. --- .../_blobupload/api/__init__.py | 31 +++++++-- .../instrumentation/_blobupload/api/blob.py | 8 ++- .../_blobupload/api/blob_uploader.py | 1 + .../_blobupload/api/content_type.py | 4 +- .../instrumentation/_blobupload/api/labels.py | 6 +- .../_blobupload/api/provider.py | 5 +- .../_blobupload/backend/registry.py | 0 .../_blobupload/api/test_content_type.py | 53 +++++++++++++++ .../tests/_blobupload/api/test_labels.py | 67 +++++++++++++++++++ 9 files changed, 160 insertions(+), 15 deletions(-) delete mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/registry.py create mode 100755 opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py create mode 100755 opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py index 610a6a762c..16dbaec5a8 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py @@ -1,16 +1,35 @@ """Exposes API methods to callers from the package name.""" -from opentelemetry.instrumentation._blobupload.api.constants import NOT_UPLOADED from opentelemetry.instrumentation._blobupload.api.blob import Blob -from opentelemetry.instrumentation._blobupload.api.blob_uploader import BlobUploader -from opentelemetry.instrumentation._blobupload.api.content_type import detect_content_type +from opentelemetry.instrumentation._blobupload.api.blob_uploader import ( + BlobUploader, +) +from opentelemetry.instrumentation._blobupload.api.constants import ( + NOT_UPLOADED, +) +from opentelemetry.instrumentation._blobupload.api.content_type import ( + detect_content_type, +) from opentelemetry.instrumentation._blobupload.api.labels import ( - generate_labels_for_span, generate_labels_for_event, - generate_labels_for_span_event + generate_labels_for_span, + generate_labels_for_span_event, ) from opentelemetry.instrumentation._blobupload.api.provider import ( BlobUploaderProvider, + get_blob_uploader, set_blob_uploader_provider, - get_glob_uploader ) + +__all__ = [ + Blob, + BlobUploader, + NOT_UPLOADED, + detect_content_type, + generate_labels_for_event, + generate_labels_for_span, + generate_labels_for_span_event, + BlobUploaderProvider, + get_blob_uploader, + set_blob_uploader_provider, +] diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py index 1f0d29a934..767070b76b 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py @@ -1,3 +1,5 @@ +from typing import Dict, Optional + import base64 class Blob(object): @@ -11,7 +13,7 @@ class Blob(object): the object such as {"trace_id": "...", "span_id": "...", "filename": ...} """ - def __init__(self, raw_bytes: bytes, content_type: Optional[str]=None, labels: Optional[dict]=None): + def __init__(self, raw_bytes: bytes, content_type: Optional[str]=None, labels: Optional[Dict[str, str]]=None): """Initialize the blob with an explicit set of properties. Args: @@ -24,7 +26,7 @@ def __init__(self, raw_bytes: bytes, content_type: Optional[str]=None, labels: O self._labels = labels or {} @staticmethod - def from_data_uri(cls, uri: str, labels: Optional[dict]=None) -> Blob: + def from_data_uri(cls, uri: str, labels: Optional[dict]=None) -> 'Blob': """Instantiate a blob from a 'data:...' URI. Args: @@ -67,6 +69,6 @@ def content_type(self) -> Optional[str]: return self._content_type @property - def labels(self) -> dict: + def labels(self) -> Dict[str, str]: """Returns the key/value metadata of this Blob.""" return _frozendict(self._labels) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py index 2c82be4fc0..446e18d0d4 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py @@ -5,6 +5,7 @@ from opentelemetry.instrumentation._blobupload.api.constants import NOT_UPLOADED from opentelemetry.instrumentation._blobupload.api.blob import Blob + class BlobUploader(abc.ABC): """Pure abstract base class representing a component that does blob uploading.""" diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py index 223f833a61..249c507396 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py @@ -5,7 +5,7 @@ class _FallBackModule(object): """Class that is shaped like the portion of 'magic' we need.""" - def from_buffer(self, raw_bytes): + def from_buffer(self, raw_bytes, mime=True): """Fallback, subpar implementation of 'from_buffer'.""" return 'application/octet-stream' @@ -21,4 +21,4 @@ def from_buffer(self, raw_bytes): def detect_content_type(raw_bytes: bytes) -> str: """Attempts to infer the content type of the specified data.""" - return _module.from_buffer(raw_bytes) + return _module.from_buffer(raw_bytes, mime=True) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py index 3ceed8cb7a..e1aa4c0788 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py @@ -1,6 +1,6 @@ """Provides utilities for providing basic identifying labels for blobs.""" -def generate_labels_for_span(trace_id, span_id) -> dict: +def generate_labels_for_span(trace_id: str, span_id: str) -> dict: """Returns metadata for a span.""" return { 'otel_type': 'span', @@ -9,7 +9,7 @@ def generate_labels_for_span(trace_id, span_id) -> dict: } -def generate_labels_for_event(trace_id, span_id, event_name) -> dict: +def generate_labels_for_event(trace_id: str, span_id: str, event_name: str) -> dict: """Returns metadata for an event.""" result = generate_labels_for_span(trace_id, span_id) result.update({ @@ -19,7 +19,7 @@ def generate_labels_for_event(trace_id, span_id, event_name) -> dict: return result -def generate_labels_for_span_event(trace_id, span_id, event_name, event_index) -> dict: +def generate_labels_for_span_event(trace_id: str, span_id: str, event_name: str, event_index: int) -> dict: """Returns metadata for a span event.""" result = generate_labels_for_event(trace_id, span_id, event_name) result.update({ diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py index 2aab9026b5..356b7b92f6 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py @@ -1,7 +1,10 @@ +from typing import Optional + import abc import logging -from opentelemetry.instrumentation._blobupload.api import BlobUploader +from opentelemetry.instrumentation._blobupload.api.blob import Blob +from opentelemetry.instrumentation._blobupload.api.blob_uploader import BlobUploader _logger = logging.getLogger(__name__) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/registry.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/registry.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py new file mode 100755 index 0000000000..f4a6c02644 --- /dev/null +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py @@ -0,0 +1,53 @@ +#! /usr/bin/env python3 + +if __name__ == '__main__': + import sys + sys.path.append('../../../src') + +from opentelemetry.instrumentation._blobupload.api import detect_content_type +from PIL import Image + +import io +import unittest + + +def create_test_image(format): + """Helper for creating a PIL Image for verifying image format support.""" + test_img = Image.new('RGB', (2, 2)) + output_buffer = io.BytesIO() + test_img.save(output_buffer, format) + result = output_buffer.getvalue() + output_buffer.close() + test_img.close() + return result + + +class TestContentType(unittest.TestCase): + + def test_detects_plaintext(self): + input = 'this is just regular text' + output = detect_content_type(input.encode()) + self.assertEqual(output, 'text/plain') + + def test_detects_json(self): + input = '''{ + "this": { + "contains": "json" + } + }''' + output = detect_content_type(input.encode()) + self.assertEqual(output, 'application/json') + + def test_detects_jpeg(self): + input = create_test_image('jpeg') + output = detect_content_type(input) + self.assertEqual(output, 'image/jpeg') + + def test_detects_png(self): + input = create_test_image('png') + output = detect_content_type(input) + self.assertEqual(output, 'image/png') + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py new file mode 100755 index 0000000000..6126faffd1 --- /dev/null +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py @@ -0,0 +1,67 @@ +#! /usr/bin/env python3 + +if __name__ == '__main__': + import sys + sys.path.append('../../../src') + +from opentelemetry.instrumentation._blobupload.api import ( + generate_labels_for_span, + generate_labels_for_event, + generate_labels_for_span_event) + +import unittest + + +class TestLabels(unittest.TestCase): + + def test_generate_labels_for_span(self): + trace_id = 'test-trace-id' + span_id = 'test-span-id' + labels = generate_labels_for_span( + trace_id=trace_id, + span_id=span_id + ) + self.assertEqual(labels, { + 'otel_type': 'span', + 'trace_id': 'test-trace-id', + 'span_id': 'test-span-id' + }) + + def test_generate_labels_for_event(self): + trace_id = 'test-trace-id' + span_id = 'test-span-id' + event_name = 'some-event' + labels = generate_labels_for_event( + trace_id=trace_id, + span_id=span_id, + event_name=event_name + ) + self.assertEqual(labels, { + 'otel_type': 'event', + 'trace_id': 'test-trace-id', + 'span_id': 'test-span-id', + 'event_name': 'some-event' + }) + + def test_generate_labels_for_span_event(self): + trace_id = 'test-trace-id' + span_id = 'test-span-id' + event_name = 'some-event' + event_index = 2 + labels = generate_labels_for_span_event( + trace_id=trace_id, + span_id=span_id, + event_name=event_name, + event_index=event_index + ) + self.assertEqual(labels, { + 'otel_type': 'span_event', + 'trace_id': 'test-trace-id', + 'span_id': 'test-span-id', + 'event_name': 'some-event', + 'event_index': 2 + }) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 8a4362e813c89a7bb90e5fe05a3c29c4d258e86b Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Wed, 8 Jan 2025 16:14:38 -0500 Subject: [PATCH 05/13] Upload current snapshot. --- .../instrumentation/_blobupload/api/blob.py | 53 +++++++---- .../_blobupload/api/blob_uploader.py | 6 +- .../_blobupload/api/content_type.py | 4 +- .../instrumentation/_blobupload/api/labels.py | 35 ++++---- .../_blobupload/api/provider.py | 18 ++-- .../_blobupload/api/test_content_type.py | 35 ++++---- .../tests/_blobupload/api/test_labels.py | 89 ++++++++++--------- .../tests/_blobupload/api/test_provider.py | 0 .../test_simple_blob_uploader_adaptor.py | 0 9 files changed, 137 insertions(+), 103 deletions(-) create mode 100755 opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py create mode 100644 opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py index 767070b76b..18224567f2 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py @@ -1,10 +1,10 @@ +import base64 from typing import Dict, Optional -import base64 class Blob(object): """Represents an opaque binary object and associated metadata. - + This object conteptually has the following properties: - raw_bytes: the actual data (payload) of the Blob @@ -13,9 +13,14 @@ class Blob(object): the object such as {"trace_id": "...", "span_id": "...", "filename": ...} """ - def __init__(self, raw_bytes: bytes, content_type: Optional[str]=None, labels: Optional[Dict[str, str]]=None): + def __init__( + self, + raw_bytes: bytes, + content_type: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + ): """Initialize the blob with an explicit set of properties. - + Args: raw_bytes: the required payload content_type: the MIME type describing the type of data in the payload @@ -26,31 +31,41 @@ def __init__(self, raw_bytes: bytes, content_type: Optional[str]=None, labels: O self._labels = labels or {} @staticmethod - def from_data_uri(cls, uri: str, labels: Optional[dict]=None) -> 'Blob': + def from_data_uri(cls, uri: str, labels: Optional[dict] = None) -> "Blob": """Instantiate a blob from a 'data:...' URI. - Args: + Args: uri: A URI in the 'data:' format. Supports a subset of 'data:' URIs that encode the data with the 'base64' extension and that include a content type. Should work with any normal 'image/jpeg', 'image/png', 'application/pdf', 'audio/aac', and many others. DOES NOT SUPPORT encoding data as percent-encoded text (no "base64"). - + labels: Additional key/value data to include in the constructed Blob. """ - if not uri.startswith('data:'): - raise ValueError('Invalid "uri"; expected "data:" prefix. Found: "{}"'.format(uri)) - if not ';base64,' in uri: - raise ValueError('Invalid "uri"; expected ";base64," section. Found: "{}"'.format(uri)) - data_prefix_len = len('data:') + if not uri.startswith("data:"): + raise ValueError( + 'Invalid "uri"; expected "data:" prefix. Found: "{}"'.format( + uri + ) + ) + if ";base64," not in uri: + raise ValueError( + 'Invalid "uri"; expected ";base64," section. Found: "{}"'.format( + uri + ) + ) + data_prefix_len = len("data:") after_data_prefix = uri[data_prefix_len:] - if ';' not in after_data_prefix: - raise ValueError('Invalid "uri"; expected ";" in URI. Found: "{}"'.format(uri)) - content_type, remaining = after_data_prefix.split(';', 1) - while not remaining.startswith('base64,'): - _, remaining = remaining.split(';', 1) - assert remaining.startswith('base64,') - base64_len = len('base64,') + if ";" not in after_data_prefix: + raise ValueError( + 'Invalid "uri"; expected ";" in URI. Found: "{}"'.format(uri) + ) + content_type, remaining = after_data_prefix.split(";", 1) + while not remaining.startswith("base64,"): + _, remaining = remaining.split(";", 1) + assert remaining.startswith("base64,") + base64_len = len("base64,") base64_encoded_content = remaining[base64_len:] try: raw_bytes = base64.standard_b64decode(base64_encoded_content) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py index 446e18d0d4..9dc248a8e8 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py @@ -2,13 +2,15 @@ import abc -from opentelemetry.instrumentation._blobupload.api.constants import NOT_UPLOADED from opentelemetry.instrumentation._blobupload.api.blob import Blob +from opentelemetry.instrumentation._blobupload.api.constants import ( + NOT_UPLOADED, +) class BlobUploader(abc.ABC): """Pure abstract base class representing a component that does blob uploading.""" - + @abc.abstractmethod def upload_async(self, blob: Blob) -> str: return NOT_UPLOADED diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py index 249c507396..391d00678a 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py @@ -1,5 +1,6 @@ """Provides utilities for automatic content-type detection.""" + # Helper used to handle the possibility of optional 'magic' dependency # being unavailable for guessing the MIME type of raw bytes. class _FallBackModule(object): @@ -7,13 +8,14 @@ class _FallBackModule(object): def from_buffer(self, raw_bytes, mime=True): """Fallback, subpar implementation of 'from_buffer'.""" - return 'application/octet-stream' + return "application/octet-stream" # Set up '_module' to either use 'magic' or the fallback. _module = _FallBackModule() try: import magic + _module = magic except ImportError: pass diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py index e1aa4c0788..829eee31a6 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py @@ -1,29 +1,34 @@ """Provides utilities for providing basic identifying labels for blobs.""" + def generate_labels_for_span(trace_id: str, span_id: str) -> dict: """Returns metadata for a span.""" - return { - 'otel_type': 'span', - 'trace_id': trace_id, - 'span_id': span_id - } + return {"otel_type": "span", "trace_id": trace_id, "span_id": span_id} -def generate_labels_for_event(trace_id: str, span_id: str, event_name: str) -> dict: +def generate_labels_for_event( + trace_id: str, span_id: str, event_name: str +) -> dict: """Returns metadata for an event.""" result = generate_labels_for_span(trace_id, span_id) - result.update({ - 'otel_type': 'event', - 'event_name': event_name, - }) + result.update( + { + "otel_type": "event", + "event_name": event_name, + } + ) return result -def generate_labels_for_span_event(trace_id: str, span_id: str, event_name: str, event_index: int) -> dict: +def generate_labels_for_span_event( + trace_id: str, span_id: str, event_name: str, event_index: int +) -> dict: """Returns metadata for a span event.""" result = generate_labels_for_event(trace_id, span_id, event_name) - result.update({ - 'otel_type': 'span_event', - 'event_index': event_index, - }) + result.update( + { + "otel_type": "span_event", + "event_index": event_index, + } + ) return result diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py index 356b7b92f6..1fd828dcc5 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py @@ -1,11 +1,11 @@ -from typing import Optional - import abc import logging +from typing import Optional from opentelemetry.instrumentation._blobupload.api.blob import Blob -from opentelemetry.instrumentation._blobupload.api.blob_uploader import BlobUploader - +from opentelemetry.instrumentation._blobupload.api.blob_uploader import ( + BlobUploader, +) _logger = logging.getLogger(__name__) @@ -22,7 +22,7 @@ class BlobUploaderProvider(abc.ABC): def get_blob_uploader(self, use_case: Optional[str]) -> BlobUploader: """Returns a BlobUploader for the specified use case. - + Args: use_case: An optional use case that describes what the uploader is for. This could name a particular package, class, or instrumentation. It is intended to allow @@ -38,10 +38,14 @@ class _DefaultBlobUploaderProvider(BlobUploaderProvider): """Default provider used when none has been configured.""" def get_blob_uploader(self, use_case: Optional[str]) -> BlobUploader: - use_case_formatted = '(None)' + use_case_formatted = "(None)" if use_case: use_case_formatted = use_case - _logger.warning('No BlobUploaderProvider configured; returning a no-op for use case {}'.format(use_case_formatted)) + _logger.warning( + "No BlobUploaderProvider configured; returning a no-op for use case {}".format( + use_case_formatted + ) + ) return _NoOpBlobUploader() diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py index f4a6c02644..a4fe9e4768 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py @@ -1,19 +1,21 @@ #! /usr/bin/env python3 -if __name__ == '__main__': +if __name__ == "__main__": import sys - sys.path.append('../../../src') -from opentelemetry.instrumentation._blobupload.api import detect_content_type -from PIL import Image + sys.path.append("../../../src") import io import unittest +from PIL import Image + +from opentelemetry.instrumentation._blobupload.api import detect_content_type + def create_test_image(format): """Helper for creating a PIL Image for verifying image format support.""" - test_img = Image.new('RGB', (2, 2)) + test_img = Image.new("RGB", (2, 2)) output_buffer = io.BytesIO() test_img.save(output_buffer, format) result = output_buffer.getvalue() @@ -23,31 +25,30 @@ def create_test_image(format): class TestContentType(unittest.TestCase): - def test_detects_plaintext(self): - input = 'this is just regular text' + input = "this is just regular text" output = detect_content_type(input.encode()) - self.assertEqual(output, 'text/plain') + self.assertEqual(output, "text/plain") def test_detects_json(self): - input = '''{ + input = """{ "this": { "contains": "json" } - }''' + }""" output = detect_content_type(input.encode()) - self.assertEqual(output, 'application/json') + self.assertEqual(output, "application/json") def test_detects_jpeg(self): - input = create_test_image('jpeg') + input = create_test_image("jpeg") output = detect_content_type(input) - self.assertEqual(output, 'image/jpeg') + self.assertEqual(output, "image/jpeg") def test_detects_png(self): - input = create_test_image('png') + input = create_test_image("png") output = detect_content_type(input) - self.assertEqual(output, 'image/png') + self.assertEqual(output, "image/png") -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py index 6126faffd1..7f8a5ffe5b 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py @@ -1,67 +1,72 @@ #! /usr/bin/env python3 -if __name__ == '__main__': +if __name__ == "__main__": import sys - sys.path.append('../../../src') -from opentelemetry.instrumentation._blobupload.api import ( - generate_labels_for_span, - generate_labels_for_event, - generate_labels_for_span_event) + sys.path.append("../../../src") import unittest +from opentelemetry.instrumentation._blobupload.api import ( + generate_labels_for_event, + generate_labels_for_span, + generate_labels_for_span_event, +) -class TestLabels(unittest.TestCase): +class TestLabels(unittest.TestCase): def test_generate_labels_for_span(self): - trace_id = 'test-trace-id' - span_id = 'test-span-id' - labels = generate_labels_for_span( - trace_id=trace_id, - span_id=span_id + trace_id = "test-trace-id" + span_id = "test-span-id" + labels = generate_labels_for_span(trace_id=trace_id, span_id=span_id) + self.assertEqual( + labels, + { + "otel_type": "span", + "trace_id": "test-trace-id", + "span_id": "test-span-id", + }, ) - self.assertEqual(labels, { - 'otel_type': 'span', - 'trace_id': 'test-trace-id', - 'span_id': 'test-span-id' - }) def test_generate_labels_for_event(self): - trace_id = 'test-trace-id' - span_id = 'test-span-id' - event_name = 'some-event' + trace_id = "test-trace-id" + span_id = "test-span-id" + event_name = "some-event" labels = generate_labels_for_event( - trace_id=trace_id, - span_id=span_id, - event_name=event_name + trace_id=trace_id, span_id=span_id, event_name=event_name + ) + self.assertEqual( + labels, + { + "otel_type": "event", + "trace_id": "test-trace-id", + "span_id": "test-span-id", + "event_name": "some-event", + }, ) - self.assertEqual(labels, { - 'otel_type': 'event', - 'trace_id': 'test-trace-id', - 'span_id': 'test-span-id', - 'event_name': 'some-event' - }) def test_generate_labels_for_span_event(self): - trace_id = 'test-trace-id' - span_id = 'test-span-id' - event_name = 'some-event' + trace_id = "test-trace-id" + span_id = "test-span-id" + event_name = "some-event" event_index = 2 labels = generate_labels_for_span_event( trace_id=trace_id, span_id=span_id, event_name=event_name, - event_index=event_index + event_index=event_index, + ) + self.assertEqual( + labels, + { + "otel_type": "span_event", + "trace_id": "test-trace-id", + "span_id": "test-span-id", + "event_name": "some-event", + "event_index": 2, + }, ) - self.assertEqual(labels, { - 'otel_type': 'span_event', - 'trace_id': 'test-trace-id', - 'span_id': 'test-span-id', - 'event_name': 'some-event', - 'event_index': 2 - }) -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py new file mode 100755 index 0000000000..e69de29bb2 diff --git a/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py new file mode 100644 index 0000000000..e69de29bb2 From 1667374f4856428d4352421d91dc1b3f6fff25fc Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Wed, 8 Jan 2025 16:52:03 -0500 Subject: [PATCH 06/13] Add dependencies. --- opentelemetry-instrumentation/pyproject.toml | 8 ++++++++ opentelemetry-instrumentation/test-requirements.txt | 1 + .../tests/_blobupload/api/test_labels.py | 1 - 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/opentelemetry-instrumentation/pyproject.toml b/opentelemetry-instrumentation/pyproject.toml index 4f80b22491..aa10874b33 100644 --- a/opentelemetry-instrumentation/pyproject.toml +++ b/opentelemetry-instrumentation/pyproject.toml @@ -32,6 +32,14 @@ dependencies = [ "packaging >= 18.0", ] +[project.optional-dependencies] +gcs = [ + "google-cloud-storage==2.19.0" +] +magic = [ + "python-magic==0.4.27" +] + [project.scripts] opentelemetry-bootstrap = "opentelemetry.instrumentation.bootstrap:run" opentelemetry-instrument = "opentelemetry.instrumentation.auto_instrumentation:run" diff --git a/opentelemetry-instrumentation/test-requirements.txt b/opentelemetry-instrumentation/test-requirements.txt index 943a45c8f4..cb4cdc0b98 100644 --- a/opentelemetry-instrumentation/test-requirements.txt +++ b/opentelemetry-instrumentation/test-requirements.txt @@ -5,6 +5,7 @@ packaging==24.0 pluggy==1.5.0 py-cpuinfo==9.0.0 pytest==7.4.4 +python-magic==0.4.27 tomli==2.0.1 typing_extensions==4.12.2 wrapt==1.16.0 diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py index 7f8a5ffe5b..08f9d01a20 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py @@ -2,7 +2,6 @@ if __name__ == "__main__": import sys - sys.path.append("../../../src") import unittest From 41b7eead616012e437085b764ad65b6746ef2ab2 Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Thu, 9 Jan 2025 16:05:54 -0500 Subject: [PATCH 07/13] Add more tests and fix some of the code that wasn't working. --- .../instrumentation/_blobupload/api/blob.py | 43 +++++-- .../_blobupload/api/content_type.py | 2 + .../_blobupload/api/provider.py | 8 +- .../backend/google/gcs/__init__.py | 4 + .../_blobupload/utils/__init__.py | 9 ++ .../_blobupload/utils/simple_blob_uploader.py | 15 ++- .../utils/simple_blob_uploader_adaptor.py | 30 +++-- .../tests/_blobupload/api/test_blob.py | 108 ++++++++++++++++++ .../_blobupload/api/test_content_type.py | 7 +- .../tests/_blobupload/api/test_provider.py | 64 +++++++++++ .../test_simple_blob_uploader_adaptor.py | 95 +++++++++++++++ 11 files changed, 359 insertions(+), 26 deletions(-) create mode 100755 opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py mode change 100644 => 100755 opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py index 18224567f2..8cbd0cf71b 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py @@ -1,5 +1,8 @@ import base64 -from typing import Dict, Optional +import json + +from types import MappingProxyType as _frozendict +from typing import Mapping, Dict, Optional class Blob(object): @@ -17,7 +20,7 @@ def __init__( self, raw_bytes: bytes, content_type: Optional[str] = None, - labels: Optional[Dict[str, str]] = None, + labels: Optional[Mapping[str, str]] = None, ): """Initialize the blob with an explicit set of properties. @@ -26,12 +29,18 @@ def __init__( content_type: the MIME type describing the type of data in the payload labels: additional key/value data about the Blob """ - self._raw_bytes = _raw_bytes + self._raw_bytes = raw_bytes self._content_type = content_type - self._labels = labels or {} + self._labels = {} + if labels is not None: + if isinstance(labels, dict): + self._labels.update(labels) + else: + for k in labels: + self._labels[k] = labels[k] @staticmethod - def from_data_uri(cls, uri: str, labels: Optional[dict] = None) -> "Blob": + def from_data_uri(uri: str, labels: Optional[dict] = None) -> "Blob": """Instantiate a blob from a 'data:...' URI. Args: @@ -67,10 +76,7 @@ def from_data_uri(cls, uri: str, labels: Optional[dict] = None) -> "Blob": assert remaining.startswith("base64,") base64_len = len("base64,") base64_encoded_content = remaining[base64_len:] - try: - raw_bytes = base64.standard_b64decode(base64_encoded_content) - except ValueError: - raw_bytes = base64.urlsafe_b64decode(base64_encoded_content) + raw_bytes = base64.b64decode(base64_encoded_content) return Blob(raw_bytes, content_type=content_type, labels=labels) @property @@ -84,6 +90,23 @@ def content_type(self) -> Optional[str]: return self._content_type @property - def labels(self) -> Dict[str, str]: + def labels(self) -> Mapping[str, str]: """Returns the key/value metadata of this Blob.""" return _frozendict(self._labels) + + def __eq__(self, o): + return ( + (isinstance(o, Blob)) and + (self.raw_bytes == o.raw_bytes) and + (self.content_type == o.content_type) and + (self.labels == o.labels) + ) + + def __repr__(self): + params = [repr(self._raw_bytes)] + if self._content_type is not None: + params.append('content_type={}'.format(repr(self._content_type))) + if self._labels: + params.append('labels={}'.format(json.dumps(self._labels, sort_keys=True))) + params_string = ', '.join(params) + return 'Blob({})'.format(params_string) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py index 391d00678a..a9553f90ce 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py @@ -23,4 +23,6 @@ def from_buffer(self, raw_bytes, mime=True): def detect_content_type(raw_bytes: bytes) -> str: """Attempts to infer the content type of the specified data.""" + if not raw_bytes: + return 'application/octet-stream' return _module.from_buffer(raw_bytes, mime=True) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py index 1fd828dcc5..c07ef261e6 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py @@ -6,6 +6,8 @@ from opentelemetry.instrumentation._blobupload.api.blob_uploader import ( BlobUploader, ) +from opentelemetry.instrumentation._blobupload.api.constants import NOT_UPLOADED + _logger = logging.getLogger(__name__) @@ -42,7 +44,7 @@ def get_blob_uploader(self, use_case: Optional[str]) -> BlobUploader: if use_case: use_case_formatted = use_case _logger.warning( - "No BlobUploaderProvider configured; returning a no-op for use case {}".format( + "No BlobUploaderProvider configured; returning a no-op for use case \"{}\". Use 'set_blob_uploader_provider()' to configure.".format( use_case_formatted ) ) @@ -52,10 +54,12 @@ def get_blob_uploader(self, use_case: Optional[str]) -> BlobUploader: _blob_uploader_provider = _DefaultBlobUploaderProvider() -def set_blob_uploader_provider(provider: BlobUploaderProvider): +def set_blob_uploader_provider(provider: BlobUploaderProvider) -> BlobUploaderProvider: """Allows configuring the behavior of 'get_blob_uploader.""" global _blob_uploader_provider + old_provider = _blob_uploader_provider _blob_uploader_provider = provider + return old_provider def get_blob_uploader(use_case: Optional[str] = None) -> BlobUploader: diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py index f99cc3a5f1..c4b39905c1 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py @@ -1 +1,5 @@ from opentelemetry.instrumentation._blobupload.backend.google.gcs._gcs_impl import GcsBlobUploader + +__all__ = [ + GcsBlobUploader +] diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py index e69de29bb2..85658825da 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py @@ -0,0 +1,9 @@ +"""Exposes API methods to callers from the package name.""" + +from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader_adaptor import blob_uploader_from_simple_blob_uploader +from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader import SimpleBlobUploader + +__all__ = [ + blob_uploader_from_simple_blob_uploader, + SimpleBlobUploader, +] diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py index b8fda2862b..c643f9534c 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py @@ -1,11 +1,14 @@ """Defines a simple, synchronous interface for providing a backend implementation.""" +import abc -class SimpleBlobUploader(ABC): +from opentelemetry.instrumentation._blobupload.api import Blob + +class SimpleBlobUploader(abc.ABC): """Pure abstract base class of a backend implementation that is synchronous.""" - @abstractmethod - def generate_destination_uri(self, blob: Blob) -> str: + @abc.abstractmethod + def generate_destination_uri(self, blob: Blob) -> str: """Generates a URI of where the blob will get written. Args: @@ -16,8 +19,8 @@ def generate_destination_uri(self, blob: Blob) -> str: """ raise NotImplementedError('SimpleBlobUploader.generate_destination_uri') - @abstractmethod - def upload_sync(self, uri: str, blob: Blob): + @abc.abstractmethod + def upload_sync(self, uri: str, blob: Blob): """Synchronously writes the blob to the specified destination URI. Args: @@ -28,4 +31,4 @@ def upload_sync(self, uri: str, blob: Blob): Effects: Attempts to upload/write the Blob to the specified destination URI. """ - raise NotImplementedError('SimpleBlobUploader.upload_sync') \ No newline at end of file + raise NotImplementedError('SimpleBlobUploader.upload_sync') diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py index d6efcab17a..1a2c206d91 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py @@ -1,13 +1,19 @@ import atexit +import logging -from concurrent.futures import Executor, ProcessPoolExecutor +from typing import Optional +from concurrent.futures import Executor, ThreadPoolExecutor -from opentelemetry.instrumentation._blobupload.api import Blob -from opentelemetry.instrumentation._blobupload.api import BlobUploader -from opentelemetry.instrumentation._blobupload.api import detect_content_type +from opentelemetry.instrumentation._blobupload.api import ( + Blob, + BlobUploader, + detect_content_type) from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader import SimpleBlobUploader +_logger = logging.getLogger(__name__) + + def _with_content_type(blob: Blob) -> Blob: """Returns a variant of the Blob with the content type auto-detected if needed.""" if blob.content_type is not None: @@ -16,7 +22,7 @@ def _with_content_type(blob: Blob) -> Blob: return Blob(blob.raw_bytes, content_type=content_type, labels=blob.labels) -def _UploadAction(object): +class _UploadAction(object): """Represents the work to be done in the background to upload a blob.""" def __init__(self, simple_uploader, uri, blob): @@ -25,7 +31,11 @@ def __init__(self, simple_uploader, uri, blob): self._blob = blob def __call__(self): - self._simple_uploader.upload_sync(self._uri, self._blob) + _logger.debug('Uploading blob to "{}".'.format(self._uri)) + try: + self._simple_uploader.upload_sync(self._uri, self._blob) + except: + _logger.error('Failed to upload blob to "{}".'.format(self._uri)) def _create_default_executor_no_cleanup(): @@ -37,7 +47,8 @@ def _create_default_executor_no_cleanup(): # It is because of this potential future enhancement, that we # have moved this logic into a separate function despite it # being currently logically quite simple. - return ProcessPoolExecutor() + _logger.debug('Creating thread pool executor') + return ThreadPoolExecutor() def _create_default_executor(): @@ -45,6 +56,7 @@ def _create_default_executor(): result = _create_default_executor_no_cleanup() def _cleanup(): result.shutdown() + _logger.debug('Registering cleanup for the pool') atexit.register(_cleanup) return result @@ -58,7 +70,10 @@ def _get_or_create_default_executor(): """Return or lazily instantiate a shared default executor.""" global _default_executor if _default_executor is None: + _logger.debug('No existing executor found; creating one lazily.') _default_executor = _create_default_executor() + else: + _logger.debug('Reusing existing executor.') return _default_executor @@ -79,6 +94,7 @@ def upload_async(self, blob: Blob) -> str: return uri def _do_in_background(self, action): + _logger.debug('Scheduling background upload.') self._executor.submit(action) diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py new file mode 100755 index 0000000000..bbe39dd99e --- /dev/null +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py @@ -0,0 +1,108 @@ +#! /usr/bin/env python3 + +if __name__ == "__main__": + import sys + sys.path.append("../../../src") + +import base64 +import unittest + + +from opentelemetry.instrumentation._blobupload.api import Blob + + +class TestBlob(unittest.TestCase): + + def test_construction_with_just_bytes(self): + data = 'some string'.encode() + blob = Blob(data) + self.assertEqual(blob.raw_bytes, data) + self.assertIsNone(blob.content_type) + self.assertIsNotNone(blob.labels) + self.assertEqual(len(blob.labels), 0) + + def test_construction_with_bytes_and_content_type(self): + data = 'some string'.encode() + content_type = 'text/plain' + blob = Blob(data, content_type=content_type) + self.assertEqual(blob.raw_bytes, data) + self.assertEqual(blob.content_type, content_type) + self.assertIsNotNone(blob.labels) + self.assertEqual(len(blob.labels), 0) + + def test_construction_with_bytes_and_labels(self): + data = 'some string'.encode() + labels = {'key1': 'value1', 'key2': 'value2'} + blob = Blob(data, labels=labels) + self.assertEqual(blob.raw_bytes, data) + self.assertIsNone(blob.content_type) + self.assert_labels_equal(blob.labels, labels) + + def test_construction_with_all_fields(self): + data = 'some string'.encode() + content_type = 'text/plain' + labels = {'key1': 'value1', 'key2': 'value2'} + blob = Blob(data, content_type=content_type, labels=labels) + self.assertEqual(blob.raw_bytes, data) + self.assertEqual(blob.content_type, content_type) + self.assert_labels_equal(blob.labels, labels) + + def test_from_data_uri_without_labels(self): + data = 'some string'.encode() + content_type = 'text/plain' + encoded_data = base64.b64encode(data).decode() + uri = 'data:{};base64,{}'.format(content_type, encoded_data) + blob = Blob.from_data_uri(uri) + self.assertEqual(blob.raw_bytes, data) + self.assertEqual(blob.content_type, content_type) + self.assertIsNotNone(blob.labels) + self.assertEqual(len(blob.labels), 0) + + def test_from_data_uri_with_labels(self): + data = 'some string'.encode() + content_type = 'text/plain' + encoded_data = base64.b64encode(data).decode() + uri = 'data:{};base64,{}'.format(content_type, encoded_data) + labels = {'key1': 'value1', 'key2': 'value2'} + blob = Blob.from_data_uri(uri, labels=labels) + self.assertEqual(blob.raw_bytes, data) + self.assertEqual(blob.content_type, content_type) + self.assert_labels_equal(blob.labels, labels) + + def test_from_data_uri_with_valid_standard_base64(self): + data = 'some string'.encode() + content_type = 'text/plain' + encoded_data = base64.standard_b64encode(data).decode() + uri = 'data:{};base64,{}'.format(content_type, encoded_data) + blob = Blob.from_data_uri(uri) + self.assertEqual(blob.raw_bytes, data) + self.assertEqual(blob.content_type, content_type) + + def test_from_data_uri_with_valid_websafe_base64(self): + data = 'some string'.encode() + content_type = 'text/plain' + encoded_data = base64.urlsafe_b64encode(data).decode() + uri = 'data:{};base64,{}'.format(content_type, encoded_data) + blob = Blob.from_data_uri(uri) + self.assertEqual(blob.raw_bytes, data) + self.assertEqual(blob.content_type, content_type) + + def test_from_data_uri_with_non_data_uri_content(self): + with self.assertRaisesRegex(ValueError, 'expected "data:" prefix'): + Blob.from_data_uri('not a valid data uri') + + def test_from_data_uri_with_non_base64_content(self): + with self.assertRaisesRegex(ValueError, 'expected ";base64," section'): + Blob.from_data_uri('data:text/plain,validifpercentencoded') + + def assert_labels_equal(self, a, b): + self.assertEqual(len(a), len(b), msg='Different sizes: {} vs {}; a={}, b={}'.format(len(a), len(b), a, b)) + for k in a: + self.assertTrue(k in b, msg='Key {} found in a but not b'.format(k)) + va = a[k] + vb = b[k] + self.assertEqual(va, vb, msg='Values for key {} different for a vs b: {} vs {}'.format(k, va, vb)) + + +if __name__ == "__main__": + unittest.main() diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py index a4fe9e4768..a16dc8d17c 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py @@ -2,7 +2,6 @@ if __name__ == "__main__": import sys - sys.path.append("../../../src") import io @@ -25,6 +24,12 @@ def create_test_image(format): class TestContentType(unittest.TestCase): + + def test_handles_empty_correctly(self): + input = bytes() + output = detect_content_type(input) + self.assertEqual(output, "application/octet-stream") + def test_detects_plaintext(self): input = "this is just regular text" output = detect_content_type(input.encode()) diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py index e69de29bb2..04207461df 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py @@ -0,0 +1,64 @@ +#! /usr/bin/env python3 + +if __name__ == "__main__": + import sys + sys.path.append("../../../src") + +import unittest + +from opentelemetry.instrumentation._blobupload.api import ( + NOT_UPLOADED, + Blob, + BlobUploader, + BlobUploaderProvider, + get_blob_uploader, + set_blob_uploader_provider +) + +class TestProvider(unittest.TestCase): + + def test_default_provider(self): + uploader = get_blob_uploader('test') + self.assertIsNotNone(uploader) + blob = Blob(bytes()) + url = uploader.upload_async(blob) + self.assertEqual(url, NOT_UPLOADED) + + def test_custom_provider(self): + + class CustomUploader(BlobUploader): + + def __init__(self, result): + self.captured_blob = None + self.upload_result = result + + def upload_async(self, blob): + self.captured_blob = blob + return self.upload_result + + class CustomProvider(BlobUploaderProvider): + + def __init__(self, uploader): + self.uploader = uploader + self.captured_use_case = None + + def get_blob_uploader(self, use_case): + self.captured_use_case = use_case + return self.uploader + + uploader = CustomUploader('foo') + provider = CustomProvider(uploader) + old_provider = set_blob_uploader_provider(provider) + returned_uploader = get_blob_uploader('test') + self.assertEqual(provider.captured_use_case, 'test') + self.assertEqual(returned_uploader, uploader) + blob = Blob(bytes(), content_type='bar') + url = returned_uploader.upload_async(blob) + self.assertEqual(url, 'foo') + self.assertEqual(uploader.captured_blob, blob) + unset_provider = set_blob_uploader_provider(old_provider) + self.assertEqual(unset_provider, provider) + + +if __name__ == "__main__": + unittest.main() diff --git a/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py old mode 100644 new mode 100755 index e69de29bb2..3ec60d49d6 --- a/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py @@ -0,0 +1,95 @@ +#! /usr/bin/env python3 + +if __name__ == "__main__": + import sys + sys.path.append("../../../src") + +import abc +import unittest +from multiprocessing import Queue + +from opentelemetry.instrumentation._blobupload.api import ( + Blob, + BlobUploader +) +from opentelemetry.instrumentation._blobupload.utils import ( + blob_uploader_from_simple_blob_uploader, + SimpleBlobUploader +) + + +class QueueBasedUploader(SimpleBlobUploader): + + def __init__(self, queue): + self._queue = queue + + def generate_destination_uri(self, blob): + return blob.labels['destination_uri'] + + def upload_sync(self, uri, blob): + self._queue.put((uri, blob)) + + +class FailingUploader(SimpleBlobUploader): + + def __init__(self, queue): + self._queue = queue + + def generate_destination_uri(self, blob): + return blob.labels['destination_uri'] + + def upload_sync(self, uri, blob): + try: + raise RuntimeError('something went wrong') + finally: + self._queue.put('done') + + + +class TestBlob(unittest.TestCase): + + def test_simple_blob_uploader_adaptor(self): + queue = Queue() + simple = QueueBasedUploader(queue) + blob = Blob(bytes(), content_type='some-content-type', labels={'destination_uri': 'foo'}) + uploader = blob_uploader_from_simple_blob_uploader(simple) + self.assertIsInstance(uploader, BlobUploader) + url = uploader.upload_async(blob) + self.assertEqual(url, 'foo') + stored_uri, stored_blob = queue.get() + self.assertEqual(stored_uri, 'foo') + self.assertEqual(stored_blob, blob) + self.assertTrue(queue.empty()) + queue.close() + + def test_auto_adds_missing_content_type(self): + queue = Queue() + simple = QueueBasedUploader(queue) + blob = Blob('some plain text'.encode(), labels={'destination_uri': 'foo'}) + uploader = blob_uploader_from_simple_blob_uploader(simple) + self.assertIsInstance(uploader, BlobUploader) + url = uploader.upload_async(blob) + self.assertEqual(url, 'foo') + stored_uri, stored_blob = queue.get() + self.assertEqual(stored_uri, 'foo') + self.assertEqual(stored_blob.raw_bytes, blob.raw_bytes) + self.assertEqual(stored_blob.content_type, 'text/plain') + self.assertEqual(stored_blob.labels, blob.labels) + self.assertTrue(queue.empty()) + queue.close() + + def test_captures_exceptions_raised(self): + queue = Queue() + simple = FailingUploader(queue) + blob = Blob(bytes(), labels={'destination_uri': 'foo'}) + uploader = blob_uploader_from_simple_blob_uploader(simple) + self.assertIsInstance(uploader, BlobUploader) + url = uploader.upload_async(blob) + self.assertEqual(url, 'foo') + queue.get() + self.assertTrue(queue.empty()) + queue.close() + + +if __name__ == "__main__": + unittest.main() From 2b51a15f151a96a3ee2ad9fc59d2b9dd4393c12a Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Mon, 13 Jan 2025 13:31:53 -0500 Subject: [PATCH 08/13] Completed writing unit tests for functionality implemented so far. --- .../backend/google/gcs/__init__.py | 1 + .../backend/google/gcs/_gcs_client_wrapper.py | 59 ++++++ .../backend/google/gcs/_gcs_impl.py | 76 +++++--- .../utils/simple_blob_uploader_adaptor.py | 2 +- .../tests/_blobupload/api/test_blob.py | 2 + .../_blobupload/api/test_content_type.py | 2 + .../tests/_blobupload/api/test_labels.py | 2 + .../tests/_blobupload/api/test_provider.py | 2 + .../google/gcs/test_gcs_blob_uploader.py | 175 ++++++++++++++++++ .../test_simple_blob_uploader_adaptor.py | 2 + 10 files changed, 299 insertions(+), 24 deletions(-) create mode 100644 opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py create mode 100755 opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py index c4b39905c1..408b26a7b7 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py @@ -1,5 +1,6 @@ from opentelemetry.instrumentation._blobupload.backend.google.gcs._gcs_impl import GcsBlobUploader + __all__ = [ GcsBlobUploader ] diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py new file mode 100644 index 0000000000..158ca00b1f --- /dev/null +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py @@ -0,0 +1,59 @@ +from typing import Any, TypeAlias + +import logging + +_logger = logging.getLogger(__name__) +_gcs_initialized = False +_gcs_client_factory = None +_gcs_blob_from_uri = None + + +GcsClientType: TypeAlias = Any + + +def set_gcs_client_factory(gcs_client_type, client_factory): + global _gcs_initialized + global _gcs_client_factory + global GcsClientType + if _gcs_initialized: + _logger.warning('Replacing default GCS client factory') + GcsClientType = gcs_client_type + _gcs_client_factory = client_factory + if _gcs_client_factory and _gcs_blob_from_uri: + _gcs_initialized = True + + +def set_gcs_blob_from_uri(blob_from_uri): + global _gcs_initialized + global _gcs_blob_from_uri + if _gcs_initialized: + _logger.warning('Replacing default GCS blob_from_uri method') + _gcs_blob_from_uri = blob_from_uri + if _gcs_client_factory and _gcs_blob_from_uri: + _gcs_initialized = True + + +def is_gcs_initialized(): + return _gcs_initialized + + +def create_gcs_client(): + if _gcs_client_factory is not None: + return _gcs_client_factory() + return None + + +def blob_from_uri(uri, client): + if _gcs_blob_from_uri is not None: + return _gcs_blob_from_uri(uri, client=client) + return None + + +try: + from google.cloud.storage import Client as _GcsClient + from google.cloud.storage.blob import Blob as _GcsBlob + set_gcs_client_factory(_GcsClient, _GcsClient) + set_gcs_blob_from_uri(getattr(_GcsBlob, 'from_uri', getattr(_GcsBlob, 'from_string'))) + _logger.debug('Found "google-cloud-storage" optional dependency and successfully registered it.') +except ImportError: + _logger.warning('Missing optional "google-cloud-storage" dependency.') diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py index 4585a9a70a..873eaefcc0 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py @@ -1,13 +1,42 @@ import io import uuid +import logging -from google.cloud.storage import Client as GcsClient -from google.cloud.storage import Blob as GcsBlob +from typing import Optional, TypeAlias from opentelemetry.instrumentation._blobupload.api import Blob from opentelemetry.instrumentation._blobupload.api import BlobUploader from opentelemetry.instrumentation._blobupload.utils import SimpleBlobUploader from opentelemetry.instrumentation._blobupload.utils import blob_uploader_from_simple_blob_uploader +from opentelemetry.instrumentation._blobupload.backend.google.gcs import _gcs_client_wrapper + +_logger = logging.getLogger(__name__) + +GcsClient: TypeAlias = _gcs_client_wrapper.GcsClientType + + +def _path_for_span(trace_id, span_id): + if not trace_id or not span_id: + return '' + return 'traces/{}/spans/{}'.format(trace_id, span_id) + + +def _path_for_event(trace_id, span_id, event_name): + if not event_name: + return '' + span_path = _path_for_span(trace_id, span_id) + if not span_path: + return '' + return '{}/events/{}'.format(span_path, event_name) + + +def _path_for_span_event(trace_id, span_id, event_index): + if event_index is None: + return '' + span_path = _path_for_span(trace_id, span_id) + if not span_path: + return '' + return '{}/events/{}'.format(span_path, event_index) def _path_segment_from_labels(labels): @@ -22,24 +51,19 @@ def _path_segment_from_labels(labels): ...depending on the particular type of signal source. """ - segments = [] - target_segments = [ - ('traces', 'trace_id', 'unknown'), - ('spans', 'span_id', 'unknown'), - ('events', 'event_index', None), - ] - for segment_prefix, label_key, default_val in target_segments: - label_value = labels.get(label_key) or default_val - if label_value: - segments.append(segment_prefix) - segments.append(label_value) - if ((labels.get('otel_type') in ['event', 'span_event']) and - ('events' not in segments)): - event_name = labels.get('event_name') or 'unknown' - segments.append('events') - segments.append(event_name) - return '/'.join(segments) - + signal_type = labels.get('otel_type') + if not signal_type or signal_type not in ['span', 'event', 'span_event']: + return '' + trace_id = labels.get('trace_id') + span_id = labels.get('span_id') + event_name = labels.get('event_name') + event_index = labels.get('event_index') + if signal_type == 'span': + return _path_for_span(trace_id, span_id) + elif signal_type == 'event': + return _path_for_event(trace_id, span_id, event_name) + elif signal_type == 'span_event': + return _path_for_span_event(trace_id, span_id, event_index) class _SimpleGcsBlobUploader(SimpleBlobUploader): @@ -52,15 +76,18 @@ def __init__(self, prefix: str, client:Optional[GcsClient]=None): if not prefix.endswith('/'): prefix = '{}/'.format(prefix) self._prefix = prefix - self._client = client or GcsClient() + self._client = client or _gcs_client_wrapper.create_gcs_client() def generate_destination_uri(self, blob: Blob) -> str: origin_path = _path_segment_from_labels(blob.labels) + if origin_path and not origin_path.endswith('/'): + origin_path = '{}/'.format(origin_path) upload_id = uuid.uuid4().hex - return '{}{}/uploads/{}'.format(self._prefix, origin_path, upload_id) + return '{}{}uploads/{}'.format(self._prefix, origin_path, upload_id) def upload_sync(self, uri: str, blob: Blob): - gcs_blob = GcsBlob.from_string(uri, client=self._client) + _logger.debug('Uploading blob: size: {} -> "{}"'.format(len(blob.raw_bytes), uri)) + gcs_blob = _gcs_client_wrapper.blob_from_uri(uri, client=self._client) gcs_blob.upload_from_file( io.BytesIO(blob.raw_bytes), content_type=blob.content_type) @@ -69,9 +96,12 @@ def upload_sync(self, uri: str, blob: Blob): gcs_blob.metadata = metadata + class GcsBlobUploader(BlobUploader): def __init__(self, prefix: str, client:Optional[GcsClient]=None): + if not _gcs_client_wrapper.is_gcs_initialized(): + raise NotImplementedError("GcsBlobUploader implementation unavailable without 'google-cloud-storage' optional dependency.") simple_uploader = _SimpleGcsBlobUploader(prefix, client) self._delegate = blob_uploader_from_simple_blob_uploader(simple_uploader) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py index 1a2c206d91..1ae64a42ae 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py @@ -35,7 +35,7 @@ def __call__(self): try: self._simple_uploader.upload_sync(self._uri, self._blob) except: - _logger.error('Failed to upload blob to "{}".'.format(self._uri)) + _logger.exception('Failed to upload blob to "{}".'.format(self._uri)) def _create_default_executor_no_cleanup(): diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py index bbe39dd99e..8d00383819 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py @@ -5,6 +5,7 @@ sys.path.append("../../../src") import base64 +import logging import unittest @@ -105,4 +106,5 @@ def assert_labels_equal(self, a, b): if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) unittest.main() diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py index a16dc8d17c..e5b9e96148 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py @@ -5,6 +5,7 @@ sys.path.append("../../../src") import io +import logging import unittest from PIL import Image @@ -56,4 +57,5 @@ def test_detects_png(self): if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) unittest.main() diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py index 08f9d01a20..039a7550e5 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_labels.py @@ -4,6 +4,7 @@ import sys sys.path.append("../../../src") +import logging import unittest from opentelemetry.instrumentation._blobupload.api import ( @@ -68,4 +69,5 @@ def test_generate_labels_for_span_event(self): if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) unittest.main() diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py index 04207461df..34777f78c4 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py @@ -4,6 +4,7 @@ import sys sys.path.append("../../../src") +import logging import unittest from opentelemetry.instrumentation._blobupload.api import ( @@ -61,4 +62,5 @@ def get_blob_uploader(self, use_case): if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) unittest.main() diff --git a/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py b/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py new file mode 100755 index 0000000000..3f5dade003 --- /dev/null +++ b/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py @@ -0,0 +1,175 @@ +#! /usr/bin/env python3 + +if __name__ == "__main__": + import sys + sys.path.append("../../../../../src") + +import abc +import logging +import unittest +from multiprocessing import Queue + +from opentelemetry.instrumentation._blobupload.api import ( + Blob, + BlobUploader, + generate_labels_for_event, + generate_labels_for_span, + generate_labels_for_span_event, +) +from opentelemetry.instrumentation._blobupload.backend.google.gcs import GcsBlobUploader + +# Internal implementation used for mocking +from opentelemetry.instrumentation._blobupload.backend.google.gcs import _gcs_client_wrapper + + +class FakeGcs(object): + + def __init__(self): + self._queue = Queue() + self._storage = {} + self._done = set() + + def reset(self): + self._storage = {} + + def get(self, id): + while id not in self._done: + self._queue.get() + return self._storage.get(id) + + def upload_from_file(self, id, data, content_type): + b = Blob(data.read(), content_type=content_type) + self._storage[id] = b + + def update_metadata(self, id, new_metadata): + old = self._storage[id] + b = Blob(old.raw_bytes, content_type=old.content_type, labels=new_metadata) + self._storage[id] = b + self._done.add(id) + self._queue.put(id) + + +class FakeGcsBlob(object): + + def __init__(self, id, fake_gcs): + self._id = id + self._fake_gcs = fake_gcs + self._metadata = {} + + def upload_from_file(self, iodata, content_type): + self._fake_gcs.upload_from_file(self._id, iodata, content_type) + + @property + def metadata(self): + self._metadata + + @metadata.setter + def metadata(self, m): + self._metadata = m + self._fake_gcs.update_metadata(self._id, self._metadata) + + +def mocked_blob_from_uri(fake_gcs): + def gcs_blob_from_uri(uri, client): + return FakeGcsBlob(uri, fake_gcs) + return gcs_blob_from_uri + + +_gcs_mock = FakeGcs() +_gcs_client_wrapper.set_gcs_client_factory(FakeGcs, lambda: _gcs_mock) +_gcs_client_wrapper.set_gcs_blob_from_uri(mocked_blob_from_uri(_gcs_mock)) + + +def get_from_fake_gcs(id): + return _gcs_mock.get(id) + + +class GcsBlobUploaderTestCase(unittest.TestCase): + + def setUp(self): + _gcs_mock.reset() + + def test_constructor_throws_if_prefix_not_uri(self): + with self.assertRaises(ValueError): + GcsBlobUploader('not a valid URI') + + def test_constructor_throws_if_prefix_not_gs_protocol(self): + with self.assertRaises(ValueError): + GcsBlobUploader('other://foo/bar') + + def test_can_construct_gcs_uploader_with_bucket_uri(self): + uploader = GcsBlobUploader('gs://some-bucket') + self.assertIsNotNone(uploader) + self.assertIsInstance(uploader, BlobUploader) + + def test_can_construct_gcs_uploader_with_bucket_uri_and_trailing_slash(self): + uploader = GcsBlobUploader('gs://some-bucket/') + self.assertIsNotNone(uploader) + self.assertIsInstance(uploader, BlobUploader) + + def test_can_construct_gcs_uploader_with_bucket_and_path_uri(self): + uploader = GcsBlobUploader('gs://some-bucket/some/path') + self.assertIsNotNone(uploader) + self.assertIsInstance(uploader, BlobUploader) + + def test_can_construct_gcs_uploader_with_bucket_and_path_uri_with_trailing_slash(self): + uploader = GcsBlobUploader('gs://some-bucket/some/path/') + self.assertIsNotNone(uploader) + self.assertIsInstance(uploader, BlobUploader) + + def test_uploads_blob_from_span(self): + trace_id = 'test-trace-id' + span_id = 'test-span-id' + labels = generate_labels_for_span(trace_id, span_id) + blob = Blob('some data'.encode(), content_type='text/plain', labels=labels) + uploader = GcsBlobUploader('gs://some-bucket/some/path') + url = uploader.upload_async(blob) + self.assertTrue( + url.startswith('gs://some-bucket/some/path/traces/test-trace-id/spans/test-span-id/uploads/') + ) + uploaded_blob = get_from_fake_gcs(url) + self.assertEqual(blob, uploaded_blob) + + def test_uploads_blob_from_event(self): + trace_id = 'test-trace-id' + span_id = 'test-span-id' + event_name = 'event-name' + labels = generate_labels_for_event(trace_id, span_id, event_name) + blob = Blob('some data'.encode(), content_type='text/plain', labels=labels) + uploader = GcsBlobUploader('gs://some-bucket/some/path') + url = uploader.upload_async(blob) + self.assertTrue( + url.startswith('gs://some-bucket/some/path/traces/test-trace-id/spans/test-span-id/events/event-name/uploads/') + ) + uploaded_blob = get_from_fake_gcs(url) + self.assertEqual(blob, uploaded_blob) + + def test_uploads_blob_from_span_event(self): + trace_id = 'test-trace-id' + span_id = 'test-span-id' + event_name = 'event-name' + event_index = 2 + labels = generate_labels_for_span_event(trace_id, span_id, event_name, event_index) + blob = Blob('some data'.encode(), content_type='text/plain', labels=labels) + uploader = GcsBlobUploader('gs://some-bucket/some/path') + url = uploader.upload_async(blob) + self.assertTrue( + url.startswith('gs://some-bucket/some/path/traces/test-trace-id/spans/test-span-id/events/2/uploads/') + ) + uploaded_blob = get_from_fake_gcs(url) + self.assertEqual(blob, uploaded_blob) + + def test_uploads_blobs_missing_expected_labels(self): + blob = Blob('some data'.encode(), content_type='text/plain') + uploader = GcsBlobUploader('gs://some-bucket/some/path') + url = uploader.upload_async(blob) + self.assertTrue( + url.startswith('gs://some-bucket/some/path/uploads/'), + ) + uploaded_blob = get_from_fake_gcs(url) + self.assertEqual(blob, uploaded_blob) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + unittest.main() diff --git a/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py index 3ec60d49d6..253235e43c 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py @@ -5,6 +5,7 @@ sys.path.append("../../../src") import abc +import logging import unittest from multiprocessing import Queue @@ -92,4 +93,5 @@ def test_captures_exceptions_raised(self): if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) unittest.main() From 0a3430ef3fce9886a36d68c099ef14e22ce7caa8 Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Tue, 14 Jan 2025 11:08:42 -0500 Subject: [PATCH 09/13] Add license comments and documentation. --- .../instrumentation/_blobupload/README.md | 123 ++++++++++++++++++ .../instrumentation/_blobupload/__init__.py | 13 ++ .../_blobupload/api/__init__.py | 14 ++ .../instrumentation/_blobupload/api/blob.py | 14 ++ .../_blobupload/api/blob_uploader.py | 14 ++ .../_blobupload/api/constants.py | 16 ++- .../_blobupload/api/content_type.py | 15 +++ .../instrumentation/_blobupload/api/labels.py | 14 ++ .../_blobupload/api/provider.py | 16 ++- .../_blobupload/backend/__init__.py | 13 ++ .../_blobupload/backend/google/__init__.py | 13 ++ .../backend/google/gcs/__init__.py | 14 ++ .../backend/google/gcs/_gcs_client_wrapper.py | 31 +++++ .../backend/google/gcs/_gcs_impl.py | 44 +++++++ .../_blobupload/utils/__init__.py | 14 ++ .../_blobupload/utils/simple_blob_uploader.py | 14 ++ .../utils/simple_blob_uploader_adaptor.py | 25 ++++ 17 files changed, 405 insertions(+), 2 deletions(-) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md index e69de29bb2..be16cc5756 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md @@ -0,0 +1,123 @@ +# Blob Uploader Library (Experimental) + +The Blob Uploader library provides an experimental way to +"write-aside" large or sensitive payloads to a blob storage +system, while retaining references to the written-aside destination +in the operations backend where telemetry is being written. + +This is particularly intended for the use case of request/response +logging, where typical telemetry backends may be unsuitable for +writing this data, either due to size reasons or due to privacy +reasons. GenAI multi-modal prompt/response logging is a particularly +salient motivation for this feature, though general HTTP request/ +response logging is another situation where this is applicable. + +## Usage: Instrumentation Library + +Instrumentation libraries should provide general hooks for handling +requests/responses (or other large blobs) that should be only +conditionally included in telemetry signals. The hooks should provide +enough context to allow a user of the instrumentation library to +conditionally choose what to do with the content including but not +limited to: dropping, including in the telemetry signal, or writing +to a BlobUploader and retaining a reference to the destination URI. + +For example: + +``` + +class RequestHook(abc.ABC): + + @abc.abstractmethod + def handle_request(self, context, signal, request): + pass + + +class ResponseHook(abc.ABC): + + @abc.abstractmethod: + def handle_response(self, context, signal, response): + pass + + +class FooInstrumentationLibrary(object): + + def __init__(self, + # ..., + request_hook: Optional[RequestHook]=None, + response_hook: Optional[ResponseHook]=None, + # ...) + + ... +``` + + +## Usage: User of Instrumentation Library + +Users of instrumentation libraries can use the Blob Uploader +libraries to implement relevant request/response hooks. + +For example: + +``` +from opentelemetry.instrumentation._blobupload.api import ( + NOT_PROVIDED, + Blob, + BlobUploaderProvider, + get_blob_uploader, + set_blob_uploader_provider) + + +class MyBlobUploaderRequestHook(RequestHook): + # ... + + def handle_request(self, context, signal, request): + if not self.should_uploader(context): + return + use_case = self.select_use_case(context, signal) + uploader = get_blob_uploader(use_case) + blob = Blob( + request.raw_bytes, + content_type=request.content_type, + labels=self.generate_blob_labels(context, signal, request)) + uri = uploader.upload_async(blob) + if uri == NOT_UPLOADED: + return + signal.attributes[REQUEST_ATTRIBUTE] = uri + + # ... + +class MyBlobUploaderProvider(BlobUploaderProvider): + + def get_blob_uploader(self, use_case=None): + # ... + + +def main(): + set_blob_uploader_provider(MyBlobUploaderProvider()) + instrumentation_libary = FooInstrumentationLibrary( + # ..., + request_hook=MyBlobUploaderRequestHook(), + # ... + ) + # ... + +``` + +## Future Work + +As can be seen from the above usage examples, there is quite a +bit of common boilerplate both for instrumentation libraries (e.g. +defining the set of hook interfaces) and for consumers of those +instrumentation libraries (e.g. implementing variants of those hook +interfaces that make use of the BlobUploader libraries). + +A potential future improvement would be to define a common set of +hook interfaces for this use case that can be be reused across +instrumentation libraries and to provide simple drop-in +implementations of those hooks that make use of BlobUploader. + +Beyond this, boilerplate to define a custom 'BlobUploaderProvider' +could be reduced by expanding the capabilities of the default +provider, so that most common uses are covered with a minimal +set of environment variables (if optional deps are present). diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/__init__.py index e69de29bb2..b0a6f42841 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/__init__.py @@ -0,0 +1,13 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py index 16dbaec5a8..06b258a600 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py @@ -1,3 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Exposes API methods to callers from the package name.""" from opentelemetry.instrumentation._blobupload.api.blob import Blob diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py index 8cbd0cf71b..1a7cc2e4c1 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py @@ -1,3 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import base64 import json diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py index 9dc248a8e8..8f8a47729a 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob_uploader.py @@ -1,3 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Defines an interface for performing asynchronous blob uploading.""" import abc diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py index 22f8864cd5..cfa4e0e096 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py @@ -1,4 +1,18 @@ -"""Defines contexts that are used by the '_blobupload' package.""" +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Defines constants that are used by the '_blobupload' package.""" # Special constant used to indicate that a BlobUploader did not upload. NOT_UPLOADED = '/dev/null' diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py index a9553f90ce..d6f0ce13ff 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py @@ -1,3 +1,18 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + """Provides utilities for automatic content-type detection.""" diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py index 829eee31a6..cb4a63046a 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py @@ -1,3 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Provides utilities for providing basic identifying labels for blobs.""" diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py index c07ef261e6..018a4c4f29 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py @@ -1,3 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import abc import logging from typing import Optional @@ -39,7 +53,7 @@ def get_blob_uploader(self, use_case: Optional[str]) -> BlobUploader: class _DefaultBlobUploaderProvider(BlobUploaderProvider): """Default provider used when none has been configured.""" - def get_blob_uploader(self, use_case: Optional[str]) -> BlobUploader: + def get_blob_uploader(self, use_case: Optional[str]=None) -> BlobUploader: use_case_formatted = "(None)" if use_case: use_case_formatted = use_case diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/__init__.py index e69de29bb2..b0a6f42841 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/__init__.py @@ -0,0 +1,13 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py index e69de29bb2..eacf7c9c0e 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py @@ -0,0 +1,13 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py index 408b26a7b7..8b089b4e01 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py @@ -1,3 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from opentelemetry.instrumentation._blobupload.backend.google.gcs._gcs_impl import GcsBlobUploader diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py index 158ca00b1f..26e5af7b94 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py @@ -1,13 +1,44 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Isolates calls to 'google-cloud-storage' dependency, simplifying mocking.""" + + from typing import Any, TypeAlias import logging _logger = logging.getLogger(__name__) + + +# Whether the Google Cloud Storage library has been initialized. _gcs_initialized = False + +# Function that returns a Google Cloud Storage Client object. _gcs_client_factory = None + +# Function that given a URI and client, returns a Google Cloud +# Storage Blob class that can be used to write to a blob. _gcs_blob_from_uri = None +# Type alias for a Google Cloud Storage client. This has to default +# to 'Any' to allow for mocks of the Google Cloud Storage client. It +# is updated at runtime in 'set_gcs_client_factory', though this +# means it is not particularly useful for automatic static type +# checking (it is, however, useful for documenting intended type). GcsClientType: TypeAlias = Any diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py index 873eaefcc0..af14e3e02a 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py @@ -1,3 +1,19 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provides the 'GcsBlobUploader' class.""" + import io import uuid import logging @@ -98,12 +114,40 @@ def upload_sync(self, uri: str, blob: Blob): class GcsBlobUploader(BlobUploader): + """A BlobUploader that writes to Google Cloud Storage.""" def __init__(self, prefix: str, client:Optional[GcsClient]=None): + """Intialize the GcsBlobUploader class. + + Args: + - prefix: a string beginning with "gs://" that includes + the Google Cloud Storage bucket to which to write as + well as an optional path prefix to use. + + - client: an optional Google Cloud Storage client. If not + provided, this class will create a Google Cloud Storage + client using the environment (i.e. Application Default + Credentials). Supply your own instance if you'd like to + use non-default configuration (e.g. to use an explicit + credential other than the one in the environment). + + Known Failure Modes: + - Missing 'google-cloud-storage' library dependency. + - Failure to construct the client (e.g. absence of a valid + Google Application Default credential in the enviroment). + """ if not _gcs_client_wrapper.is_gcs_initialized(): raise NotImplementedError("GcsBlobUploader implementation unavailable without 'google-cloud-storage' optional dependency.") simple_uploader = _SimpleGcsBlobUploader(prefix, client) self._delegate = blob_uploader_from_simple_blob_uploader(simple_uploader) def upload_async(self, blob: Blob) -> str: + """Upload the specified blob in the background. + + Generates a URI from the blob, based on the prefix supplied + to the constructor as well as the labels of the Blob (may + also include entropy or other random components). Immediately + returns the "gs://" URI representing where the Blob will be + written, and schedules background uploading of the blob there. + """ return self._delegate.upload_async(blob) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py index 85658825da..95ef7e9308 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py @@ -1,3 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Exposes API methods to callers from the package name.""" from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader_adaptor import blob_uploader_from_simple_blob_uploader diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py index c643f9534c..0c64d4131d 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py @@ -1,3 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Defines a simple, synchronous interface for providing a backend implementation.""" import abc diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py index 1ae64a42ae..d07b15bed4 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py @@ -1,3 +1,28 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Provides 'blob_uploader_from_simple_blob_uploader', a utility for +backend providers to more easily provide a full-fledged BlobUploader +by implementing the simpler 'SimpleBlobUploader' interface. + +The 'blob_uploader_from_simple_blob_uploader' utility takes care of +common machinery such as scheduling, retries, background uploading, +etc. allowing providers of specific BlobUploader backends to supply +a simpler set of synchronous uploading instructions. +""" + import atexit import logging From 587e61e888331516f482183b9c2adce2b8c8a450 Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Tue, 14 Jan 2025 11:28:51 -0500 Subject: [PATCH 10/13] Remove redundant explicit inheritance from object per review comment. --- .../src/opentelemetry/instrumentation/_blobupload/README.md | 2 +- .../src/opentelemetry/instrumentation/_blobupload/api/blob.py | 2 +- .../instrumentation/_blobupload/api/content_type.py | 2 +- .../_blobupload/utils/simple_blob_uploader_adaptor.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md index be16cc5756..bfd0dd0474 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/README.md @@ -40,7 +40,7 @@ class ResponseHook(abc.ABC): pass -class FooInstrumentationLibrary(object): +class FooInstrumentationLibrary: def __init__(self, # ..., diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py index 1a7cc2e4c1..fcfcd9e5a7 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py @@ -19,7 +19,7 @@ from typing import Mapping, Dict, Optional -class Blob(object): +class Blob: """Represents an opaque binary object and associated metadata. This object conteptually has the following properties: diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py index d6f0ce13ff..de2b85ae8d 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py @@ -18,7 +18,7 @@ # Helper used to handle the possibility of optional 'magic' dependency # being unavailable for guessing the MIME type of raw bytes. -class _FallBackModule(object): +class _FallBackModule: """Class that is shaped like the portion of 'magic' we need.""" def from_buffer(self, raw_bytes, mime=True): diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py index d07b15bed4..4fa04ce2f0 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py @@ -47,7 +47,7 @@ def _with_content_type(blob: Blob) -> Blob: return Blob(blob.raw_bytes, content_type=content_type, labels=blob.labels) -class _UploadAction(object): +class _UploadAction: """Represents the work to be done in the background to upload a blob.""" def __init__(self, simple_uploader, uri, blob): From c25a6b8bc0d99065a802d1e26100f91139730dc8 Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Tue, 14 Jan 2025 11:40:28 -0500 Subject: [PATCH 11/13] Format with ruff. --- .../instrumentation/_blobupload/api/blob.py | 11 ++-- .../_blobupload/api/constants.py | 2 +- .../_blobupload/api/content_type.py | 2 +- .../_blobupload/api/provider.py | 5 +- .../_blobupload/backend/google/__init__.py | 2 +- .../backend/google/gcs/__init__.py | 5 +- .../backend/google/gcs/_gcs_client_wrapper.py | 11 ++-- .../backend/google/gcs/_gcs_impl.py | 66 ++++++++++--------- .../_blobupload/utils/__init__.py | 8 ++- .../_blobupload/utils/simple_blob_uploader.py | 5 +- .../utils/simple_blob_uploader_adaptor.py | 23 +++---- .../tests/_blobupload/api/test_blob.py | 55 ++++++++-------- .../tests/_blobupload/api/test_provider.py | 19 +++--- .../google/gcs/test_gcs_blob_uploader.py | 65 +++++++++--------- .../test_simple_blob_uploader_adaptor.py | 38 +++++------ 15 files changed, 161 insertions(+), 156 deletions(-) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py index fcfcd9e5a7..8d4e48fec2 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py @@ -14,9 +14,8 @@ import base64 import json - from types import MappingProxyType as _frozendict -from typing import Mapping, Dict, Optional +from typing import Mapping, Optional class Blob: @@ -119,8 +118,8 @@ def __eq__(self, o): def __repr__(self): params = [repr(self._raw_bytes)] if self._content_type is not None: - params.append('content_type={}'.format(repr(self._content_type))) + params.append("content_type={}".format(repr(self._content_type))) if self._labels: - params.append('labels={}'.format(json.dumps(self._labels, sort_keys=True))) - params_string = ', '.join(params) - return 'Blob({})'.format(params_string) + params.append("labels={}".format(json.dumps(self._labels, sort_keys=True))) + params_string = ", ".join(params) + return "Blob({})".format(params_string) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py index cfa4e0e096..2b96051f80 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/constants.py @@ -15,4 +15,4 @@ """Defines constants that are used by the '_blobupload' package.""" # Special constant used to indicate that a BlobUploader did not upload. -NOT_UPLOADED = '/dev/null' +NOT_UPLOADED = "/dev/null" diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py index de2b85ae8d..0926c5866e 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py @@ -39,5 +39,5 @@ def from_buffer(self, raw_bytes, mime=True): def detect_content_type(raw_bytes: bytes) -> str: """Attempts to infer the content type of the specified data.""" if not raw_bytes: - return 'application/octet-stream' + return "application/octet-stream" return _module.from_buffer(raw_bytes, mime=True) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py index 018a4c4f29..d09e528e38 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/provider.py @@ -20,8 +20,9 @@ from opentelemetry.instrumentation._blobupload.api.blob_uploader import ( BlobUploader, ) -from opentelemetry.instrumentation._blobupload.api.constants import NOT_UPLOADED - +from opentelemetry.instrumentation._blobupload.api.constants import ( + NOT_UPLOADED, +) _logger = logging.getLogger(__name__) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py index eacf7c9c0e..b0a6f42841 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/__init__.py @@ -10,4 +10,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py index 8b089b4e01..a799cc27f3 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from opentelemetry.instrumentation._blobupload.backend.google.gcs._gcs_impl import GcsBlobUploader - +from opentelemetry.instrumentation._blobupload.backend.google.gcs._gcs_impl import ( + GcsBlobUploader, +) __all__ = [ GcsBlobUploader diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py index 26e5af7b94..bf531d012b 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_client_wrapper.py @@ -16,9 +16,8 @@ """Isolates calls to 'google-cloud-storage' dependency, simplifying mocking.""" -from typing import Any, TypeAlias - import logging +from typing import Any, TypeAlias _logger = logging.getLogger(__name__) @@ -38,7 +37,7 @@ # to 'Any' to allow for mocks of the Google Cloud Storage client. It # is updated at runtime in 'set_gcs_client_factory', though this # means it is not particularly useful for automatic static type -# checking (it is, however, useful for documenting intended type). +# checking (it is, however, useful for documenting intended type). GcsClientType: TypeAlias = Any @@ -47,7 +46,7 @@ def set_gcs_client_factory(gcs_client_type, client_factory): global _gcs_client_factory global GcsClientType if _gcs_initialized: - _logger.warning('Replacing default GCS client factory') + _logger.warning("Replacing default GCS client factory") GcsClientType = gcs_client_type _gcs_client_factory = client_factory if _gcs_client_factory and _gcs_blob_from_uri: @@ -58,7 +57,7 @@ def set_gcs_blob_from_uri(blob_from_uri): global _gcs_initialized global _gcs_blob_from_uri if _gcs_initialized: - _logger.warning('Replacing default GCS blob_from_uri method') + _logger.warning("Replacing default GCS blob_from_uri method") _gcs_blob_from_uri = blob_from_uri if _gcs_client_factory and _gcs_blob_from_uri: _gcs_initialized = True @@ -84,7 +83,7 @@ def blob_from_uri(uri, client): from google.cloud.storage import Client as _GcsClient from google.cloud.storage.blob import Blob as _GcsBlob set_gcs_client_factory(_GcsClient, _GcsClient) - set_gcs_blob_from_uri(getattr(_GcsBlob, 'from_uri', getattr(_GcsBlob, 'from_string'))) + set_gcs_blob_from_uri(getattr(_GcsBlob, "from_uri", getattr(_GcsBlob, "from_string"))) _logger.debug('Found "google-cloud-storage" optional dependency and successfully registered it.') except ImportError: _logger.warning('Missing optional "google-cloud-storage" dependency.') diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py index af14e3e02a..31f49afb0e 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py @@ -15,16 +15,18 @@ """Provides the 'GcsBlobUploader' class.""" import io -import uuid import logging - +import uuid from typing import Optional, TypeAlias -from opentelemetry.instrumentation._blobupload.api import Blob -from opentelemetry.instrumentation._blobupload.api import BlobUploader -from opentelemetry.instrumentation._blobupload.utils import SimpleBlobUploader -from opentelemetry.instrumentation._blobupload.utils import blob_uploader_from_simple_blob_uploader -from opentelemetry.instrumentation._blobupload.backend.google.gcs import _gcs_client_wrapper +from opentelemetry.instrumentation._blobupload.api import Blob, BlobUploader +from opentelemetry.instrumentation._blobupload.backend.google.gcs import ( + _gcs_client_wrapper, +) +from opentelemetry.instrumentation._blobupload.utils import ( + SimpleBlobUploader, + blob_uploader_from_simple_blob_uploader, +) _logger = logging.getLogger(__name__) @@ -33,26 +35,26 @@ def _path_for_span(trace_id, span_id): if not trace_id or not span_id: - return '' - return 'traces/{}/spans/{}'.format(trace_id, span_id) + return "" + return "traces/{}/spans/{}".format(trace_id, span_id) def _path_for_event(trace_id, span_id, event_name): if not event_name: - return '' + return "" span_path = _path_for_span(trace_id, span_id) if not span_path: - return '' - return '{}/events/{}'.format(span_path, event_name) + return "" + return "{}/events/{}".format(span_path, event_name) def _path_for_span_event(trace_id, span_id, event_index): if event_index is None: - return '' + return "" span_path = _path_for_span(trace_id, span_id) if not span_path: - return '' - return '{}/events/{}'.format(span_path, event_index) + return "" + return "{}/events/{}".format(span_path, event_index) def _path_segment_from_labels(labels): @@ -67,18 +69,18 @@ def _path_segment_from_labels(labels): ...depending on the particular type of signal source. """ - signal_type = labels.get('otel_type') - if not signal_type or signal_type not in ['span', 'event', 'span_event']: - return '' - trace_id = labels.get('trace_id') - span_id = labels.get('span_id') - event_name = labels.get('event_name') - event_index = labels.get('event_index') - if signal_type == 'span': + signal_type = labels.get("otel_type") + if not signal_type or signal_type not in ["span", "event", "span_event"]: + return "" + trace_id = labels.get("trace_id") + span_id = labels.get("span_id") + event_name = labels.get("event_name") + event_index = labels.get("event_index") + if signal_type == "span": return _path_for_span(trace_id, span_id) - elif signal_type == 'event': + elif signal_type == "event": return _path_for_event(trace_id, span_id, event_name) - elif signal_type == 'span_event': + elif signal_type == "span_event": return _path_for_span_event(trace_id, span_id, event_index) @@ -86,20 +88,20 @@ class _SimpleGcsBlobUploader(SimpleBlobUploader): def __init__(self, prefix: str, client:Optional[GcsClient]=None): if not prefix: - raise ValueError('Must supply a non-empty prefix.') - if not prefix.startswith('gs://'): + raise ValueError("Must supply a non-empty prefix.") + if not prefix.startswith("gs://"): raise ValueError('Invalid prefix; must start with "gs://"; found: "{}".'.format(prefix)) - if not prefix.endswith('/'): - prefix = '{}/'.format(prefix) + if not prefix.endswith("/"): + prefix = "{}/".format(prefix) self._prefix = prefix self._client = client or _gcs_client_wrapper.create_gcs_client() def generate_destination_uri(self, blob: Blob) -> str: origin_path = _path_segment_from_labels(blob.labels) - if origin_path and not origin_path.endswith('/'): - origin_path = '{}/'.format(origin_path) + if origin_path and not origin_path.endswith("/"): + origin_path = "{}/".format(origin_path) upload_id = uuid.uuid4().hex - return '{}{}uploads/{}'.format(self._prefix, origin_path, upload_id) + return "{}{}uploads/{}".format(self._prefix, origin_path, upload_id) def upload_sync(self, uri: str, blob: Blob): _logger.debug('Uploading blob: size: {} -> "{}"'.format(len(blob.raw_bytes), uri)) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py index 95ef7e9308..c2b429543d 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py @@ -14,8 +14,12 @@ """Exposes API methods to callers from the package name.""" -from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader_adaptor import blob_uploader_from_simple_blob_uploader -from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader import SimpleBlobUploader +from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader import ( + SimpleBlobUploader, +) +from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader_adaptor import ( + blob_uploader_from_simple_blob_uploader, +) __all__ = [ blob_uploader_from_simple_blob_uploader, diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py index 0c64d4131d..b76d432984 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py @@ -18,6 +18,7 @@ from opentelemetry.instrumentation._blobupload.api import Blob + class SimpleBlobUploader(abc.ABC): """Pure abstract base class of a backend implementation that is synchronous.""" @@ -31,7 +32,7 @@ def generate_destination_uri(self, blob: Blob) -> str: Returns: A new, unique URI that represents the target destination of the data. """ - raise NotImplementedError('SimpleBlobUploader.generate_destination_uri') + raise NotImplementedError("SimpleBlobUploader.generate_destination_uri") @abc.abstractmethod def upload_sync(self, uri: str, blob: Blob): @@ -45,4 +46,4 @@ def upload_sync(self, uri: str, blob: Blob): Effects: Attempts to upload/write the Blob to the specified destination URI. """ - raise NotImplementedError('SimpleBlobUploader.upload_sync') + raise NotImplementedError("SimpleBlobUploader.upload_sync") diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py index 4fa04ce2f0..1901795df9 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py @@ -25,16 +25,17 @@ import atexit import logging - -from typing import Optional from concurrent.futures import Executor, ThreadPoolExecutor +from typing import Optional from opentelemetry.instrumentation._blobupload.api import ( Blob, BlobUploader, - detect_content_type) -from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader import SimpleBlobUploader - + detect_content_type, +) +from opentelemetry.instrumentation._blobupload.utils.simple_blob_uploader import ( + SimpleBlobUploader, +) _logger = logging.getLogger(__name__) @@ -54,7 +55,7 @@ def __init__(self, simple_uploader, uri, blob): self._simple_uploader = simple_uploader self._uri = uri self._blob = blob - + def __call__(self): _logger.debug('Uploading blob to "{}".'.format(self._uri)) try: @@ -72,7 +73,7 @@ def _create_default_executor_no_cleanup(): # It is because of this potential future enhancement, that we # have moved this logic into a separate function despite it # being currently logically quite simple. - _logger.debug('Creating thread pool executor') + _logger.debug("Creating thread pool executor") return ThreadPoolExecutor() @@ -81,7 +82,7 @@ def _create_default_executor(): result = _create_default_executor_no_cleanup() def _cleanup(): result.shutdown() - _logger.debug('Registering cleanup for the pool') + _logger.debug("Registering cleanup for the pool") atexit.register(_cleanup) return result @@ -95,10 +96,10 @@ def _get_or_create_default_executor(): """Return or lazily instantiate a shared default executor.""" global _default_executor if _default_executor is None: - _logger.debug('No existing executor found; creating one lazily.') + _logger.debug("No existing executor found; creating one lazily.") _default_executor = _create_default_executor() else: - _logger.debug('Reusing existing executor.') + _logger.debug("Reusing existing executor.") return _default_executor @@ -119,7 +120,7 @@ def upload_async(self, blob: Blob) -> str: return uri def _do_in_background(self, action): - _logger.debug('Scheduling background upload.') + _logger.debug("Scheduling background upload.") self._executor.submit(action) diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py index 8d00383819..8fe4487be1 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_blob.py @@ -8,14 +8,13 @@ import logging import unittest - from opentelemetry.instrumentation._blobupload.api import Blob class TestBlob(unittest.TestCase): def test_construction_with_just_bytes(self): - data = 'some string'.encode() + data = "some string".encode() blob = Blob(data) self.assertEqual(blob.raw_bytes, data) self.assertIsNone(blob.content_type) @@ -23,8 +22,8 @@ def test_construction_with_just_bytes(self): self.assertEqual(len(blob.labels), 0) def test_construction_with_bytes_and_content_type(self): - data = 'some string'.encode() - content_type = 'text/plain' + data = "some string".encode() + content_type = "text/plain" blob = Blob(data, content_type=content_type) self.assertEqual(blob.raw_bytes, data) self.assertEqual(blob.content_type, content_type) @@ -32,77 +31,77 @@ def test_construction_with_bytes_and_content_type(self): self.assertEqual(len(blob.labels), 0) def test_construction_with_bytes_and_labels(self): - data = 'some string'.encode() - labels = {'key1': 'value1', 'key2': 'value2'} + data = "some string".encode() + labels = {"key1": "value1", "key2": "value2"} blob = Blob(data, labels=labels) self.assertEqual(blob.raw_bytes, data) self.assertIsNone(blob.content_type) self.assert_labels_equal(blob.labels, labels) def test_construction_with_all_fields(self): - data = 'some string'.encode() - content_type = 'text/plain' - labels = {'key1': 'value1', 'key2': 'value2'} + data = "some string".encode() + content_type = "text/plain" + labels = {"key1": "value1", "key2": "value2"} blob = Blob(data, content_type=content_type, labels=labels) self.assertEqual(blob.raw_bytes, data) self.assertEqual(blob.content_type, content_type) self.assert_labels_equal(blob.labels, labels) def test_from_data_uri_without_labels(self): - data = 'some string'.encode() - content_type = 'text/plain' + data = "some string".encode() + content_type = "text/plain" encoded_data = base64.b64encode(data).decode() - uri = 'data:{};base64,{}'.format(content_type, encoded_data) + uri = "data:{};base64,{}".format(content_type, encoded_data) blob = Blob.from_data_uri(uri) self.assertEqual(blob.raw_bytes, data) self.assertEqual(blob.content_type, content_type) self.assertIsNotNone(blob.labels) self.assertEqual(len(blob.labels), 0) - + def test_from_data_uri_with_labels(self): - data = 'some string'.encode() - content_type = 'text/plain' + data = "some string".encode() + content_type = "text/plain" encoded_data = base64.b64encode(data).decode() - uri = 'data:{};base64,{}'.format(content_type, encoded_data) - labels = {'key1': 'value1', 'key2': 'value2'} + uri = "data:{};base64,{}".format(content_type, encoded_data) + labels = {"key1": "value1", "key2": "value2"} blob = Blob.from_data_uri(uri, labels=labels) self.assertEqual(blob.raw_bytes, data) self.assertEqual(blob.content_type, content_type) self.assert_labels_equal(blob.labels, labels) def test_from_data_uri_with_valid_standard_base64(self): - data = 'some string'.encode() - content_type = 'text/plain' + data = "some string".encode() + content_type = "text/plain" encoded_data = base64.standard_b64encode(data).decode() - uri = 'data:{};base64,{}'.format(content_type, encoded_data) + uri = "data:{};base64,{}".format(content_type, encoded_data) blob = Blob.from_data_uri(uri) self.assertEqual(blob.raw_bytes, data) self.assertEqual(blob.content_type, content_type) def test_from_data_uri_with_valid_websafe_base64(self): - data = 'some string'.encode() - content_type = 'text/plain' + data = "some string".encode() + content_type = "text/plain" encoded_data = base64.urlsafe_b64encode(data).decode() - uri = 'data:{};base64,{}'.format(content_type, encoded_data) + uri = "data:{};base64,{}".format(content_type, encoded_data) blob = Blob.from_data_uri(uri) self.assertEqual(blob.raw_bytes, data) self.assertEqual(blob.content_type, content_type) def test_from_data_uri_with_non_data_uri_content(self): with self.assertRaisesRegex(ValueError, 'expected "data:" prefix'): - Blob.from_data_uri('not a valid data uri') + Blob.from_data_uri("not a valid data uri") def test_from_data_uri_with_non_base64_content(self): with self.assertRaisesRegex(ValueError, 'expected ";base64," section'): - Blob.from_data_uri('data:text/plain,validifpercentencoded') + Blob.from_data_uri("data:text/plain,validifpercentencoded") def assert_labels_equal(self, a, b): - self.assertEqual(len(a), len(b), msg='Different sizes: {} vs {}; a={}, b={}'.format(len(a), len(b), a, b)) + self.assertEqual(len(a), len(b), msg="Different sizes: {} vs {}; a={}, b={}".format(len(a), len(b), a, b)) for k in a: - self.assertTrue(k in b, msg='Key {} found in a but not b'.format(k)) + self.assertTrue(k in b, msg="Key {} found in a but not b".format(k)) va = a[k] vb = b[k] - self.assertEqual(va, vb, msg='Values for key {} different for a vs b: {} vs {}'.format(k, va, vb)) + self.assertEqual(va, vb, msg="Values for key {} different for a vs b: {} vs {}".format(k, va, vb)) if __name__ == "__main__": diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py index 34777f78c4..ce841f5fde 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_provider.py @@ -13,18 +13,19 @@ BlobUploader, BlobUploaderProvider, get_blob_uploader, - set_blob_uploader_provider + set_blob_uploader_provider, ) + class TestProvider(unittest.TestCase): def test_default_provider(self): - uploader = get_blob_uploader('test') + uploader = get_blob_uploader("test") self.assertIsNotNone(uploader) blob = Blob(bytes()) url = uploader.upload_async(blob) self.assertEqual(url, NOT_UPLOADED) - + def test_custom_provider(self): class CustomUploader(BlobUploader): @@ -36,7 +37,7 @@ def __init__(self, result): def upload_async(self, blob): self.captured_blob = blob return self.upload_result - + class CustomProvider(BlobUploaderProvider): def __init__(self, uploader): @@ -47,15 +48,15 @@ def get_blob_uploader(self, use_case): self.captured_use_case = use_case return self.uploader - uploader = CustomUploader('foo') + uploader = CustomUploader("foo") provider = CustomProvider(uploader) old_provider = set_blob_uploader_provider(provider) - returned_uploader = get_blob_uploader('test') - self.assertEqual(provider.captured_use_case, 'test') + returned_uploader = get_blob_uploader("test") + self.assertEqual(provider.captured_use_case, "test") self.assertEqual(returned_uploader, uploader) - blob = Blob(bytes(), content_type='bar') + blob = Blob(bytes(), content_type="bar") url = returned_uploader.upload_async(blob) - self.assertEqual(url, 'foo') + self.assertEqual(url, "foo") self.assertEqual(uploader.captured_blob, blob) unset_provider = set_blob_uploader_provider(old_provider) self.assertEqual(unset_provider, provider) diff --git a/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py b/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py index 3f5dade003..d060b35347 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py +++ b/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py @@ -4,7 +4,6 @@ import sys sys.path.append("../../../../../src") -import abc import logging import unittest from multiprocessing import Queue @@ -16,10 +15,12 @@ generate_labels_for_span, generate_labels_for_span_event, ) -from opentelemetry.instrumentation._blobupload.backend.google.gcs import GcsBlobUploader # Internal implementation used for mocking -from opentelemetry.instrumentation._blobupload.backend.google.gcs import _gcs_client_wrapper +from opentelemetry.instrumentation._blobupload.backend.google.gcs import ( + GcsBlobUploader, + _gcs_client_wrapper, +) class FakeGcs(object): @@ -36,11 +37,11 @@ def get(self, id): while id not in self._done: self._queue.get() return self._storage.get(id) - + def upload_from_file(self, id, data, content_type): b = Blob(data.read(), content_type=content_type) self._storage[id] = b - + def update_metadata(self, id, new_metadata): old = self._storage[id] b = Blob(old.raw_bytes, content_type=old.content_type, labels=new_metadata) @@ -58,7 +59,7 @@ def __init__(self, id, fake_gcs): def upload_from_file(self, iodata, content_type): self._fake_gcs.upload_from_file(self._id, iodata, content_type) - + @property def metadata(self): self._metadata @@ -91,80 +92,80 @@ def setUp(self): def test_constructor_throws_if_prefix_not_uri(self): with self.assertRaises(ValueError): - GcsBlobUploader('not a valid URI') + GcsBlobUploader("not a valid URI") def test_constructor_throws_if_prefix_not_gs_protocol(self): with self.assertRaises(ValueError): - GcsBlobUploader('other://foo/bar') + GcsBlobUploader("other://foo/bar") def test_can_construct_gcs_uploader_with_bucket_uri(self): - uploader = GcsBlobUploader('gs://some-bucket') + uploader = GcsBlobUploader("gs://some-bucket") self.assertIsNotNone(uploader) self.assertIsInstance(uploader, BlobUploader) def test_can_construct_gcs_uploader_with_bucket_uri_and_trailing_slash(self): - uploader = GcsBlobUploader('gs://some-bucket/') + uploader = GcsBlobUploader("gs://some-bucket/") self.assertIsNotNone(uploader) self.assertIsInstance(uploader, BlobUploader) def test_can_construct_gcs_uploader_with_bucket_and_path_uri(self): - uploader = GcsBlobUploader('gs://some-bucket/some/path') + uploader = GcsBlobUploader("gs://some-bucket/some/path") self.assertIsNotNone(uploader) self.assertIsInstance(uploader, BlobUploader) def test_can_construct_gcs_uploader_with_bucket_and_path_uri_with_trailing_slash(self): - uploader = GcsBlobUploader('gs://some-bucket/some/path/') + uploader = GcsBlobUploader("gs://some-bucket/some/path/") self.assertIsNotNone(uploader) self.assertIsInstance(uploader, BlobUploader) def test_uploads_blob_from_span(self): - trace_id = 'test-trace-id' - span_id = 'test-span-id' + trace_id = "test-trace-id" + span_id = "test-span-id" labels = generate_labels_for_span(trace_id, span_id) - blob = Blob('some data'.encode(), content_type='text/plain', labels=labels) - uploader = GcsBlobUploader('gs://some-bucket/some/path') + blob = Blob("some data".encode(), content_type="text/plain", labels=labels) + uploader = GcsBlobUploader("gs://some-bucket/some/path") url = uploader.upload_async(blob) self.assertTrue( - url.startswith('gs://some-bucket/some/path/traces/test-trace-id/spans/test-span-id/uploads/') + url.startswith("gs://some-bucket/some/path/traces/test-trace-id/spans/test-span-id/uploads/") ) uploaded_blob = get_from_fake_gcs(url) self.assertEqual(blob, uploaded_blob) def test_uploads_blob_from_event(self): - trace_id = 'test-trace-id' - span_id = 'test-span-id' - event_name = 'event-name' + trace_id = "test-trace-id" + span_id = "test-span-id" + event_name = "event-name" labels = generate_labels_for_event(trace_id, span_id, event_name) - blob = Blob('some data'.encode(), content_type='text/plain', labels=labels) - uploader = GcsBlobUploader('gs://some-bucket/some/path') + blob = Blob("some data".encode(), content_type="text/plain", labels=labels) + uploader = GcsBlobUploader("gs://some-bucket/some/path") url = uploader.upload_async(blob) self.assertTrue( - url.startswith('gs://some-bucket/some/path/traces/test-trace-id/spans/test-span-id/events/event-name/uploads/') + url.startswith("gs://some-bucket/some/path/traces/test-trace-id/spans/test-span-id/events/event-name/uploads/") ) uploaded_blob = get_from_fake_gcs(url) self.assertEqual(blob, uploaded_blob) def test_uploads_blob_from_span_event(self): - trace_id = 'test-trace-id' - span_id = 'test-span-id' - event_name = 'event-name' + trace_id = "test-trace-id" + span_id = "test-span-id" + event_name = "event-name" event_index = 2 labels = generate_labels_for_span_event(trace_id, span_id, event_name, event_index) - blob = Blob('some data'.encode(), content_type='text/plain', labels=labels) - uploader = GcsBlobUploader('gs://some-bucket/some/path') + blob = Blob("some data".encode(), content_type="text/plain", labels=labels) + uploader = GcsBlobUploader("gs://some-bucket/some/path") url = uploader.upload_async(blob) self.assertTrue( - url.startswith('gs://some-bucket/some/path/traces/test-trace-id/spans/test-span-id/events/2/uploads/') + url.startswith("gs://some-bucket/some/path/traces/test-trace-id/spans/test-span-id/events/2/uploads/") ) uploaded_blob = get_from_fake_gcs(url) self.assertEqual(blob, uploaded_blob) def test_uploads_blobs_missing_expected_labels(self): - blob = Blob('some data'.encode(), content_type='text/plain') - uploader = GcsBlobUploader('gs://some-bucket/some/path') + blob = Blob("some data".encode(), content_type="text/plain") + uploader = GcsBlobUploader("gs://some-bucket/some/path") url = uploader.upload_async(blob) self.assertTrue( - url.startswith('gs://some-bucket/some/path/uploads/'), + url.startswith("gs://some-bucket/some/path/uploads/"), ) uploaded_blob = get_from_fake_gcs(url) self.assertEqual(blob, uploaded_blob) diff --git a/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py index 253235e43c..72185a5f34 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/tests/_blobupload/utils/test_simple_blob_uploader_adaptor.py @@ -4,18 +4,14 @@ import sys sys.path.append("../../../src") -import abc import logging import unittest from multiprocessing import Queue -from opentelemetry.instrumentation._blobupload.api import ( - Blob, - BlobUploader -) +from opentelemetry.instrumentation._blobupload.api import Blob, BlobUploader from opentelemetry.instrumentation._blobupload.utils import ( + SimpleBlobUploader, blob_uploader_from_simple_blob_uploader, - SimpleBlobUploader ) @@ -23,9 +19,9 @@ class QueueBasedUploader(SimpleBlobUploader): def __init__(self, queue): self._queue = queue - + def generate_destination_uri(self, blob): - return blob.labels['destination_uri'] + return blob.labels["destination_uri"] def upload_sync(self, uri, blob): self._queue.put((uri, blob)) @@ -35,15 +31,15 @@ class FailingUploader(SimpleBlobUploader): def __init__(self, queue): self._queue = queue - + def generate_destination_uri(self, blob): - return blob.labels['destination_uri'] + return blob.labels["destination_uri"] def upload_sync(self, uri, blob): try: - raise RuntimeError('something went wrong') + raise RuntimeError("something went wrong") finally: - self._queue.put('done') + self._queue.put("done") @@ -52,13 +48,13 @@ class TestBlob(unittest.TestCase): def test_simple_blob_uploader_adaptor(self): queue = Queue() simple = QueueBasedUploader(queue) - blob = Blob(bytes(), content_type='some-content-type', labels={'destination_uri': 'foo'}) + blob = Blob(bytes(), content_type="some-content-type", labels={"destination_uri": "foo"}) uploader = blob_uploader_from_simple_blob_uploader(simple) self.assertIsInstance(uploader, BlobUploader) url = uploader.upload_async(blob) - self.assertEqual(url, 'foo') + self.assertEqual(url, "foo") stored_uri, stored_blob = queue.get() - self.assertEqual(stored_uri, 'foo') + self.assertEqual(stored_uri, "foo") self.assertEqual(stored_blob, blob) self.assertTrue(queue.empty()) queue.close() @@ -66,15 +62,15 @@ def test_simple_blob_uploader_adaptor(self): def test_auto_adds_missing_content_type(self): queue = Queue() simple = QueueBasedUploader(queue) - blob = Blob('some plain text'.encode(), labels={'destination_uri': 'foo'}) + blob = Blob("some plain text".encode(), labels={"destination_uri": "foo"}) uploader = blob_uploader_from_simple_blob_uploader(simple) self.assertIsInstance(uploader, BlobUploader) url = uploader.upload_async(blob) - self.assertEqual(url, 'foo') + self.assertEqual(url, "foo") stored_uri, stored_blob = queue.get() - self.assertEqual(stored_uri, 'foo') + self.assertEqual(stored_uri, "foo") self.assertEqual(stored_blob.raw_bytes, blob.raw_bytes) - self.assertEqual(stored_blob.content_type, 'text/plain') + self.assertEqual(stored_blob.content_type, "text/plain") self.assertEqual(stored_blob.labels, blob.labels) self.assertTrue(queue.empty()) queue.close() @@ -82,11 +78,11 @@ def test_auto_adds_missing_content_type(self): def test_captures_exceptions_raised(self): queue = Queue() simple = FailingUploader(queue) - blob = Blob(bytes(), labels={'destination_uri': 'foo'}) + blob = Blob(bytes(), labels={"destination_uri": "foo"}) uploader = blob_uploader_from_simple_blob_uploader(simple) self.assertIsInstance(uploader, BlobUploader) url = uploader.upload_async(blob) - self.assertEqual(url, 'foo') + self.assertEqual(url, "foo") queue.get() self.assertTrue(queue.empty()) queue.close() From a7bb5f5fcbc8aa58b9a9e4d57bb3047b71c274d4 Mon Sep 17 00:00:00 2001 From: Michael Aaron Safyan Date: Tue, 14 Jan 2025 11:50:56 -0500 Subject: [PATCH 12/13] Address additional ruff checks that could not be automatically fixed. --- .../_blobupload/api/__init__.py | 20 +++++------ .../backend/google/gcs/__init__.py | 2 +- .../backend/google/gcs/_gcs_impl.py | 8 ++--- .../_blobupload/utils/__init__.py | 4 +-- .../_blobupload/utils/simple_blob_uploader.py | 2 +- .../utils/simple_blob_uploader_adaptor.py | 6 ++-- .../_blobupload/api/test_content_type.py | 34 +++++++++---------- .../google/gcs/test_gcs_blob_uploader.py | 34 +++++++++---------- 8 files changed, 55 insertions(+), 55 deletions(-) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py index 06b258a600..185305b079 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/__init__.py @@ -36,14 +36,14 @@ ) __all__ = [ - Blob, - BlobUploader, - NOT_UPLOADED, - detect_content_type, - generate_labels_for_event, - generate_labels_for_span, - generate_labels_for_span_event, - BlobUploaderProvider, - get_blob_uploader, - set_blob_uploader_provider, + "Blob", + "BlobUploader", + "NOT_UPLOADED", + "detect_content_type", + "generate_labels_for_event", + "generate_labels_for_span", + "generate_labels_for_span_event", + "BlobUploaderProvider", + "get_blob_uploader", + "set_blob_uploader_provider", ] diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py index a799cc27f3..1e7c92a9e6 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/__init__.py @@ -17,5 +17,5 @@ ) __all__ = [ - GcsBlobUploader + "GcsBlobUploader" ] diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py index 31f49afb0e..c3416773df 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py @@ -59,7 +59,7 @@ def _path_for_span_event(trace_id, span_id, event_index): def _path_segment_from_labels(labels): """Returns a path segment based on blob label metadata. - + This aims to return paths like: 'traces/12345/spans/56789' @@ -120,7 +120,7 @@ class GcsBlobUploader(BlobUploader): def __init__(self, prefix: str, client:Optional[GcsClient]=None): """Intialize the GcsBlobUploader class. - + Args: - prefix: a string beginning with "gs://" that includes the Google Cloud Storage bucket to which to write as @@ -132,7 +132,7 @@ def __init__(self, prefix: str, client:Optional[GcsClient]=None): Credentials). Supply your own instance if you'd like to use non-default configuration (e.g. to use an explicit credential other than the one in the environment). - + Known Failure Modes: - Missing 'google-cloud-storage' library dependency. - Failure to construct the client (e.g. absence of a valid @@ -145,7 +145,7 @@ def __init__(self, prefix: str, client:Optional[GcsClient]=None): def upload_async(self, blob: Blob) -> str: """Upload the specified blob in the background. - + Generates a URI from the blob, based on the prefix supplied to the constructor as well as the labels of the Blob (may also include entropy or other random components). Immediately diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py index c2b429543d..d76657b0da 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/__init__.py @@ -22,6 +22,6 @@ ) __all__ = [ - blob_uploader_from_simple_blob_uploader, - SimpleBlobUploader, + "blob_uploader_from_simple_blob_uploader", + "SimpleBlobUploader", ] diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py index b76d432984..c6506d11dd 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader.py @@ -25,7 +25,7 @@ class SimpleBlobUploader(abc.ABC): @abc.abstractmethod def generate_destination_uri(self, blob: Blob) -> str: """Generates a URI of where the blob will get written. - + Args: blob: the blob which will be uploaded. diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py index 1901795df9..a0ab7daa40 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py @@ -60,7 +60,7 @@ def __call__(self): _logger.debug('Uploading blob to "{}".'.format(self._uri)) try: self._simple_uploader.upload_sync(self._uri, self._blob) - except: + except Exception: _logger.exception('Failed to upload blob to "{}".'.format(self._uri)) @@ -105,7 +105,7 @@ def _get_or_create_default_executor(): class _SimpleBlobUploaderAdaptor(BlobUploader): """Implementation of 'BlobUploader' wrapping a 'SimpleBlobUploader'. - + This implements the core of the function 'blob_uploader_from_simple_blob_uploader'. """ @@ -127,7 +127,7 @@ def _do_in_background(self, action): def blob_uploader_from_simple_blob_uploader(simple_uploader: SimpleBlobUploader) -> BlobUploader: """Implements a 'BlobUploader' using the supplied 'SimpleBlobUploader'. - + The purpose of this function is to allow backend implementations/vendors to be able to implement their logic much more simply, using synchronous uploading interfaces. diff --git a/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py b/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py index e5b9e96148..cdcd361763 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py +++ b/opentelemetry-instrumentation/tests/_blobupload/api/test_content_type.py @@ -13,11 +13,11 @@ from opentelemetry.instrumentation._blobupload.api import detect_content_type -def create_test_image(format): +def create_test_image(image_format): """Helper for creating a PIL Image for verifying image format support.""" test_img = Image.new("RGB", (2, 2)) output_buffer = io.BytesIO() - test_img.save(output_buffer, format) + test_img.save(output_buffer, image_format) result = output_buffer.getvalue() output_buffer.close() test_img.close() @@ -27,33 +27,33 @@ def create_test_image(format): class TestContentType(unittest.TestCase): def test_handles_empty_correctly(self): - input = bytes() - output = detect_content_type(input) - self.assertEqual(output, "application/octet-stream") + data = bytes() + content_type = detect_content_type(data) + self.assertEqual(content_type, "application/octet-stream") def test_detects_plaintext(self): - input = "this is just regular text" - output = detect_content_type(input.encode()) - self.assertEqual(output, "text/plain") + data = "this is just regular text" + content_type = detect_content_type(data.encode()) + self.assertEqual(content_type, "text/plain") def test_detects_json(self): - input = """{ + data = """{ "this": { "contains": "json" } }""" - output = detect_content_type(input.encode()) - self.assertEqual(output, "application/json") + content_type = detect_content_type(data.encode()) + self.assertEqual(content_type, "application/json") def test_detects_jpeg(self): - input = create_test_image("jpeg") - output = detect_content_type(input) - self.assertEqual(output, "image/jpeg") + data = create_test_image("jpeg") + content_type = detect_content_type(data) + self.assertEqual(content_type, "image/jpeg") def test_detects_png(self): - input = create_test_image("png") - output = detect_content_type(input) - self.assertEqual(output, "image/png") + data = create_test_image("png") + content_type = detect_content_type(data) + self.assertEqual(content_type, "image/png") if __name__ == "__main__": diff --git a/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py b/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py index d060b35347..9ccb830d13 100755 --- a/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py +++ b/opentelemetry-instrumentation/tests/_blobupload/backend/google/gcs/test_gcs_blob_uploader.py @@ -33,32 +33,32 @@ def __init__(self): def reset(self): self._storage = {} - def get(self, id): - while id not in self._done: + def get(self, gcs_blob_id): + while gcs_blob_id not in self._done: self._queue.get() - return self._storage.get(id) + return self._storage.get(gcs_blob_id) - def upload_from_file(self, id, data, content_type): + def upload_from_file(self, gcs_blob_id, data, content_type): b = Blob(data.read(), content_type=content_type) - self._storage[id] = b + self._storage[gcs_blob_id] = b - def update_metadata(self, id, new_metadata): - old = self._storage[id] + def update_metadata(self, gcs_blob_id, new_metadata): + old = self._storage[gcs_blob_id] b = Blob(old.raw_bytes, content_type=old.content_type, labels=new_metadata) - self._storage[id] = b - self._done.add(id) - self._queue.put(id) + self._storage[gcs_blob_id] = b + self._done.add(gcs_blob_id) + self._queue.put(gcs_blob_id) class FakeGcsBlob(object): - def __init__(self, id, fake_gcs): - self._id = id + def __init__(self, gcs_blob_id, fake_gcs): + self._gcs_blob_id = gcs_blob_id self._fake_gcs = fake_gcs self._metadata = {} def upload_from_file(self, iodata, content_type): - self._fake_gcs.upload_from_file(self._id, iodata, content_type) + self._fake_gcs.upload_from_file(self._gcs_blob_id, iodata, content_type) @property def metadata(self): @@ -67,7 +67,7 @@ def metadata(self): @metadata.setter def metadata(self, m): self._metadata = m - self._fake_gcs.update_metadata(self._id, self._metadata) + self._fake_gcs.update_metadata(self._gcs_blob_id, self._metadata) def mocked_blob_from_uri(fake_gcs): @@ -81,8 +81,8 @@ def gcs_blob_from_uri(uri, client): _gcs_client_wrapper.set_gcs_blob_from_uri(mocked_blob_from_uri(_gcs_mock)) -def get_from_fake_gcs(id): - return _gcs_mock.get(id) +def get_from_fake_gcs(gcs_blob_id): + return _gcs_mock.get(gcs_blob_id) class GcsBlobUploaderTestCase(unittest.TestCase): @@ -92,7 +92,7 @@ def setUp(self): def test_constructor_throws_if_prefix_not_uri(self): with self.assertRaises(ValueError): - GcsBlobUploader("not a valid URI") + GcsBlobUploader("not a valgcs_blob_id URI") def test_constructor_throws_if_prefix_not_gs_protocol(self): with self.assertRaises(ValueError): From 7f88a2b3becad8de18768dc5dffd6824c99b7e52 Mon Sep 17 00:00:00 2001 From: Michael Safyan Date: Tue, 21 Jan 2025 10:35:22 -0600 Subject: [PATCH 13/13] Apply suggestions from code review Co-authored-by: Samuel Colvin --- .../instrumentation/_blobupload/api/blob.py | 8 ++++---- .../instrumentation/_blobupload/api/content_type.py | 2 +- .../instrumentation/_blobupload/api/labels.py | 6 +++--- .../_blobupload/backend/google/gcs/_gcs_impl.py | 10 +++++----- .../_blobupload/utils/simple_blob_uploader_adaptor.py | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py index 8d4e48fec2..9e6f840a17 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/blob.py @@ -53,7 +53,7 @@ def __init__( self._labels[k] = labels[k] @staticmethod - def from_data_uri(uri: str, labels: Optional[dict] = None) -> "Blob": + def from_data_uri(uri: str, labels: Optional[Mapping[str, str]] = None) -> "Blob": """Instantiate a blob from a 'data:...' URI. Args: @@ -107,7 +107,7 @@ def labels(self) -> Mapping[str, str]: """Returns the key/value metadata of this Blob.""" return _frozendict(self._labels) - def __eq__(self, o): + def __eq__(self, o: Any) -> bool: return ( (isinstance(o, Blob)) and (self.raw_bytes == o.raw_bytes) and @@ -115,10 +115,10 @@ def __eq__(self, o): (self.labels == o.labels) ) - def __repr__(self): + def __repr__(self) -> str: params = [repr(self._raw_bytes)] if self._content_type is not None: - params.append("content_type={}".format(repr(self._content_type))) + params.append(f"content_type={self._content_type!r}") if self._labels: params.append("labels={}".format(json.dumps(self._labels, sort_keys=True))) params_string = ", ".join(params) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py index 0926c5866e..bcaf922f33 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/content_type.py @@ -21,7 +21,7 @@ class _FallBackModule: """Class that is shaped like the portion of 'magic' we need.""" - def from_buffer(self, raw_bytes, mime=True): + def from_buffer(self, raw_bytes: bytes, mime: bool = True): """Fallback, subpar implementation of 'from_buffer'.""" return "application/octet-stream" diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py index cb4a63046a..d3ffc157d8 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/api/labels.py @@ -15,14 +15,14 @@ """Provides utilities for providing basic identifying labels for blobs.""" -def generate_labels_for_span(trace_id: str, span_id: str) -> dict: +def generate_labels_for_span(trace_id: str, span_id: str) -> dict[str, str]: """Returns metadata for a span.""" return {"otel_type": "span", "trace_id": trace_id, "span_id": span_id} def generate_labels_for_event( trace_id: str, span_id: str, event_name: str -) -> dict: +) -> dict[str, str]: """Returns metadata for an event.""" result = generate_labels_for_span(trace_id, span_id) result.update( @@ -36,7 +36,7 @@ def generate_labels_for_event( def generate_labels_for_span_event( trace_id: str, span_id: str, event_name: str, event_index: int -) -> dict: +) -> dict[str, str]: """Returns metadata for a span event.""" result = generate_labels_for_event(trace_id, span_id, event_name) result.update( diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py index c3416773df..0e79951cef 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/backend/google/gcs/_gcs_impl.py @@ -33,13 +33,13 @@ GcsClient: TypeAlias = _gcs_client_wrapper.GcsClientType -def _path_for_span(trace_id, span_id): +def _path_for_span(trace_id: str, span_id: str) -> str: if not trace_id or not span_id: return "" return "traces/{}/spans/{}".format(trace_id, span_id) -def _path_for_event(trace_id, span_id, event_name): +def _path_for_event(trace_id: str, span_id: str, event_name: str) -> str: if not event_name: return "" span_path = _path_for_span(trace_id, span_id) @@ -48,7 +48,7 @@ def _path_for_event(trace_id, span_id, event_name): return "{}/events/{}".format(span_path, event_name) -def _path_for_span_event(trace_id, span_id, event_index): +def _path_for_span_event(trace_id: str, span_id: str, event_index) -> str: if event_index is None: return "" span_path = _path_for_span(trace_id, span_id) @@ -57,7 +57,7 @@ def _path_for_span_event(trace_id, span_id, event_index): return "{}/events/{}".format(span_path, event_index) -def _path_segment_from_labels(labels): +def _path_segment_from_labels(labels: Mapping[str, str]) -> str: """Returns a path segment based on blob label metadata. This aims to return paths like: @@ -86,7 +86,7 @@ def _path_segment_from_labels(labels): class _SimpleGcsBlobUploader(SimpleBlobUploader): - def __init__(self, prefix: str, client:Optional[GcsClient]=None): + def __init__(self, prefix: str, client: Optional[GcsClient] = None): if not prefix: raise ValueError("Must supply a non-empty prefix.") if not prefix.startswith("gs://"): diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py index a0ab7daa40..24cd50e763 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_blobupload/utils/simple_blob_uploader_adaptor.py @@ -109,7 +109,7 @@ class _SimpleBlobUploaderAdaptor(BlobUploader): This implements the core of the function 'blob_uploader_from_simple_blob_uploader'. """ - def __init__(self, simple_uploader: SimpleBlobUploader, executor: Optional[Executor]=None): + def __init__(self, simple_uploader: SimpleBlobUploader, executor: Optional[Executor] = None): self._simple_uploader = simple_uploader self._executor = executor or _get_or_create_default_executor() @@ -119,7 +119,7 @@ def upload_async(self, blob: Blob) -> str: self._do_in_background(_UploadAction(self._simple_uploader, uri, full_blob)) return uri - def _do_in_background(self, action): + def _do_in_background(self, action: _UploadAction) -> None: _logger.debug("Scheduling background upload.") self._executor.submit(action)