From 36cd28a995eb2eb8d26f67d18dd7c0287615888a Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Wed, 13 Aug 2025 09:46:36 +0200 Subject: [PATCH] refactor!: Remove unused models, types and utils --- src/apify_shared/consts.py | 420 ++++++++++++++++++++++++------------- src/apify_shared/models.py | 34 --- src/apify_shared/types.py | 10 - src/apify_shared/utils.py | 143 ++----------- tests/unit/test_consts.py | 84 ++++---- tests/unit/test_utils.py | 149 +------------ 6 files changed, 331 insertions(+), 509 deletions(-) delete mode 100644 src/apify_shared/models.py delete mode 100644 src/apify_shared/types.py diff --git a/src/apify_shared/consts.py b/src/apify_shared/consts.py index ea76927..e99d77c 100644 --- a/src/apify_shared/consts.py +++ b/src/apify_shared/consts.py @@ -5,28 +5,39 @@ class ActorJobStatus(str, Enum): - """Available statuses for actor jobs (runs or builds).""" + """Available statuses for Actor jobs (runs or builds). + + These statuses represent the lifecycle of an Actor execution, + from initialization to completion or termination. + """ - #: Actor job initialized but not started yet READY = 'READY' - #: Actor job in progress + """Actor job has been initialized but not yet started.""" + RUNNING = 'RUNNING' - #: Actor job finished successfully + """Actor job is currently executing.""" + SUCCEEDED = 'SUCCEEDED' - #: Actor job or build failed + """Actor job completed successfully without errors.""" + FAILED = 'FAILED' - #: Actor job currently timing out + """Actor job or build failed due to an error or exception.""" + TIMING_OUT = 'TIMING-OUT' - #: Actor job timed out + """Actor job is currently in the process of timing out.""" + TIMED_OUT = 'TIMED-OUT' - #: Actor job currently being aborted by user + """Actor job was terminated due to timeout.""" + ABORTING = 'ABORTING' - #: Actor job aborted by user + """Actor job is currently being aborted by user request.""" + ABORTED = 'ABORTED' + """Actor job was successfully aborted by user request.""" @property def is_terminal(self: ActorJobStatus) -> bool: - """Whether this actor job status is terminal.""" + """Whether this Actor job status is terminal.""" return self in ( ActorJobStatus.SUCCEEDED, ActorJobStatus.FAILED, @@ -36,244 +47,389 @@ def is_terminal(self: ActorJobStatus) -> bool: class ActorSourceType(str, Enum): - """Available source types for actors.""" + """Available source code types for Actors. + + Defines how Actor source code is stored and accessed + for building and executing Actors on the platform. + """ - #: Actor source code is comprised of multiple files SOURCE_FILES = 'SOURCE_FILES' - #: Actor source code is cloned from a Git repository + """Actor source code consists of multiple individual files uploaded directly.""" + GIT_REPO = 'GIT_REPO' - #: Actor source code is downloaded using a tarball or Zip file + """Actor source code is cloned from a Git repository (GitHub, GitLab, etc.).""" + TARBALL = 'TARBALL' - #: Actor source code is taken from a GitHub Gist + """Actor source code is downloaded from a tarball or ZIP archive.""" + GITHUB_GIST = 'GITHUB_GIST' + """Actor source code is retrieved from a GitHub Gist.""" class ActorEventTypes(str, Enum): - """Possible values of actor event type.""" + """Event types that can be sent to Actors during execution. + + These events provide real-time information about system state + and lifecycle changes that Actors can respond to. + """ - #: Info about resource usage of the actor SYSTEM_INFO = 'systemInfo' - #: Sent when the actor is about to migrate + """Information about resource usage and system metrics of the Actor.""" + MIGRATING = 'migrating' - #: Sent when the actor should persist its state (every minute or when migrating) + """Notification that the Actor is about to be migrated to another server.""" + PERSIST_STATE = 'persistState' - #: Sent when the actor is aborting + """Signal to persist Actor state - sent every minute or before migration.""" + ABORTING = 'aborting' + """Notification that the Actor is being terminated and should clean up.""" class ActorEnvVars(str, Enum): - """Possible Apify-specific environment variables prefixed with "ACTOR_".""" + """Environment variables with ACTOR_ prefix set by the Apify platform. - # TODO: document these # noqa: TD003 + These variables provide essential context about the current Actor run, + including identifiers, resource limits, and configuration details. + All variables are automatically set by the platform during Actor execution. + """ - #: BUILD_ID BUILD_ID = 'ACTOR_BUILD_ID' - #: BUILD_NUMBER + """Unique identifier of the Actor build used for this run.""" + BUILD_NUMBER = 'ACTOR_BUILD_NUMBER' - #: BUILD_TAGS + """Sequential build number of the Actor build used for this run.""" + BUILD_TAGS = 'ACTOR_BUILD_TAGS' - #: DEFAULT_DATASET_ID + """Comma-separated list of tags associated with the Actor build.""" + DEFAULT_DATASET_ID = 'ACTOR_DEFAULT_DATASET_ID' - #: DEFAULT_KEY_VALUE_STORE_ID + """Unique identifier of the default dataset for storing Actor results.""" + DEFAULT_KEY_VALUE_STORE_ID = 'ACTOR_DEFAULT_KEY_VALUE_STORE_ID' - #: DEFAULT_REQUEST_QUEUE_ID + """Unique identifier of the default key-value store for Actor data.""" + DEFAULT_REQUEST_QUEUE_ID = 'ACTOR_DEFAULT_REQUEST_QUEUE_ID' - #: EVENTS_WEBSOCKET_URL + """Unique identifier of the default request queue for Actor URLs.""" + EVENTS_WEBSOCKET_URL = 'ACTOR_EVENTS_WEBSOCKET_URL' - #: FULL_NAME + """WebSocket URL for receiving real-time events from the platform.""" + FULL_NAME = 'ACTOR_FULL_NAME' - #: ID + """Full Actor name in format 'username/actor-name' for identification.""" + ID = 'ACTOR_ID' - #: INPUT_KEY + """Unique identifier of the Actor definition.""" + INPUT_KEY = 'ACTOR_INPUT_KEY' - #: MAX_PAID_DATASET_ITEMS + """Key in the default key-value store where Actor input is stored (usually 'INPUT').""" + MAX_PAID_DATASET_ITEMS = 'ACTOR_MAX_PAID_DATASET_ITEMS' - #: MAX_TOTAL_CHARGE_USD + """Maximum number of dataset items that will be charged for pay-per-result Actors.""" + MAX_TOTAL_CHARGE_USD = 'ACTOR_MAX_TOTAL_CHARGE_USD' - #: MEMORY_MBYTES + """Maximum total charge limit in USD for pay-per-event Actors.""" + MEMORY_MBYTES = 'ACTOR_MEMORY_MBYTES' - #: RUN_ID + """Amount of memory allocated to the Actor run in megabytes.""" + RUN_ID = 'ACTOR_RUN_ID' - #: STANDBY_PORT + """Unique identifier of this specific Actor run execution.""" + STANDBY_PORT = 'ACTOR_STANDBY_PORT' - #: STANDBY_URL + """TCP port number for Actor standby mode HTTP server.""" + STANDBY_URL = 'ACTOR_STANDBY_URL' - #: STARTED_AT + """Public URL for accessing the Actor in standby mode.""" + STARTED_AT = 'ACTOR_STARTED_AT' - #: TASK_ID + """ISO 8601 timestamp when the Actor run was started (UTC timezone).""" + TASK_ID = 'ACTOR_TASK_ID' - #: TIMEOUT_AT + """Unique identifier of the Actor task (empty if run directly via API).""" + TIMEOUT_AT = 'ACTOR_TIMEOUT_AT' - #: WEB_SERVER_PORT + """ISO 8601 timestamp when the Actor run will timeout (UTC timezone).""" + WEB_SERVER_PORT = 'ACTOR_WEB_SERVER_PORT' - #: WEB_SERVER_URL + """TCP port number for the Actor's built-in HTTP web server.""" + WEB_SERVER_URL = 'ACTOR_WEB_SERVER_URL' + """Public URL for accessing the Actor's built-in HTTP web server.""" class ApifyEnvVars(str, Enum): - """Possible Apify-specific environment variables prefixed with "APIFY_".""" + """Environment variables with APIFY_ prefix set by the Apify platform. - # TODO: document these # noqa: TD003 + These variables provide configuration, authentication, and platform-specific + settings for Actors running on the Apify platform. They control behavior + like logging, proxy settings, browser configuration, and platform integration. + """ - #: API_BASE_URL API_BASE_URL = 'APIFY_API_BASE_URL' - #: API_PUBLIC_BASE_URL + """Base URL of the Apify API (typically 'https://api.apify.com').""" + API_PUBLIC_BASE_URL = 'APIFY_API_PUBLIC_BASE_URL' - #: DEDICATED_CPUS + """Public URL of the Apify API accessible from external networks.""" + DEDICATED_CPUS = 'APIFY_DEDICATED_CPUS' - #: DEFAULT_BROWSER_PATH + """Number of dedicated CPU cores allocated to the Actor based on memory allocation.""" + DEFAULT_BROWSER_PATH = 'APIFY_DEFAULT_BROWSER_PATH' - #: DISABLE_BROWSER_SANDBOX + """File system path to the default browser executable for web scraping.""" + DISABLE_BROWSER_SANDBOX = 'APIFY_DISABLE_BROWSER_SANDBOX' - #: DISABLE_OUTDATED_WARNING + """Set to '1' to disable browser sandbox mode for compatibility with containerized environments.""" + DISABLE_OUTDATED_WARNING = 'APIFY_DISABLE_OUTDATED_WARNING' - #: FACT + """Set to '1' to suppress warnings about outdated SDK versions.""" + FACT = 'APIFY_FACT' - #: HEADLESS + """Fun fact about the Apify platform displayed during Actor startup.""" + HEADLESS = 'APIFY_HEADLESS' - #: INPUT_SECRETS_PRIVATE_KEY_FILE + """Set to '1' to run browsers in headless mode without graphical interface.""" + INPUT_SECRETS_PRIVATE_KEY_FILE = 'APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE' - #: INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE + """Path to the private key file used for decrypting secret input values.""" + INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE = 'APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE' - #: IS_AT_HOME + """Passphrase for unlocking the private key file used for secret decryption.""" + IS_AT_HOME = 'APIFY_IS_AT_HOME' - #: LOCAL_STORAGE_DIR + """Set to '1' when the Actor is running on official Apify platform infrastructure.""" + LOCAL_STORAGE_DIR = 'APIFY_LOCAL_STORAGE_DIR' - #: LOG_FORMAT + """Local file system directory path where Actor data and storage is persisted.""" + LOG_FORMAT = 'APIFY_LOG_FORMAT' - #: LOG_LEVEL + """Logging output format: 'pretty' for human-readable, 'json' for structured logs.""" + LOG_LEVEL = 'APIFY_LOG_LEVEL' - #: MAX_USED_CPU_RATIO + """Minimum logging level: 'DEBUG', 'INFO', 'WARNING', 'ERROR' in ascending order.""" + MAX_USED_CPU_RATIO = 'APIFY_MAX_USED_CPU_RATIO' - #: META_ORIGIN + """Maximum CPU utilization ratio (0.0-1.0) that the Actor should not exceed.""" + META_ORIGIN = 'APIFY_META_ORIGIN' - #: METAMORPH_AFTER_SLEEP_MILLIS + """How the Actor run was initiated: 'WEB', 'API', 'SCHEDULER', 'TEST', etc.""" + METAMORPH_AFTER_SLEEP_MILLIS = 'APIFY_METAMORPH_AFTER_SLEEP_MILLIS' - #: PERSIST_STATE_INTERVAL_MILLIS + """Milliseconds to wait before Actor metamorphosis (transformation) occurs.""" + PERSIST_STATE_INTERVAL_MILLIS = 'APIFY_PERSIST_STATE_INTERVAL_MILLIS' - #: PERSIST_STORAGE + """Interval in milliseconds for automatic state persistence (default: 60000ms).""" + PERSIST_STORAGE = 'APIFY_PERSIST_STORAGE' - #: PROXY_HOSTNAME + """Set to '1' to persist Actor storage data after run completion.""" + PROXY_HOSTNAME = 'APIFY_PROXY_HOSTNAME' - #: PROXY_PASSWORD + """Hostname for Apify Proxy service (typically 'proxy.apify.com').""" + PROXY_PASSWORD = 'APIFY_PROXY_PASSWORD' - #: PROXY_PORT + """Authentication password for accessing Apify Proxy services.""" + PROXY_PORT = 'APIFY_PROXY_PORT' - #: PROXY_STATUS_URL + """TCP port number for connecting to Apify Proxy (typically 8000).""" + PROXY_STATUS_URL = 'APIFY_PROXY_STATUS_URL' - #: PURGE_ON_START + """URL endpoint for retrieving Apify Proxy status and connection information.""" + PURGE_ON_START = 'APIFY_PURGE_ON_START' - #: SDK_LATEST_VERSION + """Set to '1' to clear all local storage before Actor execution begins.""" + SDK_LATEST_VERSION = 'APIFY_SDK_LATEST_VERSION' - #: SYSTEM_INFO_INTERVAL_MILLIS + """Latest available version of the Apify SDK for update notifications.""" + SYSTEM_INFO_INTERVAL_MILLIS = 'APIFY_SYSTEM_INFO_INTERVAL_MILLIS' - #: TOKEN + """Interval in milliseconds for sending system resource usage information.""" + TOKEN = 'APIFY_TOKEN' - #: USER_ID + """API authentication token of the user who initiated the Actor run.""" + USER_ID = 'APIFY_USER_ID' - #: USER_IS_PAYING + """Unique identifier of the user who started the Actor (may differ from Actor owner).""" + USER_IS_PAYING = 'APIFY_USER_IS_PAYING' - #: WORKFLOW_KEY + """Set to '1' if the user who started the Actor has an active paid subscription.""" + WORKFLOW_KEY = 'APIFY_WORKFLOW_KEY' + """Unique identifier for grouping related Actor runs and API operations together.""" # Replaced by ActorEnvVars, kept for backward compatibility: - #: ACTOR_BUILD_ID ACTOR_BUILD_ID = 'APIFY_ACTOR_BUILD_ID' - #: ACTOR_BUILD_NUMBER + """Deprecated: Use ActorEnvVars.BUILD_ID instead.""" + ACTOR_BUILD_NUMBER = 'APIFY_ACTOR_BUILD_NUMBER' - #: ACTOR_EVENTS_WS_URL + """Deprecated: Use ActorEnvVars.BUILD_NUMBER instead.""" + ACTOR_EVENTS_WS_URL = 'APIFY_ACTOR_EVENTS_WS_URL' - #: ACTOR_ID + """Deprecated: Use ActorEnvVars.EVENTS_WEBSOCKET_URL instead.""" + ACTOR_ID = 'APIFY_ACTOR_ID' - #: ACTOR_RUN_ID + """Deprecated: Use ActorEnvVars.ID instead.""" + ACTOR_RUN_ID = 'APIFY_ACTOR_RUN_ID' - #: ACTOR_TASK_ID + """Deprecated: Use ActorEnvVars.RUN_ID instead.""" + ACTOR_TASK_ID = 'APIFY_ACTOR_TASK_ID' - #: CONTAINER_PORT + """Deprecated: Use ActorEnvVars.TASK_ID instead.""" + CONTAINER_PORT = 'APIFY_CONTAINER_PORT' - #: CONTAINER_URL + """TCP port for the Actor's web server (deprecated name for WEB_SERVER_PORT).""" + CONTAINER_URL = 'APIFY_CONTAINER_URL' - #: DEFAULT_DATASET_ID + """URL for accessing the Actor's web server (deprecated name for WEB_SERVER_URL).""" + DEFAULT_DATASET_ID = 'APIFY_DEFAULT_DATASET_ID' - #: DEFAULT_KEY_VALUE_STORE_ID + """Deprecated: Use ActorEnvVars.DEFAULT_DATASET_ID instead.""" + DEFAULT_KEY_VALUE_STORE_ID = 'APIFY_DEFAULT_KEY_VALUE_STORE_ID' - #: DEFAULT_REQUEST_QUEUE_ID + """Deprecated: Use ActorEnvVars.DEFAULT_KEY_VALUE_STORE_ID instead.""" + DEFAULT_REQUEST_QUEUE_ID = 'APIFY_DEFAULT_REQUEST_QUEUE_ID' - #: INPUT_KEY + """Deprecated: Use ActorEnvVars.DEFAULT_REQUEST_QUEUE_ID instead.""" + INPUT_KEY = 'APIFY_INPUT_KEY' - #: MEMORY_MBYTES + """Deprecated: Use ActorEnvVars.INPUT_KEY instead.""" + MEMORY_MBYTES = 'APIFY_MEMORY_MBYTES' - #: STARTED_AT + """Deprecated: Use ActorEnvVars.MEMORY_MBYTES instead.""" + STARTED_AT = 'APIFY_STARTED_AT' - #: TIMEOUT_AT + """Deprecated: Use ActorEnvVars.STARTED_AT instead.""" + TIMEOUT_AT = 'APIFY_TIMEOUT_AT' + """Deprecated: Use ActorEnvVars.TIMEOUT_AT instead.""" # Deprecated, kept for backward compatibility: - #: ACT_ID ACT_ID = 'APIFY_ACT_ID' - #: ACT_RUN_ID + """Deprecated: Old name for Actor ID.""" + ACT_RUN_ID = 'APIFY_ACT_RUN_ID' + """Deprecated: Old name for Actor run ID.""" class ActorExitCodes(int, Enum): - """Usual actor exit codes.""" + """Standard exit codes used by Actors to indicate run completion status. + + These codes follow Unix conventions where 0 indicates success + and non-zero values indicate various types of failures. + """ - #: The actor finished successfully SUCCESS = 0 + """Actor completed successfully without any errors.""" - #: The main function of the actor threw an Exception ERROR_USER_FUNCTION_THREW = 91 + """Actor failed because the main function threw an unhandled exception.""" class WebhookEventType(str, Enum): - """Events that can trigger a webhook.""" + """Event types that can trigger webhook notifications. + + These events are sent to configured webhook URLs when specific + Actor run or build lifecycle events occur, enabling integration + with external systems and automated workflows. + """ - #: The actor run was created ACTOR_RUN_CREATED = 'ACTOR.RUN.CREATED' - #: The actor run has succeeded + """Triggered when a new Actor run is created and initialized.""" + ACTOR_RUN_SUCCEEDED = 'ACTOR.RUN.SUCCEEDED' - #: The actor run has failed + """Triggered when an Actor run completes successfully.""" + ACTOR_RUN_FAILED = 'ACTOR.RUN.FAILED' - #: The actor run has timed out + """Triggered when an Actor run fails due to an error.""" + ACTOR_RUN_TIMED_OUT = 'ACTOR.RUN.TIMED_OUT' - #: The actor run was aborted + """Triggered when an Actor run is terminated due to timeout.""" + ACTOR_RUN_ABORTED = 'ACTOR.RUN.ABORTED' - #: The actor run was resurrected + """Triggered when an Actor run is manually aborted by user.""" + ACTOR_RUN_RESURRECTED = 'ACTOR.RUN.RESURRECTED' + """Triggered when a previously failed Actor run is automatically resurrected.""" - #: The actor build was created ACTOR_BUILD_CREATED = 'ACTOR.BUILD.CREATED' - #: The actor build has succeeded + """Triggered when a new Actor build process is initiated.""" + ACTOR_BUILD_SUCCEEDED = 'ACTOR.BUILD.SUCCEEDED' - #: The actor build has failed + """Triggered when an Actor build completes successfully.""" + ACTOR_BUILD_FAILED = 'ACTOR.BUILD.FAILED' - #: The actor build has timed out + """Triggered when an Actor build fails due to compilation or setup errors.""" + ACTOR_BUILD_TIMED_OUT = 'ACTOR.BUILD.TIMED_OUT' - #: The actor build was aborted + """Triggered when an Actor build process exceeds the time limit.""" + ACTOR_BUILD_ABORTED = 'ACTOR.BUILD.ABORTED' + """Triggered when an Actor build is manually cancelled by user.""" class MetaOrigin(str, Enum): - """Possible origins for actor runs, i.e. how were the jobs started.""" + """Origins indicating how Actor runs were initiated. + + This information helps track and analyze how Actors are being used + across different interfaces and automation systems on the platform. + """ - #: Job started from Developer console in Source section of actor DEVELOPMENT = 'DEVELOPMENT' - #: Job started from other place on the website (either console or task detail page) + """Actor run started from the Developer Console source code section.""" + WEB = 'WEB' - #: Job started through API + """Actor run initiated through the Apify Console web interface.""" + API = 'API' - #: Job started through Scheduler + """Actor run started programmatically via the Apify API.""" + SCHEDULER = 'SCHEDULER' - #: Job started through test actor page + """Actor run triggered automatically by a scheduled task.""" + TEST = 'TEST' - #: Job started by the webhook + """Actor run initiated from the test/try functionality in Console.""" + WEBHOOK = 'WEBHOOK' - #: Job started by another actor run + """Actor run triggered by an incoming webhook request.""" + ACTOR = 'ACTOR' - #: Job started via Actor standby + """Actor run started by another Actor during its execution.""" + STANDBY = 'STANDBY' - #: Job started via Apify CLI + """Actor run initiated through the Actor Standby mode.""" + CLI = 'CLI' + """Actor run started using the Apify command-line interface.""" + + +class StorageGeneralAccess(str, Enum): + """Access levels for Apify storage resources (key-value stores, datasets, request queues). + + These access levels control who can interact with storage resources and how, + providing fine-grained security for shared data and collaboration scenarios. + """ + + RESTRICTED = 'RESTRICTED' + """Access limited to the resource owner and explicitly granted users or organizations.""" + + PUBLIC_READ = 'PUBLIC_READ' + """Resource data can be read by anyone, but only the owner can modify it.""" + + PUBLIC_WRITE = 'PUBLIC_WRITE' + """Full public access - anyone can read, write, and modify the resource.""" + + +class RunGeneralAccess(str, Enum): + """Access levels for Actor runs and their associated data. + + These settings control who can view Actor run details, including logs, + outputs, and metadata, enabling secure sharing of execution results. + """ + + RESTRICTED = 'RESTRICTED' + """Access limited to the Actor owner and explicitly authorized users.""" + + PUBLIC_READ = 'PUBLIC_READ' + """Run details and outputs are publicly visible but cannot be modified by others.""" INTEGER_ENV_VARS_TYPE = Literal[ @@ -379,33 +535,3 @@ class MetaOrigin(str, Enum): COMMA_SEPARATED_LIST_ENV_VARS: list[COMMA_SEPARATED_LIST_ENV_VARS_TYPE] = list( get_args(COMMA_SEPARATED_LIST_ENV_VARS_TYPE) ) - - -class StorageGeneralAccess(str, Enum): - """Storage setting determining how others can access the storage. - - This setting overrides the user setting of the storage owner. - """ - - #: Respect the user setting of the storage owner (default behavior). - FOLLOW_USER_SETTING = 'FOLLOW_USER_SETTING' - #: Only signed-in users with explicit access can read this storage. - RESTRICTED = 'RESTRICTED' - #: Anyone with a link or the unique storage ID can read this storage. - ANYONE_WITH_ID_CAN_READ = 'ANYONE_WITH_ID_CAN_READ' - #: Anyone with a link, ID, or storage name can read this storage. - ANYONE_WITH_NAME_CAN_READ = 'ANYONE_WITH_NAME_CAN_READ' - - -class RunGeneralAccess(str, Enum): - """Run setting determining how others can access the run. - - This setting overrides the user setting of the run owner. - """ - - #: Respect the user setting of the storage owner (default behavior). - FOLLOW_USER_SETTING = 'FOLLOW_USER_SETTING' - #: Only signed-in users with explicit access can read this run. - RESTRICTED = 'RESTRICTED' - #: Anyone with a link or the unique run ID can read this run. - ANYONE_WITH_ID_CAN_READ = 'ANYONE_WITH_ID_CAN_READ' diff --git a/src/apify_shared/models.py b/src/apify_shared/models.py deleted file mode 100644 index 49bfb06..0000000 --- a/src/apify_shared/models.py +++ /dev/null @@ -1,34 +0,0 @@ -from __future__ import annotations - -from typing import Generic, TypeVar - -from apify_shared.utils import ignore_docs - -T = TypeVar('T') - - -class ListPage(Generic[T]): - """A single page of items returned from a list() method.""" - - #: list: List of returned objects on this page - items: list[T] - #: int: Count of the returned objects on this page - count: int - #: int: The limit on the number of returned objects offset specified in the API call - offset: int - #: int: The offset of the first object specified in the API call - limit: int - #: int: Total number of objects matching the API call criteria - total: int - #: bool: Whether the listing is descending or not - desc: bool - - @ignore_docs - def __init__(self: ListPage, data: dict) -> None: - """Initialize a ListPage instance from the API response data.""" - self.items = data['items'] if 'items' in data else [] - self.offset = data['offset'] if 'offset' in data else 0 - self.limit = data['limit'] if 'limit' in data else 0 - self.count = data['count'] if 'count' in data else len(self.items) - self.total = data['total'] if 'total' in data else self.offset + self.count - self.desc = data['desc'] if 'desc' in data else False diff --git a/src/apify_shared/types.py b/src/apify_shared/types.py deleted file mode 100644 index 78b5dfb..0000000 --- a/src/apify_shared/types.py +++ /dev/null @@ -1,10 +0,0 @@ -from __future__ import annotations - -from typing import Any - -# Type for representing json-serializable values. -# It's close enough to the real thing supported by json.parse, -# and the best we can do until mypy supports recursive types. -# It was suggested in a discussion with (and approved by) Guido van Rossum, -# so I'd consider it correct enough. -JSONSerializable = str | int | float | bool | None | dict[str, Any] | list[Any] diff --git a/src/apify_shared/utils.py b/src/apify_shared/utils.py index f453ab5..4758d92 100644 --- a/src/apify_shared/utils.py +++ b/src/apify_shared/utils.py @@ -1,154 +1,37 @@ from __future__ import annotations import base64 -import contextlib import hashlib import hmac -import io -import json -import re import string import time -from datetime import datetime, timezone -from enum import Enum -from typing import Any, TypeVar, cast - -PARSE_DATE_FIELDS_MAX_DEPTH = 3 -PARSE_DATE_FIELDS_KEY_SUFFIX = 'At' - -ListOrDict = TypeVar('ListOrDict', list, dict) -T = TypeVar('T') - - -def ignore_docs(method: T) -> T: - """Mark that a method's documentation should not be rendered. Functionally, this decorator is a noop.""" - return method - - -@ignore_docs -def filter_out_none_values_recursively(dictionary: dict) -> dict: - """Return copy of the dictionary, recursively omitting all keys for which values are None.""" - return cast(dict, filter_out_none_values_recursively_internal(dictionary)) - - -@ignore_docs -def filter_out_none_values_recursively_internal( - dictionary: dict, - remove_empty_dicts: bool | None = None, -) -> dict | None: - """Recursively filters out None values from a dictionary. - - Unfortunately, it's necessary to have an internal function for the correct result typing, - without having to create complicated overloads - """ - result = {} - for k, v in dictionary.items(): - if isinstance(v, dict): - v = filter_out_none_values_recursively_internal(v, remove_empty_dicts is True or remove_empty_dicts is None) # noqa: PLW2901 - if v is not None: - result[k] = v - if not result and remove_empty_dicts: - return None - return result - - -@ignore_docs -def is_content_type_json(content_type: str) -> bool: - """Check if the given content type is JSON.""" - return bool(re.search(r'^application/json', content_type, flags=re.IGNORECASE)) - - -@ignore_docs -def is_content_type_xml(content_type: str) -> bool: - """Check if the given content type is XML.""" - return bool(re.search(r'^application/.*xml$', content_type, flags=re.IGNORECASE)) - - -@ignore_docs -def is_content_type_text(content_type: str) -> bool: - """Check if the given content type is text.""" - return bool(re.search(r'^text/', content_type, flags=re.IGNORECASE)) - - -@ignore_docs -def is_file_or_bytes(value: Any) -> bool: - """Check if the input value is a file-like object or bytes. - - The check for IOBase is not ideal, it would be better to use duck typing, - but then the check would be super complex, judging from how the 'requests' library does it. - This way should be good enough for the vast majority of use cases, if it causes issues, we can improve it later. - """ - return isinstance(value, (bytes | bytearray | io.IOBase)) - - -@ignore_docs -def json_dumps(obj: Any) -> str: - """Dump JSON to a string with the correct settings and serializer.""" - return json.dumps(obj, ensure_ascii=False, indent=2, default=str) - - -@ignore_docs -def maybe_extract_enum_member_value(maybe_enum_member: Any) -> Any: - """Extract the value of an enumeration member if it is an Enum, otherwise return the original value.""" - if isinstance(maybe_enum_member, Enum): - return maybe_enum_member.value - return maybe_enum_member - - -@ignore_docs -def parse_date_fields(data: ListOrDict, max_depth: int = PARSE_DATE_FIELDS_MAX_DEPTH) -> ListOrDict: - """Recursively parse date fields in a list or dictionary up to the specified depth.""" - if max_depth < 0: - return data - - if isinstance(data, list): - return [parse_date_fields(item, max_depth - 1) for item in data] - - if isinstance(data, dict): - - def parse(key: str, value: object) -> object: - parsed_value = value - if key.endswith(PARSE_DATE_FIELDS_KEY_SUFFIX) and isinstance(value, str): - with contextlib.suppress(ValueError): - parsed_value = datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%fZ').replace(tzinfo=timezone.utc) - elif isinstance(value, dict): - parsed_value = parse_date_fields(value, max_depth - 1) - elif isinstance(value, list): - parsed_value = parse_date_fields(value, max_depth) # type: ignore # mypy doesn't work with decorators and recursive calls well - return parsed_value - - return {key: parse(key, value) for (key, value) in data.items()} - - return data - - -CHARSET = string.digits + string.ascii_letters def encode_base62(num: int) -> str: """Encode the given number to base62.""" + charset = string.digits + string.ascii_letters + if num == 0: - return CHARSET[0] + return charset[0] res = '' while num > 0: num, remainder = divmod(num, 62) - res = CHARSET[remainder] + res + res = charset[remainder] + res return res -@ignore_docs def create_hmac_signature(secret_key: str, message: str) -> str: - """Generates an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length. + """Generate an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length. HMAC signature is truncated to 30 characters to make it shorter. Args: - secret_key (str): Secret key used for signing signatures - message (str): Message to be signed + secret_key: Secret key used for signing signatures. + message: Message to be signed. Returns: - str: Base62 encoded signature + Base62 encoded signature. """ signature = hmac.new(secret_key.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).hexdigest()[:30] @@ -158,16 +41,18 @@ def create_hmac_signature(secret_key: str, message: str) -> str: def create_storage_content_signature( - resource_id: str, url_signing_secret_key: str, expires_in_millis: int | None = None, version: int = 0 + resource_id: str, + url_signing_secret_key: str, + expires_in_millis: int | None = None, + version: int = 0, ) -> str: """Create a secure signature for a resource like a dataset or key-value store. This signature is used to generate a signed URL for authenticated access, which can be expiring or permanent. - The signature is created using HMAC with the provided secret key and includes - the resource ID, expiration time, and version. + The signature is created using HMAC with the provided secret key and includes the resource ID, expiration time, + and version. Note: expires_in_millis is optional. If not provided, the signature will not expire. - """ expires_at = int(time.time() * 1000) + expires_in_millis if expires_in_millis else 0 diff --git a/tests/unit/test_consts.py b/tests/unit/test_consts.py index 3874f60..f4ced19 100644 --- a/tests/unit/test_consts.py +++ b/tests/unit/test_consts.py @@ -18,53 +18,55 @@ from enum import Enum -class TestConsts: - def test_env_vars_types_unique(self: TestConsts) -> None: - """Test that env var types don't contain any item twice.""" - for env_var_type in [ +def test_env_vars_types_unique() -> None: + """Test that env var types don't contain any item twice.""" + for env_var_type in [ + BOOL_ENV_VARS, + COMMA_SEPARATED_LIST_ENV_VARS, + DATETIME_ENV_VARS, + FLOAT_ENV_VARS, + INTEGER_ENV_VARS, + STRING_ENV_VARS, + ]: + assert isinstance(env_var_type, list) + assert len(env_var_type) == len(set(env_var_type)) + + +def test_env_vars_types_do_not_overlap() -> None: + """Test that there is no overlap between env var types.""" + for first, second in itertools.combinations( + [ BOOL_ENV_VARS, COMMA_SEPARATED_LIST_ENV_VARS, DATETIME_ENV_VARS, FLOAT_ENV_VARS, INTEGER_ENV_VARS, STRING_ENV_VARS, - ]: - assert isinstance(env_var_type, list) - assert len(env_var_type) == len(set(env_var_type)) + ], + r=2, + ): + assert isinstance(first, list) + assert isinstance(second, list) + assert not set(first) & set(second) + - def test_env_vars_types_do_not_overlap(self: TestConsts) -> None: - """Test that there is no overlap between env var types.""" - for first, second in itertools.combinations( - [ - BOOL_ENV_VARS, - COMMA_SEPARATED_LIST_ENV_VARS, - DATETIME_ENV_VARS, - FLOAT_ENV_VARS, - INTEGER_ENV_VARS, - STRING_ENV_VARS, - ], - r=2, - ): - assert isinstance(first, list) - assert isinstance(second, list) - assert not set(first) & set(second) +def test_env_vars_types_defined_for_all_env_vars() -> None: + """Test that all env vars from `ApifyEnvVars` and `ActorEnvVars` have a defined type.""" + env_vars_from_types = set( + list(BOOL_ENV_VARS) + + list(COMMA_SEPARATED_LIST_ENV_VARS) + + list(DATETIME_ENV_VARS) + + list(FLOAT_ENV_VARS) + + list(INTEGER_ENV_VARS) + + list(STRING_ENV_VARS), + ) + env_vars_from_enums = set(ApifyEnvVars).union(set(ActorEnvVars)) + assert env_vars_from_types == env_vars_from_enums - def test_env_vars_types_defined_for_all_env_vars(self: TestConsts) -> None: - """Test that all env vars from `ApifyEnvVars` and `ActorEnvVars` have a defined type.""" - env_vars_from_types = set( - list(BOOL_ENV_VARS) - + list(COMMA_SEPARATED_LIST_ENV_VARS) - + list(DATETIME_ENV_VARS) - + list(FLOAT_ENV_VARS) - + list(INTEGER_ENV_VARS) - + list(STRING_ENV_VARS), - ) - env_vars_from_enums = set(ApifyEnvVars).union(set(ActorEnvVars)) - assert env_vars_from_types == env_vars_from_enums - def test_env_vars_have_correct_prefix(self: TestConsts) -> None: - """Test that all env vars from `ApifyEnvVars` and `ActorEnvVars` have the correct prefix.""" - for env_vars_class, prefix in [(ActorEnvVars, 'ACTOR_'), (ApifyEnvVars, 'APIFY_')]: - env_vars: list[Enum] = list(env_vars_class) - for env_var in env_vars: - assert env_var.value.startswith(prefix) is True +def test_env_vars_have_correct_prefix() -> None: + """Test that all env vars from `ApifyEnvVars` and `ActorEnvVars` have the correct prefix.""" + for env_vars_class, prefix in [(ActorEnvVars, 'ACTOR_'), (ApifyEnvVars, 'APIFY_')]: + env_vars: list[Enum] = list(env_vars_class) + for env_var in env_vars: + assert env_var.value.startswith(prefix) is True diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 2e61eaf..cc9604d 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1,155 +1,8 @@ from __future__ import annotations import base64 -import io -from datetime import datetime, timezone -from enum import Enum - -from apify_shared.utils import ( - create_hmac_signature, - create_storage_content_signature, - encode_base62, - filter_out_none_values_recursively, - filter_out_none_values_recursively_internal, - ignore_docs, - is_content_type_json, - is_content_type_text, - is_content_type_xml, - is_file_or_bytes, - json_dumps, - maybe_extract_enum_member_value, - parse_date_fields, -) - - -def test__maybe_extract_enum_member_value() -> None: - class TestEnum(Enum): - A = 'A' - B = 'B' - - assert maybe_extract_enum_member_value(TestEnum.A) == 'A' - assert maybe_extract_enum_member_value(TestEnum.B) == 'B' - assert maybe_extract_enum_member_value('C') == 'C' - assert maybe_extract_enum_member_value(1) == 1 - assert maybe_extract_enum_member_value(None) is None - - -def test__filter_out_none_values_recursively() -> None: - assert filter_out_none_values_recursively({'k1': 'v1'}) == {'k1': 'v1'} - assert filter_out_none_values_recursively({'k1': None}) == {} - assert filter_out_none_values_recursively( - {'k1': 'v1', 'k2': None, 'k3': {'k4': 'v4', 'k5': None}, 'k6': {'k7': None}} - ) == {'k1': 'v1', 'k3': {'k4': 'v4'}} - - -def test_filter_out_none_values_recursively_internal() -> None: - assert filter_out_none_values_recursively_internal({}) == {} - assert filter_out_none_values_recursively_internal({'k1': {}}) == {} - assert filter_out_none_values_recursively_internal({}, remove_empty_dicts=False) == {} - assert filter_out_none_values_recursively_internal({'k1': {}}, remove_empty_dicts=False) == {'k1': {}} - assert filter_out_none_values_recursively_internal({}, remove_empty_dicts=True) is None - assert filter_out_none_values_recursively_internal({'k1': {}}, remove_empty_dicts=True) is None - - -def test__is_content_type_json() -> None: - # returns True for the right content types - assert is_content_type_json('application/json') is True - assert is_content_type_json('application/jsonc') is True - # returns False for bad content types - assert is_content_type_json('application/xml') is False - assert is_content_type_json('application/ld+json') is False - - -def test__is_content_type_xml() -> None: - # returns True for the right content types - assert is_content_type_xml('application/xml') is True - assert is_content_type_xml('application/xhtml+xml') is True - # returns False for bad content types - assert is_content_type_xml('application/json') is False - assert is_content_type_xml('text/html') is False - - -def test__is_content_type_text() -> None: - # returns True for the right content types - assert is_content_type_text('text/html') is True - assert is_content_type_text('text/plain') is True - # returns False for bad content types - assert is_content_type_text('application/json') is False - assert is_content_type_text('application/text') is False - - -def test__is_file_or_bytes() -> None: - # returns True for the right value types - assert is_file_or_bytes(b'abc') is True - assert is_file_or_bytes(bytearray.fromhex('F0F1F2')) is True - assert is_file_or_bytes(io.BytesIO(b'\x00\x01\x02')) is True - - # returns False for bad value types - assert is_file_or_bytes('abc') is False - assert is_file_or_bytes(['a', 'b', 'c']) is False - assert is_file_or_bytes({'a': 'b'}) is False - assert is_file_or_bytes(None) is False - - -def test__json_dumps() -> None: - expected = """{ - "string": "123", - "number": 456, - "nested": { - "abc": "def" - }, - "datetime": "2022-01-01 00:00:00+00:00" -}""" - actual = json_dumps( - { - 'string': '123', - 'number': 456, - 'nested': { - 'abc': 'def', - }, - 'datetime': datetime(2022, 1, 1, tzinfo=timezone.utc), - } - ) - assert actual == expected - - -def test__parse_date_fields() -> None: - # works correctly on empty dicts - assert parse_date_fields({}) == {} - - # correctly parses dates on fields ending with -At - expected_datetime = datetime(2016, 11, 14, 11, 10, 52, 425000, timezone.utc) - assert parse_date_fields({'createdAt': '2016-11-14T11:10:52.425Z'}) == {'createdAt': expected_datetime} - - # doesn't parse dates on fields not ending with -At - assert parse_date_fields({'saveUntil': '2016-11-14T11:10:52.425Z'}) == {'saveUntil': '2016-11-14T11:10:52.425Z'} - - # parses dates in dicts in lists - expected_datetime = datetime(2016, 11, 14, 11, 10, 52, 425000, timezone.utc) - assert parse_date_fields([{'createdAt': '2016-11-14T11:10:52.425Z'}]) == [{'createdAt': expected_datetime}] - - # parses nested dates - expected_datetime = datetime(2020, 2, 29, 10, 9, 8, 100000, timezone.utc) - assert parse_date_fields({'a': {'b': {'c': {'createdAt': '2020-02-29T10:09:08.100Z'}}}}) == { - 'a': {'b': {'c': {'createdAt': expected_datetime}}} - } - - # doesn't parse dates nested too deep - expected_datetime = datetime(2020, 2, 29, 10, 9, 8, 100000, timezone.utc) - assert parse_date_fields({'a': {'b': {'c': {'d': {'createdAt': '2020-02-29T10:09:08.100Z'}}}}}) == { - 'a': {'b': {'c': {'d': {'createdAt': '2020-02-29T10:09:08.100Z'}}}} - } - - # doesn't die when the date can't be parsed - assert parse_date_fields({'createdAt': 'NOT_A_DATE'}) == {'createdAt': 'NOT_A_DATE'} - - -def test_ignore_docs() -> None: - def testing_function(_a: str, _b: str) -> str: - """Dummy docstring""" - return 'dummy' - assert testing_function is ignore_docs(testing_function) +from apify_shared.utils import create_hmac_signature, create_storage_content_signature, encode_base62 def test_encode_base62() -> None: