diff --git a/.envrc b/.envrc index cab115a7..943710d0 100644 --- a/.envrc +++ b/.envrc @@ -9,6 +9,7 @@ export LAUNCHPAD_ENV="development" export LAUNCHPAD_HOST="0.0.0.0" export LAUNCHPAD_PORT="2218" export LAUNCHPAD_RPC_SHARED_SECRET="launchpad-also-very-long-value-haha" +export SENTRY_BASE_URL="http://localhost:8000" # STATSD_HOST=... # defaults to 127.0.0.1 # STATSD_PORT=... # defaults to 8125 diff --git a/src/launchpad/artifacts/apple/zipped_xcarchive.py b/src/launchpad/artifacts/apple/zipped_xcarchive.py index daffde37..1dc70beb 100644 --- a/src/launchpad/artifacts/apple/zipped_xcarchive.py +++ b/src/launchpad/artifacts/apple/zipped_xcarchive.py @@ -112,7 +112,9 @@ def generate_ipa(self, output_path: Path): except subprocess.CalledProcessError as e: raise RuntimeError(f"Failed to generate IPA file with zip: {e}") except FileNotFoundError: - raise RuntimeError("zip command not found. This tool is required for IPA generation.") + raise RuntimeError( + "zip command not found. This tool is required for IPA generation." + ) def get_provisioning_profile(self) -> dict[str, Any] | None: if self._provisioning_profile is not None: @@ -151,10 +153,23 @@ def get_app_bundle_path(self) -> Path: if self._app_bundle_path is not None: return self._app_bundle_path - for path in self._extract_dir.rglob("*.xcarchive/Products/**/*.app"): - if path.is_dir() and "__MACOSX" not in str(path): - logger.debug(f"Found Apple app bundle: {path}") - return path + # Search patterns for different XCArchive formats + search_patterns = [ + # Format 1: MyApp.xcarchive/Products/**/*.app + "*.xcarchive/Products/**/*.app", + # Format 2: Products/Applications/*.app (direct extraction) + "Products/Applications/*.app", + # Format 3: Products/**/*.app (more general) + "Products/**/*.app", + ] + + for pattern in search_patterns: + logger.debug(f"Searching for app bundle with pattern: {pattern}") + for path in self._extract_dir.rglob(pattern): + if path.is_dir() and "__MACOSX" not in str(path): + logger.debug(f"Found Apple app bundle: {path}") + self._app_bundle_path = path + return path raise FileNotFoundError(f"No .app bundle found in {self._extract_dir}") @@ -190,9 +205,15 @@ def get_all_binary_paths(self) -> List[BinaryInfo]: # Find corresponding dSYM for framework framework_uuid = self._extract_binary_uuid(framework_binary_path) - framework_dsym_path = dsym_files.get(framework_uuid) if framework_uuid else None + framework_dsym_path = ( + dsym_files.get(framework_uuid) if framework_uuid else None + ) - binaries.append(BinaryInfo(framework_name, framework_binary_path, framework_dsym_path)) + binaries.append( + BinaryInfo( + framework_name, framework_binary_path, framework_dsym_path + ) + ) # Find app extension binaries for extension_path in app_bundle_path.rglob("*.appex"): @@ -206,13 +227,23 @@ def get_all_binary_paths(self) -> List[BinaryInfo]: extension_plist = plistlib.load(f) extension_executable = extension_plist.get("CFBundleExecutable") if extension_executable: - extension_binary_path = extension_path / extension_executable + extension_binary_path = ( + extension_path / extension_executable + ) # Use the full extension name as the key to avoid conflicts - extension_name = f"{extension_path.stem}/{extension_executable}" + extension_name = ( + f"{extension_path.stem}/{extension_executable}" + ) # Find corresponding dSYM for extension - extension_uuid = self._extract_binary_uuid(extension_binary_path) - extension_dsym_path = dsym_files.get(extension_uuid) if extension_uuid else None + extension_uuid = self._extract_binary_uuid( + extension_binary_path + ) + extension_dsym_path = ( + dsym_files.get(extension_uuid) + if extension_uuid + else None + ) binaries.append( BinaryInfo( @@ -222,20 +253,33 @@ def get_all_binary_paths(self) -> List[BinaryInfo]: ) ) except Exception as e: - logger.warning(f"Failed to read extension Info.plist at {extension_path}: {e}") + logger.warning( + f"Failed to read extension Info.plist at {extension_path}: {e}" + ) return binaries - def get_asset_catalog_details(self, relative_path: Path) -> List[AssetCatalogElement]: + def get_asset_catalog_details( + self, relative_path: Path + ) -> List[AssetCatalogElement]: """Get the details of an asset catalog file (Assets.car) by returning the parsed JSON from ParsedAssets.""" try: app_bundle_path = self.get_app_bundle_path() json_name = relative_path.with_suffix(".json") - xcarchive_dir = list(self._extract_dir.glob("*.xcarchive"))[0] - app_bundle_path = app_bundle_path.relative_to(xcarchive_dir) - file_path = xcarchive_dir / "ParsedAssets" / app_bundle_path / json_name + # Handle different XCArchive formats + xcarchive_dirs = list(self._extract_dir.glob("*.xcarchive")) + if xcarchive_dirs: + # Format 1: MyApp.xcarchive/Products/Applications/... + xcarchive_dir = xcarchive_dirs[0] + app_bundle_path = app_bundle_path.relative_to(xcarchive_dir) + file_path = xcarchive_dir / "ParsedAssets" / app_bundle_path / json_name + else: + # Format 2: Products/Applications/... (direct extraction) + file_path = ( + self._extract_dir / "ParsedAssets" / app_bundle_path / json_name + ) if not file_path.exists(): logger.warning(f"Assets.json not found at {file_path}") @@ -246,7 +290,9 @@ def get_asset_catalog_details(self, relative_path: Path) -> List[AssetCatalogEle return [self._parse_asset_element(item) for item in data] except Exception as e: - logger.warning(f"Failed to get asset catalog details for {relative_path}: {e}") + logger.warning( + f"Failed to get asset catalog details for {relative_path}: {e}" + ) return [] def _parse_asset_element(self, item: dict[str, Any]) -> AssetCatalogElement: @@ -315,7 +361,9 @@ def _find_dsym_files(self) -> dict[str, Path]: dsym_uuid = self._extract_binary_uuid(dwarf_file) if dsym_uuid: dsym_files[dsym_uuid] = dwarf_file - logger.debug(f"Found dSYM file {dwarf_file} with UUID {dsym_uuid}") + logger.debug( + f"Found dSYM file {dwarf_file} with UUID {dsym_uuid}" + ) self._dsym_files = dsym_files return dsym_files diff --git a/src/launchpad/artifacts/artifact_factory.py b/src/launchpad/artifacts/artifact_factory.py index ca01c1d7..7397554a 100644 --- a/src/launchpad/artifacts/artifact_factory.py +++ b/src/launchpad/artifacts/artifact_factory.py @@ -34,40 +34,65 @@ def from_path(path: Path) -> Artifact: # Check if it's a zip file by looking at magic bytes if content.startswith(b"PK\x03\x04"): - # Check if zip contains a single APK (ZippedAPK) - with ZipFile(BytesIO(content)) as zip_file: - # Check if zip contains a Info.plist in the root of the .xcarchive folder (ZippedXCArchive) - plist_files = [f for f in zip_file.namelist() if f.endswith(".xcarchive/Info.plist")] - if plist_files: - return ZippedXCArchive(path) - - apk_files = [f for f in zip_file.namelist() if f.endswith(".apk")] - if len(apk_files) == 1: - return ZippedAPK(path) - - aab_files = [f for f in zip_file.namelist() if f.endswith(".aab")] - if len(aab_files) == 1: - return ZippedAAB(path) - - # Check if zip contains base/manifest/AndroidManifest.xml (AAB) - manifest_files = [f for f in zip_file.namelist() if f.endswith("base/manifest/AndroidManifest.xml")] - if manifest_files: - return AAB(path) - - # Check if zip contains AndroidManifest.xml (APK) - manifest_files = [f for f in zip_file.namelist() if f.endswith("AndroidManifest.xml")] - if manifest_files: - return APK(path) - - # Check if it's a direct APK or AAB by looking for AndroidManifest.xml in specific locations + try: + with ZipFile(BytesIO(content)) as zip_file: + filenames = zip_file.namelist() + + # Check for XCArchive (iOS) + if ArtifactFactory._is_xcarchive(filenames): + return ZippedXCArchive(path) + + # Check for single APK or AAB files (zipped artifacts) + apk_files = [f for f in filenames if f.endswith(".apk")] + if len(apk_files) == 1: + return ZippedAPK(path) + + aab_files = [f for f in filenames if f.endswith(".aab")] + if len(aab_files) == 1: + return ZippedAAB(path) + + # Check for AAB (base/manifest structure) + if any( + f.endswith("base/manifest/AndroidManifest.xml") + for f in filenames + ): + return AAB(path) + + # Check for APK (AndroidManifest.xml) + if any(f.endswith("AndroidManifest.xml") for f in filenames): + return APK(path) + + except Exception: + pass + + # Fallback: try direct APK/AAB detection regardless of magic bytes try: with ZipFile(BytesIO(content)) as zip_file: - if any(f.endswith("base/manifest/AndroidManifest.xml") for f in zip_file.namelist()): + filenames = zip_file.namelist() + + if any( + f.endswith("base/manifest/AndroidManifest.xml") for f in filenames + ): return AAB(path) - if any(f.endswith("AndroidManifest.xml") for f in zip_file.namelist()): + if any(f.endswith("AndroidManifest.xml") for f in filenames): return APK(path) except Exception: pass raise ValueError("Input is not a supported artifact") + + @staticmethod + def _is_xcarchive(filenames: list[str]) -> bool: + """Check if filenames indicate an XCArchive structure.""" + # Method 1: .xcarchive/Info.plist pattern + if any(f.endswith(".xcarchive/Info.plist") for f in filenames): + return True + + # Method 2: Root Info.plist + Products/Applications structure + has_root_info_plist = "Info.plist" in filenames + has_products_apps = any( + f.startswith("Products/Applications/") for f in filenames + ) + + return has_root_info_plist and has_products_apps diff --git a/src/launchpad/kafka.py b/src/launchpad/kafka.py index 2dcbf2b9..876b690f 100644 --- a/src/launchpad/kafka.py +++ b/src/launchpad/kafka.py @@ -43,7 +43,7 @@ def create_kafka_consumer( consumer_config = { "bootstrap.servers": config["bootstrap_servers"], "group.id": config["group_id"], - "auto.offset.reset": "latest", + "auto.offset.reset": config["auto_offset_reset"], "enable.auto.commit": False, "enable.auto.offset.store": False, } @@ -136,4 +136,7 @@ def get_kafka_config() -> Dict[str, Any]: "concurrency": int(os.getenv("KAFKA_CONCURRENCY", "4")), "max_pending_futures": int(os.getenv("KAFKA_MAX_PENDING_FUTURES", "100")), "healthcheck_file": os.getenv("KAFKA_HEALTHCHECK_FILE"), + "auto_offset_reset": os.getenv( + "KAFKA_AUTO_OFFSET_RESET", "latest" + ), # latest = skip old messages } diff --git a/src/launchpad/parsers/android/dex/dex_file_parser.py b/src/launchpad/parsers/android/dex/dex_file_parser.py index 5fb9ed37..4cf73062 100644 --- a/src/launchpad/parsers/android/dex/dex_file_parser.py +++ b/src/launchpad/parsers/android/dex/dex_file_parser.py @@ -115,7 +115,9 @@ def get_class_definitions(self) -> list[ClassDefinition]: source_file_idx = self.buffer_wrapper.read_u32() annotations_offset = self.buffer_wrapper.read_u32() class_data_offset = self.buffer_wrapper.read_u32() # Class data offset - static_values_offset = self.buffer_wrapper.read_u32() # static values offset + static_values_offset = ( + self.buffer_wrapper.read_u32() + ) # static values offset signature = self._get_type_name(class_idx) if superclass_index != NO_INDEX: @@ -130,15 +132,21 @@ def get_class_definitions(self) -> list[ClassDefinition]: if source_file_idx != NO_INDEX: source_file_name = self._get_string(source_file_idx) - annotations_directory = self._parse_annotations_directory(annotations_offset) + annotations_directory = self._parse_annotations_directory( + annotations_offset + ) annotations: list[Annotation] = [] if annotations_directory: - annotations = self._parse_annotation_set(annotations_directory.class_annotations_offset) + annotations = self._parse_annotation_set( + annotations_directory.class_annotations_offset + ) methods: list[Method] = [] if class_data_offset != 0: - methods = self._parse_method_definitions(class_data_offset, annotations_directory) + methods = self._parse_method_definitions( + class_data_offset, annotations_directory + ) class_def = ClassDefinition( signature=signature, @@ -157,7 +165,9 @@ def get_class_definitions(self) -> list[ClassDefinition]: # Resolve superclass references for class_idx, superclass_signature in pending_superclasses: if superclass_signature in class_by_signature: - class_defs[class_idx].superclass = class_by_signature[superclass_signature] + class_defs[class_idx].superclass = class_by_signature[ + superclass_signature + ] # Resolve interface references for class_idx, signatures in pending_interfaces: @@ -168,7 +178,9 @@ def get_class_definitions(self) -> list[ClassDefinition]: return class_defs - def _parse_annotations_directory(self, annotations_directory_offset: int) -> AnnotationsDirectory | None: + def _parse_annotations_directory( + self, annotations_directory_offset: int + ) -> AnnotationsDirectory | None: """Parse annotations directory. https://source.android.com/docs/core/runtime/dex-format#annotations-directory @@ -198,7 +210,9 @@ def _parse_annotations_directory(self, annotations_directory_offset: int) -> Ann method_index = self.buffer_wrapper.read_u32() annotations_offset = self.buffer_wrapper.read_u32() method_annotations.append( - MethodAnnotation(method_index=method_index, annotations_offset=annotations_offset) + MethodAnnotation( + method_index=method_index, annotations_offset=annotations_offset + ) ) parameter_annotations: list[ParameterAnnotation] = [] @@ -206,7 +220,9 @@ def _parse_annotations_directory(self, annotations_directory_offset: int) -> Ann method_index = self.buffer_wrapper.read_u32() annotations_offset = self.buffer_wrapper.read_u32() parameter_annotations.append( - ParameterAnnotation(method_index=method_index, annotations_offset=annotations_offset) + ParameterAnnotation( + method_index=method_index, annotations_offset=annotations_offset + ) ) self.buffer_wrapper.seek(cursor) @@ -251,8 +267,12 @@ def _parse_method_definitions( self.buffer_wrapper.read_uleb128() # field index diff self.buffer_wrapper.read_uleb128() # access flags - direct_methods = self._parse_encoded_methods(direct_methods_size, annotations_directory) - virtual_methods = self._parse_encoded_methods(virtual_methods_size, annotations_directory) + direct_methods = self._parse_encoded_methods( + direct_methods_size, annotations_directory + ) + virtual_methods = self._parse_encoded_methods( + virtual_methods_size, annotations_directory + ) methods.extend(direct_methods) methods.extend(virtual_methods) @@ -315,8 +335,12 @@ def _parse_annotation_set(self, offset: int) -> list[Annotation]: size = self.buffer_wrapper.read_u32() for i in range(size): - self.buffer_wrapper.seek(offset + 4 + i * 4) # offset + size + (index * annotation_set_item size) - self.buffer_wrapper.seek(self.buffer_wrapper.read_u32()) # annotation set item offset + self.buffer_wrapper.seek( + offset + 4 + i * 4 + ) # offset + size + (index * annotation_set_item size) + self.buffer_wrapper.seek( + self.buffer_wrapper.read_u32() + ) # annotation set item offset visibility = self.buffer_wrapper.read_u8() @@ -442,7 +466,9 @@ def _parse_encoded_annotation(self) -> Annotation: return annotation - def _parse_encoded_methods(self, size: int, annotations_directory: AnnotationsDirectory | None) -> list[Method]: + def _parse_encoded_methods( + self, size: int, annotations_directory: AnnotationsDirectory | None + ) -> list[Method]: """Parse encoded methods. Args: @@ -471,7 +497,11 @@ def _parse_encoded_methods(self, size: int, annotations_directory: AnnotationsDi if annotations_directory: for method_annotation in annotations_directory.method_annotations: if method_annotation.method_index == method_index: - annotations.extend(self._parse_annotation_set(method_annotation.annotations_offset)) + annotations.extend( + self._parse_annotation_set( + method_annotation.annotations_offset + ) + ) parameters = self._get_method_parameters( method.prototype.parameters, @@ -515,7 +545,9 @@ def _get_method_parameters( if annotations_directory: for parameter_annotation in annotations_directory.parameter_annotations: if parameter_annotation.method_index == method_index: - parameter_annotations = self._parse_annotation_set_ref_list(parameter_annotation.annotations_offset) + parameter_annotations = self._parse_annotation_set_ref_list( + parameter_annotation.annotations_offset + ) break annotation_index = 0 @@ -593,7 +625,9 @@ def _get_proto(self, proto_index: int) -> Prototype: """ cursor = self.buffer_wrapper.cursor - self.buffer_wrapper.seek(self.header.proto_ids_off + proto_index * 12) # Each proto_id_item is 12 bytes + self.buffer_wrapper.seek( + self.header.proto_ids_off + proto_index * 12 + ) # Each proto_id_item is 12 bytes shorty_idx = self.buffer_wrapper.read_u32() return_type_idx = self.buffer_wrapper.read_u32() @@ -649,7 +683,9 @@ def _get_field(self, field_index: int) -> str: """ cursor = self.buffer_wrapper.cursor - self.buffer_wrapper.seek(self.header.field_ids_off + field_index * 8) # Each field_id_item is 8 bytes + self.buffer_wrapper.seek( + self.header.field_ids_off + field_index * 8 + ) # Each field_id_item is 8 bytes class_index = self.buffer_wrapper.read_u16() type_index = self.buffer_wrapper.read_u16() @@ -675,7 +711,9 @@ def _get_method(self, method_index: int) -> Method: """ cursor = self.buffer_wrapper.cursor - self.buffer_wrapper.seek(self.header.method_ids_off + method_index * 8) # Each method_id_item is 8 bytes + self.buffer_wrapper.seek( + self.header.method_ids_off + method_index * 8 + ) # Each method_id_item is 8 bytes class_index = self.buffer_wrapper.read_u16() proto_index = self.buffer_wrapper.read_u16() @@ -701,7 +739,9 @@ def _get_type_name(self, index: int) -> str: """ cursor = self.buffer_wrapper.cursor - self.buffer_wrapper.seek(self.header.type_ids_off + index * 4) # Each type_id_item is 4 bytes + self.buffer_wrapper.seek( + self.header.type_ids_off + index * 4 + ) # Each type_id_item is 4 bytes string_index = self.buffer_wrapper.read_u32() string = self._get_string(string_index) @@ -723,7 +763,9 @@ def _get_string(self, index: int) -> str: """ cursor = self.buffer_wrapper.cursor - self.buffer_wrapper.seek(self.header.string_ids_off + index * 4) # Each string_id_item is 4 bytes + self.buffer_wrapper.seek( + self.header.string_ids_off + index * 4 + ) # Each string_id_item is 4 bytes self.buffer_wrapper.seek(self.buffer_wrapper.read_u32()) # string data offset string_length = self.buffer_wrapper.read_uleb128() diff --git a/src/launchpad/parsers/buffer_wrapper.py b/src/launchpad/parsers/buffer_wrapper.py index a3b36d0a..53441ead 100644 --- a/src/launchpad/parsers/buffer_wrapper.py +++ b/src/launchpad/parsers/buffer_wrapper.py @@ -20,7 +20,7 @@ class DebugLogContext: def __enter__(self) -> None: """Enter debug group.""" - logger.debug("=== %s ===", self.name) + # logger.debug("=== %s ===", self.name) def __exit__( self, @@ -29,7 +29,7 @@ def __exit__( exc_tb: types.TracebackType | None, ) -> None: """Exit debug group.""" - logger.debug("=== End %s ===", self.name) + # logger.debug("=== End %s ===", self.name) class BufferWrapper: @@ -75,63 +75,63 @@ def _debug_group(self, name: str) -> DebugLogContext: def read_u8(self) -> int: """Read unsigned 8-bit integer.""" with self._debug_group("read_u8"): - logger.debug(f"cursor: {self.cursor}") + # logger.debug(f"cursor: {self.cursor}") val = self.buffer[self.cursor] - logger.debug(f"value: {val}") + # logger.debug(f"value: {val}") self.cursor += 1 return val def read_s8(self) -> int: """Read signed 8-bit integer.""" with self._debug_group("read_s8"): - logger.debug(f"cursor: {self.cursor}") + # logger.debug(f"cursor: {self.cursor}") val = struct.unpack(" int: """Read unsigned 16-bit integer (little-endian).""" with self._debug_group("read_u16"): - logger.debug(f"cursor: {self.cursor}") + # logger.debug(f"cursor: {self.cursor}") val = struct.unpack(" int: """Read signed 32-bit integer (little-endian).""" with self._debug_group("read_s32"): - logger.debug(f"cursor: {self.cursor}") + # logger.debug(f"cursor: {self.cursor}") val = struct.unpack(" int: """Read unsigned 32-bit integer (little-endian).""" with self._debug_group("read_u32"): - logger.debug(f"cursor: {self.cursor}") + # logger.debug(f"cursor: {self.cursor}") val = struct.unpack(" int: """Read unsigned 32-bit integer (big-endian).""" with self._debug_group("read_u32be"): - logger.debug(f"cursor: {self.cursor}") + # logger.debug(f"cursor: {self.cursor}") val = struct.unpack(">I", self.buffer[self.cursor : self.cursor + 4])[0] - logger.debug(f"value: {val} 0x{val:08x}") + # logger.debug(f"value: {val} 0x{val:08x}") self.cursor += 4 return val # type: ignore[no-any-return] def read_u64(self) -> int: """Read unsigned 64-bit integer (little-endian).""" with self._debug_group("read_u64"): - logger.debug(f"cursor: {self.cursor}") + # logger.debug(f"cursor: {self.cursor}") val = struct.unpack(" int: length = self.read_u8() if length & 0x80: length = ((length & 0x7F) << 8) | self.read_u8() - logger.debug(f"length: {length}") + # logger.debug(f"length: {length}") return length def read_length16(self) -> int: @@ -150,7 +150,7 @@ def read_length16(self) -> int: length = self.read_u16() if length & 0x8000: length = ((length & 0x7FFF) << 16) | self.read_u16() - logger.debug(f"length: {length}") + # logger.debug(f"length: {length}") return length def read_uleb128(self) -> int: @@ -198,7 +198,9 @@ def read_sized_int(self, size: int) -> int: """ with self._debug_group(f"read_sized_int ({size} bytes)"): if not 1 <= size <= 4: - raise ValueError(f"Invalid size {size} for sized int at offset 0x{self.cursor:08x}") + raise ValueError( + f"Invalid size {size} for sized int at offset 0x{self.cursor:08x}" + ) # Read bytes and sign extend if size == 4: @@ -229,7 +231,9 @@ def read_sized_uint(self, size: int) -> int: """ with self._debug_group(f"read_sized_uint ({size} bytes)"): if not 1 <= size <= 4: - raise ValueError(f"Invalid size {size} for sized uint at offset 0x{self.cursor:08x}") + raise ValueError( + f"Invalid size {size} for sized uint at offset 0x{self.cursor:08x}" + ) # Read bytes val = 0 @@ -253,7 +257,9 @@ def read_sized_float(self, size: int) -> float: """ with self._debug_group(f"read_sized_float ({size} bytes)"): if not 1 <= size <= 4: - raise ValueError(f"Invalid size {size} for sized float at offset 0x{self.cursor:08x}") + raise ValueError( + f"Invalid size {size} for sized float at offset 0x{self.cursor:08x}" + ) # Zero extend to 4 bytes bytes_val = bytearray(4) @@ -277,7 +283,9 @@ def read_sized_double(self, size: int) -> float: """ with self._debug_group(f"read_sized_double ({size} bytes)"): if not 1 <= size <= 8: - raise ValueError(f"Invalid size {size} for sized double at offset 0x{self.cursor:08x}") + raise ValueError( + f"Invalid size {size} for sized double at offset 0x{self.cursor:08x}" + ) # Zero extend to 8 bytes bytes_val = bytearray(8) @@ -297,7 +305,11 @@ def read_string_with_length(self, length: int) -> str: Decoded string with null bytes removed """ with self._debug_group(f"read_string ({length} bytes)"): - val = self.buffer[self.cursor : self.cursor + length].decode("utf-8", errors="replace").replace("\0", "") + val = ( + self.buffer[self.cursor : self.cursor + length] + .decode("utf-8", errors="replace") + .replace("\0", "") + ) self.cursor += length return val diff --git a/src/launchpad/sentry_client.py b/src/launchpad/sentry_client.py index e3ccc76f..b6f64bdd 100644 --- a/src/launchpad/sentry_client.py +++ b/src/launchpad/sentry_client.py @@ -25,41 +25,24 @@ def __init__(self, base_url: str) -> None: self.base_url = base_url.rstrip("/") self.shared_secret = os.getenv("LAUNCHPAD_RPC_SHARED_SECRET") if not self.shared_secret: - raise RuntimeError("LAUNCHPAD_RPC_SHARED_SECRET must be provided or set as environment variable") + raise RuntimeError( + "LAUNCHPAD_RPC_SHARED_SECRET must be provided or set as environment variable" + ) - def assemble_size_analysis( - self, - org: str | int, - project: str | int, - artifact_id: str | int, - checksum: str, - chunks: list[str], + def download_artifact( + self, org: str, project: str, artifact_id: str ) -> Dict[str, Any]: - """Call the assemble size analysis endpoint.""" - # Validate hex strings - if not re.match(r"^[a-fA-F0-9]+$", checksum): - raise ValueError("Invalid checksum format") - for chunk in chunks: - if not re.match(r"^[a-fA-F0-9]+$", chunk): - raise ValueError("Invalid chunk format") - - data = { - "checksum": checksum, - "chunks": chunks, - "assemble_type": "size_analysis", - } - - endpoint = f"/api/0/internal/{org}/{project}/files/preprodartifacts/{artifact_id}/assemble-generic/" - return self._make_json_request("POST", endpoint, data, operation="Assemble request") - - def download_artifact(self, org: str, project: str, artifact_id: str) -> Dict[str, Any]: """Download preprod artifact.""" - endpoint = f"/api/0/internal/{org}/{project}/files/preprodartifacts/{artifact_id}/" + endpoint = ( + f"/api/0/internal/{org}/{project}/files/preprodartifacts/{artifact_id}/" + ) url = self._build_url(endpoint) try: logger.debug(f"GET {url}") - response = requests.get(url, headers=self._get_auth_headers(), timeout=120, stream=True) + response = requests.get( + url, headers=self._get_auth_headers(), timeout=120, stream=True + ) if response.status_code != 200: return self._handle_error_response(response, "Download") @@ -83,7 +66,9 @@ def download_artifact(self, org: str, project: str, artifact_id: str) -> Dict[st logger.error(f"Download failed: {e}") return {"error": str(e)} - def update_artifact(self, org: str, project: str, artifact_id: str, data: Dict[str, Any]) -> Dict[str, Any]: + def update_artifact( + self, org: str, project: str, artifact_id: str, data: Dict[str, Any] + ) -> Dict[str, Any]: """Update preprod artifact.""" endpoint = f"/api/0/internal/{org}/{project}/files/preprodartifacts/{artifact_id}/update/" return self._make_json_request("PUT", endpoint, data, operation="Update") @@ -107,13 +92,17 @@ def upload_size_analysis_file( with open(path, "rb") as f: content = f.read() - logger.info(f"Uploading {file_path} ({len(content)} bytes, {len(content) / 1024 / 1024:.2f} MB)") + logger.info( + f"Uploading {file_path} ({len(content)} bytes, {len(content) / 1024 / 1024:.2f} MB)" + ) # Step 1: Get chunk upload options from server logger.debug("Getting chunk upload options...") options_result = self._get_chunk_upload_options(org) if "error" in options_result: - return {"error": f"Failed to get chunk upload options: {options_result['error']}"} + return { + "error": f"Failed to get chunk upload options: {options_result['error']}" + } chunk_options = options_result.get("chunking", {}) chunk_size = chunk_options.get("chunk_size", 8 * 1024 * 1024) # fallback to 8MB @@ -136,7 +125,7 @@ def upload_size_analysis_file( for attempt in range(max_retries): logger.debug(f"Assembly attempt {attempt + 1}/{max_retries}") - result = self.assemble_size_analysis( + result = self._assemble_size_analysis( org=org, project=project, artifact_id=artifact_id, @@ -156,7 +145,9 @@ def upload_size_analysis_file( logger.info(f"Re-uploading {len(missing)} missing chunks") if not self._upload_chunks(org, chunks, missing): - logger.warning(f"Some chunks failed to re-upload on attempt {attempt + 1}") + logger.warning( + f"Some chunks failed to re-upload on attempt {attempt + 1}" + ) else: logger.warning(f"Assembly attempt {attempt + 1} failed: {result}") if attempt == max_retries - 1: # Last attempt @@ -167,7 +158,9 @@ def upload_size_analysis_file( def _get_auth_headers(self, body: bytes | None = None) -> Dict[str, str]: """Get authentication headers for a request.""" body = body or b"" - signature = hmac.new(self.shared_secret.encode("utf-8"), body, hashlib.sha256).hexdigest() + signature = hmac.new( + self.shared_secret.encode("utf-8"), body, hashlib.sha256 + ).hexdigest() return { "Authorization": f"rpcsignature rpc0:{signature}", "Content-Type": "application/json", @@ -177,7 +170,9 @@ def _build_url(self, endpoint: str) -> str: """Build full URL from endpoint.""" return f"{self.base_url}{endpoint}" - def _handle_error_response(self, response: requests.Response, operation: str) -> Dict[str, Any]: + def _handle_error_response( + self, response: requests.Response, operation: str + ) -> Dict[str, Any]: """Handle non-200 response with consistent error format.""" logger.warning(f"{operation} failed: {response.status_code}") return { @@ -235,13 +230,17 @@ def _create_chunks(self, content: bytes, chunk_size: int) -> list[Dict[str, Any] # Show individual chunk details (limit for large files, similar to Rust version) max_chunks_to_show = 5 for i, chunk in enumerate(chunks[:max_chunks_to_show]): - logger.debug(f" Chunk {i + 1}: {chunk['size']} bytes (SHA1: {chunk['checksum']})") + logger.debug( + f" Chunk {i + 1}: {chunk['size']} bytes (SHA1: {chunk['checksum']})" + ) if len(chunks) > max_chunks_to_show: logger.debug(f" ... and {len(chunks) - max_chunks_to_show} more chunks") return chunks - def _upload_chunks(self, org: str, chunks: list[Dict[str, Any]], target_checksums: list[str]) -> bool: + def _upload_chunks( + self, org: str, chunks: list[Dict[str, Any]], target_checksums: list[str] + ) -> bool: """Upload chunks by checksum list.""" chunk_map = {c["checksum"]: c for c in chunks} success = 0 @@ -253,7 +252,9 @@ def _upload_chunks(self, org: str, chunks: list[Dict[str, Any]], target_checksum if self._upload_chunk(org, chunk_map[checksum]): success += 1 - logger.debug(f"Uploaded chunk {success}/{len(target_checksums)}: {checksum}") + logger.debug( + f"Uploaded chunk {success}/{len(target_checksums)}: {checksum}" + ) logger.debug(f"Uploaded {success}/{len(target_checksums)} chunks successfully") return success == len(target_checksums) @@ -267,7 +268,9 @@ def _upload_chunk(self, org: str, chunk: Dict[str, Any]) -> bool: body = self._create_multipart_body(boundary, chunk["checksum"], chunk["data"]) # For multipart, we need custom headers - signature = hmac.new(self.shared_secret.encode("utf-8"), body, hashlib.sha256).hexdigest() + signature = hmac.new( + self.shared_secret.encode("utf-8"), body, hashlib.sha256 + ).hexdigest() headers = { "Authorization": f"rpcsignature rpc0:{signature}", "Content-Type": f"multipart/form-data; boundary={boundary}", @@ -285,7 +288,36 @@ def _upload_chunk(self, org: str, chunk: Dict[str, Any]) -> bool: logger.error(f"Chunk upload error: {e}") return False - def _create_multipart_body(self, boundary: str, filename: str, data: bytes) -> bytes: + def _assemble_size_analysis( + self, + org: str | int, + project: str | int, + artifact_id: str | int, + checksum: str, + chunks: list[str], + ) -> Dict[str, Any]: + """Call the assemble size analysis endpoint.""" + # Validate hex strings + if not re.match(r"^[a-fA-F0-9]+$", checksum): + raise ValueError("Invalid checksum format") + for chunk in chunks: + if not re.match(r"^[a-fA-F0-9]+$", chunk): + raise ValueError("Invalid chunk format") + + data = { + "checksum": checksum, + "chunks": chunks, + "assemble_type": "size_analysis", + } + + endpoint = f"/api/0/internal/{org}/{project}/files/preprodartifacts/{artifact_id}/assemble-generic/" + return self._make_json_request( + "POST", endpoint, data, operation="Assemble request" + ) + + def _create_multipart_body( + self, boundary: str, filename: str, data: bytes + ) -> bytes: """Create multipart/form-data body.""" lines = [ f"--{boundary}", diff --git a/src/launchpad/service.py b/src/launchpad/service.py index d3cb2a7b..b907807a 100644 --- a/src/launchpad/service.py +++ b/src/launchpad/service.py @@ -3,11 +3,16 @@ from __future__ import annotations import asyncio +import json import os import signal +import tempfile import time +import re +import zipfile from typing import Any, Dict, cast +from pathlib import Path from arroyo.backends.kafka import KafkaPayload from arroyo.processing.processor import StreamProcessor @@ -15,6 +20,12 @@ PreprodArtifactEvents, ) +from launchpad.sentry_client import SentryClient +from launchpad.size.runner import do_size, do_preprocess +from launchpad.size.analyzers.android import AndroidAnalyzer +from launchpad.size.analyzers.apple import AppleAppAnalyzer +from launchpad.size.models.android import AndroidAppInfo +from launchpad.size.models.apple import AppleAppInfo from launchpad.utils.logging import get_logger from launchpad.utils.statsd import DogStatsd, get_statsd @@ -37,11 +48,36 @@ def __init__(self) -> None: self._kafka_task: asyncio.Future[Any] | None = None self._statsd: DogStatsd | None = None self._healthcheck_file: str | None = None + self._service_config: Dict[str, Any] | None = None async def setup(self) -> None: """Set up the service components.""" - service_config = get_service_config() - self._statsd = get_statsd(host=service_config["statsd_host"], port=service_config["statsd_port"]) + self._service_config = get_service_config() + self._statsd = get_statsd( + host=self._service_config["statsd_host"], + port=self._service_config["statsd_port"], + ) + + # Setup HTTP server with health check callback + server_config = get_server_config() + self.server = LaunchpadServer( + host=server_config["host"], + port=server_config["port"], + health_check_callback=self.health_check, + ) + + # Setup healthcheck file if not configured + self._healthcheck_file = os.getenv("KAFKA_HEALTHCHECK_FILE") + if not self._healthcheck_file: + # Create a default healthcheck file in tmp + self._healthcheck_file = f"/tmp/launchpad-kafka-health-{os.getpid()}" + os.environ["KAFKA_HEALTHCHECK_FILE"] = self._healthcheck_file + logger.info(f"Using healthcheck file: {self._healthcheck_file}") + + # Create Kafka consumer with message handler + self.kafka_processor = create_kafka_consumer( + message_handler=self.handle_kafka_message + ) # Setup HTTP server with health check callback server_config = get_server_config() @@ -60,10 +96,199 @@ async def setup(self) -> None: logger.info(f"Using healthcheck file: {self._healthcheck_file}") # Create Kafka consumer with message handler - self.kafka_processor = create_kafka_consumer(message_handler=self.handle_kafka_message) + self.kafka_processor = create_kafka_consumer( + message_handler=self.handle_kafka_message + ) logger.info("Service components initialized") + def process_artifact_analysis( + self, artifact_id: str, project_id: str, organization_id: str + ) -> None: + """ + Download artifact and perform size analysis. + """ + if not self._service_config: + raise RuntimeError("Service not properly initialized. Call setup() first.") + + sentry_base_url = self._service_config["sentry_base_url"] + sentry_client = SentryClient(base_url=sentry_base_url) + + # Download the artifact + logger.info(f"Downloading artifact {artifact_id}...") + download_result = sentry_client.download_artifact( + org=organization_id, project=project_id, artifact_id=artifact_id + ) + + if "error" in download_result: + # Use structured error categorization + error_category, error_description = _categorize_http_error(download_result) + raise RuntimeError( + f"Failed to download artifact ({error_category}): {error_description}" + ) + + if not download_result.get("success"): + raise RuntimeError(f"Download was not successful: {download_result}") + + file_content = download_result["file_content"] + file_size = download_result["file_size_bytes"] + + logger.info( + f"Downloaded artifact {artifact_id}: {file_size} bytes ({file_size / 1024 / 1024:.2f} MB)" + ) + + # Save to temporary file + temp_file = None + try: + with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tf: + tf.write(file_content) + temp_file = tf.name + + logger.info(f"Saved artifact to temporary file: {temp_file}") + + # Create artifact instance to determine type before preprocessing + from launchpad.artifacts.artifact_factory import ArtifactFactory + + artifact = ArtifactFactory.from_path(Path(temp_file)) + + # Run preprocessing first + logger.info(f"Running preprocessing on {temp_file}...") + app_info = do_preprocess(Path(temp_file)) + logger.info(f"Preprocessing completed for artifact {artifact_id}") + + # Prepare update data based on platform + update_data: Dict[str, Any] = {} + + if isinstance(app_info, AppleAppInfo): + # Apple/iOS artifact (XCARCHIVE) + update_data = { + "build_version": app_info.version, + "build_number": ( + int(app_info.build) + if str(app_info.build).isdigit() + else app_info.build + ), # perhaps we shouldnt even include it if it isnt an int + "artifact_type": 0, # 0 = XCARCHIVE + "apple_app_info": { + "is_simulator": app_info.is_simulator, + "codesigning_type": app_info.codesigning_type, + "profile_name": app_info.profile_name, + "is_code_signature_valid": app_info.is_code_signature_valid, + "code_signature_errors": app_info.code_signature_errors, + }, + } + elif isinstance(app_info, AndroidAppInfo): + # Android artifact - need to determine if AAB or APK + # Check the actual artifact type from the file + from launchpad.artifacts.android.aab import AAB + from launchpad.artifacts.android.zipped_aab import ZippedAAB + + if isinstance(artifact, (AAB, ZippedAAB)): + artifact_type = 1 # 1 = AAB + else: + artifact_type = 2 # 2 = APK + + update_data = { + "build_version": app_info.version, + "build_number": ( + int(app_info.build) if app_info.build.isdigit() else None + ), + "artifact_type": artifact_type, + } + + # Send update to Sentry + logger.info( + f"Sending preprocessed info to Sentry for artifact {artifact_id}..." + ) + logger.info( + f"!!!!$$$$$$Update data for artifact {artifact_id}: {update_data}" + ) + update_result = sentry_client.update_artifact( + org=organization_id, + project=project_id, + artifact_id=artifact_id, + data=update_data, + ) + + if "error" in update_result: + logger.error( + f"Failed to send preprocessed info: {update_result['error']}" + ) + # Don't raise - preprocessing succeeded, just update failed + else: + logger.info( + f"Successfully sent preprocessed info for artifact {artifact_id}" + ) + + # Create analyzer with preprocessed info to avoid duplicate work + analyzer: AndroidAnalyzer | AppleAppAnalyzer + if isinstance(app_info, AndroidAppInfo): + analyzer = AndroidAnalyzer() + analyzer.app_info = app_info + else: # AppleAppInfo + analyzer = AppleAppAnalyzer() + analyzer.app_info = app_info + + # Run full analysis with the pre-configured analyzer + logger.info(f"Running full analysis on {temp_file}...") + results = do_size(Path(temp_file), analyzer=analyzer) + + logger.info(f"Size analysis completed for artifact {artifact_id}") + + # Write results to temporary file for upload + analysis_file = None + try: + with tempfile.NamedTemporaryFile( + mode="w", suffix=".json", delete=False + ) as af: + json.dump(results.to_dict(), af, indent=2) + analysis_file = af.name + + logger.info( + f"Analysis results written to temporary file: {analysis_file}" + ) + + # Upload the analysis file back to Sentry + logger.info(f"Uploading analysis results for artifact {artifact_id}...") + upload_result = sentry_client.upload_size_analysis_file( + org=organization_id, + project=project_id, + artifact_id=artifact_id, + file_path=analysis_file, + ) + + if "error" in upload_result: + logger.error( + f"Failed to upload analysis results: {upload_result['error']}" + ) + # Don't raise - analysis succeeded, just upload failed + else: + logger.info( + f"Successfully uploaded analysis results for artifact {artifact_id}" + ) + + finally: + # Clean up analysis file + if analysis_file and os.path.exists(analysis_file): + try: + os.remove(analysis_file) + logger.debug(f"Cleaned up analysis file: {analysis_file}") + except Exception as e: + logger.warning( + f"Failed to clean up analysis file {analysis_file}: {e}" + ) + + finally: + # Clean up temporary file + if temp_file and os.path.exists(temp_file): + try: + os.remove(temp_file) + logger.debug(f"Cleaned up temporary file: {temp_file}") + except Exception as e: + logger.warning( + f"Failed to clean up temporary file {temp_file}: {e}" + ) + def handle_kafka_message(self, payload: PreprodArtifactEvents) -> None: """ Handle incoming Kafka messages. @@ -73,30 +298,119 @@ def handle_kafka_message(self, payload: PreprodArtifactEvents) -> None: organization_id = payload["organization_id"] try: - logger.info(f"Processing artifact: {artifact_id} (project: {project_id}, org: {organization_id})") + logger.info( + f"Processing artifact: {artifact_id} (project: {project_id}, org: {organization_id})" + ) if self._statsd: self._statsd.increment("launchpad.artifact.processing.started") - # TODO: Implement actual analysis logic - # This will need to: - # 1. Fetch the artifact using artifact_id from storage/API - # 2. Determine platform by examining the artifact - # 3. Run appropriate analyzer (iOS/Android) - # 4. Store results + # Perform the actual artifact analysis + self.process_artifact_analysis(artifact_id, project_id, organization_id) - # For now, just log - logger.info(f"Analysis completed for artifact {artifact_id} (stub)") + logger.info(f"Analysis completed for artifact {artifact_id}") if self._statsd: self._statsd.increment("launchpad.artifact.processing.completed") - except Exception as e: - logger.error(f"Analysis failed for artifact {artifact_id}: {e}", exc_info=True) + except RuntimeError as e: + # Handle expected errors without crashing the consumer + error_msg = str(e) + + # Use proper error categorization instead of string matching + if "(not_found)" in error_msg: + logger.warning( + f"Artifact not found: {artifact_id} (project: {project_id}, org: {organization_id}). " + "This may be a test message or the artifact may have been deleted." + ) + if self._statsd: + self._statsd.increment("launchpad.artifact.processing.not_found") + elif "(server_error)" in error_msg: + logger.error( + f"Server error downloading artifact {artifact_id}: {e}. " + "This is likely a temporary issue with the Sentry API." + ) + if self._statsd: + self._statsd.increment("launchpad.artifact.processing.server_error") + elif "(client_error)" in error_msg: + logger.error( + f"Client error downloading artifact {artifact_id}: {e}. " + "This may indicate a permissions issue or malformed request." + ) + if self._statsd: + self._statsd.increment("launchpad.artifact.processing.client_error") + else: + logger.error( + f"Analysis failed for artifact {artifact_id}: {e}", exc_info=True + ) + if self._statsd: + self._statsd.increment("launchpad.artifact.processing.failed") + # Don't re-raise - let the consumer continue processing other messages + except ValueError as e: + # Handle artifact type validation errors gracefully + error_msg = str(e) + if "Input is not a supported artifact" in error_msg: + logger.warning( + f"Unsupported artifact type for artifact {artifact_id} (project: {project_id}, org: {organization_id}): {e}. " + "This artifact format is not currently supported by the analyzer." + ) + if self._statsd: + self._statsd.increment( + "launchpad.artifact.processing.unsupported_type" + ) + else: + logger.error( + f"Validation error processing artifact {artifact_id}: {e}", + exc_info=True, + ) + if self._statsd: + self._statsd.increment( + "launchpad.artifact.processing.validation_error" + ) + # Don't re-raise - let the consumer continue processing other messages + except (OSError, IOError, FileNotFoundError, PermissionError) as e: + # Handle file I/O errors gracefully + logger.error( + f"File I/O error processing artifact {artifact_id} (project: {project_id}, org: {organization_id}): {e}. " + "This may indicate a corrupted artifact or file system issue." + ) + if self._statsd: + self._statsd.increment("launchpad.artifact.processing.io_error") + # Don't re-raise - let the consumer continue processing other messages + except zipfile.BadZipFile as e: + # Handle corrupted zip files gracefully + logger.error( + f"Corrupted zip file for artifact {artifact_id} (project: {project_id}, org: {organization_id}): {e}. " + "This artifact appears to be a corrupted or invalid zip file." + ) if self._statsd: - self._statsd.increment("launchpad.artifact.processing.failed") - # Re-raise to let Arroyo handle the error (can be configured for DLQ) - raise + self._statsd.increment("launchpad.artifact.processing.bad_zip") + # Don't re-raise - let the consumer continue processing other messages + except Exception as e: + # Handle zip file corruption and other parsing errors gracefully + error_msg = str(e) + if any( + keyword in error_msg.lower() + for keyword in ["zip", "corrupt", "invalid", "bad magic"] + ): + logger.error( + f"Artifact corruption/parsing error for artifact {artifact_id} (project: {project_id}, org: {organization_id}): {e}. " + "This may indicate a corrupted or malformed artifact file." + ) + if self._statsd: + self._statsd.increment( + "launchpad.artifact.processing.corruption_error" + ) + # Don't re-raise - let the consumer continue processing other messages + else: + logger.error( + f"Unexpected error processing artifact {artifact_id}: {e}", + exc_info=True, + ) + if self._statsd: + self._statsd.increment("launchpad.artifact.processing.failed") + # Re-raise to let Arroyo handle the error (can be configured for DLQ) + raise async def start(self) -> None: """Start all service components.""" @@ -129,7 +443,11 @@ def _setup_signal_handlers(self) -> None: def signal_handler(signum: int, frame: Any) -> None: if self._shutdown_event.is_set(): - logger.info(f"Received signal {signum} during shutdown, ignoring...") + logger.info( + f"Received signal {signum} during shutdown, forcing exit..." + ) + # Force exit if we get a second signal + os._exit(1) return logger.info(f"Received signal {signum}, initiating shutdown...") @@ -239,7 +557,9 @@ async def health_check(self) -> HealthCheckResponse: health_status["components"]["kafka"] = kafka_health # Check server health - health_status["components"]["server"] = {"status": "ok" if self.server else "not_initialized"} + health_status["components"]["server"] = { + "status": "ok" if self.server else "not_initialized" + } return health_status @@ -248,6 +568,7 @@ def get_service_config() -> Dict[str, Any]: """Get service configuration from environment.""" statsd_host = os.getenv("STATSD_HOST", "127.0.0.1") statsd_port_str = os.getenv("STATSD_PORT", "8125") + sentry_base_url = os.getenv("SENTRY_BASE_URL") try: statsd_port = int(statsd_port_str) @@ -257,6 +578,7 @@ def get_service_config() -> Dict[str, Any]: return { "statsd_host": statsd_host, "statsd_port": statsd_port, + "sentry_base_url": sentry_base_url, } @@ -265,3 +587,45 @@ async def run_service() -> None: service = LaunchpadService() await service.setup() await service.start() + + +def _categorize_http_error(error_result: Dict[str, Any]) -> tuple[str, str]: + """ + Categorize HTTP error results from SentryClient. + + Returns: + Tuple of (error_category, error_description) + Categories: "not_found", "server_error", "client_error", "unknown" + """ + # First try to get the structured status code + status_code = error_result.get("status_code") + if isinstance(status_code, int): + if status_code == 404: + return "not_found", f"Resource not found (HTTP {status_code})" + elif 500 <= status_code < 600: + return "server_error", f"Server error (HTTP {status_code})" + elif 400 <= status_code < 500: + return "client_error", f"Client error (HTTP {status_code})" + else: + return "unknown", f"Unexpected HTTP status {status_code}" + + # Fallback to parsing the error message string + error_msg = error_result.get("error", "") + if isinstance(error_msg, str): + # Extract HTTP status code from error message like "HTTP 404" + match = re.search(r"HTTP (\d+)", error_msg) + if match: + try: + status_code = int(match.group(1)) + if status_code == 404: + return "not_found", f"Resource not found (HTTP {status_code})" + elif 500 <= status_code < 600: + return "server_error", f"Server error (HTTP {status_code})" + elif 400 <= status_code < 500: + return "client_error", f"Client error (HTTP {status_code})" + else: + return "unknown", f"Unexpected HTTP status {status_code}" + except ValueError: + pass + + return "unknown", f"Unknown error: {error_result}" diff --git a/src/launchpad/size/analyzers/android.py b/src/launchpad/size/analyzers/android.py index c231b2ea..2506dbb5 100644 --- a/src/launchpad/size/analyzers/android.py +++ b/src/launchpad/size/analyzers/android.py @@ -47,17 +47,35 @@ def __init__( skip_insights: Skip insights generation for faster analysis """ self.skip_insights = skip_insights + self.app_info: AndroidAppInfo | None = None - def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults: + def preprocess(self, artifact: AndroidArtifact) -> AndroidAppInfo: + """Extract basic app information from the manifest. + + Args: + artifact: Android artifact to preprocess + + Returns: + Basic app information extracted from manifest + """ manifest_dict = artifact.get_manifest().model_dump() - app_info = AndroidAppInfo( + self.app_info = AndroidAppInfo( name=manifest_dict["application"]["label"] or "Unknown", version=manifest_dict["version_name"] or "Unknown", build=manifest_dict["version_code"] or "Unknown", package_name=manifest_dict["package_name"], ) + return self.app_info + + def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults: + # Use preprocessed app info if available, otherwise extract it + if not self.app_info: + self.app_info = self.preprocess(artifact) + + app_info = self.app_info + apks: list[APK] = [] # Split AAB into APKs, or use the APK directly if isinstance(artifact, AAB): @@ -129,7 +147,9 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis: if file_path.name in FILE_NAME_TO_TREEMAP_TYPE: treemap_type = FILE_NAME_TO_TREEMAP_TYPE[file_path.name] else: - treemap_type = FILE_TYPE_TO_TREEMAP_TYPE.get(file_type, TreemapType.OTHER) + treemap_type = FILE_TYPE_TO_TREEMAP_TYPE.get( + file_type, TreemapType.OTHER + ) # Get relative path from extract directory relative_path = str(file_path.relative_to(extract_path)) @@ -151,7 +171,9 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis: hash_md5=file_hash, ) path_to_file_info["classes.dex"] = merged_dex_info - logger.debug("Created merged DEX representation: %s", relative_path) + logger.debug( + "Created merged DEX representation: %s", relative_path + ) else: # Additional DEX file - merge into existing representation existing_info = path_to_file_info["classes.dex"] @@ -229,13 +251,22 @@ def _get_class_definitions(self, apks: list[APK]) -> list[ClassDefinition]: class_definitions.extend(apk.get_class_definitions()) return class_definitions - def _get_images(self, apks: list[APK], file_analysis: FileAnalysis) -> dict[Path, FileInfo]: + def _get_images( + self, apks: list[APK], file_analysis: FileAnalysis + ) -> dict[Path, FileInfo]: image_map = {} for apk in apks: for image_file in apk.get_images(): image_map[image_file] = next( - (file_info for file_info in file_analysis.files if str(image_file).endswith(file_info.path)), None + ( + file_info + for file_info in file_analysis.files + if str(image_file).endswith(file_info.path) + ), + None, ) if image_map[image_file] is None: - logger.warning(f"Image file {image_file} not found in file analysis") + logger.warning( + f"Image file {image_file} not found in file analysis" + ) return image_map diff --git a/src/launchpad/size/runner.py b/src/launchpad/size/runner.py index ca361446..a2baf2e8 100644 --- a/src/launchpad/size/runner.py +++ b/src/launchpad/size/runner.py @@ -9,21 +9,57 @@ from launchpad.size.analyzers.android import AndroidAnalyzer from launchpad.size.analyzers.apple import AppleAppAnalyzer from launchpad.size.models.common import BaseAnalysisResults +from launchpad.size.models.android import AndroidAppInfo +from launchpad.size.models.apple import AppleAppInfo -def do_size(path: Path, **flags: Any) -> BaseAnalysisResults: - start_time = time.time() +def do_preprocess(path: Path, **flags: Any) -> AndroidAppInfo | AppleAppInfo: + """Perform preprocessing step only to extract basic app info. + + Args: + path: Path to the artifact + **flags: Additional flags passed to analyzer + + Returns: + App info extracted during preprocessing + """ artifact = ArtifactFactory.from_path(path) - # isinstance switch below is a bit sad. Ryan suggested a - # get_analyzer method on artifact which might be nicer. - analyzer: AndroidAnalyzer | AppleAppAnalyzer if isinstance(artifact, AndroidArtifact): analyzer = AndroidAnalyzer(**flags) + return analyzer.preprocess(cast(AndroidArtifact, artifact)) elif isinstance(artifact, AppleArtifact): analyzer = AppleAppAnalyzer(**flags) + return analyzer.preprocess(cast(AppleArtifact, artifact)) else: raise ValueError(f"Unknown artifact kind {artifact}") + + +def do_size( + path: Path, analyzer: AndroidAnalyzer | AppleAppAnalyzer | None = None, **flags: Any +) -> BaseAnalysisResults: + """Perform full size analysis. + + Args: + path: Path to the artifact + analyzer: Optional pre-configured analyzer (with preprocessing already done) + **flags: Additional flags passed to analyzer if creating new one + + Returns: + Full analysis results + """ + start_time = time.time() + artifact = ArtifactFactory.from_path(path) + + # If no analyzer provided, create one + if analyzer is None: + if isinstance(artifact, AndroidArtifact): + analyzer = AndroidAnalyzer(**flags) + elif isinstance(artifact, AppleArtifact): + analyzer = AppleAppAnalyzer(**flags) + else: + raise ValueError(f"Unknown artifact kind {artifact}") + results = analyzer.analyze(cast(Any, artifact)) end_time = time.time() diff --git a/tests/unit/test_service.py b/tests/unit/test_service.py new file mode 100644 index 00000000..8ae4bc9d --- /dev/null +++ b/tests/unit/test_service.py @@ -0,0 +1,99 @@ +"""Tests for service error handling.""" + +import pytest + +from launchpad.service import _categorize_http_error + + +class TestErrorCategorization: + """Test the HTTP error categorization function.""" + + def test_categorize_with_structured_status_code(self): + """Test error categorization using structured status code.""" + # Test 404 not found + error_result = {"error": "HTTP 404", "status_code": 404} + category, description = _categorize_http_error(error_result) + assert category == "not_found" + assert "Resource not found (HTTP 404)" in description + + # Test 500 server error + error_result = {"error": "HTTP 500", "status_code": 500} + category, description = _categorize_http_error(error_result) + assert category == "server_error" + assert "Server error (HTTP 500)" in description + + # Test 502 server error + error_result = {"error": "HTTP 502", "status_code": 502} + category, description = _categorize_http_error(error_result) + assert category == "server_error" + assert "Server error (HTTP 502)" in description + + # Test 401 client error + error_result = {"error": "HTTP 401", "status_code": 401} + category, description = _categorize_http_error(error_result) + assert category == "client_error" + assert "Client error (HTTP 401)" in description + + # Test 403 client error + error_result = {"error": "HTTP 403", "status_code": 403} + category, description = _categorize_http_error(error_result) + assert category == "client_error" + assert "Client error (HTTP 403)" in description + + # Test unusual status code + error_result = {"error": "HTTP 299", "status_code": 299} + category, description = _categorize_http_error(error_result) + assert category == "unknown" + assert "Unexpected HTTP status 299" in description + + def test_categorize_with_string_parsing_fallback(self): + """Test error categorization using string parsing fallback.""" + # Test 404 not found (no status_code field) + error_result = {"error": "HTTP 404"} + category, description = _categorize_http_error(error_result) + assert category == "not_found" + assert "Resource not found (HTTP 404)" in description + + # Test 500 server error + error_result = {"error": "HTTP 500"} + category, description = _categorize_http_error(error_result) + assert category == "server_error" + assert "Server error (HTTP 500)" in description + + # Test 503 server error + error_result = {"error": "HTTP 503"} + category, description = _categorize_http_error(error_result) + assert category == "server_error" + assert "Server error (HTTP 503)" in description + + # Test 400 client error + error_result = {"error": "HTTP 400"} + category, description = _categorize_http_error(error_result) + assert category == "client_error" + assert "Client error (HTTP 400)" in description + + def test_categorize_with_malformed_input(self): + """Test error categorization with malformed input.""" + # Test missing fields + error_result = {} + category, description = _categorize_http_error(error_result) + assert category == "unknown" + assert "Unknown error" in description + + # Test non-integer status code + error_result = {"error": "HTTP 404", "status_code": "404"} + category, description = _categorize_http_error(error_result) + assert category == "not_found" # Should fall back to string parsing + assert "Resource not found (HTTP 404)" in description + + # Test non-HTTP error message + error_result = {"error": "Connection timeout"} + category, description = _categorize_http_error(error_result) + assert category == "unknown" + assert "Unknown error" in description + + # Test malformed HTTP error message + error_result = {"error": "HTTP abc"} + category, description = _categorize_http_error(error_result) + assert category == "unknown" + assert "Unknown error" in description diff --git a/tests/unit/test_service_error_handling.py b/tests/unit/test_service_error_handling.py new file mode 100644 index 00000000..0feec910 --- /dev/null +++ b/tests/unit/test_service_error_handling.py @@ -0,0 +1,179 @@ +"""Test error handling in the LaunchpadService.""" + +import tempfile +import zipfile +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest + +from launchpad.service import LaunchpadService + + +class TestLaunchpadServiceErrorHandling: + """Test error handling in LaunchpadService.""" + + def setup_method(self): + """Set up test fixtures.""" + self.service = LaunchpadService() + self.service._statsd = Mock() + self.service._service_config = {"sentry_base_url": "http://test.com"} + + def test_handle_kafka_message_unsupported_artifact(self): + """Test that unsupported artifact errors are handled gracefully.""" + # Create a mock payload + payload = { + "artifact_id": "test-artifact", + "project_id": "test-project", + "organization_id": "test-org", + } + + # Mock the process_artifact_analysis method to raise ValueError + with patch.object( + self.service, + "process_artifact_analysis", + side_effect=ValueError("Input is not a supported artifact"), + ): + # This should not raise an exception + self.service.handle_kafka_message(payload) + + # Verify that the correct statsd metric was incremented + self.service._statsd.increment.assert_called_with( + "launchpad.artifact.processing.unsupported_type" + ) + + def test_handle_kafka_message_validation_error(self): + """Test that general validation errors are handled gracefully.""" + payload = { + "artifact_id": "test-artifact", + "project_id": "test-project", + "organization_id": "test-org", + } + + with patch.object( + self.service, + "process_artifact_analysis", + side_effect=ValueError("Some other validation error"), + ): + # This should not raise an exception + self.service.handle_kafka_message(payload) + + # Verify that the correct statsd metric was incremented + self.service._statsd.increment.assert_called_with( + "launchpad.artifact.processing.validation_error" + ) + + def test_handle_kafka_message_io_error(self): + """Test that I/O errors are handled gracefully.""" + payload = { + "artifact_id": "test-artifact", + "project_id": "test-project", + "organization_id": "test-org", + } + + with patch.object( + self.service, + "process_artifact_analysis", + side_effect=FileNotFoundError("File not found"), + ): + # This should not raise an exception + self.service.handle_kafka_message(payload) + + # Verify that the correct statsd metric was incremented + self.service._statsd.increment.assert_called_with( + "launchpad.artifact.processing.io_error" + ) + + def test_handle_kafka_message_bad_zip_file(self): + """Test that bad zip file errors are handled gracefully.""" + payload = { + "artifact_id": "test-artifact", + "project_id": "test-project", + "organization_id": "test-org", + } + + with patch.object( + self.service, + "process_artifact_analysis", + side_effect=zipfile.BadZipFile("Bad zip file"), + ): + # This should not raise an exception + self.service.handle_kafka_message(payload) + + # Verify that the correct statsd metric was incremented + self.service._statsd.increment.assert_called_with( + "launchpad.artifact.processing.bad_zip" + ) + + def test_handle_kafka_message_corruption_error(self): + """Test that corruption errors are handled gracefully.""" + payload = { + "artifact_id": "test-artifact", + "project_id": "test-project", + "organization_id": "test-org", + } + + with patch.object( + self.service, + "process_artifact_analysis", + side_effect=Exception("zip file is corrupt"), + ): + # This should not raise an exception + self.service.handle_kafka_message(payload) + + # Verify that the correct statsd metric was incremented + self.service._statsd.increment.assert_called_with( + "launchpad.artifact.processing.corruption_error" + ) + + def test_handle_kafka_message_unexpected_error_reraises(self): + """Test that unexpected errors are re-raised for DLQ handling.""" + payload = { + "artifact_id": "test-artifact", + "project_id": "test-project", + "organization_id": "test-org", + } + + with patch.object( + self.service, + "process_artifact_analysis", + side_effect=Exception("Unexpected error"), + ): + # This should re-raise the exception + with pytest.raises(Exception, match="Unexpected error"): + self.service.handle_kafka_message(payload) + + # Verify that the correct statsd metric was incremented + self.service._statsd.increment.assert_called_with( + "launchpad.artifact.processing.failed" + ) + + def test_create_unsupported_artifact_file(self): + """Test that unsupported artifact files are handled correctly.""" + with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as tmp: + tmp.write(b"This is not a valid artifact") + tmp_path = Path(tmp.name) + + try: + from launchpad.artifacts.artifact_factory import ArtifactFactory + + with pytest.raises(ValueError, match="Input is not a supported artifact"): + ArtifactFactory.from_path(tmp_path) + finally: + tmp_path.unlink() + + def test_create_empty_zip_file(self): + """Test that empty zip files are handled correctly.""" + with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp: + # Create an empty zip file + with zipfile.ZipFile(tmp.name, "w") as zf: + pass # Empty zip file + tmp_path = Path(tmp.name) + + try: + from launchpad.artifacts.artifact_factory import ArtifactFactory + + with pytest.raises(ValueError, match="Input is not a supported artifact"): + ArtifactFactory.from_path(tmp_path) + finally: + tmp_path.unlink()