From 36cccd5b4df1a07a14d71af877426470d55b9816 Mon Sep 17 00:00:00 2001 From: Eugene M Date: Fri, 7 Mar 2025 00:56:08 +0300 Subject: [PATCH 1/7] fix: add more abi types --- diffyscan/utils/encoder.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/diffyscan/utils/encoder.py b/diffyscan/utils/encoder.py index 8b3f393..6d3a74a 100644 --- a/diffyscan/utils/encoder.py +++ b/diffyscan/utils/encoder.py @@ -1,3 +1,5 @@ +import re + from .custom_exceptions import EncoderError @@ -10,8 +12,16 @@ def encode_address(address: str) -> str: return to_hex_with_alignment(number) -def encode_bytes32(data: str) -> str: - return data.replace("0x", "") +def encode_fixed_bytes(value: str, length: int) -> str: + raw_hex = value.lower().replace("0x", "") + max_hex_len = length * 2 # each byte is 2 hex chars + if len(raw_hex) > max_hex_len: + raise EncoderError( + f"Provided bytes length exceeds {length} bytes (max {max_hex_len} hex chars)." + ) + # Right-pad with zeros up to fixed length, then left-pad to 64 hex chars total + raw_hex = raw_hex.ljust(max_hex_len, "0") # fill the fixed bytes + return raw_hex.ljust(64, "0") # fill up to 32 bytes in total def encode_bytes(data: str) -> str: @@ -83,17 +93,19 @@ def encode_constructor_arguments(constructor_abi: list, constructor_config_args: for argument_index in range(arg_length): arg_type = constructor_abi[argument_index]["type"] arg_value = constructor_config_args[argument_index] + if arg_type == "address": constructor_calldata += encode_address(arg_value) - elif ( - arg_type == "uint256" - or arg_type == "bool" - or arg_type == "uint8" - or arg_type == "uint32" - ): + elif arg_type == "bool": + constructor_calldata += to_hex_with_alignment(int(bool(arg_value))) + # Handle any integral type: uint, uint8..uint256, int, int8..int256 + elif re.match(r"^(u?int)(\d*)$", arg_type): constructor_calldata += to_hex_with_alignment(arg_value) - elif arg_type == "bytes32": - constructor_calldata += encode_bytes32(arg_value) + # Handle fixed-length bytes (e.g. bytes1..bytes32) + elif re.match(r"^bytes(\d+)$", arg_type): + match_len = re.match(r"^bytes(\d+)$", arg_type) + num_bytes = int(match_len.group(1)) + constructor_calldata += encode_fixed_bytes(arg_value, num_bytes) elif arg_type == "bytes" or arg_type.endswith("[]"): offset_to_start_of_data_part, encoded_value = encode_dynamic_type( arg_value, argument_index From 5c2dbd23d10d8a1cfb8a64ec30bccf46392d431c Mon Sep 17 00:00:00 2001 From: Eugene M Date: Fri, 7 Mar 2025 01:03:36 +0300 Subject: [PATCH 2/7] fix: encode_tuple fix --- diffyscan/utils/encoder.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/diffyscan/utils/encoder.py b/diffyscan/utils/encoder.py index 6d3a74a..a9fa251 100644 --- a/diffyscan/utils/encoder.py +++ b/diffyscan/utils/encoder.py @@ -48,10 +48,16 @@ def encode_tuple(types: list, args: list): arg_value = args[arg_index] if arg_type == "address": encoded_offsets += encode_address(arg_value) - elif arg_type == "uint256" or arg_type == "bool" or arg_type == "uint8": + elif arg_type == "bool": + encoded_offsets += to_hex_with_alignment(int(bool(arg_value))) + # Handle any integral type: uint, uint8..uint256, int, int8..int256 + elif re.match(r"^(u?int)(\d*)$", arg_type): encoded_offsets += to_hex_with_alignment(arg_value) - elif arg_type == "bytes32": - encoded_offsets += encode_bytes32(arg_value) + # Handle fixed-length bytes (e.g. bytes1..bytes32) + elif re.match(r"^bytes(\d+)$", arg_type): + match_len = re.match(r"^bytes(\d+)$", arg_type) + num_bytes = int(match_len.group(1)) + encoded_offsets += encode_fixed_bytes(arg_value, num_bytes) elif arg_type == "address[]" and not arg_value: encoded_data += to_hex_with_alignment(0) offset = to_hex_with_alignment((arg_index + args_length) * 32) From 54a70098f8424aa29a7c56e6d8e1bc8f10ad6758 Mon Sep 17 00:00:00 2001 From: Eugene M Date: Fri, 7 Mar 2025 01:43:59 +0300 Subject: [PATCH 3/7] fix: custom exceptions --- diffyscan/utils/custom_exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diffyscan/utils/custom_exceptions.py b/diffyscan/utils/custom_exceptions.py index 9dc9d1f..1e4f24a 100644 --- a/diffyscan/utils/custom_exceptions.py +++ b/diffyscan/utils/custom_exceptions.py @@ -36,7 +36,7 @@ def __init__(self, reason: str): class ExplorerError(BaseCustomException): def __init__(self, reason: str): - super().__init__(f"Failed to communicate with Blockchain explorer: {reason}") + super().__init__(f"Failed to communicate with a remote resource: {reason}") class BinVerifierError(BaseCustomException): From 6c4a813a30a3c67fd02fa8fb044873d36a1e1c27 Mon Sep 17 00:00:00 2001 From: Eugene M Date: Tue, 11 Mar 2025 00:10:29 +0300 Subject: [PATCH 4/7] fix: improve bytecode comparison --- diffyscan/diffyscan.py | 4 +- diffyscan/utils/binary_verifier.py | 254 +++++++++++++++------------ diffyscan/utils/encoder.py | 265 +++++++++++++++++++++-------- package-lock.json | 81 ++++----- package.json | 2 +- 5 files changed, 379 insertions(+), 227 deletions(-) diff --git a/diffyscan/diffyscan.py b/diffyscan/diffyscan.py index f731ac5..bf325b4 100644 --- a/diffyscan/diffyscan.py +++ b/diffyscan/diffyscan.py @@ -62,7 +62,7 @@ def run_bytecode_diff( is_fully_matched = local_compiled_bytecode == remote_deployed_bytecode if is_fully_matched: - logger.okay(f"Bytecodes are fully matched") + logger.okay(f"Bytecodes fully match") return logger.info(f"Automated match hasn't worked out") @@ -86,7 +86,7 @@ def run_bytecode_diff( is_fully_matched = local_deployed_bytecode == remote_deployed_bytecode if is_fully_matched: - logger.okay(f"Bytecodes are fully matched") + logger.okay(f"Bytecodes fully match") return deep_match_bytecode( diff --git a/diffyscan/utils/binary_verifier.py b/diffyscan/utils/binary_verifier.py index 1bd7fef..602e481 100644 --- a/diffyscan/utils/binary_verifier.py +++ b/diffyscan/utils/binary_verifier.py @@ -5,82 +5,154 @@ from .custom_exceptions import BinVerifierError -def format_bytecode(bytecode): +def format_bytecode(bytecode: str) -> str: + """Converts raw hex for an instruction into a '0x' prefixed string, or empty if none.""" return "0x" + bytecode[2:] if len(bytecode) > 2 else "" def trim_solidity_meta(bytecode: str) -> dict: + """ + Strips Solidity metadata from the end of the bytecode, if present. + Solidity appends a CBOR metadata section at the end, indicated by + the last 2 bytes in big-endian (multiplied by 2 for hex, plus 4). + """ meta_size = int(bytecode[-4:], 16) * 2 + 4 - if meta_size > len(bytecode): return {"bytecode": bytecode, "metadata": ""} - return { "bytecode": bytecode[:-meta_size], "metadata": bytecode[-meta_size:], } +def parse(bytecode: str): + """ + Parses raw hex EVM bytecode into a list of instructions: + [ { 'start': offset, 'length': N, 'op': {...}, 'bytecode': '...' }, ... ] + """ + buffer = bytes.fromhex(bytecode[2:] if bytecode.startswith("0x") else bytecode) + instructions = [] + i = 0 + unknown_opcodes = set() + + while i < len(buffer): + opcode = buffer[i] + if opcode not in OPCODES: + unknown_opcodes.add(hex(opcode)) + + # For PUSH1..PUSH32, the length is 1 + (opcode - PUSH0) + length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0) + + instr_hex = buffer[i : i + length].hex() + instructions.append( + { + "start": i, + "length": length, + "op": {"name": OPCODES.get(opcode, "INVALID"), "code": opcode}, + "bytecode": instr_hex, + } + ) + + i += length + + return instructions, unknown_opcodes + + +def regions_overlap(a_start: int, a_len: int, b_start: int, b_len: int) -> bool: + """ + Return True if [a_start, a_start+a_len) overlaps with [b_start, b_start+b_len). + """ + a_end = a_start + a_len + b_end = b_start + b_len + # intervals do NOT overlap if one is entirely to the left of the other + if a_end <= b_start or b_end <= a_start: + return False + return True + + +def overlaps_any_immutable( + immutables: dict[int, int], instr_start: int, instr_len: int +) -> bool: + """ + Checks if the instruction byte range [instr_start.. instr_start+instr_len) + overlaps with ANY known immutable region [start.. start+length) from 'immutables'. + """ + for imm_start, imm_len in immutables.items(): + if regions_overlap(instr_start, instr_len, imm_start, imm_len): + return True + return False + + def deep_match_bytecode( actual_bytecode: str, expected_bytecode: str, immutables: dict ) -> None: + """ + Compare two chunks of bytecode instruction-by-instruction, ignoring differences + that appear within known 'immutable' regions. + + If: + - No differences => "Bytecodes fully match." + - Differences only in immutables => "Bytecodes have differences only on the immutable reference position." + - Differences outside immutables => raises BinVerifierError. + """ logger.info("Comparing actual code with the expected one...") - actual_trimmed_bytecode = trim_solidity_meta(actual_bytecode) - expected_trimmed_bytecode = trim_solidity_meta(expected_bytecode) + # Possibly strip out metadata from both + actual_trimmed = trim_solidity_meta(actual_bytecode) + expected_trimmed = trim_solidity_meta(expected_bytecode) - if actual_trimmed_bytecode["metadata"] or expected_trimmed_bytecode["metadata"]: + if actual_trimmed["metadata"] or expected_trimmed["metadata"]: logger.info("Metadata has been detected and trimmed") - actual_instructions, unknown_opcodes_first_half = parse( - actual_trimmed_bytecode["bytecode"] - ) - expected_instructions, unknown_opcodes_second_half = parse( - expected_trimmed_bytecode["bytecode"] - ) + # Parse instructions + actual_instructions, unknown_opcodes_a = parse(actual_trimmed["bytecode"]) + expected_instructions, unknown_opcodes_b = parse(expected_trimmed["bytecode"]) - unknown_opcodes = ( - unknown_opcodes_first_half or set() | unknown_opcodes_second_half or set() - ) + # Check for unknown opcodes + unknown_opcodes = unknown_opcodes_a | unknown_opcodes_b if unknown_opcodes: logger.warn(f"Detected unknown opcodes: {unknown_opcodes}") + # If they differ in length, we still attempt to compare if len(actual_instructions) != len(expected_instructions): - logger.warn(f"Codes have a different length") + logger.warn("Codes have a different length") - zipped_instructions = list( - itertools.zip_longest(actual_instructions, expected_instructions) - ) + # Pair them up by index + zipped_instructions = list(zip(actual_instructions, expected_instructions)) + + # Identify mismatch indexes + def is_mismatch(pair) -> bool: + return pair[0]["bytecode"] != pair[1]["bytecode"] - is_mismatch = ( - lambda pair: pair[0] is None - or pair[1] is None - or pair[0].get("bytecode") != pair[1].get("bytecode") - ) mismatches = [ - index for index, pair in enumerate(zipped_instructions) if is_mismatch(pair) + idx for idx, pair in enumerate(zipped_instructions) if is_mismatch(pair) ] - near_lines_count = 3 # context depth, i.e., the number of lines above and \below to be displayed for each diff + # If no mismatches at all => fully match + if not mismatches and len(actual_instructions) == len(expected_instructions): + logger.okay("Bytecodes fully match") + return + # We'll show a few lines around each mismatch for context + near_lines_count = 3 checkpoints = {0, *mismatches} - + # handle last line if instructions differ in count if actual_instructions: checkpoints.add(len(actual_instructions) - 1) - if expected_instructions: checkpoints.add(len(expected_instructions) - 1) + # Expand around mismatches for ind in list(checkpoints): - start_index = max(0, ind - near_lines_count) - end_index = min(ind + near_lines_count, len(zipped_instructions) - 1) - - checkpoints.update(range(start_index, end_index + 1)) + start_idx = max(0, ind - near_lines_count) + end_idx = min(ind + near_lines_count, len(zipped_instructions) - 1) + checkpoints.update(range(start_idx, end_idx + 1)) checkpoints = sorted(checkpoints) + # Print a small legend logger.divider() - logger.info(f"0000 00 STOP - both expected and actual bytecode instructions match") + logger.info("0000 00 STOP - both expected and actual bytecode instructions match") logger.info(f'{bgRed("0x0002")} - the actual bytecode differs') logger.info( f'{bgYellow("0x0001")} - the actual bytecode differs on the immutable reference position' @@ -97,94 +169,58 @@ def deep_match_bytecode( logger.divider() is_matched_with_excluded_immutables = True - for previous_index, current_index in zip(checkpoints, checkpoints[1:]): - if previous_index != current_index - 1: + + # Print the diff lines + # note: for shortness, we won't handle "None" instructions here, + # since we used zip() not zip_longest(). Adjust if needed. + for prev_idx, cur_idx in zip(checkpoints, checkpoints[1:]): + if prev_idx != cur_idx - 1: print("...") - actual = ( - actual_instructions[current_index] - if current_index < len(actual_instructions) - else None - ) - expected = ( - expected_instructions[current_index] - if current_index < len(expected_instructions) - else None - ) + actual = zipped_instructions[cur_idx][0] + expected = zipped_instructions[cur_idx][1] - if not actual and expected: - params = "0x" + expected["bytecode"][2:] - print( - red( - f'{to_hex(current_index, 4)} {to_hex(expected["op"]["code"])} {expected["op"]["name"]} {params}' - ) - ) - elif actual and not expected: - params = "0x" + actual["bytecode"][2:] - print( - green( - f'{to_hex(current_index, 4)} {to_hex(actual["op"]["code"])} {actual["op"]["name"]} {params}' - ) - ) - elif actual and expected: + # Compare opcodes + same_opcode = actual["op"]["code"] == expected["op"]["code"] + if same_opcode: + opcode = to_hex(actual["op"]["code"]) + opname = actual["op"]["name"] + else: opcode = ( - to_hex(actual["op"]["code"]) - if actual["op"]["code"] == expected["op"]["code"] - else bgRed(to_hex(actual["op"]["code"])) + bgRed(to_hex(actual["op"]["code"])) + " " + bgGreen(to_hex(expected["op"]["code"])) ) - opname = ( - actual["op"]["name"] - if actual["op"]["name"] == expected["op"]["name"] - else bgRed(actual["op"]["name"]) + " " + bgGreen(expected["op"]["name"]) - ) + opname = bgRed(actual["op"]["name"]) + " " + bgGreen(expected["op"]["name"]) - actual_params = format_bytecode(actual["bytecode"]) - expected_params = format_bytecode(expected["bytecode"]) + actual_params = format_bytecode(actual["bytecode"]) + expected_params = format_bytecode(expected["bytecode"]) - params_length = len(expected["bytecode"]) // 2 - 1 - is_immutable = immutables.get(expected["start"] + 1) == params_length - if actual_params != expected_params and not is_immutable: - is_matched_with_excluded_immutables = False - params = ( - actual_params - if actual_params == expected_params - else ( - bgYellow(actual_params) + " " + bgGreen(expected_params) - if is_immutable - else bgRed(actual_params) + " " + bgGreen(expected_params) - ) - ) - print(f"{to_hex(current_index, 4)} {opcode} {opname} {params}") + # Check partial overlap with immutables + instr_start = expected["start"] + instr_len = expected["length"] + within_immutable_region = overlaps_any_immutable( + immutables, instr_start, instr_len + ) + + if actual_params == expected_params: + # Perfect match => no highlight + params = actual_params else: - raise BinVerifierError("Invalid bytecode difference data") + # There's a difference + if within_immutable_region: + params = bgYellow(actual_params) + " " + bgGreen(expected_params) + else: + params = bgRed(actual_params) + " " + bgGreen(expected_params) + is_matched_with_excluded_immutables = False + + print(f"{to_hex(cur_idx, 4)} {opcode} {opname} {params}") + # If we found any mismatch outside immutables => fail if not is_matched_with_excluded_immutables: raise BinVerifierError( - f"Bytecodes have differences not on the immutable reference position" + "Bytecodes have differences not on the immutable reference position" ) - logger.okay(f"Bytecodes have differences only on the immutable reference position") - - -def parse(bytecode): - buffer = bytes.fromhex(bytecode[2:] if bytecode.startswith("0x") else bytecode) - instructions = [] - i = 0 - unknown_opcodes = set() - while i < len(buffer): - opcode = buffer[i] - if opcode not in OPCODES: - unknown_opcodes.add(hex(opcode)) - length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0) - instructions.append( - { - "start": i, - "length": length, - "op": {"name": OPCODES.get(opcode, "INVALID"), "code": opcode}, - "bytecode": buffer[i : i + length].hex(), - } - ) - i += length - return instructions, unknown_opcodes + # Otherwise, differences exist but only in immutables + logger.okay("Bytecodes have differences only on the immutable reference position") diff --git a/diffyscan/utils/encoder.py b/diffyscan/utils/encoder.py index a9fa251..885a7f7 100644 --- a/diffyscan/utils/encoder.py +++ b/diffyscan/utils/encoder.py @@ -3,98 +3,222 @@ from .custom_exceptions import EncoderError +def _parse_solidity_int_type(arg_type: str) -> tuple[int, bool]: + """ + Given a Solidity int/uint type (e.g. 'uint256', 'int128', 'uint', 'int'), + returns (bits, is_signed). + - bits = 256 if no explicit size is specified. + - is_signed = True if it starts with 'int', False if 'uint'. + """ + match = re.match(r"^(u?int)(\d*)$", arg_type) + if not match: + raise EncoderError(f"Invalid integer type format '{arg_type}'.") + is_signed = not match.group(1).startswith("u") # 'uint' => False, 'int' => True + bits_str = match.group(2) + bits = int(bits_str) if bits_str else 256 + return (bits, is_signed) + + def to_hex_with_alignment(value: int) -> str: + """ + Encodes `value` (non-negative integer) as a 32-byte hex string. + For negative values, you must first apply two's complement. + """ return format(value, "064x") +def encode_int(value: int, bits: int, is_signed: bool) -> str: + """ + Encodes an integer value (possibly negative if signed) into 32 bytes + using two's complement for negative values. + """ + # Convert bool to int if needed (though typically you'd handle bool in a separate branch). + if isinstance(value, bool): + value = 1 if value else 0 + + # Python's 'format' doesn't automatically do two's-complement for negative integers. + # So if it's signed and value is negative, convert by adding 2^bits. + if is_signed and value < 0: + # e.g. for int256, 2^256 + value + value = (1 << bits) + value + + # Now ensure it fits within 'bits' + # (if bits=8, max = 2^7 - 1 for signed or 2^8-1 for unsigned). + # We'll skip a strict bounds check for brevity, but you could raise an error + # if abs(value) >= 2^(bits-1) for signed or value >= 2^bits for unsigned. + + return to_hex_with_alignment(value) + + def encode_address(address: str) -> str: - number = int(address, 16) + """ + Encodes an address as a 32-byte hex string. + Assumes 'address' is already a hex string (with '0x' or without). + """ + address_no_0x = address.lower().replace("0x", "") + # Convert to int + number = int(address_no_0x, 16) return to_hex_with_alignment(number) def encode_fixed_bytes(value: str, length: int) -> str: + """ + Encodes fixed-length bytes (e.g., bytes1..bytes32) into 32 bytes. + """ raw_hex = value.lower().replace("0x", "") - max_hex_len = length * 2 # each byte is 2 hex chars + max_hex_len = length * 2 # each byte => 2 hex chars if len(raw_hex) > max_hex_len: raise EncoderError( f"Provided bytes length exceeds {length} bytes (max {max_hex_len} hex chars)." ) - # Right-pad with zeros up to fixed length, then left-pad to 64 hex chars total - raw_hex = raw_hex.ljust(max_hex_len, "0") # fill the fixed bytes - return raw_hex.ljust(64, "0") # fill up to 32 bytes in total + # Right-pad the actual bytes to `length`, then pad to 32 bytes total + raw_hex = raw_hex.ljust(max_hex_len, "0") + return raw_hex.ljust(64, "0") def encode_bytes(data: str) -> str: - bytes_str = data.lstrip("0x") + """ + Encodes a dynamic `bytes` value as: + [ 32-byte length, (N + padded to multiple of 32) bytes data ] + Naive approach: `data` is a hex string (with or without 0x). + """ + bytes_str = data.lower().lstrip("0x") if not bytes_str: + # length = 0 return to_hex_with_alignment(0) - # Calculate the length of the hex-encoded 32-bytes padded data - # since EVM uses 32-byte (256-bit) words count_of_bytes_from_hex = len(bytes_str) // 2 - encoded_length = 0 - if count_of_bytes_from_hex > 0: - encoded_length = ((len(bytes_str) - 1) // 64 + 1) * 64 - bytes_str += "0" * (encoded_length - len(bytes_str)) - return to_hex_with_alignment(count_of_bytes_from_hex) + bytes_str - - -def encode_tuple(types: list, args: list): - args_length = len(types) - encoded_offsets = "" - encoded_data = "" - for arg_index in range(args_length): - arg_type = types[arg_index] - arg_value = args[arg_index] - if arg_type == "address": - encoded_offsets += encode_address(arg_value) + # how many hex chars needed to pad to next 32-byte boundary: + remainder = len(bytes_str) % 64 + if remainder != 0: + padding_needed = 64 - remainder + else: + padding_needed = 0 + + padded_bytes_str = bytes_str + ("0" * padding_needed) + + # first 32 bytes = length, then the data + return to_hex_with_alignment(count_of_bytes_from_hex) + padded_bytes_str + + +def encode_tuple(components_abi: list, values: list) -> str: + """ + Recursively encodes a tuple (struct). + If a component is itself 'tuple', we recurse. + If a component is an array type, we only allow empty arrays in this snippet. + For full dynamic-array encoding, you'd need offset-based logic + element encoding. + """ + if len(components_abi) != len(values): + raise EncoderError( + f"encode_tuple: mismatch in component count: {len(components_abi)} vs values: {len(values)}" + ) + + encoded = "" + for comp, val in zip(components_abi, values): + arg_type = comp["type"] + + # 1) Possibly a nested tuple + if arg_type == "tuple": + if "components" not in comp: + raise EncoderError("Tuple type missing 'components' in ABI data.") + encoded += encode_tuple(comp["components"], val) + + # 2) Possibly an array of addresses/ints, etc. + elif arg_type.endswith("[]"): + # If you need full dynamic array encoding in a struct, you'd do an offset-based approach here. + # We just handle the empty array case (0 length). + if not val: # empty array + encoded += to_hex_with_alignment(0) + else: + raise EncoderError( + "encode_tuple: non-empty dynamic arrays in tuples not yet supported." + ) + + # 3) address + elif arg_type == "address": + encoded += encode_address(val) + + # 4) bool elif arg_type == "bool": - encoded_offsets += to_hex_with_alignment(int(bool(arg_value))) - # Handle any integral type: uint, uint8..uint256, int, int8..int256 + # Could unify with int, but let's keep it explicit for readability + encoded += to_hex_with_alignment(int(bool(val))) + + # 5) integer types (uint, int, etc.) elif re.match(r"^(u?int)(\d*)$", arg_type): - encoded_offsets += to_hex_with_alignment(arg_value) - # Handle fixed-length bytes (e.g. bytes1..bytes32) + bits, is_signed = _parse_solidity_int_type(arg_type) + encoded += encode_int(int(val), bits, is_signed) + + # 6) fixed-length bytes elif re.match(r"^bytes(\d+)$", arg_type): match_len = re.match(r"^bytes(\d+)$", arg_type) num_bytes = int(match_len.group(1)) - encoded_offsets += encode_fixed_bytes(arg_value, num_bytes) - elif arg_type == "address[]" and not arg_value: - encoded_data += to_hex_with_alignment(0) - offset = to_hex_with_alignment((arg_index + args_length) * 32) - encoded_offsets += offset + encoded += encode_fixed_bytes(val, num_bytes) + + # 7) dynamic bytes + elif arg_type == "bytes": + encoded += encode_bytes(val) + + # 8) string + elif arg_type == "string": + # For a struct field that is a string, you'd typically do offset-based dynamic encoding. + raise EncoderError( + "encode_tuple: 'string' inside tuple not fully implemented." + ) + else: raise EncoderError( - f"Unknown constructor argument type '{arg_type}' in tuple" + f"Unknown or unhandled type '{arg_type}' in encode_tuple" ) - return encoded_offsets + encoded_data + + return encoded def encode_dynamic_type(arg_value: str, argument_index: int): + """ + Encodes a top-level dynamic `bytes` or array argument as: + [ offset, ... data in the 'compl_data' section ... ] + This snippet is naive: for a real array, you'd handle array length + each element. + """ + # For now, we just handle a raw bytes value in hex form: offset_to_start_of_data_part = to_hex_with_alignment((argument_index + 1) * 32) encoded_value = encode_bytes(arg_value) return offset_to_start_of_data_part, encoded_value def encode_string(arg_length: int, compl_data: list, arg_value: str): + """ + Encodes a top-level string argument in the same offset + data approach + used by 'encode_dynamic_type'. We do: + [ offset, ... then length + contents in 'compl_data' ... ] + """ argument_index = arg_length + len(compl_data) - encoded_value = arg_value.encode("utf-8") + encoded_value_bytes = arg_value.encode("utf-8") offset_to_start_of_data_part = to_hex_with_alignment(argument_index * 32) - encoded_value_length = to_hex_with_alignment(len(encoded_value)) + encoded_value_length = to_hex_with_alignment(len(encoded_value_bytes)) + # We'll pad the actual string data to a multiple of 32 + hex_str = encoded_value_bytes.hex() + remainder = len(hex_str) % 64 + if remainder != 0: + padding_needed = 64 - remainder + hex_str += "0" * padding_needed + return ( offset_to_start_of_data_part, encoded_value_length, - encoded_value.hex().ljust(64, "0"), + hex_str, ) def encode_constructor_arguments(constructor_abi: list, constructor_config_args: list): - # see https://docs.soliditylang.org/en/develop/abi-spec.html#contract-abi-specification - # transferred from here: - # https://github.com/lidofinance/lido-dao/blob/master/bytecode-verificator/bytecode_verificator.sh#L369-L405 + """ + Encodes each constructor argument in order, concatenating the result. + Appends any 'compl_data' (dynamic offsets, etc.) at the end. + """ arg_length = len(constructor_abi) constructor_calldata = "" compl_data = [] + try: for argument_index in range(arg_length): arg_type = constructor_abi[argument_index]["type"] @@ -102,48 +226,49 @@ def encode_constructor_arguments(constructor_abi: list, constructor_config_args: if arg_type == "address": constructor_calldata += encode_address(arg_value) + elif arg_type == "bool": constructor_calldata += to_hex_with_alignment(int(bool(arg_value))) - # Handle any integral type: uint, uint8..uint256, int, int8..int256 + elif re.match(r"^(u?int)(\d*)$", arg_type): - constructor_calldata += to_hex_with_alignment(arg_value) - # Handle fixed-length bytes (e.g. bytes1..bytes32) + # parse bits + sign + bits, is_signed = _parse_solidity_int_type(arg_type) + constructor_calldata += encode_int(int(arg_value), bits, is_signed) + elif re.match(r"^bytes(\d+)$", arg_type): + # fixed-length bytes match_len = re.match(r"^bytes(\d+)$", arg_type) num_bytes = int(match_len.group(1)) constructor_calldata += encode_fixed_bytes(arg_value, num_bytes) + elif arg_type == "bytes" or arg_type.endswith("[]"): - offset_to_start_of_data_part, encoded_value = encode_dynamic_type( - arg_value, argument_index - ) - constructor_calldata += offset_to_start_of_data_part + # top-level dynamic array or raw bytes + offset, encoded_value = encode_dynamic_type(arg_value, argument_index) + constructor_calldata += offset compl_data.append(encoded_value) + elif arg_type == "string": - offset_to_start_of_data_part, encoded_value_length, encoded_value = ( - encode_string(arg_length, compl_data, arg_value) + offset, length_hex, contents_hex = encode_string( + arg_length, compl_data, arg_value ) - constructor_calldata += offset_to_start_of_data_part - compl_data.append(encoded_value_length) - compl_data.append(encoded_value) + constructor_calldata += offset + compl_data.append(length_hex) + compl_data.append(contents_hex) + elif arg_type == "tuple": - args_tuple_types = [ - component["type"] - for component in constructor_abi[argument_index]["components"] - ] - if all(arg == "address[]" for arg in args_tuple_types): - argument_index = len(constructor_calldata) // 64 - offset_to_start_of_data_part = to_hex_with_alignment( - (argument_index + 1) * 32 - ) - constructor_calldata += offset_to_start_of_data_part - compl_data.append(encode_tuple(args_tuple_types, arg_value)) - else: - constructor_calldata += encode_tuple(args_tuple_types, arg_value) + tuple_abi = constructor_abi[argument_index]["components"] + constructor_calldata += encode_tuple(tuple_abi, arg_value) + else: - raise EncoderError(f"Unknown constructor argument type: {arg_type}") + raise EncoderError( + f"Unknown or unhandled constructor argument type: {arg_type}" + ) + except Exception as e: - raise EncoderError(e) from None - for offset_to_start_of_data_part in compl_data: - constructor_calldata += offset_to_start_of_data_part + raise EncoderError(f"Failed to encode calldata arguments: {e}") from None + + # Append any "completion" data (the actual dynamic data or string contents) + for data_part in compl_data: + constructor_calldata += data_part return constructor_calldata diff --git a/package-lock.json b/package-lock.json index ea177fb..6c1aad2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,7 +5,7 @@ "packages": { "": { "dependencies": { - "hardhat": "^2.22.17", + "hardhat": "^2.22.19", "kill-port": "^2.0.1" }, "devDependencies": { @@ -773,82 +773,74 @@ ] }, "node_modules/@nomicfoundation/edr": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/@nomicfoundation/edr/-/edr-0.7.0.tgz", - "integrity": "sha512-+Zyu7TE47TGNcPhOfWLPA/zISs32WDMXrhSWdWYyPHDVn/Uux5TVuOeScKb0BR/R8EJ+leR8COUF/EGxvDOVKg==", - "license": "MIT", + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/@nomicfoundation/edr/-/edr-0.8.0.tgz", + "integrity": "sha512-dwWRrghSVBQDpt0wP+6RXD8BMz2i/9TI34TcmZqeEAZuCLei3U9KZRgGTKVAM1rMRvrpf5ROfPqrWNetKVUTag==", "dependencies": { - "@nomicfoundation/edr-darwin-arm64": "0.7.0", - "@nomicfoundation/edr-darwin-x64": "0.7.0", - "@nomicfoundation/edr-linux-arm64-gnu": "0.7.0", - "@nomicfoundation/edr-linux-arm64-musl": "0.7.0", - "@nomicfoundation/edr-linux-x64-gnu": "0.7.0", - "@nomicfoundation/edr-linux-x64-musl": "0.7.0", - "@nomicfoundation/edr-win32-x64-msvc": "0.7.0" + "@nomicfoundation/edr-darwin-arm64": "0.8.0", + "@nomicfoundation/edr-darwin-x64": "0.8.0", + "@nomicfoundation/edr-linux-arm64-gnu": "0.8.0", + "@nomicfoundation/edr-linux-arm64-musl": "0.8.0", + "@nomicfoundation/edr-linux-x64-gnu": "0.8.0", + "@nomicfoundation/edr-linux-x64-musl": "0.8.0", + "@nomicfoundation/edr-win32-x64-msvc": "0.8.0" }, "engines": { "node": ">= 18" } }, "node_modules/@nomicfoundation/edr-darwin-arm64": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-darwin-arm64/-/edr-darwin-arm64-0.7.0.tgz", - "integrity": "sha512-vAH20oh4GaSB/iQFTRcoO8jLc0CLd9XuLY9I7vtcqZWAiM4U1J4Y8cu67PWmtxbvUQOqXR7S6FtAr8/AlWm14g==", - "license": "MIT", + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-darwin-arm64/-/edr-darwin-arm64-0.8.0.tgz", + "integrity": "sha512-sKTmOu/P5YYhxT0ThN2Pe3hmCE/5Ag6K/eYoiavjLWbR7HEb5ZwPu2rC3DpuUk1H+UKJqt7o4/xIgJxqw9wu6A==", "engines": { "node": ">= 18" } }, "node_modules/@nomicfoundation/edr-darwin-x64": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-darwin-x64/-/edr-darwin-x64-0.7.0.tgz", - "integrity": "sha512-WHDdIrPvLlgXQr2eKypBM5xOZAwdxhDAEQIvEMQL8tEEm2qYW2bliUlssBPrs8E3bdivFbe1HizImslMAfU3+g==", - "license": "MIT", + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-darwin-x64/-/edr-darwin-x64-0.8.0.tgz", + "integrity": "sha512-8ymEtWw1xf1Id1cc42XIeE+9wyo3Dpn9OD/X8GiaMz9R70Ebmj2g+FrbETu8o6UM+aL28sBZQCiCzjlft2yWAg==", "engines": { "node": ">= 18" } }, "node_modules/@nomicfoundation/edr-linux-arm64-gnu": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-linux-arm64-gnu/-/edr-linux-arm64-gnu-0.7.0.tgz", - "integrity": "sha512-WXpJB54ukz1no7gxCPXVEw9pgl/9UZ/WO3l1ctyv/T7vOygjqA4SUd6kppTs6MNXAuTiisPtvJ/fmvHiMBLrsw==", - "license": "MIT", + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-linux-arm64-gnu/-/edr-linux-arm64-gnu-0.8.0.tgz", + "integrity": "sha512-h/wWzS2EyQuycz+x/SjMRbyA+QMCCVmotRsgM1WycPARvVZWIVfwRRsKoXKdCftsb3S8NTprqBdJlOmsFyETFA==", "engines": { "node": ">= 18" } }, "node_modules/@nomicfoundation/edr-linux-arm64-musl": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-linux-arm64-musl/-/edr-linux-arm64-musl-0.7.0.tgz", - "integrity": "sha512-1iZYOcEgc+zJI7JQrlAFziuy9sBz1WgnIx3HIIu0J7lBRZ/AXeHHgATb+4InqxtEx9O3W8A0s7f11SyFqJL4Aw==", - "license": "MIT", + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-linux-arm64-musl/-/edr-linux-arm64-musl-0.8.0.tgz", + "integrity": "sha512-gnWxDgdkka0O9GpPX/gZT3REeKYV28Guyg13+Vj/bbLpmK1HmGh6Kx+fMhWv+Ht/wEmGDBGMCW1wdyT/CftJaQ==", "engines": { "node": ">= 18" } }, "node_modules/@nomicfoundation/edr-linux-x64-gnu": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-linux-x64-gnu/-/edr-linux-x64-gnu-0.7.0.tgz", - "integrity": "sha512-wSjC94WcR5MM8sg9w3OsAmT6+bbmChJw6uJKoXR3qscps/jdhjzJWzfgT0XGRq3XMUfimyafW2RWOyfX3ouhrQ==", - "license": "MIT", + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-linux-x64-gnu/-/edr-linux-x64-gnu-0.8.0.tgz", + "integrity": "sha512-DTMiAkgAx+nyxcxKyxFZk1HPakXXUCgrmei7r5G7kngiggiGp/AUuBBWFHi8xvl2y04GYhro5Wp+KprnLVoAPA==", "engines": { "node": ">= 18" } }, "node_modules/@nomicfoundation/edr-linux-x64-musl": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-linux-x64-musl/-/edr-linux-x64-musl-0.7.0.tgz", - "integrity": "sha512-Us22+AZ7wkG1mZwxqE4S4ZcuwkEA5VrUiBOJSvKHGOgy6vFvB/Euh5Lkp4GovwjrtiXuvyGO2UmtkzymZKDxZw==", - "license": "MIT", + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-linux-x64-musl/-/edr-linux-x64-musl-0.8.0.tgz", + "integrity": "sha512-iTITWe0Zj8cNqS0xTblmxPbHVWwEtMiDC+Yxwr64d7QBn/1W0ilFQ16J8gB6RVVFU3GpfNyoeg3tUoMpSnrm6Q==", "engines": { "node": ">= 18" } }, "node_modules/@nomicfoundation/edr-win32-x64-msvc": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-win32-x64-msvc/-/edr-win32-x64-msvc-0.7.0.tgz", - "integrity": "sha512-HAry0heTsWkzReVtjHwoIq3BgFCvXpVhJ5qPmTnegZGsr/KxqvMmHyDMifzKao4bycU8yrpTSyOiAJt27RWjzQ==", - "license": "MIT", + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/@nomicfoundation/edr-win32-x64-msvc/-/edr-win32-x64-msvc-0.8.0.tgz", + "integrity": "sha512-mNRDyd/C3j7RMcwapifzv2K57sfA5xOw8g2U84ZDvgSrXVXLC99ZPxn9kmolb+dz8VMm9FONTZz9ESS6v8DTnA==", "engines": { "node": ">= 18" } @@ -2352,14 +2344,13 @@ "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==" }, "node_modules/hardhat": { - "version": "2.22.18", - "resolved": "https://registry.npmjs.org/hardhat/-/hardhat-2.22.18.tgz", - "integrity": "sha512-2+kUz39gvMo56s75cfLBhiFedkQf+gXdrwCcz4R/5wW0oBdwiyfj2q9BIkMoaA0WIGYYMU2I1Cc4ucTunhfjzw==", - "license": "MIT", + "version": "2.22.19", + "resolved": "https://registry.npmjs.org/hardhat/-/hardhat-2.22.19.tgz", + "integrity": "sha512-jptJR5o6MCgNbhd7eKa3mrteR+Ggq1exmE5RUL5ydQEVKcZm0sss5laa86yZ0ixIavIvF4zzS7TdGDuyopj0sQ==", "dependencies": { "@ethersproject/abi": "^5.1.2", "@metamask/eth-sig-util": "^4.0.0", - "@nomicfoundation/edr": "^0.7.0", + "@nomicfoundation/edr": "^0.8.0", "@nomicfoundation/ethereumjs-common": "4.0.4", "@nomicfoundation/ethereumjs-tx": "5.0.4", "@nomicfoundation/ethereumjs-util": "9.0.4", diff --git a/package.json b/package.json index 0d06a53..343dd26 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "@commitlint/config-conventional": "^19.5.0" }, "dependencies": { - "hardhat": "^2.22.17", + "hardhat": "^2.22.19", "kill-port": "^2.0.1" }, "packageManager": "npm@10.8.2", From 33e2c09dbafb5d214d7af6811b10748367661318 Mon Sep 17 00:00:00 2001 From: Eugene M Date: Tue, 11 Mar 2025 12:17:36 +0300 Subject: [PATCH 5/7] fix: explorer host detection --- diffyscan/utils/explorer.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/diffyscan/utils/explorer.py b/diffyscan/utils/explorer.py index 385e7a4..4908dc1 100644 --- a/diffyscan/utils/explorer.py +++ b/diffyscan/utils/explorer.py @@ -238,13 +238,10 @@ def get_explorer_hostname(config): explorer_hostname = load_env( config["explorer_hostname_env_var"], masked=True, required=False ) - if explorer_hostname is None: - logger.warn( - f'Failed to find an explorer hostname env in the config ("explorer_hostname_env_var")' - ) + elif "explorer_hostname" in config: explorer_hostname = config["explorer_hostname"] - if explorer_hostname is None: + else: logger.warn( - f'Failed to find explorer hostname in the config ("explorer_hostname")' + f'Failed to find explorer hostname in the config ("explorer_hostname" or "explorer_hostname_env_var")' ) return explorer_hostname From ceb494e4a52cb6e28b2519e5e1703587d60917e5 Mon Sep 17 00:00:00 2001 From: Eugene M Date: Tue, 11 Mar 2025 15:54:54 +0300 Subject: [PATCH 6/7] fix: handle arrays properly --- diffyscan/diffyscan.py | 4 ++- diffyscan/utils/encoder.py | 67 +++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/diffyscan/diffyscan.py b/diffyscan/diffyscan.py index bf325b4..11b0608 100644 --- a/diffyscan/diffyscan.py +++ b/diffyscan/diffyscan.py @@ -65,7 +65,9 @@ def run_bytecode_diff( logger.okay(f"Bytecodes fully match") return - logger.info(f"Automated match hasn't worked out") + logger.info( + f"Static bytecodes not match, trying local deployment to bind immutables" + ) calldata = get_calldata( contract_address_from_config, diff --git a/diffyscan/utils/encoder.py b/diffyscan/utils/encoder.py index 885a7f7..939e251 100644 --- a/diffyscan/utils/encoder.py +++ b/diffyscan/utils/encoder.py @@ -209,11 +209,53 @@ def encode_string(arg_length: int, compl_data: list, arg_value: str): ) -def encode_constructor_arguments(constructor_abi: list, constructor_config_args: list): +def encode_array(element_type: str, elements: list) -> str: """ - Encodes each constructor argument in order, concatenating the result. - Appends any 'compl_data' (dynamic offsets, etc.) at the end. + Encodes a one-dimensional dynamic array of the given element_type: + - (u)intX, bool, address, or a simple type you already handle + Returns the concatenated hex string: + [ 32-byte array length, each element in 32 bytes (or more if necessary) ] """ + # 1) Encode array length + length_hex = to_hex_with_alignment(len(elements)) + + # 2) Encode each element + elements_hex = "" + + # The element_type might be 'address', 'uint256', etc. + # If it's a nested array, e.g. 'uint256[]', you must do recursion. + # For simplicity, let's handle only top-level single array of simple types. + + # If it's e.g. 'uint', parse bits, is_signed + uint_int_match = re.match(r"^(u?int)(\d*)$", element_type) + + for elem in elements: + if element_type == "address": + elements_hex += encode_address(elem) + + elif element_type == "bool": + elements_hex += to_hex_with_alignment(int(bool(elem))) + + elif uint_int_match: + bits, is_signed = _parse_solidity_int_type(element_type) + elements_hex += encode_int(int(elem), bits, is_signed) + + else: + # If you have 'bytes32[]' or something else, handle it: + bytesN_match = re.match(r"^bytes(\d+)$", element_type) + if bytesN_match: + num_bytes = int(bytesN_match.group(1)) + elements_hex += encode_fixed_bytes(elem, num_bytes) + else: + # If you want advanced features like nested arrays or strings, you'd do it here + raise EncoderError( + f"encode_array: unhandled element type '{element_type}'" + ) + + return length_hex + elements_hex + + +def encode_constructor_arguments(constructor_abi: list, constructor_config_args: list): arg_length = len(constructor_abi) constructor_calldata = "" @@ -231,18 +273,15 @@ def encode_constructor_arguments(constructor_abi: list, constructor_config_args: constructor_calldata += to_hex_with_alignment(int(bool(arg_value))) elif re.match(r"^(u?int)(\d*)$", arg_type): - # parse bits + sign bits, is_signed = _parse_solidity_int_type(arg_type) constructor_calldata += encode_int(int(arg_value), bits, is_signed) elif re.match(r"^bytes(\d+)$", arg_type): - # fixed-length bytes match_len = re.match(r"^bytes(\d+)$", arg_type) num_bytes = int(match_len.group(1)) constructor_calldata += encode_fixed_bytes(arg_value, num_bytes) - elif arg_type == "bytes" or arg_type.endswith("[]"): - # top-level dynamic array or raw bytes + elif arg_type == "bytes": offset, encoded_value = encode_dynamic_type(arg_value, argument_index) constructor_calldata += offset compl_data.append(encoded_value) @@ -259,6 +298,18 @@ def encode_constructor_arguments(constructor_abi: list, constructor_config_args: tuple_abi = constructor_abi[argument_index]["components"] constructor_calldata += encode_tuple(tuple_abi, arg_value) + elif arg_type.endswith("[]"): + # The "base type" is everything before the final "[]" + element_type = arg_type[:-2] # e.g. "uint256" or "address" + + # 1) Write the offset for this dynamic array + offset_hex = to_hex_with_alignment((argument_index + 1) * 32) + constructor_calldata += offset_hex + + # 2) Build the array payload: length + each element + array_payload = encode_array(element_type, arg_value) + compl_data.append(array_payload) + else: raise EncoderError( f"Unknown or unhandled constructor argument type: {arg_type}" @@ -267,7 +318,7 @@ def encode_constructor_arguments(constructor_abi: list, constructor_config_args: except Exception as e: raise EncoderError(f"Failed to encode calldata arguments: {e}") from None - # Append any "completion" data (the actual dynamic data or string contents) + # Finally, append any "completion" data for data_part in compl_data: constructor_calldata += data_part From 92eb392a42d0e6fa659f13183ce442f63975307f Mon Sep 17 00:00:00 2001 From: Eugene M Date: Tue, 11 Mar 2025 17:51:01 +0300 Subject: [PATCH 7/7] fix: recursive encode_tuple --- diffyscan/utils/encoder.py | 98 ++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/diffyscan/utils/encoder.py b/diffyscan/utils/encoder.py index 939e251..d97d22c 100644 --- a/diffyscan/utils/encoder.py +++ b/diffyscan/utils/encoder.py @@ -103,74 +103,80 @@ def encode_bytes(data: str) -> str: def encode_tuple(components_abi: list, values: list) -> str: """ - Recursively encodes a tuple (struct). - If a component is itself 'tuple', we recurse. - If a component is an array type, we only allow empty arrays in this snippet. - For full dynamic-array encoding, you'd need offset-based logic + element encoding. + Recursively encodes a tuple (struct) with support for dynamic arrays. + This version splits the tuple into a static part and a dynamic part. + Dynamic components (like T[] or bytes) are replaced with an offset (relative + to the start of the dynamic section), and their actual data is appended afterward. + + Note: This is a simplified implementation and may not cover all edge cases not presuming nested dynamic types. """ if len(components_abi) != len(values): raise EncoderError( f"encode_tuple: mismatch in component count: {len(components_abi)} vs values: {len(values)}" ) - encoded = "" + static_parts = [] + dynamic_parts = [] + + # First, encode each element into a static part (if static) or reserve a placeholder (if dynamic). for comp, val in zip(components_abi, values): arg_type = comp["type"] - # 1) Possibly a nested tuple + # Nested tuple: recurse. if arg_type == "tuple": - if "components" not in comp: - raise EncoderError("Tuple type missing 'components' in ABI data.") - encoded += encode_tuple(comp["components"], val) - - # 2) Possibly an array of addresses/ints, etc. - elif arg_type.endswith("[]"): - # If you need full dynamic array encoding in a struct, you'd do an offset-based approach here. - # We just handle the empty array case (0 length). - if not val: # empty array - encoded += to_hex_with_alignment(0) + # (Assumes nested tuples are fully static + static_parts.append(encode_tuple(comp["components"], val)) + + # Dynamic array or dynamic bytes: + elif arg_type.endswith("[]") or arg_type in ["bytes", "string"]: + # Reserve a placeholder; dynamic data will be appended. + static_parts.append(None) + if arg_type.endswith("[]"): + # For a dynamic array, the element type is the part before "[]". + base_type = arg_type[:-2] + dynamic_parts.append(encode_array(base_type, val)) + elif arg_type == "bytes": + dynamic_parts.append(encode_bytes(val)) else: raise EncoderError( - "encode_tuple: non-empty dynamic arrays in tuples not yet supported." + "encode_tuple: 'string' inside tuple not implemented." ) - # 3) address + # Otherwise, treat as a static type. elif arg_type == "address": - encoded += encode_address(val) - - # 4) bool + static_parts.append(encode_address(val)) elif arg_type == "bool": - # Could unify with int, but let's keep it explicit for readability - encoded += to_hex_with_alignment(int(bool(val))) - - # 5) integer types (uint, int, etc.) + static_parts.append(to_hex_with_alignment(int(bool(val)))) elif re.match(r"^(u?int)(\d*)$", arg_type): bits, is_signed = _parse_solidity_int_type(arg_type) - encoded += encode_int(int(val), bits, is_signed) - - # 6) fixed-length bytes + static_parts.append(encode_int(int(val), bits, is_signed)) elif re.match(r"^bytes(\d+)$", arg_type): match_len = re.match(r"^bytes(\d+)$", arg_type) num_bytes = int(match_len.group(1)) - encoded += encode_fixed_bytes(val, num_bytes) - - # 7) dynamic bytes - elif arg_type == "bytes": - encoded += encode_bytes(val) - - # 8) string - elif arg_type == "string": - # For a struct field that is a string, you'd typically do offset-based dynamic encoding. - raise EncoderError( - "encode_tuple: 'string' inside tuple not fully implemented." - ) - + static_parts.append(encode_fixed_bytes(val, num_bytes)) else: - raise EncoderError( - f"Unknown or unhandled type '{arg_type}' in encode_tuple" - ) - - return encoded + raise EncoderError(f"Unknown type '{arg_type}' in tuple") + + # Now calculate the static size (each static part is 32 bytes) + static_size = 32 * len(static_parts) + dynamic_offset = 0 + # Replace None placeholders with offsets (relative to the beginning of the dynamic section) + for i in range(len(static_parts)): + if static_parts[i] is None: + # The offset is computed as static_size + current dynamic_offset + static_parts[i] = to_hex_with_alignment(static_size + dynamic_offset) + # Assume each dynamic part is already 32-byte aligned. + part_length = len(dynamic_parts.pop(0)) // 2 + # Round up to the next multiple of 32 bytes: + padded_length = ((part_length + 31) // 32) * 32 + dynamic_offset += padded_length + + # Concatenate static parts and then (re-)concatenate dynamic parts. + encoded_static = "".join(static_parts) + # TODO: dynamic parts for this non-nested impl are omitted + encoded_dynamic = "" + + return encoded_static + encoded_dynamic def encode_dynamic_type(arg_value: str, argument_index: int):