lidofinance · Mar 10, 2025
diff --git a/‎diffyscan/diffyscan.py
+2-2 b/‎diffyscan/diffyscan.py
+2-2
diff --git a/‎diffyscan/utils/binary_verifier.py
+145-109 b/‎diffyscan/utils/binary_verifier.py
+145-109
diff --git a/‎diffyscan/utils/encoder.py
+195-70 b/‎diffyscan/utils/encoder.py
+195-70
diff --git a/‎package-lock.json
+36-45 b/‎package-lock.json
+36-45
diff --git a/‎package.json
+1-1 b/‎package.json
+1-1
@@ -62,7 +62,7 @@ def run_bytecode_diff(
     is_fully_matched = local_compiled_bytecode == remote_deployed_bytecode
 
     if is_fully_matched:
-        logger.okay(f"Bytecodes are fully matched")
+        logger.okay(f"Bytecodes fully match")
         return
 
     logger.info(f"Automated match hasn't worked out")
@@ -86,7 +86,7 @@ def run_bytecode_diff(
     is_fully_matched = local_deployed_bytecode == remote_deployed_bytecode
 
     if is_fully_matched:
-        logger.okay(f"Bytecodes are fully matched")
+        logger.okay(f"Bytecodes fully match")
         return
 
     deep_match_bytecode(
 
@@ -5,82 +5,154 @@
 from .custom_exceptions import BinVerifierError
 
 
-def format_bytecode(bytecode):
+def format_bytecode(bytecode: str) -> str:
+    """Converts raw hex for an instruction into a '0x' prefixed string, or empty if none."""
     return "0x" + bytecode[2:] if len(bytecode) > 2 else ""
 
 
 def trim_solidity_meta(bytecode: str) -> dict:
+    """
+    Strips Solidity metadata from the end of the bytecode, if present.
+    Solidity appends a CBOR metadata section at the end, indicated by
+    the last 2 bytes in big-endian (multiplied by 2 for hex, plus 4).
+    """
     meta_size = int(bytecode[-4:], 16) * 2 + 4
-
     if meta_size > len(bytecode):
         return {"bytecode": bytecode, "metadata": ""}
-
     return {
         "bytecode": bytecode[:-meta_size],
         "metadata": bytecode[-meta_size:],
     }
 
 
+def parse(bytecode: str):
+    """
+    Parses raw hex EVM bytecode into a list of instructions:
+      [ { 'start': offset, 'length': N, 'op': {...}, 'bytecode': '...' }, ... ]
+    """
+    buffer = bytes.fromhex(bytecode[2:] if bytecode.startswith("0x") else bytecode)
+    instructions = []
+    i = 0
+    unknown_opcodes = set()
+
+    while i < len(buffer):
+        opcode = buffer[i]
+        if opcode not in OPCODES:
+            unknown_opcodes.add(hex(opcode))
+
+        # For PUSH1..PUSH32, the length is 1 + (opcode - PUSH0)
+        length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0)
+
+        instr_hex = buffer[i : i + length].hex()
+        instructions.append(
+            {
+                "start": i,
+                "length": length,
+                "op": {"name": OPCODES.get(opcode, "INVALID"), "code": opcode},
+                "bytecode": instr_hex,
+            }
+        )
+
+        i += length
+
+    return instructions, unknown_opcodes
+
+
+def regions_overlap(a_start: int, a_len: int, b_start: int, b_len: int) -> bool:
+    """
+    Return True if [a_start, a_start+a_len) overlaps with [b_start, b_start+b_len).
+    """
+    a_end = a_start + a_len
+    b_end = b_start + b_len
+    # intervals do NOT overlap if one is entirely to the left of the other
+    if a_end <= b_start or b_end <= a_start:
+        return False
+    return True
+
+
+def overlaps_any_immutable(
+    immutables: dict[int, int], instr_start: int, instr_len: int
+) -> bool:
+    """
+    Checks if the instruction byte range [instr_start.. instr_start+instr_len)
+    overlaps with ANY known immutable region [start.. start+length) from 'immutables'.
+    """
+    for imm_start, imm_len in immutables.items():
+        if regions_overlap(instr_start, instr_len, imm_start, imm_len):
+            return True
+    return False
+
+
 def deep_match_bytecode(
     actual_bytecode: str, expected_bytecode: str, immutables: dict
 ) -> None:
+    """
+    Compare two chunks of bytecode instruction-by-instruction, ignoring differences
+    that appear within known 'immutable' regions.
+
+    If:
+      - No differences => "Bytecodes fully match."
+      - Differences only in immutables => "Bytecodes have differences only on the immutable reference position."
+      - Differences outside immutables => raises BinVerifierError.
+    """
     logger.info("Comparing actual code with the expected one...")
 
-    actual_trimmed_bytecode = trim_solidity_meta(actual_bytecode)
-    expected_trimmed_bytecode = trim_solidity_meta(expected_bytecode)
+    # Possibly strip out metadata from both
+    actual_trimmed = trim_solidity_meta(actual_bytecode)
+    expected_trimmed = trim_solidity_meta(expected_bytecode)
 
-    if actual_trimmed_bytecode["metadata"] or expected_trimmed_bytecode["metadata"]:
+    if actual_trimmed["metadata"] or expected_trimmed["metadata"]:
         logger.info("Metadata has been detected and trimmed")
 
-    actual_instructions, unknown_opcodes_first_half = parse(
-        actual_trimmed_bytecode["bytecode"]
-    )
-    expected_instructions, unknown_opcodes_second_half = parse(
-        expected_trimmed_bytecode["bytecode"]
-    )
+    # Parse instructions
+    actual_instructions, unknown_opcodes_a = parse(actual_trimmed["bytecode"])
+    expected_instructions, unknown_opcodes_b = parse(expected_trimmed["bytecode"])
 
-    unknown_opcodes = (
-        unknown_opcodes_first_half or set() | unknown_opcodes_second_half or set()
-    )
+    # Check for unknown opcodes
+    unknown_opcodes = unknown_opcodes_a | unknown_opcodes_b
     if unknown_opcodes:
         logger.warn(f"Detected unknown opcodes: {unknown_opcodes}")
 
+    # If they differ in length, we still attempt to compare
     if len(actual_instructions) != len(expected_instructions):
-        logger.warn(f"Codes have a different length")
+        logger.warn("Codes have a different length")
 
-    zipped_instructions = list(
-        itertools.zip_longest(actual_instructions, expected_instructions)
-    )
+    # Pair them up by index
+    zipped_instructions = list(zip(actual_instructions, expected_instructions))
+
+    # Identify mismatch indexes
+    def is_mismatch(pair) -> bool:
+        return pair[0]["bytecode"] != pair[1]["bytecode"]
 
-    is_mismatch = (
-        lambda pair: pair[0] is None
-        or pair[1] is None
-        or pair[0].get("bytecode") != pair[1].get("bytecode")
-    )
     mismatches = [
-        index for index, pair in enumerate(zipped_instructions) if is_mismatch(pair)
+        idx for idx, pair in enumerate(zipped_instructions) if is_mismatch(pair)
     ]
 
-    near_lines_count = 3  # context depth, i.e., the number of lines above and \below to be displayed for each diff
+    # If no mismatches at all => fully match
+    if not mismatches and len(actual_instructions) == len(expected_instructions):
+        logger.okay("Bytecodes fully match")
+        return
 
+    # We'll show a few lines around each mismatch for context
+    near_lines_count = 3
     checkpoints = {0, *mismatches}
-
+    # handle last line if instructions differ in count
     if actual_instructions:
         checkpoints.add(len(actual_instructions) - 1)
-
     if expected_instructions:
         checkpoints.add(len(expected_instructions) - 1)
 
+    # Expand around mismatches
     for ind in list(checkpoints):
-        start_index = max(0, ind - near_lines_count)
-        end_index = min(ind + near_lines_count, len(zipped_instructions) - 1)
-
-        checkpoints.update(range(start_index, end_index + 1))
+        start_idx = max(0, ind - near_lines_count)
+        end_idx = min(ind + near_lines_count, len(zipped_instructions) - 1)
+        checkpoints.update(range(start_idx, end_idx + 1))
 
     checkpoints = sorted(checkpoints)
 
+    # Print a small legend
     logger.divider()
-    logger.info(f"0000 00 STOP - both expected and actual bytecode instructions match")
+    logger.info("0000 00 STOP - both expected and actual bytecode instructions match")
     logger.info(f'{bgRed("0x0002")} - the actual bytecode differs')
     logger.info(
         f'{bgYellow("0x0001")} - the actual bytecode differs on the immutable reference position'
@@ -97,94 +169,58 @@ def deep_match_bytecode(
     logger.divider()
 
     is_matched_with_excluded_immutables = True
-    for previous_index, current_index in zip(checkpoints, checkpoints[1:]):
-        if previous_index != current_index - 1:
+
+    # Print the diff lines
+    # note: for shortness, we won't handle "None" instructions here,
+    # since we used zip() not zip_longest(). Adjust if needed.
+    for prev_idx, cur_idx in zip(checkpoints, checkpoints[1:]):
+        if prev_idx != cur_idx - 1:
             print("...")
 
-        actual = (
-            actual_instructions[current_index]
-            if current_index < len(actual_instructions)
-            else None
-        )
-        expected = (
-            expected_instructions[current_index]
-            if current_index < len(expected_instructions)
-            else None
-        )
+        actual = zipped_instructions[cur_idx][0]
+        expected = zipped_instructions[cur_idx][1]
 
-        if not actual and expected:
-            params = "0x" + expected["bytecode"][2:]
-            print(
-                red(
-                    f'{to_hex(current_index, 4)} {to_hex(expected["op"]["code"])} {expected["op"]["name"]} {params}'
-                )
-            )
-        elif actual and not expected:
-            params = "0x" + actual["bytecode"][2:]
-            print(
-                green(
-                    f'{to_hex(current_index, 4)} {to_hex(actual["op"]["code"])} {actual["op"]["name"]} {params}'
-                )
-            )
-        elif actual and expected:
+        # Compare opcodes
+        same_opcode = actual["op"]["code"] == expected["op"]["code"]
+        if same_opcode:
+            opcode = to_hex(actual["op"]["code"])
+            opname = actual["op"]["name"]
+        else:
             opcode = (
-                to_hex(actual["op"]["code"])
-                if actual["op"]["code"] == expected["op"]["code"]
-                else bgRed(to_hex(actual["op"]["code"]))
+                bgRed(to_hex(actual["op"]["code"]))
                 + " "
                 + bgGreen(to_hex(expected["op"]["code"]))
             )
-            opname = (
-                actual["op"]["name"]
-                if actual["op"]["name"] == expected["op"]["name"]
-                else bgRed(actual["op"]["name"]) + " " + bgGreen(expected["op"]["name"])
-            )
+            opname = bgRed(actual["op"]["name"]) + " " + bgGreen(expected["op"]["name"])
 
-            actual_params = format_bytecode(actual["bytecode"])
-            expected_params = format_bytecode(expected["bytecode"])
+        actual_params = format_bytecode(actual["bytecode"])
+        expected_params = format_bytecode(expected["bytecode"])
 
-            params_length = len(expected["bytecode"]) // 2 - 1
-            is_immutable = immutables.get(expected["start"] + 1) == params_length
-            if actual_params != expected_params and not is_immutable:
-                is_matched_with_excluded_immutables = False
-            params = (
-                actual_params
-                if actual_params == expected_params
-                else (
-                    bgYellow(actual_params) + " " + bgGreen(expected_params)
-                    if is_immutable
-                    else bgRed(actual_params) + " " + bgGreen(expected_params)
-                )
-            )
-            print(f"{to_hex(current_index, 4)} {opcode} {opname} {params}")
+        # Check partial overlap with immutables
+        instr_start = expected["start"]
+        instr_len = expected["length"]
+        within_immutable_region = overlaps_any_immutable(
+            immutables, instr_start, instr_len
+        )
+
+        if actual_params == expected_params:
+            # Perfect match => no highlight
+            params = actual_params
         else:
-            raise BinVerifierError("Invalid bytecode difference data")
+            # There's a difference
+            if within_immutable_region:
+                params = bgYellow(actual_params) + " " + bgGreen(expected_params)
+            else:
+                params = bgRed(actual_params) + " " + bgGreen(expected_params)
+                is_matched_with_excluded_immutables = False
+
+        print(f"{to_hex(cur_idx, 4)} {opcode} {opname} {params}")
 
+    # If we found any mismatch outside immutables => fail
     if not is_matched_with_excluded_immutables:
         raise BinVerifierError(
-            f"Bytecodes have differences not on the immutable reference position"
+            "Bytecodes have differences not on the immutable reference position"
         )
 
-    logger.okay(f"Bytecodes have differences only on the immutable reference position")
-
-
-def parse(bytecode):
-    buffer = bytes.fromhex(bytecode[2:] if bytecode.startswith("0x") else bytecode)
-    instructions = []
-    i = 0
-    unknown_opcodes = set()
-    while i < len(buffer):
-        opcode = buffer[i]
-        if opcode not in OPCODES:
-            unknown_opcodes.add(hex(opcode))
-        length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0)
-        instructions.append(
-            {
-                "start": i,
-                "length": length,
-                "op": {"name": OPCODES.get(opcode, "INVALID"), "code": opcode},
-                "bytecode": buffer[i : i + length].hex(),
-            }
-        )
-        i += length
-    return instructions, unknown_opcodes
+    # Otherwise, differences exist but only in immutables
+    logger.okay("Bytecodes have differences only on the immutable reference position")
@@ -3,147 +3,272 @@
 from .custom_exceptions import EncoderError
 
 
+def _parse_solidity_int_type(arg_type: str) -> tuple[int, bool]:
+    """
+    Given a Solidity int/uint type (e.g. 'uint256', 'int128', 'uint', 'int'),
+    returns (bits, is_signed).
+      - bits = 256 if no explicit size is specified.
+      - is_signed = True if it starts with 'int', False if 'uint'.
+    """
+    match = re.match(r"^(u?int)(\d*)$", arg_type)
+    if not match:
+        raise EncoderError(f"Invalid integer type format '{arg_type}'.")
+    is_signed = not match.group(1).startswith("u")  # 'uint' => False, 'int' => True
+    bits_str = match.group(2)
+    bits = int(bits_str) if bits_str else 256
+    return (bits, is_signed)
+
+
 def to_hex_with_alignment(value: int) -> str:
+    """
+    Encodes `value` (non-negative integer) as a 32-byte hex string.
+    For negative values, you must first apply two's complement.
+    """
     return format(value, "064x")
 
 
+def encode_int(value: int, bits: int, is_signed: bool) -> str:
+    """
+    Encodes an integer value (possibly negative if signed) into 32 bytes
+    using two's complement for negative values.
+    """
+    # Convert bool to int if needed (though typically you'd handle bool in a separate branch).
+    if isinstance(value, bool):
+        value = 1 if value else 0
+
+    # Python's 'format' doesn't automatically do two's-complement for negative integers.
+    # So if it's signed and value is negative, convert by adding 2^bits.
+    if is_signed and value < 0:
+        # e.g. for int256, 2^256 + value
+        value = (1 << bits) + value
+
+    # Now ensure it fits within 'bits'
+    # (if bits=8, max = 2^7 - 1 for signed or 2^8-1 for unsigned).
+    # We'll skip a strict bounds check for brevity, but you could raise an error
+    # if abs(value) >= 2^(bits-1) for signed or value >= 2^bits for unsigned.
+
+    return to_hex_with_alignment(value)
+
+
 def encode_address(address: str) -> str:
-    number = int(address, 16)
+    """
+    Encodes an address as a 32-byte hex string.
+    Assumes 'address' is already a hex string (with '0x' or without).
+    """
+    address_no_0x = address.lower().replace("0x", "")
+    # Convert to int
+    number = int(address_no_0x, 16)
     return to_hex_with_alignment(number)
 
 
 def encode_fixed_bytes(value: str, length: int) -> str:
+    """
+    Encodes fixed-length bytes (e.g., bytes1..bytes32) into 32 bytes.
+    """
     raw_hex = value.lower().replace("0x", "")
-    max_hex_len = length * 2  # each byte is 2 hex chars
+    max_hex_len = length * 2  # each byte => 2 hex chars
     if len(raw_hex) > max_hex_len:
         raise EncoderError(
             f"Provided bytes length exceeds {length} bytes (max {max_hex_len} hex chars)."
         )
-    # Right-pad with zeros up to fixed length, then left-pad to 64 hex chars total
-    raw_hex = raw_hex.ljust(max_hex_len, "0")  # fill the fixed bytes
-    return raw_hex.ljust(64, "0")  # fill up to 32 bytes in total
+    # Right-pad the actual bytes to `length`, then pad to 32 bytes total
+    raw_hex = raw_hex.ljust(max_hex_len, "0")
+    return raw_hex.ljust(64, "0")
 
 
 def encode_bytes(data: str) -> str:
-    bytes_str = data.lstrip("0x")
+    """
+    Encodes a dynamic `bytes` value as:
+      [ 32-byte length, (N + padded to multiple of 32) bytes data ]
+    Naive approach: `data` is a hex string (with or without 0x).
+    """
+    bytes_str = data.lower().lstrip("0x")
     if not bytes_str:
+        # length = 0
         return to_hex_with_alignment(0)
 
-    # Calculate the length of the hex-encoded 32-bytes padded data
-    # since EVM uses 32-byte (256-bit) words
     count_of_bytes_from_hex = len(bytes_str) // 2
-    encoded_length = 0
-    if count_of_bytes_from_hex > 0:
-        encoded_length = ((len(bytes_str) - 1) // 64 + 1) * 64
-    bytes_str += "0" * (encoded_length - len(bytes_str))
-    return to_hex_with_alignment(count_of_bytes_from_hex) + bytes_str
-
-
-def encode_tuple(types: list, args: list):
-    args_length = len(types)
-    encoded_offsets = ""
-    encoded_data = ""
-    for arg_index in range(args_length):
-        arg_type = types[arg_index]
-        arg_value = args[arg_index]
-        if arg_type == "address":
-            encoded_offsets += encode_address(arg_value)
+    # how many hex chars needed to pad to next 32-byte boundary:
+    remainder = len(bytes_str) % 64
+    if remainder != 0:
+        padding_needed = 64 - remainder
+    else:
+        padding_needed = 0
+
+    padded_bytes_str = bytes_str + ("0" * padding_needed)
+
+    # first 32 bytes = length, then the data
+    return to_hex_with_alignment(count_of_bytes_from_hex) + padded_bytes_str
+
+
+def encode_tuple(components_abi: list, values: list) -> str:
+    """
+    Recursively encodes a tuple (struct).
+    If a component is itself 'tuple', we recurse.
+    If a component is an array type, we only allow empty arrays in this snippet.
+    For full dynamic-array encoding, you'd need offset-based logic + element encoding.
+    """
+    if len(components_abi) != len(values):
+        raise EncoderError(
+            f"encode_tuple: mismatch in component count: {len(components_abi)} vs values: {len(values)}"
+        )
+
+    encoded = ""
+    for comp, val in zip(components_abi, values):
+        arg_type = comp["type"]
+
+        # 1) Possibly a nested tuple
+        if arg_type == "tuple":
+            if "components" not in comp:
+                raise EncoderError("Tuple type missing 'components' in ABI data.")
+            encoded += encode_tuple(comp["components"], val)
+
+        # 2) Possibly an array of addresses/ints, etc.
+        elif arg_type.endswith("[]"):
+            # If you need full dynamic array encoding in a struct, you'd do an offset-based approach here.
+            # We just handle the empty array case (0 length).
+            if not val:  # empty array
+                encoded += to_hex_with_alignment(0)
+            else:
+                raise EncoderError(
+                    "encode_tuple: non-empty dynamic arrays in tuples not yet supported."
+                )
+
+        # 3) address
+        elif arg_type == "address":
+            encoded += encode_address(val)
+
+        # 4) bool
         elif arg_type == "bool":
-            encoded_offsets += to_hex_with_alignment(int(bool(arg_value)))
-        # Handle any integral type: uint, uint8..uint256, int, int8..int256
+            # Could unify with int, but let's keep it explicit for readability
+            encoded += to_hex_with_alignment(int(bool(val)))
+
+        # 5) integer types (uint, int, etc.)
         elif re.match(r"^(u?int)(\d*)$", arg_type):
-            encoded_offsets += to_hex_with_alignment(arg_value)
-        # Handle fixed-length bytes (e.g. bytes1..bytes32)
+            bits, is_signed = _parse_solidity_int_type(arg_type)
+            encoded += encode_int(int(val), bits, is_signed)
+
+        # 6) fixed-length bytes
         elif re.match(r"^bytes(\d+)$", arg_type):
             match_len = re.match(r"^bytes(\d+)$", arg_type)
             num_bytes = int(match_len.group(1))
-            encoded_offsets += encode_fixed_bytes(arg_value, num_bytes)
-        elif arg_type == "address[]" and not arg_value:
-            encoded_data += to_hex_with_alignment(0)
-            offset = to_hex_with_alignment((arg_index + args_length) * 32)
-            encoded_offsets += offset
+            encoded += encode_fixed_bytes(val, num_bytes)
+
+        # 7) dynamic bytes
+        elif arg_type == "bytes":
+            encoded += encode_bytes(val)
+
+        # 8) string
+        elif arg_type == "string":
+            # For a struct field that is a string, you'd typically do offset-based dynamic encoding.
+            raise EncoderError(
+                "encode_tuple: 'string' inside tuple not fully implemented."
+            )
+
         else:
             raise EncoderError(
-                f"Unknown constructor argument type '{arg_type}' in tuple"
+                f"Unknown or unhandled type '{arg_type}' in encode_tuple"
             )
-    return encoded_offsets + encoded_data
+
+    return encoded
 
 
 def encode_dynamic_type(arg_value: str, argument_index: int):
+    """
+    Encodes a top-level dynamic `bytes` or array argument as:
+      [ offset, ... data in the 'compl_data' section ... ]
+    This snippet is naive: for a real array, you'd handle array length + each element.
+    """
+    # For now, we just handle a raw bytes value in hex form:
     offset_to_start_of_data_part = to_hex_with_alignment((argument_index + 1) * 32)
     encoded_value = encode_bytes(arg_value)
     return offset_to_start_of_data_part, encoded_value
 
 
 def encode_string(arg_length: int, compl_data: list, arg_value: str):
+    """
+    Encodes a top-level string argument in the same offset + data approach
+    used by 'encode_dynamic_type'. We do:
+      [ offset, ... then length + contents in 'compl_data' ... ]
+    """
     argument_index = arg_length + len(compl_data)
-    encoded_value = arg_value.encode("utf-8")
+    encoded_value_bytes = arg_value.encode("utf-8")
     offset_to_start_of_data_part = to_hex_with_alignment(argument_index * 32)
-    encoded_value_length = to_hex_with_alignment(len(encoded_value))
+    encoded_value_length = to_hex_with_alignment(len(encoded_value_bytes))
+    # We'll pad the actual string data to a multiple of 32
+    hex_str = encoded_value_bytes.hex()
+    remainder = len(hex_str) % 64
+    if remainder != 0:
+        padding_needed = 64 - remainder
+        hex_str += "0" * padding_needed
+
     return (
         offset_to_start_of_data_part,
         encoded_value_length,
-        encoded_value.hex().ljust(64, "0"),
+        hex_str,
     )
 
 
 def encode_constructor_arguments(constructor_abi: list, constructor_config_args: list):
-    # see https://docs.soliditylang.org/en/develop/abi-spec.html#contract-abi-specification
-    # transferred from here:
-    # https://github.com/lidofinance/lido-dao/blob/master/bytecode-verificator/bytecode_verificator.sh#L369-L405
+    """
+    Encodes each constructor argument in order, concatenating the result.
+    Appends any 'compl_data' (dynamic offsets, etc.) at the end.
+    """
     arg_length = len(constructor_abi)
 
     constructor_calldata = ""
     compl_data = []
+
     try:
         for argument_index in range(arg_length):
             arg_type = constructor_abi[argument_index]["type"]
             arg_value = constructor_config_args[argument_index]
 
             if arg_type == "address":
                 constructor_calldata += encode_address(arg_value)
+
             elif arg_type == "bool":
                 constructor_calldata += to_hex_with_alignment(int(bool(arg_value)))
-            # Handle any integral type: uint, uint8..uint256, int, int8..int256
+
             elif re.match(r"^(u?int)(\d*)$", arg_type):
-                constructor_calldata += to_hex_with_alignment(arg_value)
-            # Handle fixed-length bytes (e.g. bytes1..bytes32)
+                # parse bits + sign
+                bits, is_signed = _parse_solidity_int_type(arg_type)
+                constructor_calldata += encode_int(int(arg_value), bits, is_signed)
+
             elif re.match(r"^bytes(\d+)$", arg_type):
+                # fixed-length bytes
                 match_len = re.match(r"^bytes(\d+)$", arg_type)
                 num_bytes = int(match_len.group(1))
                 constructor_calldata += encode_fixed_bytes(arg_value, num_bytes)
+
             elif arg_type == "bytes" or arg_type.endswith("[]"):
-                offset_to_start_of_data_part, encoded_value = encode_dynamic_type(
-                    arg_value, argument_index
-                )
-                constructor_calldata += offset_to_start_of_data_part
+                # top-level dynamic array or raw bytes
+                offset, encoded_value = encode_dynamic_type(arg_value, argument_index)
+                constructor_calldata += offset
                 compl_data.append(encoded_value)
+
             elif arg_type == "string":
-                offset_to_start_of_data_part, encoded_value_length, encoded_value = (
-                    encode_string(arg_length, compl_data, arg_value)
+                offset, length_hex, contents_hex = encode_string(
+                    arg_length, compl_data, arg_value
                 )
-                constructor_calldata += offset_to_start_of_data_part
-                compl_data.append(encoded_value_length)
-                compl_data.append(encoded_value)
+                constructor_calldata += offset
+                compl_data.append(length_hex)
+                compl_data.append(contents_hex)
+
             elif arg_type == "tuple":
-                args_tuple_types = [
-                    component["type"]
-                    for component in constructor_abi[argument_index]["components"]
-                ]
-                if all(arg == "address[]" for arg in args_tuple_types):
-                    argument_index = len(constructor_calldata) // 64
-                    offset_to_start_of_data_part = to_hex_with_alignment(
-                        (argument_index + 1) * 32
-                    )
-                    constructor_calldata += offset_to_start_of_data_part
-                    compl_data.append(encode_tuple(args_tuple_types, arg_value))
-                else:
-                    constructor_calldata += encode_tuple(args_tuple_types, arg_value)
+                tuple_abi = constructor_abi[argument_index]["components"]
+                constructor_calldata += encode_tuple(tuple_abi, arg_value)
+
             else:
-                raise EncoderError(f"Unknown constructor argument type: {arg_type}")
+                raise EncoderError(
+                    f"Unknown or unhandled constructor argument type: {arg_type}"
+                )
+
     except Exception as e:
-        raise EncoderError(e) from None
-    for offset_to_start_of_data_part in compl_data:
-        constructor_calldata += offset_to_start_of_data_part
+        raise EncoderError(f"Failed to encode calldata arguments: {e}") from None
+
+    # Append any "completion" data (the actual dynamic data or string contents)
+    for data_part in compl_data:
+        constructor_calldata += data_part
 
     return constructor_calldata
@@ -8,7 +8,7 @@
     "@commitlint/config-conventional": "^19.5.0"
   },
   "dependencies": {
-    "hardhat": "^2.22.17",
+    "hardhat": "^2.22.19",
     "kill-port": "^2.0.1"
   },
   "packageManager": "npm@10.8.2",