Skip to content

Commit 6c4a813

Browse files
committedMar 10, 2025
fix: improve bytecode comparison
1 parent 54a7009 commit 6c4a813

File tree

5 files changed

+379
-227
lines changed

5 files changed

+379
-227
lines changed
 

‎diffyscan/diffyscan.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def run_bytecode_diff(
6262
is_fully_matched = local_compiled_bytecode == remote_deployed_bytecode
6363

6464
if is_fully_matched:
65-
logger.okay(f"Bytecodes are fully matched")
65+
logger.okay(f"Bytecodes fully match")
6666
return
6767

6868
logger.info(f"Automated match hasn't worked out")
@@ -86,7 +86,7 @@ def run_bytecode_diff(
8686
is_fully_matched = local_deployed_bytecode == remote_deployed_bytecode
8787

8888
if is_fully_matched:
89-
logger.okay(f"Bytecodes are fully matched")
89+
logger.okay(f"Bytecodes fully match")
9090
return
9191

9292
deep_match_bytecode(

‎diffyscan/utils/binary_verifier.py

+145-109
Original file line numberDiff line numberDiff line change
@@ -5,82 +5,154 @@
55
from .custom_exceptions import BinVerifierError
66

77

8-
def format_bytecode(bytecode):
8+
def format_bytecode(bytecode: str) -> str:
9+
"""Converts raw hex for an instruction into a '0x' prefixed string, or empty if none."""
910
return "0x" + bytecode[2:] if len(bytecode) > 2 else ""
1011

1112

1213
def trim_solidity_meta(bytecode: str) -> dict:
14+
"""
15+
Strips Solidity metadata from the end of the bytecode, if present.
16+
Solidity appends a CBOR metadata section at the end, indicated by
17+
the last 2 bytes in big-endian (multiplied by 2 for hex, plus 4).
18+
"""
1319
meta_size = int(bytecode[-4:], 16) * 2 + 4
14-
1520
if meta_size > len(bytecode):
1621
return {"bytecode": bytecode, "metadata": ""}
17-
1822
return {
1923
"bytecode": bytecode[:-meta_size],
2024
"metadata": bytecode[-meta_size:],
2125
}
2226

2327

28+
def parse(bytecode: str):
29+
"""
30+
Parses raw hex EVM bytecode into a list of instructions:
31+
[ { 'start': offset, 'length': N, 'op': {...}, 'bytecode': '...' }, ... ]
32+
"""
33+
buffer = bytes.fromhex(bytecode[2:] if bytecode.startswith("0x") else bytecode)
34+
instructions = []
35+
i = 0
36+
unknown_opcodes = set()
37+
38+
while i < len(buffer):
39+
opcode = buffer[i]
40+
if opcode not in OPCODES:
41+
unknown_opcodes.add(hex(opcode))
42+
43+
# For PUSH1..PUSH32, the length is 1 + (opcode - PUSH0)
44+
length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0)
45+
46+
instr_hex = buffer[i : i + length].hex()
47+
instructions.append(
48+
{
49+
"start": i,
50+
"length": length,
51+
"op": {"name": OPCODES.get(opcode, "INVALID"), "code": opcode},
52+
"bytecode": instr_hex,
53+
}
54+
)
55+
56+
i += length
57+
58+
return instructions, unknown_opcodes
59+
60+
61+
def regions_overlap(a_start: int, a_len: int, b_start: int, b_len: int) -> bool:
62+
"""
63+
Return True if [a_start, a_start+a_len) overlaps with [b_start, b_start+b_len).
64+
"""
65+
a_end = a_start + a_len
66+
b_end = b_start + b_len
67+
# intervals do NOT overlap if one is entirely to the left of the other
68+
if a_end <= b_start or b_end <= a_start:
69+
return False
70+
return True
71+
72+
73+
def overlaps_any_immutable(
74+
immutables: dict[int, int], instr_start: int, instr_len: int
75+
) -> bool:
76+
"""
77+
Checks if the instruction byte range [instr_start.. instr_start+instr_len)
78+
overlaps with ANY known immutable region [start.. start+length) from 'immutables'.
79+
"""
80+
for imm_start, imm_len in immutables.items():
81+
if regions_overlap(instr_start, instr_len, imm_start, imm_len):
82+
return True
83+
return False
84+
85+
2486
def deep_match_bytecode(
2587
actual_bytecode: str, expected_bytecode: str, immutables: dict
2688
) -> None:
89+
"""
90+
Compare two chunks of bytecode instruction-by-instruction, ignoring differences
91+
that appear within known 'immutable' regions.
92+
93+
If:
94+
- No differences => "Bytecodes fully match."
95+
- Differences only in immutables => "Bytecodes have differences only on the immutable reference position."
96+
- Differences outside immutables => raises BinVerifierError.
97+
"""
2798
logger.info("Comparing actual code with the expected one...")
2899

29-
actual_trimmed_bytecode = trim_solidity_meta(actual_bytecode)
30-
expected_trimmed_bytecode = trim_solidity_meta(expected_bytecode)
100+
# Possibly strip out metadata from both
101+
actual_trimmed = trim_solidity_meta(actual_bytecode)
102+
expected_trimmed = trim_solidity_meta(expected_bytecode)
31103

32-
if actual_trimmed_bytecode["metadata"] or expected_trimmed_bytecode["metadata"]:
104+
if actual_trimmed["metadata"] or expected_trimmed["metadata"]:
33105
logger.info("Metadata has been detected and trimmed")
34106

35-
actual_instructions, unknown_opcodes_first_half = parse(
36-
actual_trimmed_bytecode["bytecode"]
37-
)
38-
expected_instructions, unknown_opcodes_second_half = parse(
39-
expected_trimmed_bytecode["bytecode"]
40-
)
107+
# Parse instructions
108+
actual_instructions, unknown_opcodes_a = parse(actual_trimmed["bytecode"])
109+
expected_instructions, unknown_opcodes_b = parse(expected_trimmed["bytecode"])
41110

42-
unknown_opcodes = (
43-
unknown_opcodes_first_half or set() | unknown_opcodes_second_half or set()
44-
)
111+
# Check for unknown opcodes
112+
unknown_opcodes = unknown_opcodes_a | unknown_opcodes_b
45113
if unknown_opcodes:
46114
logger.warn(f"Detected unknown opcodes: {unknown_opcodes}")
47115

116+
# If they differ in length, we still attempt to compare
48117
if len(actual_instructions) != len(expected_instructions):
49-
logger.warn(f"Codes have a different length")
118+
logger.warn("Codes have a different length")
50119

51-
zipped_instructions = list(
52-
itertools.zip_longest(actual_instructions, expected_instructions)
53-
)
120+
# Pair them up by index
121+
zipped_instructions = list(zip(actual_instructions, expected_instructions))
122+
123+
# Identify mismatch indexes
124+
def is_mismatch(pair) -> bool:
125+
return pair[0]["bytecode"] != pair[1]["bytecode"]
54126

55-
is_mismatch = (
56-
lambda pair: pair[0] is None
57-
or pair[1] is None
58-
or pair[0].get("bytecode") != pair[1].get("bytecode")
59-
)
60127
mismatches = [
61-
index for index, pair in enumerate(zipped_instructions) if is_mismatch(pair)
128+
idx for idx, pair in enumerate(zipped_instructions) if is_mismatch(pair)
62129
]
63130

64-
near_lines_count = 3 # context depth, i.e., the number of lines above and \below to be displayed for each diff
131+
# If no mismatches at all => fully match
132+
if not mismatches and len(actual_instructions) == len(expected_instructions):
133+
logger.okay("Bytecodes fully match")
134+
return
65135

136+
# We'll show a few lines around each mismatch for context
137+
near_lines_count = 3
66138
checkpoints = {0, *mismatches}
67-
139+
# handle last line if instructions differ in count
68140
if actual_instructions:
69141
checkpoints.add(len(actual_instructions) - 1)
70-
71142
if expected_instructions:
72143
checkpoints.add(len(expected_instructions) - 1)
73144

145+
# Expand around mismatches
74146
for ind in list(checkpoints):
75-
start_index = max(0, ind - near_lines_count)
76-
end_index = min(ind + near_lines_count, len(zipped_instructions) - 1)
77-
78-
checkpoints.update(range(start_index, end_index + 1))
147+
start_idx = max(0, ind - near_lines_count)
148+
end_idx = min(ind + near_lines_count, len(zipped_instructions) - 1)
149+
checkpoints.update(range(start_idx, end_idx + 1))
79150

80151
checkpoints = sorted(checkpoints)
81152

153+
# Print a small legend
82154
logger.divider()
83-
logger.info(f"0000 00 STOP - both expected and actual bytecode instructions match")
155+
logger.info("0000 00 STOP - both expected and actual bytecode instructions match")
84156
logger.info(f'{bgRed("0x0002")} - the actual bytecode differs')
85157
logger.info(
86158
f'{bgYellow("0x0001")} - the actual bytecode differs on the immutable reference position'
@@ -97,94 +169,58 @@ def deep_match_bytecode(
97169
logger.divider()
98170

99171
is_matched_with_excluded_immutables = True
100-
for previous_index, current_index in zip(checkpoints, checkpoints[1:]):
101-
if previous_index != current_index - 1:
172+
173+
# Print the diff lines
174+
# note: for shortness, we won't handle "None" instructions here,
175+
# since we used zip() not zip_longest(). Adjust if needed.
176+
for prev_idx, cur_idx in zip(checkpoints, checkpoints[1:]):
177+
if prev_idx != cur_idx - 1:
102178
print("...")
103179

104-
actual = (
105-
actual_instructions[current_index]
106-
if current_index < len(actual_instructions)
107-
else None
108-
)
109-
expected = (
110-
expected_instructions[current_index]
111-
if current_index < len(expected_instructions)
112-
else None
113-
)
180+
actual = zipped_instructions[cur_idx][0]
181+
expected = zipped_instructions[cur_idx][1]
114182

115-
if not actual and expected:
116-
params = "0x" + expected["bytecode"][2:]
117-
print(
118-
red(
119-
f'{to_hex(current_index, 4)} {to_hex(expected["op"]["code"])} {expected["op"]["name"]} {params}'
120-
)
121-
)
122-
elif actual and not expected:
123-
params = "0x" + actual["bytecode"][2:]
124-
print(
125-
green(
126-
f'{to_hex(current_index, 4)} {to_hex(actual["op"]["code"])} {actual["op"]["name"]} {params}'
127-
)
128-
)
129-
elif actual and expected:
183+
# Compare opcodes
184+
same_opcode = actual["op"]["code"] == expected["op"]["code"]
185+
if same_opcode:
186+
opcode = to_hex(actual["op"]["code"])
187+
opname = actual["op"]["name"]
188+
else:
130189
opcode = (
131-
to_hex(actual["op"]["code"])
132-
if actual["op"]["code"] == expected["op"]["code"]
133-
else bgRed(to_hex(actual["op"]["code"]))
190+
bgRed(to_hex(actual["op"]["code"]))
134191
+ " "
135192
+ bgGreen(to_hex(expected["op"]["code"]))
136193
)
137-
opname = (
138-
actual["op"]["name"]
139-
if actual["op"]["name"] == expected["op"]["name"]
140-
else bgRed(actual["op"]["name"]) + " " + bgGreen(expected["op"]["name"])
141-
)
194+
opname = bgRed(actual["op"]["name"]) + " " + bgGreen(expected["op"]["name"])
142195

143-
actual_params = format_bytecode(actual["bytecode"])
144-
expected_params = format_bytecode(expected["bytecode"])
196+
actual_params = format_bytecode(actual["bytecode"])
197+
expected_params = format_bytecode(expected["bytecode"])
145198

146-
params_length = len(expected["bytecode"]) // 2 - 1
147-
is_immutable = immutables.get(expected["start"] + 1) == params_length
148-
if actual_params != expected_params and not is_immutable:
149-
is_matched_with_excluded_immutables = False
150-
params = (
151-
actual_params
152-
if actual_params == expected_params
153-
else (
154-
bgYellow(actual_params) + " " + bgGreen(expected_params)
155-
if is_immutable
156-
else bgRed(actual_params) + " " + bgGreen(expected_params)
157-
)
158-
)
159-
print(f"{to_hex(current_index, 4)} {opcode} {opname} {params}")
199+
# Check partial overlap with immutables
200+
instr_start = expected["start"]
201+
instr_len = expected["length"]
202+
within_immutable_region = overlaps_any_immutable(
203+
immutables, instr_start, instr_len
204+
)
205+
206+
if actual_params == expected_params:
207+
# Perfect match => no highlight
208+
params = actual_params
160209
else:
161-
raise BinVerifierError("Invalid bytecode difference data")
210+
# There's a difference
211+
if within_immutable_region:
212+
params = bgYellow(actual_params) + " " + bgGreen(expected_params)
213+
else:
214+
params = bgRed(actual_params) + " " + bgGreen(expected_params)
215+
is_matched_with_excluded_immutables = False
216+
217+
print(f"{to_hex(cur_idx, 4)} {opcode} {opname} {params}")
162218

219+
# If we found any mismatch outside immutables => fail
163220
if not is_matched_with_excluded_immutables:
164221
raise BinVerifierError(
165-
f"Bytecodes have differences not on the immutable reference position"
222+
"Bytecodes have differences not on the immutable reference position"
166223
)
167224

168-
logger.okay(f"Bytecodes have differences only on the immutable reference position")
169-
170-
171-
def parse(bytecode):
172-
buffer = bytes.fromhex(bytecode[2:] if bytecode.startswith("0x") else bytecode)
173-
instructions = []
174-
i = 0
175-
unknown_opcodes = set()
176-
while i < len(buffer):
177-
opcode = buffer[i]
178-
if opcode not in OPCODES:
179-
unknown_opcodes.add(hex(opcode))
180-
length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0)
181-
instructions.append(
182-
{
183-
"start": i,
184-
"length": length,
185-
"op": {"name": OPCODES.get(opcode, "INVALID"), "code": opcode},
186-
"bytecode": buffer[i : i + length].hex(),
187-
}
188-
)
189-
i += length
190-
return instructions, unknown_opcodes
225+
# Otherwise, differences exist but only in immutables
226+
logger.okay("Bytecodes have differences only on the immutable reference position")

‎diffyscan/utils/encoder.py

+195-70
Original file line numberDiff line numberDiff line change
@@ -3,147 +3,272 @@
33
from .custom_exceptions import EncoderError
44

55

6+
def _parse_solidity_int_type(arg_type: str) -> tuple[int, bool]:
7+
"""
8+
Given a Solidity int/uint type (e.g. 'uint256', 'int128', 'uint', 'int'),
9+
returns (bits, is_signed).
10+
- bits = 256 if no explicit size is specified.
11+
- is_signed = True if it starts with 'int', False if 'uint'.
12+
"""
13+
match = re.match(r"^(u?int)(\d*)$", arg_type)
14+
if not match:
15+
raise EncoderError(f"Invalid integer type format '{arg_type}'.")
16+
is_signed = not match.group(1).startswith("u") # 'uint' => False, 'int' => True
17+
bits_str = match.group(2)
18+
bits = int(bits_str) if bits_str else 256
19+
return (bits, is_signed)
20+
21+
622
def to_hex_with_alignment(value: int) -> str:
23+
"""
24+
Encodes `value` (non-negative integer) as a 32-byte hex string.
25+
For negative values, you must first apply two's complement.
26+
"""
727
return format(value, "064x")
828

929

30+
def encode_int(value: int, bits: int, is_signed: bool) -> str:
31+
"""
32+
Encodes an integer value (possibly negative if signed) into 32 bytes
33+
using two's complement for negative values.
34+
"""
35+
# Convert bool to int if needed (though typically you'd handle bool in a separate branch).
36+
if isinstance(value, bool):
37+
value = 1 if value else 0
38+
39+
# Python's 'format' doesn't automatically do two's-complement for negative integers.
40+
# So if it's signed and value is negative, convert by adding 2^bits.
41+
if is_signed and value < 0:
42+
# e.g. for int256, 2^256 + value
43+
value = (1 << bits) + value
44+
45+
# Now ensure it fits within 'bits'
46+
# (if bits=8, max = 2^7 - 1 for signed or 2^8-1 for unsigned).
47+
# We'll skip a strict bounds check for brevity, but you could raise an error
48+
# if abs(value) >= 2^(bits-1) for signed or value >= 2^bits for unsigned.
49+
50+
return to_hex_with_alignment(value)
51+
52+
1053
def encode_address(address: str) -> str:
11-
number = int(address, 16)
54+
"""
55+
Encodes an address as a 32-byte hex string.
56+
Assumes 'address' is already a hex string (with '0x' or without).
57+
"""
58+
address_no_0x = address.lower().replace("0x", "")
59+
# Convert to int
60+
number = int(address_no_0x, 16)
1261
return to_hex_with_alignment(number)
1362

1463

1564
def encode_fixed_bytes(value: str, length: int) -> str:
65+
"""
66+
Encodes fixed-length bytes (e.g., bytes1..bytes32) into 32 bytes.
67+
"""
1668
raw_hex = value.lower().replace("0x", "")
17-
max_hex_len = length * 2 # each byte is 2 hex chars
69+
max_hex_len = length * 2 # each byte => 2 hex chars
1870
if len(raw_hex) > max_hex_len:
1971
raise EncoderError(
2072
f"Provided bytes length exceeds {length} bytes (max {max_hex_len} hex chars)."
2173
)
22-
# Right-pad with zeros up to fixed length, then left-pad to 64 hex chars total
23-
raw_hex = raw_hex.ljust(max_hex_len, "0") # fill the fixed bytes
24-
return raw_hex.ljust(64, "0") # fill up to 32 bytes in total
74+
# Right-pad the actual bytes to `length`, then pad to 32 bytes total
75+
raw_hex = raw_hex.ljust(max_hex_len, "0")
76+
return raw_hex.ljust(64, "0")
2577

2678

2779
def encode_bytes(data: str) -> str:
28-
bytes_str = data.lstrip("0x")
80+
"""
81+
Encodes a dynamic `bytes` value as:
82+
[ 32-byte length, (N + padded to multiple of 32) bytes data ]
83+
Naive approach: `data` is a hex string (with or without 0x).
84+
"""
85+
bytes_str = data.lower().lstrip("0x")
2986
if not bytes_str:
87+
# length = 0
3088
return to_hex_with_alignment(0)
3189

32-
# Calculate the length of the hex-encoded 32-bytes padded data
33-
# since EVM uses 32-byte (256-bit) words
3490
count_of_bytes_from_hex = len(bytes_str) // 2
35-
encoded_length = 0
36-
if count_of_bytes_from_hex > 0:
37-
encoded_length = ((len(bytes_str) - 1) // 64 + 1) * 64
38-
bytes_str += "0" * (encoded_length - len(bytes_str))
39-
return to_hex_with_alignment(count_of_bytes_from_hex) + bytes_str
40-
41-
42-
def encode_tuple(types: list, args: list):
43-
args_length = len(types)
44-
encoded_offsets = ""
45-
encoded_data = ""
46-
for arg_index in range(args_length):
47-
arg_type = types[arg_index]
48-
arg_value = args[arg_index]
49-
if arg_type == "address":
50-
encoded_offsets += encode_address(arg_value)
91+
# how many hex chars needed to pad to next 32-byte boundary:
92+
remainder = len(bytes_str) % 64
93+
if remainder != 0:
94+
padding_needed = 64 - remainder
95+
else:
96+
padding_needed = 0
97+
98+
padded_bytes_str = bytes_str + ("0" * padding_needed)
99+
100+
# first 32 bytes = length, then the data
101+
return to_hex_with_alignment(count_of_bytes_from_hex) + padded_bytes_str
102+
103+
104+
def encode_tuple(components_abi: list, values: list) -> str:
105+
"""
106+
Recursively encodes a tuple (struct).
107+
If a component is itself 'tuple', we recurse.
108+
If a component is an array type, we only allow empty arrays in this snippet.
109+
For full dynamic-array encoding, you'd need offset-based logic + element encoding.
110+
"""
111+
if len(components_abi) != len(values):
112+
raise EncoderError(
113+
f"encode_tuple: mismatch in component count: {len(components_abi)} vs values: {len(values)}"
114+
)
115+
116+
encoded = ""
117+
for comp, val in zip(components_abi, values):
118+
arg_type = comp["type"]
119+
120+
# 1) Possibly a nested tuple
121+
if arg_type == "tuple":
122+
if "components" not in comp:
123+
raise EncoderError("Tuple type missing 'components' in ABI data.")
124+
encoded += encode_tuple(comp["components"], val)
125+
126+
# 2) Possibly an array of addresses/ints, etc.
127+
elif arg_type.endswith("[]"):
128+
# If you need full dynamic array encoding in a struct, you'd do an offset-based approach here.
129+
# We just handle the empty array case (0 length).
130+
if not val: # empty array
131+
encoded += to_hex_with_alignment(0)
132+
else:
133+
raise EncoderError(
134+
"encode_tuple: non-empty dynamic arrays in tuples not yet supported."
135+
)
136+
137+
# 3) address
138+
elif arg_type == "address":
139+
encoded += encode_address(val)
140+
141+
# 4) bool
51142
elif arg_type == "bool":
52-
encoded_offsets += to_hex_with_alignment(int(bool(arg_value)))
53-
# Handle any integral type: uint, uint8..uint256, int, int8..int256
143+
# Could unify with int, but let's keep it explicit for readability
144+
encoded += to_hex_with_alignment(int(bool(val)))
145+
146+
# 5) integer types (uint, int, etc.)
54147
elif re.match(r"^(u?int)(\d*)$", arg_type):
55-
encoded_offsets += to_hex_with_alignment(arg_value)
56-
# Handle fixed-length bytes (e.g. bytes1..bytes32)
148+
bits, is_signed = _parse_solidity_int_type(arg_type)
149+
encoded += encode_int(int(val), bits, is_signed)
150+
151+
# 6) fixed-length bytes
57152
elif re.match(r"^bytes(\d+)$", arg_type):
58153
match_len = re.match(r"^bytes(\d+)$", arg_type)
59154
num_bytes = int(match_len.group(1))
60-
encoded_offsets += encode_fixed_bytes(arg_value, num_bytes)
61-
elif arg_type == "address[]" and not arg_value:
62-
encoded_data += to_hex_with_alignment(0)
63-
offset = to_hex_with_alignment((arg_index + args_length) * 32)
64-
encoded_offsets += offset
155+
encoded += encode_fixed_bytes(val, num_bytes)
156+
157+
# 7) dynamic bytes
158+
elif arg_type == "bytes":
159+
encoded += encode_bytes(val)
160+
161+
# 8) string
162+
elif arg_type == "string":
163+
# For a struct field that is a string, you'd typically do offset-based dynamic encoding.
164+
raise EncoderError(
165+
"encode_tuple: 'string' inside tuple not fully implemented."
166+
)
167+
65168
else:
66169
raise EncoderError(
67-
f"Unknown constructor argument type '{arg_type}' in tuple"
170+
f"Unknown or unhandled type '{arg_type}' in encode_tuple"
68171
)
69-
return encoded_offsets + encoded_data
172+
173+
return encoded
70174

71175

72176
def encode_dynamic_type(arg_value: str, argument_index: int):
177+
"""
178+
Encodes a top-level dynamic `bytes` or array argument as:
179+
[ offset, ... data in the 'compl_data' section ... ]
180+
This snippet is naive: for a real array, you'd handle array length + each element.
181+
"""
182+
# For now, we just handle a raw bytes value in hex form:
73183
offset_to_start_of_data_part = to_hex_with_alignment((argument_index + 1) * 32)
74184
encoded_value = encode_bytes(arg_value)
75185
return offset_to_start_of_data_part, encoded_value
76186

77187

78188
def encode_string(arg_length: int, compl_data: list, arg_value: str):
189+
"""
190+
Encodes a top-level string argument in the same offset + data approach
191+
used by 'encode_dynamic_type'. We do:
192+
[ offset, ... then length + contents in 'compl_data' ... ]
193+
"""
79194
argument_index = arg_length + len(compl_data)
80-
encoded_value = arg_value.encode("utf-8")
195+
encoded_value_bytes = arg_value.encode("utf-8")
81196
offset_to_start_of_data_part = to_hex_with_alignment(argument_index * 32)
82-
encoded_value_length = to_hex_with_alignment(len(encoded_value))
197+
encoded_value_length = to_hex_with_alignment(len(encoded_value_bytes))
198+
# We'll pad the actual string data to a multiple of 32
199+
hex_str = encoded_value_bytes.hex()
200+
remainder = len(hex_str) % 64
201+
if remainder != 0:
202+
padding_needed = 64 - remainder
203+
hex_str += "0" * padding_needed
204+
83205
return (
84206
offset_to_start_of_data_part,
85207
encoded_value_length,
86-
encoded_value.hex().ljust(64, "0"),
208+
hex_str,
87209
)
88210

89211

90212
def encode_constructor_arguments(constructor_abi: list, constructor_config_args: list):
91-
# see https://docs.soliditylang.org/en/develop/abi-spec.html#contract-abi-specification
92-
# transferred from here:
93-
# https://github.com/lidofinance/lido-dao/blob/master/bytecode-verificator/bytecode_verificator.sh#L369-L405
213+
"""
214+
Encodes each constructor argument in order, concatenating the result.
215+
Appends any 'compl_data' (dynamic offsets, etc.) at the end.
216+
"""
94217
arg_length = len(constructor_abi)
95218

96219
constructor_calldata = ""
97220
compl_data = []
221+
98222
try:
99223
for argument_index in range(arg_length):
100224
arg_type = constructor_abi[argument_index]["type"]
101225
arg_value = constructor_config_args[argument_index]
102226

103227
if arg_type == "address":
104228
constructor_calldata += encode_address(arg_value)
229+
105230
elif arg_type == "bool":
106231
constructor_calldata += to_hex_with_alignment(int(bool(arg_value)))
107-
# Handle any integral type: uint, uint8..uint256, int, int8..int256
232+
108233
elif re.match(r"^(u?int)(\d*)$", arg_type):
109-
constructor_calldata += to_hex_with_alignment(arg_value)
110-
# Handle fixed-length bytes (e.g. bytes1..bytes32)
234+
# parse bits + sign
235+
bits, is_signed = _parse_solidity_int_type(arg_type)
236+
constructor_calldata += encode_int(int(arg_value), bits, is_signed)
237+
111238
elif re.match(r"^bytes(\d+)$", arg_type):
239+
# fixed-length bytes
112240
match_len = re.match(r"^bytes(\d+)$", arg_type)
113241
num_bytes = int(match_len.group(1))
114242
constructor_calldata += encode_fixed_bytes(arg_value, num_bytes)
243+
115244
elif arg_type == "bytes" or arg_type.endswith("[]"):
116-
offset_to_start_of_data_part, encoded_value = encode_dynamic_type(
117-
arg_value, argument_index
118-
)
119-
constructor_calldata += offset_to_start_of_data_part
245+
# top-level dynamic array or raw bytes
246+
offset, encoded_value = encode_dynamic_type(arg_value, argument_index)
247+
constructor_calldata += offset
120248
compl_data.append(encoded_value)
249+
121250
elif arg_type == "string":
122-
offset_to_start_of_data_part, encoded_value_length, encoded_value = (
123-
encode_string(arg_length, compl_data, arg_value)
251+
offset, length_hex, contents_hex = encode_string(
252+
arg_length, compl_data, arg_value
124253
)
125-
constructor_calldata += offset_to_start_of_data_part
126-
compl_data.append(encoded_value_length)
127-
compl_data.append(encoded_value)
254+
constructor_calldata += offset
255+
compl_data.append(length_hex)
256+
compl_data.append(contents_hex)
257+
128258
elif arg_type == "tuple":
129-
args_tuple_types = [
130-
component["type"]
131-
for component in constructor_abi[argument_index]["components"]
132-
]
133-
if all(arg == "address[]" for arg in args_tuple_types):
134-
argument_index = len(constructor_calldata) // 64
135-
offset_to_start_of_data_part = to_hex_with_alignment(
136-
(argument_index + 1) * 32
137-
)
138-
constructor_calldata += offset_to_start_of_data_part
139-
compl_data.append(encode_tuple(args_tuple_types, arg_value))
140-
else:
141-
constructor_calldata += encode_tuple(args_tuple_types, arg_value)
259+
tuple_abi = constructor_abi[argument_index]["components"]
260+
constructor_calldata += encode_tuple(tuple_abi, arg_value)
261+
142262
else:
143-
raise EncoderError(f"Unknown constructor argument type: {arg_type}")
263+
raise EncoderError(
264+
f"Unknown or unhandled constructor argument type: {arg_type}"
265+
)
266+
144267
except Exception as e:
145-
raise EncoderError(e) from None
146-
for offset_to_start_of_data_part in compl_data:
147-
constructor_calldata += offset_to_start_of_data_part
268+
raise EncoderError(f"Failed to encode calldata arguments: {e}") from None
269+
270+
# Append any "completion" data (the actual dynamic data or string contents)
271+
for data_part in compl_data:
272+
constructor_calldata += data_part
148273

149274
return constructor_calldata

‎package-lock.json

+36-45
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"@commitlint/config-conventional": "^19.5.0"
99
},
1010
"dependencies": {
11-
"hardhat": "^2.22.17",
11+
"hardhat": "^2.22.19",
1212
"kill-port": "^2.0.1"
1313
},
1414
"packageManager": "npm@10.8.2",

0 commit comments

Comments
 (0)
Please sign in to comment.