Skip to content

Commit e198baa

Browse files
authoredMar 11, 2025
Merge pull request #77 from lidofinance/fix/abi-encode
Fix: DG-related fixes
2 parents 439c56a + 92eb392 commit e198baa

7 files changed

+467
-241
lines changed
 

‎diffyscan/diffyscan.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,12 @@ def run_bytecode_diff(
6262
is_fully_matched = local_compiled_bytecode == remote_deployed_bytecode
6363

6464
if is_fully_matched:
65-
logger.okay(f"Bytecodes are fully matched")
65+
logger.okay(f"Bytecodes fully match")
6666
return
6767

68-
logger.info(f"Automated match hasn't worked out")
68+
logger.info(
69+
f"Static bytecodes not match, trying local deployment to bind immutables"
70+
)
6971

7072
calldata = get_calldata(
7173
contract_address_from_config,
@@ -86,7 +88,7 @@ def run_bytecode_diff(
8688
is_fully_matched = local_deployed_bytecode == remote_deployed_bytecode
8789

8890
if is_fully_matched:
89-
logger.okay(f"Bytecodes are fully matched")
91+
logger.okay(f"Bytecodes fully match")
9092
return
9193

9294
deep_match_bytecode(

‎diffyscan/utils/binary_verifier.py

+145-109
Original file line numberDiff line numberDiff line change
@@ -5,82 +5,154 @@
55
from .custom_exceptions import BinVerifierError
66

77

8-
def format_bytecode(bytecode):
8+
def format_bytecode(bytecode: str) -> str:
9+
"""Converts raw hex for an instruction into a '0x' prefixed string, or empty if none."""
910
return "0x" + bytecode[2:] if len(bytecode) > 2 else ""
1011

1112

1213
def trim_solidity_meta(bytecode: str) -> dict:
14+
"""
15+
Strips Solidity metadata from the end of the bytecode, if present.
16+
Solidity appends a CBOR metadata section at the end, indicated by
17+
the last 2 bytes in big-endian (multiplied by 2 for hex, plus 4).
18+
"""
1319
meta_size = int(bytecode[-4:], 16) * 2 + 4
14-
1520
if meta_size > len(bytecode):
1621
return {"bytecode": bytecode, "metadata": ""}
17-
1822
return {
1923
"bytecode": bytecode[:-meta_size],
2024
"metadata": bytecode[-meta_size:],
2125
}
2226

2327

28+
def parse(bytecode: str):
29+
"""
30+
Parses raw hex EVM bytecode into a list of instructions:
31+
[ { 'start': offset, 'length': N, 'op': {...}, 'bytecode': '...' }, ... ]
32+
"""
33+
buffer = bytes.fromhex(bytecode[2:] if bytecode.startswith("0x") else bytecode)
34+
instructions = []
35+
i = 0
36+
unknown_opcodes = set()
37+
38+
while i < len(buffer):
39+
opcode = buffer[i]
40+
if opcode not in OPCODES:
41+
unknown_opcodes.add(hex(opcode))
42+
43+
# For PUSH1..PUSH32, the length is 1 + (opcode - PUSH0)
44+
length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0)
45+
46+
instr_hex = buffer[i : i + length].hex()
47+
instructions.append(
48+
{
49+
"start": i,
50+
"length": length,
51+
"op": {"name": OPCODES.get(opcode, "INVALID"), "code": opcode},
52+
"bytecode": instr_hex,
53+
}
54+
)
55+
56+
i += length
57+
58+
return instructions, unknown_opcodes
59+
60+
61+
def regions_overlap(a_start: int, a_len: int, b_start: int, b_len: int) -> bool:
62+
"""
63+
Return True if [a_start, a_start+a_len) overlaps with [b_start, b_start+b_len).
64+
"""
65+
a_end = a_start + a_len
66+
b_end = b_start + b_len
67+
# intervals do NOT overlap if one is entirely to the left of the other
68+
if a_end <= b_start or b_end <= a_start:
69+
return False
70+
return True
71+
72+
73+
def overlaps_any_immutable(
74+
immutables: dict[int, int], instr_start: int, instr_len: int
75+
) -> bool:
76+
"""
77+
Checks if the instruction byte range [instr_start.. instr_start+instr_len)
78+
overlaps with ANY known immutable region [start.. start+length) from 'immutables'.
79+
"""
80+
for imm_start, imm_len in immutables.items():
81+
if regions_overlap(instr_start, instr_len, imm_start, imm_len):
82+
return True
83+
return False
84+
85+
2486
def deep_match_bytecode(
2587
actual_bytecode: str, expected_bytecode: str, immutables: dict
2688
) -> None:
89+
"""
90+
Compare two chunks of bytecode instruction-by-instruction, ignoring differences
91+
that appear within known 'immutable' regions.
92+
93+
If:
94+
- No differences => "Bytecodes fully match."
95+
- Differences only in immutables => "Bytecodes have differences only on the immutable reference position."
96+
- Differences outside immutables => raises BinVerifierError.
97+
"""
2798
logger.info("Comparing actual code with the expected one...")
2899

29-
actual_trimmed_bytecode = trim_solidity_meta(actual_bytecode)
30-
expected_trimmed_bytecode = trim_solidity_meta(expected_bytecode)
100+
# Possibly strip out metadata from both
101+
actual_trimmed = trim_solidity_meta(actual_bytecode)
102+
expected_trimmed = trim_solidity_meta(expected_bytecode)
31103

32-
if actual_trimmed_bytecode["metadata"] or expected_trimmed_bytecode["metadata"]:
104+
if actual_trimmed["metadata"] or expected_trimmed["metadata"]:
33105
logger.info("Metadata has been detected and trimmed")
34106

35-
actual_instructions, unknown_opcodes_first_half = parse(
36-
actual_trimmed_bytecode["bytecode"]
37-
)
38-
expected_instructions, unknown_opcodes_second_half = parse(
39-
expected_trimmed_bytecode["bytecode"]
40-
)
107+
# Parse instructions
108+
actual_instructions, unknown_opcodes_a = parse(actual_trimmed["bytecode"])
109+
expected_instructions, unknown_opcodes_b = parse(expected_trimmed["bytecode"])
41110

42-
unknown_opcodes = (
43-
unknown_opcodes_first_half or set() | unknown_opcodes_second_half or set()
44-
)
111+
# Check for unknown opcodes
112+
unknown_opcodes = unknown_opcodes_a | unknown_opcodes_b
45113
if unknown_opcodes:
46114
logger.warn(f"Detected unknown opcodes: {unknown_opcodes}")
47115

116+
# If they differ in length, we still attempt to compare
48117
if len(actual_instructions) != len(expected_instructions):
49-
logger.warn(f"Codes have a different length")
118+
logger.warn("Codes have a different length")
50119

51-
zipped_instructions = list(
52-
itertools.zip_longest(actual_instructions, expected_instructions)
53-
)
120+
# Pair them up by index
121+
zipped_instructions = list(zip(actual_instructions, expected_instructions))
122+
123+
# Identify mismatch indexes
124+
def is_mismatch(pair) -> bool:
125+
return pair[0]["bytecode"] != pair[1]["bytecode"]
54126

55-
is_mismatch = (
56-
lambda pair: pair[0] is None
57-
or pair[1] is None
58-
or pair[0].get("bytecode") != pair[1].get("bytecode")
59-
)
60127
mismatches = [
61-
index for index, pair in enumerate(zipped_instructions) if is_mismatch(pair)
128+
idx for idx, pair in enumerate(zipped_instructions) if is_mismatch(pair)
62129
]
63130

64-
near_lines_count = 3 # context depth, i.e., the number of lines above and \below to be displayed for each diff
131+
# If no mismatches at all => fully match
132+
if not mismatches and len(actual_instructions) == len(expected_instructions):
133+
logger.okay("Bytecodes fully match")
134+
return
65135

136+
# We'll show a few lines around each mismatch for context
137+
near_lines_count = 3
66138
checkpoints = {0, *mismatches}
67-
139+
# handle last line if instructions differ in count
68140
if actual_instructions:
69141
checkpoints.add(len(actual_instructions) - 1)
70-
71142
if expected_instructions:
72143
checkpoints.add(len(expected_instructions) - 1)
73144

145+
# Expand around mismatches
74146
for ind in list(checkpoints):
75-
start_index = max(0, ind - near_lines_count)
76-
end_index = min(ind + near_lines_count, len(zipped_instructions) - 1)
77-
78-
checkpoints.update(range(start_index, end_index + 1))
147+
start_idx = max(0, ind - near_lines_count)
148+
end_idx = min(ind + near_lines_count, len(zipped_instructions) - 1)
149+
checkpoints.update(range(start_idx, end_idx + 1))
79150

80151
checkpoints = sorted(checkpoints)
81152

153+
# Print a small legend
82154
logger.divider()
83-
logger.info(f"0000 00 STOP - both expected and actual bytecode instructions match")
155+
logger.info("0000 00 STOP - both expected and actual bytecode instructions match")
84156
logger.info(f'{bgRed("0x0002")} - the actual bytecode differs')
85157
logger.info(
86158
f'{bgYellow("0x0001")} - the actual bytecode differs on the immutable reference position'
@@ -97,94 +169,58 @@ def deep_match_bytecode(
97169
logger.divider()
98170

99171
is_matched_with_excluded_immutables = True
100-
for previous_index, current_index in zip(checkpoints, checkpoints[1:]):
101-
if previous_index != current_index - 1:
172+
173+
# Print the diff lines
174+
# note: for shortness, we won't handle "None" instructions here,
175+
# since we used zip() not zip_longest(). Adjust if needed.
176+
for prev_idx, cur_idx in zip(checkpoints, checkpoints[1:]):
177+
if prev_idx != cur_idx - 1:
102178
print("...")
103179

104-
actual = (
105-
actual_instructions[current_index]
106-
if current_index < len(actual_instructions)
107-
else None
108-
)
109-
expected = (
110-
expected_instructions[current_index]
111-
if current_index < len(expected_instructions)
112-
else None
113-
)
180+
actual = zipped_instructions[cur_idx][0]
181+
expected = zipped_instructions[cur_idx][1]
114182

115-
if not actual and expected:
116-
params = "0x" + expected["bytecode"][2:]
117-
print(
118-
red(
119-
f'{to_hex(current_index, 4)} {to_hex(expected["op"]["code"])} {expected["op"]["name"]} {params}'
120-
)
121-
)
122-
elif actual and not expected:
123-
params = "0x" + actual["bytecode"][2:]
124-
print(
125-
green(
126-
f'{to_hex(current_index, 4)} {to_hex(actual["op"]["code"])} {actual["op"]["name"]} {params}'
127-
)
128-
)
129-
elif actual and expected:
183+
# Compare opcodes
184+
same_opcode = actual["op"]["code"] == expected["op"]["code"]
185+
if same_opcode:
186+
opcode = to_hex(actual["op"]["code"])
187+
opname = actual["op"]["name"]
188+
else:
130189
opcode = (
131-
to_hex(actual["op"]["code"])
132-
if actual["op"]["code"] == expected["op"]["code"]
133-
else bgRed(to_hex(actual["op"]["code"]))
190+
bgRed(to_hex(actual["op"]["code"]))
134191
+ " "
135192
+ bgGreen(to_hex(expected["op"]["code"]))
136193
)
137-
opname = (
138-
actual["op"]["name"]
139-
if actual["op"]["name"] == expected["op"]["name"]
140-
else bgRed(actual["op"]["name"]) + " " + bgGreen(expected["op"]["name"])
141-
)
194+
opname = bgRed(actual["op"]["name"]) + " " + bgGreen(expected["op"]["name"])
142195

143-
actual_params = format_bytecode(actual["bytecode"])
144-
expected_params = format_bytecode(expected["bytecode"])
196+
actual_params = format_bytecode(actual["bytecode"])
197+
expected_params = format_bytecode(expected["bytecode"])
145198

146-
params_length = len(expected["bytecode"]) // 2 - 1
147-
is_immutable = immutables.get(expected["start"] + 1) == params_length
148-
if actual_params != expected_params and not is_immutable:
149-
is_matched_with_excluded_immutables = False
150-
params = (
151-
actual_params
152-
if actual_params == expected_params
153-
else (
154-
bgYellow(actual_params) + " " + bgGreen(expected_params)
155-
if is_immutable
156-
else bgRed(actual_params) + " " + bgGreen(expected_params)
157-
)
158-
)
159-
print(f"{to_hex(current_index, 4)} {opcode} {opname} {params}")
199+
# Check partial overlap with immutables
200+
instr_start = expected["start"]
201+
instr_len = expected["length"]
202+
within_immutable_region = overlaps_any_immutable(
203+
immutables, instr_start, instr_len
204+
)
205+
206+
if actual_params == expected_params:
207+
# Perfect match => no highlight
208+
params = actual_params
160209
else:
161-
raise BinVerifierError("Invalid bytecode difference data")
210+
# There's a difference
211+
if within_immutable_region:
212+
params = bgYellow(actual_params) + " " + bgGreen(expected_params)
213+
else:
214+
params = bgRed(actual_params) + " " + bgGreen(expected_params)
215+
is_matched_with_excluded_immutables = False
216+
217+
print(f"{to_hex(cur_idx, 4)} {opcode} {opname} {params}")
162218

219+
# If we found any mismatch outside immutables => fail
163220
if not is_matched_with_excluded_immutables:
164221
raise BinVerifierError(
165-
f"Bytecodes have differences not on the immutable reference position"
222+
"Bytecodes have differences not on the immutable reference position"
166223
)
167224

168-
logger.okay(f"Bytecodes have differences only on the immutable reference position")
169-
170-
171-
def parse(bytecode):
172-
buffer = bytes.fromhex(bytecode[2:] if bytecode.startswith("0x") else bytecode)
173-
instructions = []
174-
i = 0
175-
unknown_opcodes = set()
176-
while i < len(buffer):
177-
opcode = buffer[i]
178-
if opcode not in OPCODES:
179-
unknown_opcodes.add(hex(opcode))
180-
length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0)
181-
instructions.append(
182-
{
183-
"start": i,
184-
"length": length,
185-
"op": {"name": OPCODES.get(opcode, "INVALID"), "code": opcode},
186-
"bytecode": buffer[i : i + length].hex(),
187-
}
188-
)
189-
i += length
190-
return instructions, unknown_opcodes
225+
# Otherwise, differences exist but only in immutables
226+
logger.okay("Bytecodes have differences only on the immutable reference position")

‎diffyscan/utils/custom_exceptions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def __init__(self, reason: str):
3636

3737
class ExplorerError(BaseCustomException):
3838
def __init__(self, reason: str):
39-
super().__init__(f"Failed to communicate with Blockchain explorer: {reason}")
39+
super().__init__(f"Failed to communicate with a remote resource: {reason}")
4040

4141

4242
class BinVerifierError(BaseCustomException):

0 commit comments

Comments
 (0)
Please sign in to comment.