Skip to content

Fix: DG-related fixes #77

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions diffyscan/diffyscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,12 @@ def run_bytecode_diff(
is_fully_matched = local_compiled_bytecode == remote_deployed_bytecode

if is_fully_matched:
logger.okay(f"Bytecodes are fully matched")
logger.okay(f"Bytecodes fully match")
return

logger.info(f"Automated match hasn't worked out")
logger.info(
f"Static bytecodes not match, trying local deployment to bind immutables"
)

calldata = get_calldata(
contract_address_from_config,
Expand All @@ -86,7 +88,7 @@ def run_bytecode_diff(
is_fully_matched = local_deployed_bytecode == remote_deployed_bytecode

if is_fully_matched:
logger.okay(f"Bytecodes are fully matched")
logger.okay(f"Bytecodes fully match")
return

deep_match_bytecode(
Expand Down
254 changes: 145 additions & 109 deletions diffyscan/utils/binary_verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,82 +5,154 @@
from .custom_exceptions import BinVerifierError


def format_bytecode(bytecode):
def format_bytecode(bytecode: str) -> str:
"""Converts raw hex for an instruction into a '0x' prefixed string, or empty if none."""
return "0x" + bytecode[2:] if len(bytecode) > 2 else ""


def trim_solidity_meta(bytecode: str) -> dict:
"""
Strips Solidity metadata from the end of the bytecode, if present.
Solidity appends a CBOR metadata section at the end, indicated by
the last 2 bytes in big-endian (multiplied by 2 for hex, plus 4).
"""
meta_size = int(bytecode[-4:], 16) * 2 + 4

if meta_size > len(bytecode):
return {"bytecode": bytecode, "metadata": ""}

return {
"bytecode": bytecode[:-meta_size],
"metadata": bytecode[-meta_size:],
}


def parse(bytecode: str):
"""
Parses raw hex EVM bytecode into a list of instructions:
[ { 'start': offset, 'length': N, 'op': {...}, 'bytecode': '...' }, ... ]
"""
buffer = bytes.fromhex(bytecode[2:] if bytecode.startswith("0x") else bytecode)
instructions = []
i = 0
unknown_opcodes = set()

while i < len(buffer):
opcode = buffer[i]
if opcode not in OPCODES:
unknown_opcodes.add(hex(opcode))

# For PUSH1..PUSH32, the length is 1 + (opcode - PUSH0)
length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0)

instr_hex = buffer[i : i + length].hex()
instructions.append(
{
"start": i,
"length": length,
"op": {"name": OPCODES.get(opcode, "INVALID"), "code": opcode},
"bytecode": instr_hex,
}
)

i += length

return instructions, unknown_opcodes


def regions_overlap(a_start: int, a_len: int, b_start: int, b_len: int) -> bool:
"""
Return True if [a_start, a_start+a_len) overlaps with [b_start, b_start+b_len).
"""
a_end = a_start + a_len
b_end = b_start + b_len
# intervals do NOT overlap if one is entirely to the left of the other
if a_end <= b_start or b_end <= a_start:
return False
return True


def overlaps_any_immutable(
immutables: dict[int, int], instr_start: int, instr_len: int
) -> bool:
"""
Checks if the instruction byte range [instr_start.. instr_start+instr_len)
overlaps with ANY known immutable region [start.. start+length) from 'immutables'.
"""
for imm_start, imm_len in immutables.items():
if regions_overlap(instr_start, instr_len, imm_start, imm_len):
return True
return False


def deep_match_bytecode(
actual_bytecode: str, expected_bytecode: str, immutables: dict
) -> None:
"""
Compare two chunks of bytecode instruction-by-instruction, ignoring differences
that appear within known 'immutable' regions.

If:
- No differences => "Bytecodes fully match."
- Differences only in immutables => "Bytecodes have differences only on the immutable reference position."
- Differences outside immutables => raises BinVerifierError.
"""
logger.info("Comparing actual code with the expected one...")

actual_trimmed_bytecode = trim_solidity_meta(actual_bytecode)
expected_trimmed_bytecode = trim_solidity_meta(expected_bytecode)
# Possibly strip out metadata from both
actual_trimmed = trim_solidity_meta(actual_bytecode)
expected_trimmed = trim_solidity_meta(expected_bytecode)

if actual_trimmed_bytecode["metadata"] or expected_trimmed_bytecode["metadata"]:
if actual_trimmed["metadata"] or expected_trimmed["metadata"]:
logger.info("Metadata has been detected and trimmed")

actual_instructions, unknown_opcodes_first_half = parse(
actual_trimmed_bytecode["bytecode"]
)
expected_instructions, unknown_opcodes_second_half = parse(
expected_trimmed_bytecode["bytecode"]
)
# Parse instructions
actual_instructions, unknown_opcodes_a = parse(actual_trimmed["bytecode"])
expected_instructions, unknown_opcodes_b = parse(expected_trimmed["bytecode"])

unknown_opcodes = (
unknown_opcodes_first_half or set() | unknown_opcodes_second_half or set()
)
# Check for unknown opcodes
unknown_opcodes = unknown_opcodes_a | unknown_opcodes_b
if unknown_opcodes:
logger.warn(f"Detected unknown opcodes: {unknown_opcodes}")

# If they differ in length, we still attempt to compare
if len(actual_instructions) != len(expected_instructions):
logger.warn(f"Codes have a different length")
logger.warn("Codes have a different length")

zipped_instructions = list(
itertools.zip_longest(actual_instructions, expected_instructions)
)
# Pair them up by index
zipped_instructions = list(zip(actual_instructions, expected_instructions))

# Identify mismatch indexes
def is_mismatch(pair) -> bool:
return pair[0]["bytecode"] != pair[1]["bytecode"]

is_mismatch = (
lambda pair: pair[0] is None
or pair[1] is None
or pair[0].get("bytecode") != pair[1].get("bytecode")
)
mismatches = [
index for index, pair in enumerate(zipped_instructions) if is_mismatch(pair)
idx for idx, pair in enumerate(zipped_instructions) if is_mismatch(pair)
]

near_lines_count = 3 # context depth, i.e., the number of lines above and \below to be displayed for each diff
# If no mismatches at all => fully match
if not mismatches and len(actual_instructions) == len(expected_instructions):
logger.okay("Bytecodes fully match")
return

# We'll show a few lines around each mismatch for context
near_lines_count = 3
checkpoints = {0, *mismatches}

# handle last line if instructions differ in count
if actual_instructions:
checkpoints.add(len(actual_instructions) - 1)

if expected_instructions:
checkpoints.add(len(expected_instructions) - 1)

# Expand around mismatches
for ind in list(checkpoints):
start_index = max(0, ind - near_lines_count)
end_index = min(ind + near_lines_count, len(zipped_instructions) - 1)

checkpoints.update(range(start_index, end_index + 1))
start_idx = max(0, ind - near_lines_count)
end_idx = min(ind + near_lines_count, len(zipped_instructions) - 1)
checkpoints.update(range(start_idx, end_idx + 1))

checkpoints = sorted(checkpoints)

# Print a small legend
logger.divider()
logger.info(f"0000 00 STOP - both expected and actual bytecode instructions match")
logger.info("0000 00 STOP - both expected and actual bytecode instructions match")
logger.info(f'{bgRed("0x0002")} - the actual bytecode differs')
logger.info(
f'{bgYellow("0x0001")} - the actual bytecode differs on the immutable reference position'
Expand All @@ -97,94 +169,58 @@ def deep_match_bytecode(
logger.divider()

is_matched_with_excluded_immutables = True
for previous_index, current_index in zip(checkpoints, checkpoints[1:]):
if previous_index != current_index - 1:

# Print the diff lines
# note: for shortness, we won't handle "None" instructions here,
# since we used zip() not zip_longest(). Adjust if needed.
for prev_idx, cur_idx in zip(checkpoints, checkpoints[1:]):
if prev_idx != cur_idx - 1:
print("...")

actual = (
actual_instructions[current_index]
if current_index < len(actual_instructions)
else None
)
expected = (
expected_instructions[current_index]
if current_index < len(expected_instructions)
else None
)
actual = zipped_instructions[cur_idx][0]
expected = zipped_instructions[cur_idx][1]

if not actual and expected:
params = "0x" + expected["bytecode"][2:]
print(
red(
f'{to_hex(current_index, 4)} {to_hex(expected["op"]["code"])} {expected["op"]["name"]} {params}'
)
)
elif actual and not expected:
params = "0x" + actual["bytecode"][2:]
print(
green(
f'{to_hex(current_index, 4)} {to_hex(actual["op"]["code"])} {actual["op"]["name"]} {params}'
)
)
elif actual and expected:
# Compare opcodes
same_opcode = actual["op"]["code"] == expected["op"]["code"]
if same_opcode:
opcode = to_hex(actual["op"]["code"])
opname = actual["op"]["name"]
else:
opcode = (
to_hex(actual["op"]["code"])
if actual["op"]["code"] == expected["op"]["code"]
else bgRed(to_hex(actual["op"]["code"]))
bgRed(to_hex(actual["op"]["code"]))
+ " "
+ bgGreen(to_hex(expected["op"]["code"]))
)
opname = (
actual["op"]["name"]
if actual["op"]["name"] == expected["op"]["name"]
else bgRed(actual["op"]["name"]) + " " + bgGreen(expected["op"]["name"])
)
opname = bgRed(actual["op"]["name"]) + " " + bgGreen(expected["op"]["name"])

actual_params = format_bytecode(actual["bytecode"])
expected_params = format_bytecode(expected["bytecode"])
actual_params = format_bytecode(actual["bytecode"])
expected_params = format_bytecode(expected["bytecode"])

params_length = len(expected["bytecode"]) // 2 - 1
is_immutable = immutables.get(expected["start"] + 1) == params_length
if actual_params != expected_params and not is_immutable:
is_matched_with_excluded_immutables = False
params = (
actual_params
if actual_params == expected_params
else (
bgYellow(actual_params) + " " + bgGreen(expected_params)
if is_immutable
else bgRed(actual_params) + " " + bgGreen(expected_params)
)
)
print(f"{to_hex(current_index, 4)} {opcode} {opname} {params}")
# Check partial overlap with immutables
instr_start = expected["start"]
instr_len = expected["length"]
within_immutable_region = overlaps_any_immutable(
immutables, instr_start, instr_len
)

if actual_params == expected_params:
# Perfect match => no highlight
params = actual_params
else:
raise BinVerifierError("Invalid bytecode difference data")
# There's a difference
if within_immutable_region:
params = bgYellow(actual_params) + " " + bgGreen(expected_params)
else:
params = bgRed(actual_params) + " " + bgGreen(expected_params)
is_matched_with_excluded_immutables = False

print(f"{to_hex(cur_idx, 4)} {opcode} {opname} {params}")

# If we found any mismatch outside immutables => fail
if not is_matched_with_excluded_immutables:
raise BinVerifierError(
f"Bytecodes have differences not on the immutable reference position"
"Bytecodes have differences not on the immutable reference position"
)

logger.okay(f"Bytecodes have differences only on the immutable reference position")


def parse(bytecode):
buffer = bytes.fromhex(bytecode[2:] if bytecode.startswith("0x") else bytecode)
instructions = []
i = 0
unknown_opcodes = set()
while i < len(buffer):
opcode = buffer[i]
if opcode not in OPCODES:
unknown_opcodes.add(hex(opcode))
length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0)
instructions.append(
{
"start": i,
"length": length,
"op": {"name": OPCODES.get(opcode, "INVALID"), "code": opcode},
"bytecode": buffer[i : i + length].hex(),
}
)
i += length
return instructions, unknown_opcodes
# Otherwise, differences exist but only in immutables
logger.okay("Bytecodes have differences only on the immutable reference position")
2 changes: 1 addition & 1 deletion diffyscan/utils/custom_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, reason: str):

class ExplorerError(BaseCustomException):
def __init__(self, reason: str):
super().__init__(f"Failed to communicate with Blockchain explorer: {reason}")
super().__init__(f"Failed to communicate with a remote resource: {reason}")


class BinVerifierError(BaseCustomException):
Expand Down
Loading