5
5
from .custom_exceptions import BinVerifierError
6
6
7
7
8
- def format_bytecode (bytecode ):
8
+ def format_bytecode (bytecode : str ) -> str :
9
+ """Converts raw hex for an instruction into a '0x' prefixed string, or empty if none."""
9
10
return "0x" + bytecode [2 :] if len (bytecode ) > 2 else ""
10
11
11
12
12
13
def trim_solidity_meta (bytecode : str ) -> dict :
14
+ """
15
+ Strips Solidity metadata from the end of the bytecode, if present.
16
+ Solidity appends a CBOR metadata section at the end, indicated by
17
+ the last 2 bytes in big-endian (multiplied by 2 for hex, plus 4).
18
+ """
13
19
meta_size = int (bytecode [- 4 :], 16 ) * 2 + 4
14
-
15
20
if meta_size > len (bytecode ):
16
21
return {"bytecode" : bytecode , "metadata" : "" }
17
-
18
22
return {
19
23
"bytecode" : bytecode [:- meta_size ],
20
24
"metadata" : bytecode [- meta_size :],
21
25
}
22
26
23
27
28
+ def parse (bytecode : str ):
29
+ """
30
+ Parses raw hex EVM bytecode into a list of instructions:
31
+ [ { 'start': offset, 'length': N, 'op': {...}, 'bytecode': '...' }, ... ]
32
+ """
33
+ buffer = bytes .fromhex (bytecode [2 :] if bytecode .startswith ("0x" ) else bytecode )
34
+ instructions = []
35
+ i = 0
36
+ unknown_opcodes = set ()
37
+
38
+ while i < len (buffer ):
39
+ opcode = buffer [i ]
40
+ if opcode not in OPCODES :
41
+ unknown_opcodes .add (hex (opcode ))
42
+
43
+ # For PUSH1..PUSH32, the length is 1 + (opcode - PUSH0)
44
+ length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0 )
45
+
46
+ instr_hex = buffer [i : i + length ].hex ()
47
+ instructions .append (
48
+ {
49
+ "start" : i ,
50
+ "length" : length ,
51
+ "op" : {"name" : OPCODES .get (opcode , "INVALID" ), "code" : opcode },
52
+ "bytecode" : instr_hex ,
53
+ }
54
+ )
55
+
56
+ i += length
57
+
58
+ return instructions , unknown_opcodes
59
+
60
+
61
+ def regions_overlap (a_start : int , a_len : int , b_start : int , b_len : int ) -> bool :
62
+ """
63
+ Return True if [a_start, a_start+a_len) overlaps with [b_start, b_start+b_len).
64
+ """
65
+ a_end = a_start + a_len
66
+ b_end = b_start + b_len
67
+ # intervals do NOT overlap if one is entirely to the left of the other
68
+ if a_end <= b_start or b_end <= a_start :
69
+ return False
70
+ return True
71
+
72
+
73
+ def overlaps_any_immutable (
74
+ immutables : dict [int , int ], instr_start : int , instr_len : int
75
+ ) -> bool :
76
+ """
77
+ Checks if the instruction byte range [instr_start.. instr_start+instr_len)
78
+ overlaps with ANY known immutable region [start.. start+length) from 'immutables'.
79
+ """
80
+ for imm_start , imm_len in immutables .items ():
81
+ if regions_overlap (instr_start , instr_len , imm_start , imm_len ):
82
+ return True
83
+ return False
84
+
85
+
24
86
def deep_match_bytecode (
25
87
actual_bytecode : str , expected_bytecode : str , immutables : dict
26
88
) -> None :
89
+ """
90
+ Compare two chunks of bytecode instruction-by-instruction, ignoring differences
91
+ that appear within known 'immutable' regions.
92
+
93
+ If:
94
+ - No differences => "Bytecodes fully match."
95
+ - Differences only in immutables => "Bytecodes have differences only on the immutable reference position."
96
+ - Differences outside immutables => raises BinVerifierError.
97
+ """
27
98
logger .info ("Comparing actual code with the expected one..." )
28
99
29
- actual_trimmed_bytecode = trim_solidity_meta (actual_bytecode )
30
- expected_trimmed_bytecode = trim_solidity_meta (expected_bytecode )
100
+ # Possibly strip out metadata from both
101
+ actual_trimmed = trim_solidity_meta (actual_bytecode )
102
+ expected_trimmed = trim_solidity_meta (expected_bytecode )
31
103
32
- if actual_trimmed_bytecode ["metadata" ] or expected_trimmed_bytecode ["metadata" ]:
104
+ if actual_trimmed ["metadata" ] or expected_trimmed ["metadata" ]:
33
105
logger .info ("Metadata has been detected and trimmed" )
34
106
35
- actual_instructions , unknown_opcodes_first_half = parse (
36
- actual_trimmed_bytecode ["bytecode" ]
37
- )
38
- expected_instructions , unknown_opcodes_second_half = parse (
39
- expected_trimmed_bytecode ["bytecode" ]
40
- )
107
+ # Parse instructions
108
+ actual_instructions , unknown_opcodes_a = parse (actual_trimmed ["bytecode" ])
109
+ expected_instructions , unknown_opcodes_b = parse (expected_trimmed ["bytecode" ])
41
110
42
- unknown_opcodes = (
43
- unknown_opcodes_first_half or set () | unknown_opcodes_second_half or set ()
44
- )
111
+ # Check for unknown opcodes
112
+ unknown_opcodes = unknown_opcodes_a | unknown_opcodes_b
45
113
if unknown_opcodes :
46
114
logger .warn (f"Detected unknown opcodes: { unknown_opcodes } " )
47
115
116
+ # If they differ in length, we still attempt to compare
48
117
if len (actual_instructions ) != len (expected_instructions ):
49
- logger .warn (f "Codes have a different length" )
118
+ logger .warn ("Codes have a different length" )
50
119
51
- zipped_instructions = list (
52
- itertools .zip_longest (actual_instructions , expected_instructions )
53
- )
120
+ # Pair them up by index
121
+ zipped_instructions = list (zip (actual_instructions , expected_instructions ))
122
+
123
+ # Identify mismatch indexes
124
+ def is_mismatch (pair ) -> bool :
125
+ return pair [0 ]["bytecode" ] != pair [1 ]["bytecode" ]
54
126
55
- is_mismatch = (
56
- lambda pair : pair [0 ] is None
57
- or pair [1 ] is None
58
- or pair [0 ].get ("bytecode" ) != pair [1 ].get ("bytecode" )
59
- )
60
127
mismatches = [
61
- index for index , pair in enumerate (zipped_instructions ) if is_mismatch (pair )
128
+ idx for idx , pair in enumerate (zipped_instructions ) if is_mismatch (pair )
62
129
]
63
130
64
- near_lines_count = 3 # context depth, i.e., the number of lines above and \below to be displayed for each diff
131
+ # If no mismatches at all => fully match
132
+ if not mismatches and len (actual_instructions ) == len (expected_instructions ):
133
+ logger .okay ("Bytecodes fully match" )
134
+ return
65
135
136
+ # We'll show a few lines around each mismatch for context
137
+ near_lines_count = 3
66
138
checkpoints = {0 , * mismatches }
67
-
139
+ # handle last line if instructions differ in count
68
140
if actual_instructions :
69
141
checkpoints .add (len (actual_instructions ) - 1 )
70
-
71
142
if expected_instructions :
72
143
checkpoints .add (len (expected_instructions ) - 1 )
73
144
145
+ # Expand around mismatches
74
146
for ind in list (checkpoints ):
75
- start_index = max (0 , ind - near_lines_count )
76
- end_index = min (ind + near_lines_count , len (zipped_instructions ) - 1 )
77
-
78
- checkpoints .update (range (start_index , end_index + 1 ))
147
+ start_idx = max (0 , ind - near_lines_count )
148
+ end_idx = min (ind + near_lines_count , len (zipped_instructions ) - 1 )
149
+ checkpoints .update (range (start_idx , end_idx + 1 ))
79
150
80
151
checkpoints = sorted (checkpoints )
81
152
153
+ # Print a small legend
82
154
logger .divider ()
83
- logger .info (f "0000 00 STOP - both expected and actual bytecode instructions match" )
155
+ logger .info ("0000 00 STOP - both expected and actual bytecode instructions match" )
84
156
logger .info (f'{ bgRed ("0x0002" )} - the actual bytecode differs' )
85
157
logger .info (
86
158
f'{ bgYellow ("0x0001" )} - the actual bytecode differs on the immutable reference position'
@@ -97,94 +169,58 @@ def deep_match_bytecode(
97
169
logger .divider ()
98
170
99
171
is_matched_with_excluded_immutables = True
100
- for previous_index , current_index in zip (checkpoints , checkpoints [1 :]):
101
- if previous_index != current_index - 1 :
172
+
173
+ # Print the diff lines
174
+ # note: for shortness, we won't handle "None" instructions here,
175
+ # since we used zip() not zip_longest(). Adjust if needed.
176
+ for prev_idx , cur_idx in zip (checkpoints , checkpoints [1 :]):
177
+ if prev_idx != cur_idx - 1 :
102
178
print ("..." )
103
179
104
- actual = (
105
- actual_instructions [current_index ]
106
- if current_index < len (actual_instructions )
107
- else None
108
- )
109
- expected = (
110
- expected_instructions [current_index ]
111
- if current_index < len (expected_instructions )
112
- else None
113
- )
180
+ actual = zipped_instructions [cur_idx ][0 ]
181
+ expected = zipped_instructions [cur_idx ][1 ]
114
182
115
- if not actual and expected :
116
- params = "0x" + expected ["bytecode" ][2 :]
117
- print (
118
- red (
119
- f'{ to_hex (current_index , 4 )} { to_hex (expected ["op" ]["code" ])} { expected ["op" ]["name" ]} { params } '
120
- )
121
- )
122
- elif actual and not expected :
123
- params = "0x" + actual ["bytecode" ][2 :]
124
- print (
125
- green (
126
- f'{ to_hex (current_index , 4 )} { to_hex (actual ["op" ]["code" ])} { actual ["op" ]["name" ]} { params } '
127
- )
128
- )
129
- elif actual and expected :
183
+ # Compare opcodes
184
+ same_opcode = actual ["op" ]["code" ] == expected ["op" ]["code" ]
185
+ if same_opcode :
186
+ opcode = to_hex (actual ["op" ]["code" ])
187
+ opname = actual ["op" ]["name" ]
188
+ else :
130
189
opcode = (
131
- to_hex (actual ["op" ]["code" ])
132
- if actual ["op" ]["code" ] == expected ["op" ]["code" ]
133
- else bgRed (to_hex (actual ["op" ]["code" ]))
190
+ bgRed (to_hex (actual ["op" ]["code" ]))
134
191
+ " "
135
192
+ bgGreen (to_hex (expected ["op" ]["code" ]))
136
193
)
137
- opname = (
138
- actual ["op" ]["name" ]
139
- if actual ["op" ]["name" ] == expected ["op" ]["name" ]
140
- else bgRed (actual ["op" ]["name" ]) + " " + bgGreen (expected ["op" ]["name" ])
141
- )
194
+ opname = bgRed (actual ["op" ]["name" ]) + " " + bgGreen (expected ["op" ]["name" ])
142
195
143
- actual_params = format_bytecode (actual ["bytecode" ])
144
- expected_params = format_bytecode (expected ["bytecode" ])
196
+ actual_params = format_bytecode (actual ["bytecode" ])
197
+ expected_params = format_bytecode (expected ["bytecode" ])
145
198
146
- params_length = len (expected ["bytecode" ]) // 2 - 1
147
- is_immutable = immutables .get (expected ["start" ] + 1 ) == params_length
148
- if actual_params != expected_params and not is_immutable :
149
- is_matched_with_excluded_immutables = False
150
- params = (
151
- actual_params
152
- if actual_params == expected_params
153
- else (
154
- bgYellow (actual_params ) + " " + bgGreen (expected_params )
155
- if is_immutable
156
- else bgRed (actual_params ) + " " + bgGreen (expected_params )
157
- )
158
- )
159
- print (f"{ to_hex (current_index , 4 )} { opcode } { opname } { params } " )
199
+ # Check partial overlap with immutables
200
+ instr_start = expected ["start" ]
201
+ instr_len = expected ["length" ]
202
+ within_immutable_region = overlaps_any_immutable (
203
+ immutables , instr_start , instr_len
204
+ )
205
+
206
+ if actual_params == expected_params :
207
+ # Perfect match => no highlight
208
+ params = actual_params
160
209
else :
161
- raise BinVerifierError ("Invalid bytecode difference data" )
210
+ # There's a difference
211
+ if within_immutable_region :
212
+ params = bgYellow (actual_params ) + " " + bgGreen (expected_params )
213
+ else :
214
+ params = bgRed (actual_params ) + " " + bgGreen (expected_params )
215
+ is_matched_with_excluded_immutables = False
216
+
217
+ print (f"{ to_hex (cur_idx , 4 )} { opcode } { opname } { params } " )
162
218
219
+ # If we found any mismatch outside immutables => fail
163
220
if not is_matched_with_excluded_immutables :
164
221
raise BinVerifierError (
165
- f "Bytecodes have differences not on the immutable reference position"
222
+ "Bytecodes have differences not on the immutable reference position"
166
223
)
167
224
168
- logger .okay (f"Bytecodes have differences only on the immutable reference position" )
169
-
170
-
171
- def parse (bytecode ):
172
- buffer = bytes .fromhex (bytecode [2 :] if bytecode .startswith ("0x" ) else bytecode )
173
- instructions = []
174
- i = 0
175
- unknown_opcodes = set ()
176
- while i < len (buffer ):
177
- opcode = buffer [i ]
178
- if opcode not in OPCODES :
179
- unknown_opcodes .add (hex (opcode ))
180
- length = 1 + (opcode - PUSH0 if PUSH0 <= opcode <= PUSH32 else 0 )
181
- instructions .append (
182
- {
183
- "start" : i ,
184
- "length" : length ,
185
- "op" : {"name" : OPCODES .get (opcode , "INVALID" ), "code" : opcode },
186
- "bytecode" : buffer [i : i + length ].hex (),
187
- }
188
- )
189
- i += length
190
- return instructions , unknown_opcodes
225
+ # Otherwise, differences exist but only in immutables
226
+ logger .okay ("Bytecodes have differences only on the immutable reference position" )
0 commit comments