|
3 | 3 | from .custom_exceptions import EncoderError
|
4 | 4 |
|
5 | 5 |
|
| 6 | +def _parse_solidity_int_type(arg_type: str) -> tuple[int, bool]: |
| 7 | + """ |
| 8 | + Given a Solidity int/uint type (e.g. 'uint256', 'int128', 'uint', 'int'), |
| 9 | + returns (bits, is_signed). |
| 10 | + - bits = 256 if no explicit size is specified. |
| 11 | + - is_signed = True if it starts with 'int', False if 'uint'. |
| 12 | + """ |
| 13 | + match = re.match(r"^(u?int)(\d*)$", arg_type) |
| 14 | + if not match: |
| 15 | + raise EncoderError(f"Invalid integer type format '{arg_type}'.") |
| 16 | + is_signed = not match.group(1).startswith("u") # 'uint' => False, 'int' => True |
| 17 | + bits_str = match.group(2) |
| 18 | + bits = int(bits_str) if bits_str else 256 |
| 19 | + return (bits, is_signed) |
| 20 | + |
| 21 | + |
6 | 22 | def to_hex_with_alignment(value: int) -> str:
|
| 23 | + """ |
| 24 | + Encodes `value` (non-negative integer) as a 32-byte hex string. |
| 25 | + For negative values, you must first apply two's complement. |
| 26 | + """ |
7 | 27 | return format(value, "064x")
|
8 | 28 |
|
9 | 29 |
|
| 30 | +def encode_int(value: int, bits: int, is_signed: bool) -> str: |
| 31 | + """ |
| 32 | + Encodes an integer value (possibly negative if signed) into 32 bytes |
| 33 | + using two's complement for negative values. |
| 34 | + """ |
| 35 | + # Convert bool to int if needed (though typically you'd handle bool in a separate branch). |
| 36 | + if isinstance(value, bool): |
| 37 | + value = 1 if value else 0 |
| 38 | + |
| 39 | + # Python's 'format' doesn't automatically do two's-complement for negative integers. |
| 40 | + # So if it's signed and value is negative, convert by adding 2^bits. |
| 41 | + if is_signed and value < 0: |
| 42 | + # e.g. for int256, 2^256 + value |
| 43 | + value = (1 << bits) + value |
| 44 | + |
| 45 | + # Now ensure it fits within 'bits' |
| 46 | + # (if bits=8, max = 2^7 - 1 for signed or 2^8-1 for unsigned). |
| 47 | + # We'll skip a strict bounds check for brevity, but you could raise an error |
| 48 | + # if abs(value) >= 2^(bits-1) for signed or value >= 2^bits for unsigned. |
| 49 | + |
| 50 | + return to_hex_with_alignment(value) |
| 51 | + |
| 52 | + |
10 | 53 | def encode_address(address: str) -> str:
|
11 |
| - number = int(address, 16) |
| 54 | + """ |
| 55 | + Encodes an address as a 32-byte hex string. |
| 56 | + Assumes 'address' is already a hex string (with '0x' or without). |
| 57 | + """ |
| 58 | + address_no_0x = address.lower().replace("0x", "") |
| 59 | + # Convert to int |
| 60 | + number = int(address_no_0x, 16) |
12 | 61 | return to_hex_with_alignment(number)
|
13 | 62 |
|
14 | 63 |
|
15 | 64 | def encode_fixed_bytes(value: str, length: int) -> str:
|
| 65 | + """ |
| 66 | + Encodes fixed-length bytes (e.g., bytes1..bytes32) into 32 bytes. |
| 67 | + """ |
16 | 68 | raw_hex = value.lower().replace("0x", "")
|
17 |
| - max_hex_len = length * 2 # each byte is 2 hex chars |
| 69 | + max_hex_len = length * 2 # each byte => 2 hex chars |
18 | 70 | if len(raw_hex) > max_hex_len:
|
19 | 71 | raise EncoderError(
|
20 | 72 | f"Provided bytes length exceeds {length} bytes (max {max_hex_len} hex chars)."
|
21 | 73 | )
|
22 |
| - # Right-pad with zeros up to fixed length, then left-pad to 64 hex chars total |
23 |
| - raw_hex = raw_hex.ljust(max_hex_len, "0") # fill the fixed bytes |
24 |
| - return raw_hex.ljust(64, "0") # fill up to 32 bytes in total |
| 74 | + # Right-pad the actual bytes to `length`, then pad to 32 bytes total |
| 75 | + raw_hex = raw_hex.ljust(max_hex_len, "0") |
| 76 | + return raw_hex.ljust(64, "0") |
25 | 77 |
|
26 | 78 |
|
27 | 79 | def encode_bytes(data: str) -> str:
|
28 |
| - bytes_str = data.lstrip("0x") |
| 80 | + """ |
| 81 | + Encodes a dynamic `bytes` value as: |
| 82 | + [ 32-byte length, (N + padded to multiple of 32) bytes data ] |
| 83 | + Naive approach: `data` is a hex string (with or without 0x). |
| 84 | + """ |
| 85 | + bytes_str = data.lower().lstrip("0x") |
29 | 86 | if not bytes_str:
|
| 87 | + # length = 0 |
30 | 88 | return to_hex_with_alignment(0)
|
31 | 89 |
|
32 |
| - # Calculate the length of the hex-encoded 32-bytes padded data |
33 |
| - # since EVM uses 32-byte (256-bit) words |
34 | 90 | count_of_bytes_from_hex = len(bytes_str) // 2
|
35 |
| - encoded_length = 0 |
36 |
| - if count_of_bytes_from_hex > 0: |
37 |
| - encoded_length = ((len(bytes_str) - 1) // 64 + 1) * 64 |
38 |
| - bytes_str += "0" * (encoded_length - len(bytes_str)) |
39 |
| - return to_hex_with_alignment(count_of_bytes_from_hex) + bytes_str |
40 |
| - |
41 |
| - |
42 |
| -def encode_tuple(types: list, args: list): |
43 |
| - args_length = len(types) |
44 |
| - encoded_offsets = "" |
45 |
| - encoded_data = "" |
46 |
| - for arg_index in range(args_length): |
47 |
| - arg_type = types[arg_index] |
48 |
| - arg_value = args[arg_index] |
49 |
| - if arg_type == "address": |
50 |
| - encoded_offsets += encode_address(arg_value) |
| 91 | + # how many hex chars needed to pad to next 32-byte boundary: |
| 92 | + remainder = len(bytes_str) % 64 |
| 93 | + if remainder != 0: |
| 94 | + padding_needed = 64 - remainder |
| 95 | + else: |
| 96 | + padding_needed = 0 |
| 97 | + |
| 98 | + padded_bytes_str = bytes_str + ("0" * padding_needed) |
| 99 | + |
| 100 | + # first 32 bytes = length, then the data |
| 101 | + return to_hex_with_alignment(count_of_bytes_from_hex) + padded_bytes_str |
| 102 | + |
| 103 | + |
| 104 | +def encode_tuple(components_abi: list, values: list) -> str: |
| 105 | + """ |
| 106 | + Recursively encodes a tuple (struct). |
| 107 | + If a component is itself 'tuple', we recurse. |
| 108 | + If a component is an array type, we only allow empty arrays in this snippet. |
| 109 | + For full dynamic-array encoding, you'd need offset-based logic + element encoding. |
| 110 | + """ |
| 111 | + if len(components_abi) != len(values): |
| 112 | + raise EncoderError( |
| 113 | + f"encode_tuple: mismatch in component count: {len(components_abi)} vs values: {len(values)}" |
| 114 | + ) |
| 115 | + |
| 116 | + encoded = "" |
| 117 | + for comp, val in zip(components_abi, values): |
| 118 | + arg_type = comp["type"] |
| 119 | + |
| 120 | + # 1) Possibly a nested tuple |
| 121 | + if arg_type == "tuple": |
| 122 | + if "components" not in comp: |
| 123 | + raise EncoderError("Tuple type missing 'components' in ABI data.") |
| 124 | + encoded += encode_tuple(comp["components"], val) |
| 125 | + |
| 126 | + # 2) Possibly an array of addresses/ints, etc. |
| 127 | + elif arg_type.endswith("[]"): |
| 128 | + # If you need full dynamic array encoding in a struct, you'd do an offset-based approach here. |
| 129 | + # We just handle the empty array case (0 length). |
| 130 | + if not val: # empty array |
| 131 | + encoded += to_hex_with_alignment(0) |
| 132 | + else: |
| 133 | + raise EncoderError( |
| 134 | + "encode_tuple: non-empty dynamic arrays in tuples not yet supported." |
| 135 | + ) |
| 136 | + |
| 137 | + # 3) address |
| 138 | + elif arg_type == "address": |
| 139 | + encoded += encode_address(val) |
| 140 | + |
| 141 | + # 4) bool |
51 | 142 | elif arg_type == "bool":
|
52 |
| - encoded_offsets += to_hex_with_alignment(int(bool(arg_value))) |
53 |
| - # Handle any integral type: uint, uint8..uint256, int, int8..int256 |
| 143 | + # Could unify with int, but let's keep it explicit for readability |
| 144 | + encoded += to_hex_with_alignment(int(bool(val))) |
| 145 | + |
| 146 | + # 5) integer types (uint, int, etc.) |
54 | 147 | elif re.match(r"^(u?int)(\d*)$", arg_type):
|
55 |
| - encoded_offsets += to_hex_with_alignment(arg_value) |
56 |
| - # Handle fixed-length bytes (e.g. bytes1..bytes32) |
| 148 | + bits, is_signed = _parse_solidity_int_type(arg_type) |
| 149 | + encoded += encode_int(int(val), bits, is_signed) |
| 150 | + |
| 151 | + # 6) fixed-length bytes |
57 | 152 | elif re.match(r"^bytes(\d+)$", arg_type):
|
58 | 153 | match_len = re.match(r"^bytes(\d+)$", arg_type)
|
59 | 154 | num_bytes = int(match_len.group(1))
|
60 |
| - encoded_offsets += encode_fixed_bytes(arg_value, num_bytes) |
61 |
| - elif arg_type == "address[]" and not arg_value: |
62 |
| - encoded_data += to_hex_with_alignment(0) |
63 |
| - offset = to_hex_with_alignment((arg_index + args_length) * 32) |
64 |
| - encoded_offsets += offset |
| 155 | + encoded += encode_fixed_bytes(val, num_bytes) |
| 156 | + |
| 157 | + # 7) dynamic bytes |
| 158 | + elif arg_type == "bytes": |
| 159 | + encoded += encode_bytes(val) |
| 160 | + |
| 161 | + # 8) string |
| 162 | + elif arg_type == "string": |
| 163 | + # For a struct field that is a string, you'd typically do offset-based dynamic encoding. |
| 164 | + raise EncoderError( |
| 165 | + "encode_tuple: 'string' inside tuple not fully implemented." |
| 166 | + ) |
| 167 | + |
65 | 168 | else:
|
66 | 169 | raise EncoderError(
|
67 |
| - f"Unknown constructor argument type '{arg_type}' in tuple" |
| 170 | + f"Unknown or unhandled type '{arg_type}' in encode_tuple" |
68 | 171 | )
|
69 |
| - return encoded_offsets + encoded_data |
| 172 | + |
| 173 | + return encoded |
70 | 174 |
|
71 | 175 |
|
72 | 176 | def encode_dynamic_type(arg_value: str, argument_index: int):
|
| 177 | + """ |
| 178 | + Encodes a top-level dynamic `bytes` or array argument as: |
| 179 | + [ offset, ... data in the 'compl_data' section ... ] |
| 180 | + This snippet is naive: for a real array, you'd handle array length + each element. |
| 181 | + """ |
| 182 | + # For now, we just handle a raw bytes value in hex form: |
73 | 183 | offset_to_start_of_data_part = to_hex_with_alignment((argument_index + 1) * 32)
|
74 | 184 | encoded_value = encode_bytes(arg_value)
|
75 | 185 | return offset_to_start_of_data_part, encoded_value
|
76 | 186 |
|
77 | 187 |
|
78 | 188 | def encode_string(arg_length: int, compl_data: list, arg_value: str):
|
| 189 | + """ |
| 190 | + Encodes a top-level string argument in the same offset + data approach |
| 191 | + used by 'encode_dynamic_type'. We do: |
| 192 | + [ offset, ... then length + contents in 'compl_data' ... ] |
| 193 | + """ |
79 | 194 | argument_index = arg_length + len(compl_data)
|
80 |
| - encoded_value = arg_value.encode("utf-8") |
| 195 | + encoded_value_bytes = arg_value.encode("utf-8") |
81 | 196 | offset_to_start_of_data_part = to_hex_with_alignment(argument_index * 32)
|
82 |
| - encoded_value_length = to_hex_with_alignment(len(encoded_value)) |
| 197 | + encoded_value_length = to_hex_with_alignment(len(encoded_value_bytes)) |
| 198 | + # We'll pad the actual string data to a multiple of 32 |
| 199 | + hex_str = encoded_value_bytes.hex() |
| 200 | + remainder = len(hex_str) % 64 |
| 201 | + if remainder != 0: |
| 202 | + padding_needed = 64 - remainder |
| 203 | + hex_str += "0" * padding_needed |
| 204 | + |
83 | 205 | return (
|
84 | 206 | offset_to_start_of_data_part,
|
85 | 207 | encoded_value_length,
|
86 |
| - encoded_value.hex().ljust(64, "0"), |
| 208 | + hex_str, |
87 | 209 | )
|
88 | 210 |
|
89 | 211 |
|
90 | 212 | def encode_constructor_arguments(constructor_abi: list, constructor_config_args: list):
|
91 |
| - # see https://docs.soliditylang.org/en/develop/abi-spec.html#contract-abi-specification |
92 |
| - # transferred from here: |
93 |
| - # https://github.com/lidofinance/lido-dao/blob/master/bytecode-verificator/bytecode_verificator.sh#L369-L405 |
| 213 | + """ |
| 214 | + Encodes each constructor argument in order, concatenating the result. |
| 215 | + Appends any 'compl_data' (dynamic offsets, etc.) at the end. |
| 216 | + """ |
94 | 217 | arg_length = len(constructor_abi)
|
95 | 218 |
|
96 | 219 | constructor_calldata = ""
|
97 | 220 | compl_data = []
|
| 221 | + |
98 | 222 | try:
|
99 | 223 | for argument_index in range(arg_length):
|
100 | 224 | arg_type = constructor_abi[argument_index]["type"]
|
101 | 225 | arg_value = constructor_config_args[argument_index]
|
102 | 226 |
|
103 | 227 | if arg_type == "address":
|
104 | 228 | constructor_calldata += encode_address(arg_value)
|
| 229 | + |
105 | 230 | elif arg_type == "bool":
|
106 | 231 | constructor_calldata += to_hex_with_alignment(int(bool(arg_value)))
|
107 |
| - # Handle any integral type: uint, uint8..uint256, int, int8..int256 |
| 232 | + |
108 | 233 | elif re.match(r"^(u?int)(\d*)$", arg_type):
|
109 |
| - constructor_calldata += to_hex_with_alignment(arg_value) |
110 |
| - # Handle fixed-length bytes (e.g. bytes1..bytes32) |
| 234 | + # parse bits + sign |
| 235 | + bits, is_signed = _parse_solidity_int_type(arg_type) |
| 236 | + constructor_calldata += encode_int(int(arg_value), bits, is_signed) |
| 237 | + |
111 | 238 | elif re.match(r"^bytes(\d+)$", arg_type):
|
| 239 | + # fixed-length bytes |
112 | 240 | match_len = re.match(r"^bytes(\d+)$", arg_type)
|
113 | 241 | num_bytes = int(match_len.group(1))
|
114 | 242 | constructor_calldata += encode_fixed_bytes(arg_value, num_bytes)
|
| 243 | + |
115 | 244 | elif arg_type == "bytes" or arg_type.endswith("[]"):
|
116 |
| - offset_to_start_of_data_part, encoded_value = encode_dynamic_type( |
117 |
| - arg_value, argument_index |
118 |
| - ) |
119 |
| - constructor_calldata += offset_to_start_of_data_part |
| 245 | + # top-level dynamic array or raw bytes |
| 246 | + offset, encoded_value = encode_dynamic_type(arg_value, argument_index) |
| 247 | + constructor_calldata += offset |
120 | 248 | compl_data.append(encoded_value)
|
| 249 | + |
121 | 250 | elif arg_type == "string":
|
122 |
| - offset_to_start_of_data_part, encoded_value_length, encoded_value = ( |
123 |
| - encode_string(arg_length, compl_data, arg_value) |
| 251 | + offset, length_hex, contents_hex = encode_string( |
| 252 | + arg_length, compl_data, arg_value |
124 | 253 | )
|
125 |
| - constructor_calldata += offset_to_start_of_data_part |
126 |
| - compl_data.append(encoded_value_length) |
127 |
| - compl_data.append(encoded_value) |
| 254 | + constructor_calldata += offset |
| 255 | + compl_data.append(length_hex) |
| 256 | + compl_data.append(contents_hex) |
| 257 | + |
128 | 258 | elif arg_type == "tuple":
|
129 |
| - args_tuple_types = [ |
130 |
| - component["type"] |
131 |
| - for component in constructor_abi[argument_index]["components"] |
132 |
| - ] |
133 |
| - if all(arg == "address[]" for arg in args_tuple_types): |
134 |
| - argument_index = len(constructor_calldata) // 64 |
135 |
| - offset_to_start_of_data_part = to_hex_with_alignment( |
136 |
| - (argument_index + 1) * 32 |
137 |
| - ) |
138 |
| - constructor_calldata += offset_to_start_of_data_part |
139 |
| - compl_data.append(encode_tuple(args_tuple_types, arg_value)) |
140 |
| - else: |
141 |
| - constructor_calldata += encode_tuple(args_tuple_types, arg_value) |
| 259 | + tuple_abi = constructor_abi[argument_index]["components"] |
| 260 | + constructor_calldata += encode_tuple(tuple_abi, arg_value) |
| 261 | + |
142 | 262 | else:
|
143 |
| - raise EncoderError(f"Unknown constructor argument type: {arg_type}") |
| 263 | + raise EncoderError( |
| 264 | + f"Unknown or unhandled constructor argument type: {arg_type}" |
| 265 | + ) |
| 266 | + |
144 | 267 | except Exception as e:
|
145 |
| - raise EncoderError(e) from None |
146 |
| - for offset_to_start_of_data_part in compl_data: |
147 |
| - constructor_calldata += offset_to_start_of_data_part |
| 268 | + raise EncoderError(f"Failed to encode calldata arguments: {e}") from None |
| 269 | + |
| 270 | + # Append any "completion" data (the actual dynamic data or string contents) |
| 271 | + for data_part in compl_data: |
| 272 | + constructor_calldata += data_part |
148 | 273 |
|
149 | 274 | return constructor_calldata
|
0 commit comments