|
205 | 205 | CACHEFLUSH((char *)fn, CODESIZE); |
206 | 206 | #define REPLACE_NEAR(t, fn, fn_stub) REPLACE_FAR(t, fn, fn_stub) |
207 | 207 | #elif defined(__powerpc64__) && __LITTLE_ENDIAN__ |
208 | | - // For ppc64le (ELFv2 ABI with function descriptors) |
| 208 | + // For ppc64le (Little Endian, ELFv2 ABI) |
| 209 | + // https://maskray.me/blog/2023-02-26-linker-notes-on-power-isa |
209 | 210 | // https://llvm.org/devmtg/2014-04/PDFs/Talks/Euro-LLVM-2014-Weigand.pdf |
210 | | - #define CODESIZE 36U |
211 | | - #define CODESIZE_MIN 36U |
| 211 | + // PowerPC64 LE (ELFv2 ABI) - Direct jump, no TOC descriptor needed |
| 212 | + #define CODESIZE 28U |
| 213 | + #define CODESIZE_MIN 28U |
212 | 214 | #define CODESIZE_MAX CODESIZE |
213 | | - /* |
214 | | - * This hook loads the address of the stub function's descriptor, |
215 | | - * then loads the code entry point and TOC address from the descriptor, |
216 | | - * and finally performs an indirect branch. |
217 | | - * |
218 | | - * Assembly equivalence: |
219 | | - * // Step 1: Load the 64-bit address of the function descriptor for `fn_stub` into r11 |
220 | | - * lis r11, fn_stub_desc@highest |
221 | | - * ori r11, r11, fn_stub_desc@higher |
222 | | - * rldicr r11, r11, 32, 31 |
223 | | - * ori r11, r11, fn_stub_desc@h |
224 | | - * ori r11, r11, fn_stub_desc@l |
225 | | - * |
226 | | - * // Step 2: Load values from the function descriptor |
227 | | - * ld r12, 0(r11) // Load code address into r12 from descriptor |
228 | | - * ld r2, 8(r11) // Load TOC address into r2 from descriptor |
229 | | - * |
230 | | - * // Step 3: Branch |
231 | | - * mtctr r12 // Move code address to Count Register |
232 | | - * bctr // Branch to the address in CTR |
233 | | - */ |
234 | | - #define REPLACE_FAR(t, fn, fn_stub)\ |
235 | | - do {\ |
236 | | - uint64_t desc_addr = (uint64_t)fn_stub;\ |
237 | | - uint32_t* p = (uint32_t*)fn;\ |
238 | | - /* Load 64-bit descriptor address into r11 */ \ |
239 | | - p[0] = 0x3d600000 | (uint32_t)(desc_addr >> 48);\ |
240 | | - p[1] = 0x616b0000 | (uint32_t)((desc_addr >> 32) & 0xFFFF);\ |
241 | | - /* rldicr r11, r11, 32, 31 (Rotate Left Doubleword Immediate then Clear Right) */ \ |
242 | | - p[2] = 0x796b07c6; \ |
243 | | - p[3] = 0x616b0000 | (uint32_t)((desc_addr >> 16) & 0xFFFF);\ |
244 | | - p[4] = 0x616b0000 | (uint32_t)(desc_addr & 0xFFFF);\ |
245 | | - /* ld r12, 0(r11) */ \ |
246 | | - p[5] = 0xe98b0000;\ |
247 | | - /* ld r2, 8(r11) */ \ |
248 | | - p[6] = 0xe84b0008;\ |
249 | | - /* mtctr r12 */ \ |
250 | | - p[7] = 0x7d8903a6;\ |
251 | | - /* bctr */ \ |
252 | | - p[8] = 0x4e800420;\ |
253 | | - CACHEFLUSH((char *)fn, CODESIZE);\ |
254 | | - } while(0) |
| 215 | + #define REPLACE_FAR(t, fn, fn_stub) \ |
| 216 | + do { \ |
| 217 | + uint64_t addr = (uint64_t)(fn_stub); \ |
| 218 | + uint32_t* p = (uint32_t*)(fn); \ |
| 219 | + p[0] = 0x3d800000 | ((addr >> 48) & 0xFFFF); /* lis r12, hi16 */ \ |
| 220 | + p[1] = 0x618c0000 | ((addr >> 32) & 0xFFFF); /* ori r12, mid16 */ \ |
| 221 | + p[2] = 0x798c07c6; /* rldicr r12, r12, 32, 31 */ \ |
| 222 | + p[3] = 0x658c0000 | ((addr >> 16) & 0xFFFF); /* oris r12, midlow16 */ \ |
| 223 | + p[4] = 0x618c0000 | (addr & 0xFFFF); /* ori r12, low16 */ \ |
| 224 | + p[5] = 0x7d8903a6; /* mtctr r12 */ \ |
| 225 | + p[6] = 0x4e800420; /* bctr */ \ |
| 226 | + CACHEFLUSH((char *)fn, CODESIZE); \ |
| 227 | + } while (0) |
255 | 228 | #define REPLACE_NEAR(t, fn, fn_stub) REPLACE_FAR(t, fn, fn_stub) |
256 | 229 | #elif defined(__powerpc64__) |
257 | | - // For ppc64be (Big Endian, ELFv1 ABI) |
258 | | - #define CODESIZE 28U |
259 | | - #define CODESIZE_MIN 28U |
| 230 | + // PowerPC64 BE (ELFv1 ABI) - Use function descriptor to load code + TOC |
| 231 | + #define CODESIZE 36U |
| 232 | + #define CODESIZE_MIN 36U |
260 | 233 | #define CODESIZE_MAX CODESIZE |
261 | | - // lis r12, fn_stub@highest |
262 | | - // ori r12, r12, fn_stub@higher |
263 | | - // rldicr r12, r12, 32, 31 |
264 | | - // ori r12, r12, fn_stub@high |
265 | | - // ori r12, r12, fn_stub@l |
266 | | - // mtctr r12 |
267 | | - // bctr |
268 | | - #define REPLACE_FAR(t, fn, fn_stub)\ |
269 | | - ((uint32_t*)fn)[0] = 0x3c000000 | (((uintptr_t)fn_stub >> 48) & 0xffff);\ |
270 | | - ((uint32_t*)fn)[1] = 0x60000000 | (((uintptr_t)fn_stub >> 32) & 0xffff);\ |
271 | | - ((uint32_t*)fn)[2] = 0x78000000 | ((((uintptr_t)fn_stub >> 32) & 0xffff) << 16);\ |
272 | | - ((uint32_t*)fn)[3] = 0x60000000 | (((uintptr_t)fn_stub >> 16) & 0xffff);\ |
273 | | - ((uint32_t*)fn)[4] = 0x60000000 | ((uintptr_t)fn_stub & 0xffff);\ |
274 | | - ((uint32_t*)fn)[5] = 0x7d8903a6;\ |
275 | | - ((uint32_t*)fn)[6] = 0x4e800420;\ |
276 | | - CACHEFLUSH((char *)fn, CODESIZE); |
| 234 | + #define REPLACE_FAR(t, fn, fn_stub) \ |
| 235 | + do { \ |
| 236 | + uint64_t desc_addr = (uint64_t)(fn_stub); \ |
| 237 | + uint32_t* p = (uint32_t*)(fn); \ |
| 238 | + p[0] = 0x3d600000 | ((desc_addr >> 48) & 0xFFFF); /* lis r11, hi16 */ \ |
| 239 | + p[1] = 0x616b0000 | ((desc_addr >> 32) & 0xFFFF); /* ori r11, mid16 */ \ |
| 240 | + p[2] = 0x796b07c6; /* rldicr r11, r11, 32, 31 */ \ |
| 241 | + p[3] = 0x616b0000 | ((desc_addr >> 16) & 0xFFFF); /* ori r11, midlow16 */ \ |
| 242 | + p[4] = 0x616b0000 | (desc_addr & 0xFFFF); /* ori r11, low16 */ \ |
| 243 | + p[5] = 0xe98b0000; /* ld r12, 0(r11) - code */ \ |
| 244 | + p[6] = 0xe84b0008; /* ld r2, 8(r11) - toc */ \ |
| 245 | + p[7] = 0x7d8903a6; /* mtctr r12 */ \ |
| 246 | + p[8] = 0x4e800420; /* bctr */ \ |
| 247 | + CACHEFLUSH((char *)fn, CODESIZE); \ |
| 248 | + } while (0) |
277 | 249 | #define REPLACE_NEAR(t, fn, fn_stub) REPLACE_FAR(t, fn, fn_stub) |
278 | 250 | #elif defined(__alpha__) |
279 | 251 | #define CODESIZE 16U |
|
0 commit comments