|
221 | 221 | entry3: (movq (imm64 :iarg2) %r8) |
222 | 222 | entry2: (movq (imm64 :iarg1) %rdx) |
223 | 223 | entry1: (movq (imm64 :iarg0) %rcx) |
224 | | - entry0: (addq -40 %rsp) ; %rcx-%r9 save area + 8byte align |
| 224 | + entry0: (push %rbx) ; save rbx; aligns rsp to 16 |
| 225 | + (addq -32 %rsp) ; shadow space (shared with helper calls) |
225 | 226 | (call (func:)) |
226 | | - (addq 40 %rsp) |
| 227 | + ;; %rax or %xmm0 may have the return value at this point. |
| 228 | + ;; retkind encoding matches amd64-call-reg. |
| 229 | + (movb (imm8 :retkind) %bl) |
| 230 | + (decb %bl) |
| 231 | + (jsl epilog:) ; retkind=0: return %rax as-is |
| 232 | + (jz fixnum:) ; retkind=1 |
| 233 | + (decb %bl) |
| 234 | + (jz integer:) ; retkind=2 |
| 235 | + (decb %bl) |
| 236 | + (jz double:) ; retkind=3 |
| 237 | + (decb %bl) |
| 238 | + (jz float:) ; retkind=4 |
| 239 | + (decb %bl) |
| 240 | + (jz void:) ; retkind=5 |
| 241 | + (decb %bl) |
| 242 | + (jz cstring:) ; retkind=6 |
| 243 | + pointer: (movq %rax %rcx) ; rcx=ptr |
| 244 | + (movq (imm64 :rettype) %rdx) ; rdx=type |
| 245 | + (call (fn-handle:)) |
| 246 | + (jmp epilog:) |
| 247 | + fixnum: (shl 2 %rax) |
| 248 | + (incq %rax) |
| 249 | + (jmp epilog:) |
| 250 | + integer: (movq %rax %rcx) ; rcx=intptr_t |
| 251 | + (call (fn-int:)) |
| 252 | + (jmp epilog:) |
| 253 | + float: (cvtss2sd %xmm0 %xmm0) |
| 254 | + double: (call (fn-flonum:)) |
| 255 | + (jmp epilog:) |
| 256 | + void: (movq (imm64 :SCM_UNDEFINED) %rax) |
| 257 | + (jmp epilog:) |
| 258 | + cstring: (movq %rax %rcx) ; rcx=cstr |
| 259 | + (movq -1 %rdx) ; rdx=size |
| 260 | + (movq -1 %r8) ; r8=len |
| 261 | + (movq (imm32 :SCM_STRING_COPYING) %r9) ; r9=flags |
| 262 | + (call (fn-string:)) |
| 263 | + epilog: (addq 32 %rsp) |
| 264 | + (pop %rbx) |
227 | 265 | (ret) |
228 | 266 | (.endsection text) |
229 | 267 |
|
230 | 268 | (.section data) |
231 | | - func: (.dataq :func) |
| 269 | + func: (.dataq :func) |
| 270 | + fn-flonum: (.dataq :Scm_MakeFlonum) |
| 271 | + fn-string: (.dataq :Scm_MakeString) |
| 272 | + fn-handle: (.dataq :Scm_MakeNativeHandleSimple) |
| 273 | + fn-int: (.dataq :Scm_IntptrToInteger) |
232 | 274 | farg0: (.dataq :farg0) |
233 | 275 | farg1: (.dataq :farg1) |
234 | 276 | farg2: (.dataq :farg2) |
|
253 | 295 | entry3: (movq (imm64 :iarg2) %r8) |
254 | 296 | entry2: (movq (imm64 :iarg1) %rdx) |
255 | 297 | entry1: (movq (imm64 :iarg0) %rcx) |
256 | | - entry0: (addq -32 %rsp) ; %rcx-%r9 save area |
| 298 | + entry0: (push %rbx) ; save rbx |
| 299 | + (addq -32 %rsp) ; shadow space (shared with helper calls) |
257 | 300 | (call (func:)) |
258 | | - (addq 32 %rsp) |
| 301 | + ;; boxing dispatch (Windows x64 calling convention for helpers) |
| 302 | + (movb (imm8 :retkind) %bl) |
| 303 | + (decb %bl) |
| 304 | + (jsl epilog:) |
| 305 | + (jz fixnum:) |
| 306 | + (decb %bl) |
| 307 | + (jz integer:) |
| 308 | + (decb %bl) |
| 309 | + (jz double:) |
| 310 | + (decb %bl) |
| 311 | + (jz float:) |
| 312 | + (decb %bl) |
| 313 | + (jz void:) |
| 314 | + (decb %bl) |
| 315 | + (jz cstring:) |
| 316 | + pointer: (movq %rax %rcx) |
| 317 | + (movq (imm64 :rettype) %rdx) |
| 318 | + (call (fn-handle:)) |
| 319 | + (jmp epilog:) |
| 320 | + fixnum: (shl 2 %rax) |
| 321 | + (incq %rax) |
| 322 | + (jmp epilog:) |
| 323 | + integer: (movq %rax %rcx) |
| 324 | + (call (fn-int:)) |
| 325 | + (jmp epilog:) |
| 326 | + float: (cvtss2sd %xmm0 %xmm0) |
| 327 | + double: (call (fn-flonum:)) |
| 328 | + (jmp epilog:) |
| 329 | + void: (movq (imm64 :SCM_UNDEFINED) %rax) |
| 330 | + (jmp epilog:) |
| 331 | + cstring: (movq %rax %rcx) |
| 332 | + (movq -1 %rdx) |
| 333 | + (movq -1 %r8) |
| 334 | + (movq (imm32 :SCM_STRING_COPYING) %r9) |
| 335 | + (call (fn-string:)) |
| 336 | + epilog: (addq 32 %rsp) |
| 337 | + (pop %rbx) |
259 | 338 | epilogue:(addq (imm32 :epilogue-spill-size) %rsp) |
260 | 339 | (ret) |
261 | 340 | (.endsection text) |
262 | 341 |
|
263 | 342 | (.section data) |
264 | | - func: (.dataq :func) |
| 343 | + func: (.dataq :func) |
| 344 | + fn-flonum: (.dataq :Scm_MakeFlonum) |
| 345 | + fn-string: (.dataq :Scm_MakeString) |
| 346 | + fn-handle: (.dataq :Scm_MakeNativeHandleSimple) |
| 347 | + fn-int: (.dataq :Scm_IntptrToInteger) |
265 | 348 | farg0: (.dataq :farg0) |
266 | 349 | farg1: (.dataq :farg1) |
267 | 350 | farg2: (.dataq :farg2) |
|
541 | 624 |
|
542 | 625 | (Ps |
543 | 626 | `(define call-winx64-regs |
544 | | - (let ([% (%%make-bootstrap-function-table '(%%call-native))] |
545 | | - [link-tmpl #f] [lbl-off #f]) |
| 627 | + (let ([% (%%make-bootstrap-function-table '(%%call-native |
| 628 | + %%get-entry-address))] |
| 629 | + [link-tmpl #f] [lbl-off #f] [prelinked-tmpl #f]) |
546 | 630 | (define (init!) |
547 | 631 | (set! link-tmpl (module-binding-ref 'lang.asm.linker 'link-templates)) |
548 | | - (set! lbl-off (module-binding-ref 'lang.asm.linker 'linked-label-offset))) |
| 632 | + (set! lbl-off (module-binding-ref 'lang.asm.linker 'linked-label-offset)) |
| 633 | + (let ([prelink (module-binding-ref 'lang.asm.linker 'prelink-template)] |
| 634 | + [gea (% '%%get-entry-address)]) |
| 635 | + (set! prelinked-tmpl |
| 636 | + (prelink (winx64-call-reg-tmpl) |
| 637 | + `((:Scm_MakeFlonum ,<intptr_t> |
| 638 | + ,(gea "_Scm_MakeFlonum")) |
| 639 | + (:Scm_MakeString ,<intptr_t> |
| 640 | + ,(gea "_Scm_MakeString")) |
| 641 | + (:Scm_MakeNativeHandleSimple ,<intptr_t> |
| 642 | + ,(gea "_Scm_MakeNativeHandleSimple")) |
| 643 | + (:Scm_IntptrToInteger ,<intptr_t> |
| 644 | + ,(gea "_Scm_IntptrToInteger")) |
| 645 | + (:SCM_STRING_COPYING ,<int32> |
| 646 | + ,SCM_STRING_COPYING) |
| 647 | + (:SCM_UNDEFINED ,<top> |
| 648 | + ,(undefined))))))) |
549 | 649 | (^[ptr args num-args num-fargs rettype] |
550 | 650 | (when (not link-tmpl) (init!)) |
551 | 651 | (let* (;; for effective-nargs calculation, we need to consider |
|
558 | 658 | [entry-label (~ '#(entry0: entry1: entry2: entry3: entry4: |
559 | 659 | entry4f0: entry4f1: entry4f2: entry4f3:) |
560 | 660 | effective-nargs)] |
| 661 | + [retkind (%asm-retkind rettype)] |
561 | 662 | [params |
562 | 663 | (let loop ([args args] [count 0] [r '()]) |
563 | 664 | (cond [(null? args) r] |
|
584 | 685 | `(,ikey ,@(car args)) r))))] |
585 | 686 | [else (error "bad arg entry:" (car args))]))]) |
586 | 687 | (receive [bytes lbs] |
587 | | - (link-tmpl (list (winx64-call-reg-tmpl)) |
| 688 | + (link-tmpl (list prelinked-tmpl) |
588 | 689 | (list* `(:func ,<void*> ,ptr) |
| 690 | + `(:retkind ,<integer> ,retkind) |
| 691 | + `(:rettype ,<top> ,rettype) |
589 | 692 | params)) |
590 | | - ;; win-frame-size=40: shadow space (32) + 8-byte alignment |
591 | | - ;; Prolog ends after the 4-byte "addq -40 %rsp" at entry0: |
| 693 | + ;; win-frame-size=40: push %rbx (8) + shadow space (32) |
| 694 | + ;; Prolog ends after "push %rbx" (1 byte) + "addq -32 %rsp" (4 bytes) |
592 | 695 | ((% '%%call-native) 0 0 bytes 0 |
593 | 696 | (lbl-off lbs 'end:) |
594 | 697 | (lbl-off lbs entry-label) |
595 | 698 | rettype |
596 | | - (+ (lbl-off lbs 'entry0:) 4) |
597 | | - 40 0))))))) |
| 699 | + (+ (lbl-off lbs 'entry0:) 5) |
| 700 | + 40 1))))))) |
598 | 701 |
|
599 | 702 | (Ps |
600 | 703 | `(define call-winx64-spill |
601 | | - (let ([% (%%make-bootstrap-function-table '(%%call-native))] |
602 | | - [link-tmpl #f] [lbl-off #f]) |
| 704 | + (let ([% (%%make-bootstrap-function-table '(%%call-native |
| 705 | + %%get-entry-address))] |
| 706 | + [link-tmpl #f] [lbl-off #f] [prelinked-tmpl #f]) |
603 | 707 | (define (init!) |
604 | 708 | (set! link-tmpl (module-binding-ref 'lang.asm.linker 'link-templates)) |
605 | | - (set! lbl-off (module-binding-ref 'lang.asm.linker 'linked-label-offset))) |
| 709 | + (set! lbl-off (module-binding-ref 'lang.asm.linker 'linked-label-offset)) |
| 710 | + (let ([prelink (module-binding-ref 'lang.asm.linker 'prelink-template)] |
| 711 | + [gea (% '%%get-entry-address)]) |
| 712 | + (set! prelinked-tmpl |
| 713 | + (prelink (winx64-call-spill-tmpl) |
| 714 | + `((:Scm_MakeFlonum ,<intptr_t> |
| 715 | + ,(gea "_Scm_MakeFlonum")) |
| 716 | + (:Scm_MakeString ,<intptr_t> |
| 717 | + ,(gea "_Scm_MakeString")) |
| 718 | + (:Scm_MakeNativeHandleSimple ,<intptr_t> |
| 719 | + ,(gea "_Scm_MakeNativeHandleSimple")) |
| 720 | + (:Scm_IntptrToInteger ,<intptr_t> |
| 721 | + ,(gea "_Scm_IntptrToInteger")) |
| 722 | + (:SCM_STRING_COPYING ,<int32> |
| 723 | + ,SCM_STRING_COPYING) |
| 724 | + (:SCM_UNDEFINED ,<top> |
| 725 | + ,(undefined))))))) |
606 | 726 | (^[ptr args num-args num-fargs num-spills rettype] |
607 | 727 | (when (not link-tmpl) (init!)) |
608 | 728 | (let loop ([args args] [count 0] [scount 0] [named '()] [spill-params '()]) |
609 | 729 | (if (null? args) |
610 | | - (let* ([align-pad (if (even? num-spills) 8 0)] |
611 | | - [spill-area-bytes (* 8 num-spills)]) |
| 730 | + ;; With push %rbx at entry0, alignment parity is flipped. |
| 731 | + ;; We need rsp%16==0 before "call func:", which requires |
| 732 | + ;; align-pad=(if (even? num-spills) 0 8). |
| 733 | + (let* ([align-pad (if (even? num-spills) 0 8)] |
| 734 | + [spill-area-bytes (* 8 num-spills)] |
| 735 | + [retkind (%asm-retkind rettype)]) |
612 | 736 | (receive [bytes lbs] |
613 | | - (link-tmpl (list (winx64-call-spill-tmpl)) |
| 737 | + (link-tmpl (list prelinked-tmpl) |
614 | 738 | `((:func ,<void*> ,ptr) |
615 | 739 | (:init-spill-size |
616 | 740 | ,<int32> |
|
619 | 743 | ,<int32> |
620 | 744 | ,(+ spill-area-bytes align-pad)) |
621 | 745 | (:align-pad ,<int8> ,align-pad) |
| 746 | + (:retkind ,<integer> ,retkind) |
| 747 | + (:rettype ,<top> ,rettype) |
622 | 748 | ,@named |
623 | 749 | ,@spill-params) |
624 | 750 | :postamble spill-area-bytes) |
625 | | - ;; win-frame-size = shadow space (32) + spill args (8*N) |
626 | | - ;; Prolog ends after the 4-byte "addq -32 %rsp" at entry0: |
| 751 | + ;; win-frame-size = push %rbx (8) + shadow (32) + spill+pad |
| 752 | + ;; Prolog ends after "push %rbx" (1) + "addq -32 %rsp" (4) |
627 | 753 | ((% '%%call-native) 0 ;tstart |
628 | 754 | 0 ;tend (no zero fill needed) |
629 | 755 | bytes ;code |
|
632 | 758 | spill-area-bytes) ;end |
633 | 759 | (lbl-off lbs 'entry:) ;entry |
634 | 760 | rettype |
635 | | - (+ (lbl-off lbs 'entry0:) 4) |
636 | | - (+ spill-area-bytes align-pad 32) |
637 | | - 0))) |
| 761 | + (+ (lbl-off lbs 'entry0:) 5) |
| 762 | + (+ spill-area-bytes align-pad 40) |
| 763 | + 1))) |
638 | 764 | (cond [(%iarg-type? (caar args)) |
639 | 765 | (if (< count 4) |
640 | 766 | (loop (cdr args) (+ count 1) scount |
|
0 commit comments