From 9588640b83ec7895246c5123919adcab4f9096be Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Mon, 25 Aug 2025 11:59:15 +0200 Subject: [PATCH 1/9] 8366041: Shenandoah: Late Barrier Expansion --- src/hotspot/cpu/aarch64/aarch64.ad | 6 - .../shenandoahBarrierSetAssembler_aarch64.cpp | 128 +++++++++ .../shenandoahBarrierSetAssembler_aarch64.hpp | 8 + .../gc/shenandoah/shenandoah_aarch64.ad | 200 +++++++------- src/hotspot/share/adlc/formssel.cpp | 7 - .../share/gc/shared/c2/barrierSetC2.hpp | 4 - .../shenandoah/c2/shenandoahBarrierSetC2.cpp | 252 ++++++++---------- .../shenandoah/c2/shenandoahBarrierSetC2.hpp | 77 +++++- .../gc/shenandoah/c2/shenandoahSupport.cpp | 4 - .../gc/shenandoah/c2/shenandoahSupport.hpp | 90 ------- src/hotspot/share/opto/classes.hpp | 6 - src/hotspot/share/opto/escape.cpp | 6 +- src/hotspot/share/opto/loopTransform.cpp | 8 +- src/hotspot/share/opto/matcher.cpp | 9 +- 14 files changed, 419 insertions(+), 386 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 9697ac31350ec..7a3fc930fc504 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1512,8 +1512,6 @@ source %{ case Op_CompareAndSwapL: case Op_CompareAndSwapP: case Op_CompareAndSwapN: - case Op_ShenandoahCompareAndSwapP: - case Op_ShenandoahCompareAndSwapN: case Op_CompareAndSwapB: case Op_CompareAndSwapS: case Op_GetAndSetI: @@ -1535,10 +1533,6 @@ source %{ case Op_WeakCompareAndSwapL: case Op_WeakCompareAndSwapP: case Op_WeakCompareAndSwapN: - case Op_ShenandoahWeakCompareAndSwapP: - case Op_ShenandoahWeakCompareAndSwapN: - case Op_ShenandoahCompareAndExchangeP: - case Op_ShenandoahCompareAndExchangeN: return maybe_volatile; default: return false; diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index 200c42171348a..f58536d747121 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -41,6 +41,9 @@ #include "c1/c1_MacroAssembler.hpp" #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" #endif +#ifdef COMPILER2 +#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" +#endif #define __ masm-> @@ -606,6 +609,131 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, } } +#ifdef COMPILER2 +void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, + MacroAssembler* masm, + Register addr, + Register expected, + Register new_val, + Register result, + bool acquire, bool release, bool weak, + bool is_cae, bool narrow) { + Register tmp = rscratch2; + Assembler::operand_size size = narrow ? Assembler::word : Assembler::xword; + + assert_different_registers(addr, expected, result, tmp); + assert_different_registers(addr, new_val, result, tmp); + + ShenandoahCASBarrierSlowStub* const slow_stub = ShenandoahCASBarrierSlowStub::create(node, addr, expected, new_val, result, tmp, is_cae, narrow, acquire, release, weak); + ShenandoahCASBarrierMidStub* const mid_stub = ShenandoahCASBarrierMidStub::create(node, slow_stub, result, tmp, is_cae); + + // Step 1. Fast-path. + // + // Try to CAS with given arguments. If successful, then we are done. + __ cmpxchg(addr, expected, new_val, size, acquire, release, weak, result); + // EQ flag set iff success. result holds value fetched. + + __ br(Assembler::NE, *mid_stub->entry()); + + // Slow-stub re-enters with condition flags according to CAS, we may need to + // set result accordingly. + __ bind(*slow_stub->continuation()); + if (!is_cae) { + __ cset(result, Assembler::EQ); + } + + // Mid-stub re-enters with result set correctly. + __ bind(*mid_stub->continuation()); +} + +#undef __ +#define __ masm. + +void ShenandoahCASBarrierMidStub::emit_code(MacroAssembler& masm) { + __ bind(*entry()); + + Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ ldrb(_tmp, gc_state); + __ tstw(_tmp, ShenandoahHeap::HAS_FORWARDED); + __ br(Assembler::NE, *_slow_stub->entry()); + if (!_cae) { + __ mov(_result, 0); // result = false + } + __ b(*continuation()); +} + +void ShenandoahCASBarrierSlowStub::emit_code(MacroAssembler& masm) { + __ bind(*entry()); + Assembler::operand_size size = _narrow ? Assembler::word : Assembler::xword; + + // Step 2. CAS has failed because the value held at addr does not + // match expected. This may be a false negative because the value fetched + // from addr (now held in result) may be a from-space pointer to the + // original copy of same object referenced by to-space pointer expected. + // + // To resolve this, it suffices to find the forward pointer associated + // with fetched value. If this matches expected, retry CAS with new + // parameters. If this mismatches, then we have a legitimate + // failure, and we're done. + + // overwrite tmp with from-space pointer fetched from memory + __ mov(_tmp, _result); + + if (_narrow) { + // Decode tmp in order to resolve its forward pointer + __ decode_heap_oop(_tmp, _tmp); + } + + ShenandoahBarrierSet::assembler()->resolve_forward_pointer(&masm, _tmp); + + if (_narrow) { + // Encode tmp to compare against expected. + __ encode_heap_oop(_tmp, _tmp); + } + + // Does forwarded value of fetched from-space pointer match original + // value of expected? If result holds null, this comparison will fail + // because we know from step1 that expected is not null. There is + // no need for a separate test for result (the value originally held + // in memory) equal to null. + __ cmp(_tmp, _expected); + + // If not, then the failure was legitimate and we're done. + // Branching to continuation with NE condition denotes failure. + __ br(Assembler::NE, *continuation()); + + // Fall through to step 3. + + // Step 3. We've confirmed that the value originally held in memory + // (now held in result) pointed to from-space version of original + // expected value. Try the CAS again with the from-space expected + // value. If it now succeeds, we're good. + // + // Note: result holds encoded from-space pointer that matches to-space + // object residing at expected. result is the new "expected". + + // Note that macro implementation of __cmpxchg cannot use same register + // tmp2 for result and expected since it overwrites result before it + // compares result with expected. + __ mov(_tmp, _result); + __ cmpxchg(_addr, _tmp, _new_val, size, _acquire, _release, _weak, _result); + // EQ flag set iff success. result holds value fetched, rscratch1 clobbered. + + // If fetched value did not equal the new expected, this could + // still be a false negative because some other thread may have + // newly overwritten the memory value with its to-space equivalent. + __ br(Assembler::EQ, *continuation()); + + // Step 4. + __ mov(_tmp, _result); + __ cmpxchg(_addr, _tmp, _new_val, size, _acquire, _release, _weak, _result); + + __ b(*continuation()); +} +#undef __ +#define __ masm-> +#endif // COMPILER2 + void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register start, Register count, Register scratch, RegSet saved_regs) { assert(ShenandoahCardBarrier, "Should have been checked by caller"); diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp index a12d4e2beec40..31d18ced6fe91 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp @@ -35,9 +35,13 @@ class ShenandoahPreBarrierStub; class ShenandoahLoadReferenceBarrierStub; class StubAssembler; #endif +#ifdef COMPILER2 +class MachNode; +#endif // COMPILER2 class StubCodeGenerator; class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { + friend class ShenandoahCASBarrierSlowStub; private: void satb_write_barrier_pre(MacroAssembler* masm, @@ -88,6 +92,10 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { Register obj, Register tmp, Label& slowpath); void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, bool acquire, bool release, bool is_cae, Register result); +#ifdef COMPILER2 + void cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register expected, Register new_val, Register result, + bool acquire, bool release, bool weak, bool is_cae, bool narrow); +#endif }; #endif // CPU_AARCH64_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad index d5dcf7f953448..d930277830e32 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad @@ -27,233 +27,223 @@ source_hpp %{ #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" %} -encode %{ - enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{ - guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register); - %} - - enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{ - guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register); - %} -%} +instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ - - match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ - "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" + "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" %} - ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp, res)); + ins_encode %{ + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, /*narrow*/ false); + %} ins_pipe(pipe_slow); %} -instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ - match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ - "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" + "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" %} ins_encode %{ - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, /*narrow*/ true); %} ins_pipe(pipe_slow); %} -instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); predicate(needs_acquiring_load_exclusive(n)); - match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ - "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" + "cmpxchg_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" %} - ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp, res)); + ins_encode %{ + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, /*narrow*/ false); + %} ins_pipe(pipe_slow); %} -instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapN mem (Binary oldval newval))); predicate(needs_acquiring_load_exclusive(n)); - match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ - "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" + "cmpxchgw_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" %} ins_encode %{ - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, /*narrow*/ true); %} ins_pipe(pipe_slow); %} -instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ - match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); +instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP_DEF res, TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" %} ins_encode %{ - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ false, /*release*/ true, /*is_cae*/ true, $res$$Register); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, /*narrow*/ true); %} ins_pipe(pipe_slow); %} -instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ - match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); +instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP_DEF res, TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ - "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" + "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" %} ins_encode %{ - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ false, /*release*/ true, /*is_cae*/ true, $res$$Register); - %} + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, /*narrow*/ false); + %} ins_pipe(pipe_slow); %} -instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); predicate(needs_acquiring_load_exclusive(n)); - match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(VOLATILE_REF_COST); - effect(TEMP_DEF res, TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" %} ins_encode %{ - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ true, /*release*/ true, /*is_cae*/ true, $res$$Register); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true, /*narrow*/ true); %} ins_pipe(pipe_slow); %} -instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); predicate(needs_acquiring_load_exclusive(n)); - match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(VOLATILE_REF_COST); - effect(TEMP_DEF res, TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ - "cmpxchg_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" + "cmpxchg_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" %} ins_encode %{ - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ true, /*release*/ true, /*is_cae*/ true, $res$$Register); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true, /*narrow*/ false); %} ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ - match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); +instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)" %} ins_encode %{ - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, /*narrow*/ true); %} ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ - match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); +instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval" %} ins_encode %{ - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, /*narrow*/ false); %} ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); predicate(needs_acquiring_load_exclusive(n)); - match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)" %} ins_encode %{ - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false, /*narrow*/ true); %} ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); predicate(needs_acquiring_load_exclusive(n)); - match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP_DEF res, KILL cr); format %{ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval" "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)" %} ins_encode %{ - Register tmp = $tmp$$Register; - __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, + /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false, /*narrow*/ false); %} ins_pipe(pipe_slow); %} diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp index b938d5b75608d..b5ba395e0f5b4 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -790,10 +790,6 @@ bool InstructForm::captures_bottom_type(FormDict &globals) const { !strcmp(_matrule->_rChild->_opType,"GetAndSetN") || !strcmp(_matrule->_rChild->_opType,"RotateLeft") || !strcmp(_matrule->_rChild->_opType,"RotateRight") || -#if INCLUDE_SHENANDOAHGC - !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeP") || - !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") || -#endif !strcmp(_matrule->_rChild->_opType,"StrInflatedCopy") || !strcmp(_matrule->_rChild->_opType,"VectorCmpMasked")|| !strcmp(_matrule->_rChild->_opType,"VectorMaskGen")|| @@ -3653,9 +3649,6 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const { "CompareAndSwapB", "CompareAndSwapS", "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN", "WeakCompareAndSwapB", "WeakCompareAndSwapS", "WeakCompareAndSwapI", "WeakCompareAndSwapL", "WeakCompareAndSwapP", "WeakCompareAndSwapN", "CompareAndExchangeB", "CompareAndExchangeS", "CompareAndExchangeI", "CompareAndExchangeL", "CompareAndExchangeP", "CompareAndExchangeN", -#if INCLUDE_SHENANDOAHGC - "ShenandoahCompareAndSwapN", "ShenandoahCompareAndSwapP", "ShenandoahWeakCompareAndSwapP", "ShenandoahWeakCompareAndSwapN", "ShenandoahCompareAndExchangeP", "ShenandoahCompareAndExchangeN", -#endif "GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP", "GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN", "ClearArray" diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp index 7b9cb985cff6e..b167b39474bd1 100644 --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp @@ -359,10 +359,6 @@ class BarrierSetC2: public CHeapObj { virtual bool escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const { return false; } virtual bool escape_add_final_edges(ConnectionGraph* conn_graph, PhaseGVN* gvn, Node* n, uint opcode) const { return false; } - virtual bool escape_has_out_with_unsafe_object(Node* n) const { return false; } - - virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const { return false; }; - virtual bool matcher_is_store_load_barrier(Node* x, uint xop) const { return false; } // Whether the given phi node joins OOPs from fast and slow allocation paths. static bool is_allocation(const Node* node); diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp index fdfde866cd725..ebdbeff862f87 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp @@ -41,6 +41,7 @@ #include "opto/macro.hpp" #include "opto/movenode.hpp" #include "opto/narrowptrnode.hpp" +#include "opto/output.hpp" #include "opto/rootnode.hpp" #include "opto/runtime.hpp" @@ -48,8 +49,11 @@ ShenandoahBarrierSetC2* ShenandoahBarrierSetC2::bsc2() { return reinterpret_cast(BarrierSet::barrier_set()->barrier_set_c2()); } -ShenandoahBarrierSetC2State::ShenandoahBarrierSetC2State(Arena* comp_arena) - : _load_reference_barriers(new (comp_arena) GrowableArray(comp_arena, 8, 0, nullptr)) { +ShenandoahBarrierSetC2State::ShenandoahBarrierSetC2State(Arena* comp_arena) : + BarrierSetC2State(comp_arena), + _load_reference_barriers(new (comp_arena) GrowableArray(comp_arena, 8, 0, nullptr)), + _stubs(new (comp_arena) GrowableArray(comp_arena, 8, 0, nullptr)), + _stubs_start_offset(0) { } int ShenandoahBarrierSetC2State::load_reference_barriers_count() const { @@ -668,113 +672,80 @@ Node* ShenandoahBarrierSetC2::load_at_resolved(C2Access& access, const Type* val return load; } +static void set_barrier_data(C2Access& access) { + if (!access.is_oop()) { + return; + } + + if (access.decorators() & C2_TIGHTLY_COUPLED_ALLOC) { + access.set_barrier_data(ShenandoahBarrierElided); + return; + } + + uint8_t barrier_data = 0; + + if (access.decorators() & ON_PHANTOM_OOP_REF) { + barrier_data |= ShenandoahBarrierPhantom; + } else if (access.decorators() & ON_WEAK_OOP_REF) { + barrier_data |= ShenandoahBarrierWeak; + } else { + barrier_data |= ShenandoahBarrierStrong; + } + + if (access.decorators() & IN_NATIVE) { + barrier_data |= ShenandoahBarrierNative; + } + + if (access.decorators() & AS_NO_KEEPALIVE) { + barrier_data |= ShenandoahBarrierNoKeepAlive; + } + + access.set_barrier_data(barrier_data); +} + Node* ShenandoahBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, Node* new_val, const Type* value_type) const { + if (ShenandoahCASBarrier) { + set_barrier_data(access); + } + GraphKit* kit = access.kit(); + // TODO: Implement late SATB barriers. if (access.is_oop()) { - shenandoah_write_barrier_pre(kit, false /* do_load */, - nullptr, nullptr, max_juint, nullptr, nullptr, - expected_val /* pre_val */, T_OBJECT); - - MemNode::MemOrd mo = access.mem_node_mo(); - Node* mem = access.memory(); - Node* adr = access.addr().node(); - const TypePtr* adr_type = access.addr().type(); - Node* load_store = nullptr; - -#ifdef _LP64 - if (adr->bottom_type()->is_ptr_to_narrowoop()) { - Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); - Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); - if (ShenandoahCASBarrier) { - load_store = kit->gvn().transform(new ShenandoahCompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo)); - } else { - load_store = kit->gvn().transform(new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo)); - } - } else -#endif - { - if (ShenandoahCASBarrier) { - load_store = kit->gvn().transform(new ShenandoahCompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo)); - } else { - load_store = kit->gvn().transform(new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo)); - } - } - - access.set_raw_access(load_store); - pin_atomic_op(access); + shenandoah_write_barrier_pre(kit, false /* do_load */, nullptr, nullptr, max_juint, nullptr, nullptr, expected_val /* pre_val */, T_OBJECT); + } + Node* load_store = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); -#ifdef _LP64 - if (adr->bottom_type()->is_ptr_to_narrowoop()) { - load_store = kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); - } -#endif + // TODO: Implement late barriers for LRB and Card-Table. + if (access.is_oop()) { load_store = kit->gvn().transform(new ShenandoahLoadReferenceBarrierNode(nullptr, load_store, access.decorators())); if (ShenandoahCardBarrier) { post_barrier(kit, kit->control(), access.raw_access(), access.base(), access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true); } - return load_store; } - return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); + return load_store; } Node* ShenandoahBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, Node* new_val, const Type* value_type) const { + if (ShenandoahCASBarrier) { + set_barrier_data(access); + } GraphKit* kit = access.kit(); if (access.is_oop()) { shenandoah_write_barrier_pre(kit, false /* do_load */, nullptr, nullptr, max_juint, nullptr, nullptr, expected_val /* pre_val */, T_OBJECT); - DecoratorSet decorators = access.decorators(); - MemNode::MemOrd mo = access.mem_node_mo(); - Node* mem = access.memory(); - bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0; - Node* load_store = nullptr; - Node* adr = access.addr().node(); -#ifdef _LP64 - if (adr->bottom_type()->is_ptr_to_narrowoop()) { - Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); - Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); - if (ShenandoahCASBarrier) { - if (is_weak_cas) { - load_store = kit->gvn().transform(new ShenandoahWeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo)); - } else { - load_store = kit->gvn().transform(new ShenandoahCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo)); - } - } else { - if (is_weak_cas) { - load_store = kit->gvn().transform(new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo)); - } else { - load_store = kit->gvn().transform(new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo)); - } - } - } else -#endif - { - if (ShenandoahCASBarrier) { - if (is_weak_cas) { - load_store = kit->gvn().transform(new ShenandoahWeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo)); - } else { - load_store = kit->gvn().transform(new ShenandoahCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo)); - } - } else { - if (is_weak_cas) { - load_store = kit->gvn().transform(new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo)); - } else { - load_store = kit->gvn().transform(new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo)); - } - } - } - access.set_raw_access(load_store); - pin_atomic_op(access); + } + Node* load_store = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); + if (access.is_oop()) { if (ShenandoahCardBarrier) { post_barrier(kit, kit->control(), access.raw_access(), access.base(), access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true); } - return load_store; } - return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); + return load_store; } Node* ShenandoahBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* val, const Type* value_type) const { @@ -1202,13 +1173,6 @@ bool ShenandoahBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, ui } return false; } - case Op_ShenandoahCompareAndSwapP: - case Op_ShenandoahCompareAndSwapN: - case Op_ShenandoahWeakCompareAndSwapN: - case Op_ShenandoahWeakCompareAndSwapP: - case Op_ShenandoahCompareAndExchangeP: - case Op_ShenandoahCompareAndExchangeN: - return true; case Op_ShenandoahLoadReferenceBarrier: assert(false, "should have been expanded already"); return true; @@ -1219,16 +1183,6 @@ bool ShenandoahBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, ui bool ShenandoahBarrierSetC2::escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const { switch (opcode) { - case Op_ShenandoahCompareAndExchangeP: - case Op_ShenandoahCompareAndExchangeN: - conn_graph->add_objload_to_connection_graph(n, delayed_worklist); - // fallthrough - case Op_ShenandoahWeakCompareAndSwapP: - case Op_ShenandoahWeakCompareAndSwapN: - case Op_ShenandoahCompareAndSwapP: - case Op_ShenandoahCompareAndSwapN: - conn_graph->add_to_congraph_unsafe_access(n, opcode, delayed_worklist); - return true; case Op_StoreP: { Node* adr = n->in(MemNode::Address); const Type* adr_type = gvn->type(adr); @@ -1264,17 +1218,6 @@ bool ShenandoahBarrierSetC2::escape_add_to_con_graph(ConnectionGraph* conn_graph bool ShenandoahBarrierSetC2::escape_add_final_edges(ConnectionGraph* conn_graph, PhaseGVN* gvn, Node* n, uint opcode) const { switch (opcode) { - case Op_ShenandoahCompareAndExchangeP: - case Op_ShenandoahCompareAndExchangeN: { - Node *adr = n->in(MemNode::Address); - conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, adr, nullptr); - // fallthrough - } - case Op_ShenandoahCompareAndSwapP: - case Op_ShenandoahCompareAndSwapN: - case Op_ShenandoahWeakCompareAndSwapP: - case Op_ShenandoahWeakCompareAndSwapN: - return conn_graph->add_final_edges_unsafe_access(n, opcode); case Op_ShenandoahLoadReferenceBarrier: conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(ShenandoahLoadReferenceBarrierNode::ValueIn), nullptr); return true; @@ -1285,38 +1228,69 @@ bool ShenandoahBarrierSetC2::escape_add_final_edges(ConnectionGraph* conn_graph, return false; } -bool ShenandoahBarrierSetC2::escape_has_out_with_unsafe_object(Node* n) const { - return n->has_out_with(Op_ShenandoahCompareAndExchangeP) || n->has_out_with(Op_ShenandoahCompareAndExchangeN) || - n->has_out_with(Op_ShenandoahCompareAndSwapP, Op_ShenandoahCompareAndSwapN, Op_ShenandoahWeakCompareAndSwapP, Op_ShenandoahWeakCompareAndSwapN); +static ShenandoahBarrierSetC2State* barrier_set_state() { + return reinterpret_cast(Compile::current()->barrier_set_state()); +} + +int ShenandoahBarrierSetC2::estimate_stub_size() const { + Compile* const C = Compile::current(); + BufferBlob* const blob = C->output()->scratch_buffer_blob(); + GrowableArray* const stubs = barrier_set_state()->stubs(); + int size = 0; + + for (int i = 0; i < stubs->length(); i++) { + CodeBuffer cb(blob->content_begin(), checked_cast((address)C->output()->scratch_locs_memory() - blob->content_begin())); + MacroAssembler masm(&cb); + stubs->at(i)->emit_code(masm); + size += cb.insts_size(); + } + return size; } -bool ShenandoahBarrierSetC2::matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const { - switch (opcode) { - case Op_ShenandoahCompareAndExchangeP: - case Op_ShenandoahCompareAndExchangeN: - case Op_ShenandoahWeakCompareAndSwapP: - case Op_ShenandoahWeakCompareAndSwapN: - case Op_ShenandoahCompareAndSwapP: - case Op_ShenandoahCompareAndSwapN: { // Convert trinary to binary-tree - Node* newval = n->in(MemNode::ValueIn); - Node* oldval = n->in(LoadStoreConditionalNode::ExpectedIn); - Node* pair = new BinaryNode(oldval, newval); - n->set_req(MemNode::ValueIn,pair); - n->del_req(LoadStoreConditionalNode::ExpectedIn); - return true; +void ShenandoahBarrierSetC2::emit_stubs(CodeBuffer& cb) const { + MacroAssembler masm(&cb); + GrowableArray* const stubs = barrier_set_state()->stubs(); + barrier_set_state()->set_stubs_start_offset(masm.offset()); + + for (int i = 0; i < stubs->length(); i++) { + // Make sure there is enough space in the code buffer + if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) { + ciEnv::current()->record_failure("CodeCache is full"); + return; } - default: - break; + + stubs->at(i)->emit_code(masm); } - return false; + + masm.flush(); + } -bool ShenandoahBarrierSetC2::matcher_is_store_load_barrier(Node* x, uint xop) const { - return xop == Op_ShenandoahCompareAndExchangeP || - xop == Op_ShenandoahCompareAndExchangeN || - xop == Op_ShenandoahWeakCompareAndSwapP || - xop == Op_ShenandoahWeakCompareAndSwapN || - xop == Op_ShenandoahCompareAndSwapN || - xop == Op_ShenandoahCompareAndSwapP; +void ShenandoahBarrierStub::register_stub() { + if (!Compile::current()->output()->in_scratch_emit_size()) { + barrier_set_state()->stubs()->append(this); + } +} + +ShenandoahCASBarrierSlowStub* ShenandoahCASBarrierSlowStub::create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool narrow, bool acquire, bool release, bool weak) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierSlowStub(node, addr, expected, new_val, result, tmp, cae, narrow, acquire, release, weak); + stub->register_stub(); + return stub; +} + +ShenandoahCASBarrierMidStub* ShenandoahCASBarrierMidStub::create(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register result, Register tmp, bool cae) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierMidStub(node, slow_stub, result, tmp, cae); + stub->register_stub(); + return stub; } + +bool ShenandoahBarrierSetC2State::needs_liveness_data(const MachNode* mach) const { + assert(mach->barrier_data() != 0, "what else?"); + return mach->barrier_data() != 0; +; +} + +bool ShenandoahBarrierSetC2State::needs_livein_data() const { + return true; +} \ No newline at end of file diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp index dd9e9bcc1a5fa..3b8a0518e4b9b 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp @@ -29,18 +29,42 @@ #include "gc/shenandoah/c2/shenandoahSupport.hpp" #include "utilities/growableArray.hpp" -class ShenandoahBarrierSetC2State : public ArenaObj { -private: +static const uint8_t ShenandoahBarrierStrong = 1 << 0; +static const uint8_t ShenandoahBarrierWeak = 1 << 1; +static const uint8_t ShenandoahBarrierPhantom = 1 << 2; +static const uint8_t ShenandoahBarrierNoKeepAlive = 1 << 3; +static const uint8_t ShenandoahBarrierNative = 1 << 4; +static const uint8_t ShenandoahBarrierElided = 1 << 5; + +class ShenandoahBarrierStub; + +class ShenandoahBarrierSetC2State : public BarrierSetC2State { GrowableArray* _load_reference_barriers; + GrowableArray* _stubs; + int _stubs_start_offset; public: - ShenandoahBarrierSetC2State(Arena* comp_arena); + explicit ShenandoahBarrierSetC2State(Arena* comp_arena); + + bool needs_liveness_data(const MachNode* mach) const override; + bool needs_livein_data() const override; int load_reference_barriers_count() const; ShenandoahLoadReferenceBarrierNode* load_reference_barrier(int idx) const; void add_load_reference_barrier(ShenandoahLoadReferenceBarrierNode* n); void remove_load_reference_barrier(ShenandoahLoadReferenceBarrierNode * n); -}; + + GrowableArray* stubs() { + return _stubs; + } + + void set_stubs_start_offset(int offset) { + _stubs_start_offset = offset; + } + + int stubs_start_offset() { + return _stubs_start_offset; + }}; class ShenandoahBarrierSetC2 : public BarrierSetC2 { private: @@ -146,10 +170,49 @@ class ShenandoahBarrierSetC2 : public BarrierSetC2 { virtual bool escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const; virtual bool escape_add_final_edges(ConnectionGraph* conn_graph, PhaseGVN* gvn, Node* n, uint opcode) const; - virtual bool escape_has_out_with_unsafe_object(Node* n) const; - virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const; - virtual bool matcher_is_store_load_barrier(Node* x, uint xop) const; + int estimate_stub_size() const /* override */; + void emit_stubs(CodeBuffer& cb) const /* override */; +}; + +class ShenandoahBarrierStub : public BarrierStubC2 { +protected: + explicit ShenandoahBarrierStub(const MachNode* node) : BarrierStubC2(node) {} + void register_stub(); +public: + virtual void emit_code(MacroAssembler& masm) = 0; }; +class ShenandoahCASBarrierSlowStub : public ShenandoahBarrierStub { + Register _addr; + Register _expected; + Register _new_val; + Register _result; + Register _tmp; + bool _cae; + bool _narrow; + bool _acquire; + bool _release; + bool _weak; + + explicit ShenandoahCASBarrierSlowStub(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool narrow, bool acquire, bool release, bool weak) : + ShenandoahBarrierStub(node), + _addr(addr), _expected(expected), _new_val(new_val), _result(result), _tmp(tmp), _cae(cae), _narrow(narrow), _acquire(acquire), _release(release), _weak(weak) {} + +public: + static ShenandoahCASBarrierSlowStub* create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool narrow, bool acquire, bool release, bool weak); + void emit_code(MacroAssembler& masm) override; +}; + +class ShenandoahCASBarrierMidStub : public ShenandoahBarrierStub { + ShenandoahCASBarrierSlowStub* _slow_stub; + Register _result; + Register _tmp; + bool _cae; + ShenandoahCASBarrierMidStub(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register result, Register tmp, bool cae) : + ShenandoahBarrierStub(node), _slow_stub(slow_stub), _result(result), _tmp(tmp), _cae(cae) {} +public: + static ShenandoahCASBarrierMidStub* create(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register result, Register tmp, bool cae); + void emit_code(MacroAssembler& masm) override;}; + #endif // SHARE_GC_SHENANDOAH_C2_SHENANDOAHBARRIERSETC2_HPP diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp index 8210718126bc2..e16e0cdf0b676 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp @@ -1465,8 +1465,6 @@ Node* ShenandoahBarrierC2Support::get_load_addr(PhaseIdealLoop* phase, VectorSet case Op_CompareAndExchangeP: case Op_GetAndSetN: case Op_GetAndSetP: - case Op_ShenandoahCompareAndExchangeP: - case Op_ShenandoahCompareAndExchangeN: // Those instructions would just have stored a different // value into the field. No use to attempt to fix it at this point. return phase->igvn().zerocon(T_OBJECT); @@ -2463,8 +2461,6 @@ bool ShenandoahLoadReferenceBarrierNode::needs_barrier_impl(PhaseGVN* phase, Nod case Op_AddP: return true; // TODO: Can refine? case Op_LoadP: - case Op_ShenandoahCompareAndExchangeN: - case Op_ShenandoahCompareAndExchangeP: case Op_CompareAndExchangeN: case Op_CompareAndExchangeP: case Op_GetAndSetN: diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp index 63e8412a30731..c1209f0875a92 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp @@ -123,96 +123,6 @@ class MemoryGraphFixer : public ResourceObj { void record_new_ctrl(Node* ctrl, Node* region, Node* mem, Node* mem_for_ctrl); }; -class ShenandoahCompareAndSwapPNode : public CompareAndSwapPNode { -public: - ShenandoahCompareAndSwapPNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) - : CompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { } - - virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) { - if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypePtr::NULL_PTR) { - return new CompareAndSwapPNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), order()); - } - return nullptr; - } - - virtual int Opcode() const; -}; - -class ShenandoahCompareAndSwapNNode : public CompareAndSwapNNode { -public: - ShenandoahCompareAndSwapNNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) - : CompareAndSwapNNode(c, mem, adr, val, ex, mem_ord) { } - - virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) { - if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypeNarrowOop::NULL_PTR) { - return new CompareAndSwapNNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), order()); - } - return nullptr; - } - - virtual int Opcode() const; -}; - -class ShenandoahWeakCompareAndSwapPNode : public WeakCompareAndSwapPNode { -public: - ShenandoahWeakCompareAndSwapPNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) - : WeakCompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { } - - virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) { - if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypePtr::NULL_PTR) { - return new WeakCompareAndSwapPNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), order()); - } - return nullptr; - } - - virtual int Opcode() const; -}; - -class ShenandoahWeakCompareAndSwapNNode : public WeakCompareAndSwapNNode { -public: - ShenandoahWeakCompareAndSwapNNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) - : WeakCompareAndSwapNNode(c, mem, adr, val, ex, mem_ord) { } - - virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) { - if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypeNarrowOop::NULL_PTR) { - return new WeakCompareAndSwapNNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), order()); - } - return nullptr; - } - - virtual int Opcode() const; -}; - -class ShenandoahCompareAndExchangePNode : public CompareAndExchangePNode { -public: - ShenandoahCompareAndExchangePNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord) - : CompareAndExchangePNode(c, mem, adr, val, ex, at, t, mem_ord) { } - - virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) { - if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypePtr::NULL_PTR) { - return new CompareAndExchangePNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), adr_type(), bottom_type(), order()); - } - return nullptr; - } - - virtual int Opcode() const; -}; - -class ShenandoahCompareAndExchangeNNode : public CompareAndExchangeNNode { -public: - ShenandoahCompareAndExchangeNNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord) - : CompareAndExchangeNNode(c, mem, adr, val, ex, at, t, mem_ord) { } - - virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) { - if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypeNarrowOop::NULL_PTR) { - return new CompareAndExchangeNNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), adr_type(), bottom_type(), order()); - } - return nullptr; - } - - virtual int Opcode() const; -}; - class ShenandoahLoadReferenceBarrierNode : public Node { public: enum { diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp index 587d5fad8f29e..bd3947ab2bb7b 100644 --- a/src/hotspot/share/opto/classes.hpp +++ b/src/hotspot/share/opto/classes.hpp @@ -328,12 +328,6 @@ macro(SafePointScalarMerge) #else #define shmacro(x) optionalmacro(x) #endif -shmacro(ShenandoahCompareAndExchangeP) -shmacro(ShenandoahCompareAndExchangeN) -shmacro(ShenandoahCompareAndSwapN) -shmacro(ShenandoahCompareAndSwapP) -shmacro(ShenandoahWeakCompareAndSwapN) -shmacro(ShenandoahWeakCompareAndSwapP) shmacro(ShenandoahLoadReferenceBarrier) macro(SCMemProj) macro(CopySignD) diff --git a/src/hotspot/share/opto/escape.cpp b/src/hotspot/share/opto/escape.cpp index 1a5bddd332ead..70c2551cde96e 100644 --- a/src/hotspot/share/opto/escape.cpp +++ b/src/hotspot/share/opto/escape.cpp @@ -3449,8 +3449,7 @@ bool ConnectionGraph::is_oop_field(Node* n, int offset, bool* unsafe) { // Check for unsafe oop field access if (n->has_out_with(Op_StoreP, Op_LoadP, Op_StoreN, Op_LoadN) || n->has_out_with(Op_GetAndSetP, Op_GetAndSetN, Op_CompareAndExchangeP, Op_CompareAndExchangeN) || - n->has_out_with(Op_CompareAndSwapP, Op_CompareAndSwapN, Op_WeakCompareAndSwapP, Op_WeakCompareAndSwapN) || - BarrierSet::barrier_set()->barrier_set_c2()->escape_has_out_with_unsafe_object(n)) { + n->has_out_with(Op_CompareAndSwapP, Op_CompareAndSwapN, Op_WeakCompareAndSwapP, Op_WeakCompareAndSwapN)) { bt = T_OBJECT; (*unsafe) = true; } @@ -3468,8 +3467,7 @@ bool ConnectionGraph::is_oop_field(Node* n, int offset, bool* unsafe) { // Allocation initialization, ThreadLocal field access, unsafe access if (n->has_out_with(Op_StoreP, Op_LoadP, Op_StoreN, Op_LoadN) || n->has_out_with(Op_GetAndSetP, Op_GetAndSetN, Op_CompareAndExchangeP, Op_CompareAndExchangeN) || - n->has_out_with(Op_CompareAndSwapP, Op_CompareAndSwapN, Op_WeakCompareAndSwapP, Op_WeakCompareAndSwapN) || - BarrierSet::barrier_set()->barrier_set_c2()->escape_has_out_with_unsafe_object(n)) { + n->has_out_with(Op_CompareAndSwapP, Op_CompareAndSwapN, Op_WeakCompareAndSwapP, Op_WeakCompareAndSwapN)) { bt = T_OBJECT; } } diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp index 5f5e0520e7eb6..4a7771135a3fb 100644 --- a/src/hotspot/share/opto/loopTransform.cpp +++ b/src/hotspot/share/opto/loopTransform.cpp @@ -3009,13 +3009,7 @@ void IdealLoopTree::adjust_loop_exit_prob(PhaseIdealLoop *phase) { (bol->in(1)->Opcode() == Op_CompareAndSwapI) || (bol->in(1)->Opcode() == Op_CompareAndSwapL) || (bol->in(1)->Opcode() == Op_CompareAndSwapP) || - (bol->in(1)->Opcode() == Op_CompareAndSwapN) || - (bol->in(1)->Opcode() == Op_ShenandoahCompareAndExchangeP) || - (bol->in(1)->Opcode() == Op_ShenandoahCompareAndExchangeN) || - (bol->in(1)->Opcode() == Op_ShenandoahWeakCompareAndSwapP) || - (bol->in(1)->Opcode() == Op_ShenandoahWeakCompareAndSwapN) || - (bol->in(1)->Opcode() == Op_ShenandoahCompareAndSwapP) || - (bol->in(1)->Opcode() == Op_ShenandoahCompareAndSwapN))) + (bol->in(1)->Opcode() == Op_CompareAndSwapN))) return; // Allocation loops RARELY take backedge // Find the OTHER exit path from the IF Node* ex = iff->proj_out(1-test_con); diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp index 5cb56019bc144..e62c0437283ad 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -2230,11 +2230,7 @@ void Matcher::find_shared(Node* n) { mstack.pop(); // Remove node from stack // Now hack a few special opcodes - uint opcode = n->Opcode(); - bool gc_handled = BarrierSet::barrier_set()->barrier_set_c2()->matcher_find_shared_post_visit(this, n, opcode); - if (!gc_handled) { - find_shared_post_visit(n, opcode); - } + find_shared_post_visit(n, n->Opcode()); } else { ShouldNotReachHere(); @@ -2939,8 +2935,7 @@ bool Matcher::post_store_load_barrier(const Node* vmb) { xop == Op_CompareAndSwapL || xop == Op_CompareAndSwapP || xop == Op_CompareAndSwapN || - xop == Op_CompareAndSwapI || - BarrierSet::barrier_set()->barrier_set_c2()->matcher_is_store_load_barrier(x, xop)) { + xop == Op_CompareAndSwapI) { return true; } From a69a5f6756c830c9d8e6482925c9813297faa40f Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Mon, 25 Aug 2025 13:57:58 +0000 Subject: [PATCH 2/9] x86 parts --- .../shenandoahBarrierSetAssembler_x86.cpp | 145 ++++++++++++++++++ .../shenandoahBarrierSetAssembler_x86.hpp | 8 + .../x86/gc/shenandoah/shenandoah_x86_64.ad | 50 +++--- .../shenandoah/c2/shenandoahBarrierSetC2.cpp | 16 +- .../shenandoah/c2/shenandoahBarrierSetC2.hpp | 21 +-- 5 files changed, 201 insertions(+), 39 deletions(-) diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp index 9e321391f6cc9..cee634faa4938 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp @@ -41,6 +41,9 @@ #include "c1/c1_MacroAssembler.hpp" #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" #endif +#ifdef COMPILER2 +#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" +#endif #define __ masm-> @@ -774,6 +777,148 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, } } +#ifdef COMPILER2 +void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, + Register res, Address addr, Register oldval, Register newval, Register tmp1, Register tmp2, + bool exchange) { + assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); + assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); + assert_different_registers(oldval, tmp1, tmp2); + assert_different_registers(newval, tmp1, tmp2); + + ShenandoahCASBarrierSlowStub* const slow_stub = ShenandoahCASBarrierSlowStub::create(node, addr, oldval, newval, res, tmp1, tmp2, exchange); + ShenandoahCASBarrierMidStub* const mid_stub = ShenandoahCASBarrierMidStub::create(node, slow_stub, oldval, res, tmp1, exchange); + + Label L_success, L_failure; + + // Remember oldval for retry logic below. It will be overwritten by the CAS. + __ movptr(tmp2, oldval); + + // Step 1. Fast-path. + // + // Try to CAS with given arguments. If successful, then we are done. + __ lock(); + __ cmpxchgptr(newval, addr); + __ jcc(Assembler::notEqual, *mid_stub->entry()); + + // Slow-stub re-enters with condition flags according to CAS, we may need to + // set result accordingly. + __ bind(*slow_stub->continuation()); + + // Step 5. If we need a boolean result out of CAS, set the flag appropriately. + // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. + // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. + + if (!exchange) { + assert(res != noreg, "need result register"); + __ setcc(Assembler::equal, res); + } + + // Mid-stub re-enters with result set correctly. + __ bind(*mid_stub->continuation()); +} + +#undef __ +#define __ masm. + +void ShenandoahCASBarrierMidStub::emit_code(MacroAssembler& masm) { + __ bind(*entry()); + + if (!_cae) { + // Set result to false, in case that we fail the following tests. + // Failing those tests means legitimate failures. + // Otherwise, result will be set correctly after returning from + // the slow-path. + __ movl(_result, 0); // Result = false. + } + // Check if CAS result is null. If it is, then we must have a legitimate failure. + // This makes loading the fwdptr in the slow-path simpler. + __ testptr(_expected, _expected); + __ jcc(Assembler::equal, *continuation()); + + // Check if GC is in progress, otherwise we must have a legitimate failure. + Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); + __ jcc(Assembler::notZero, *_slow_stub->entry()); + __ jmp(*continuation()); +} + +void ShenandoahCASBarrierSlowStub::emit_code(MacroAssembler& masm) { + __ bind(*entry()); + + assert(_expected == rax, "expected must be rax"); + + // Step 2. CAS has failed because the value held at addr does not + // match expected. This may be a false negative because the value fetched + // from addr (now held in result) may be a from-space pointer to the + // original copy of same object referenced by to-space pointer expected. + // + // To resolve this, it suffices to find the forward pointer associated + // with fetched value. If this matches expected, retry CAS with new + // parameters. If this mismatches, then we have a legitimate + // failure, and we're done. + + // overwrite tmp1 with from-space pointer fetched from memory + __ movptr(_tmp1, _expected); + + if (UseCompressedOops) { + __ decode_heap_oop_not_null(_tmp1); + } + + // Load/decode forwarding pointer. + __ movq(_tmp1, Address(_tmp1, oopDesc::mark_offset_in_bytes())); + // Negate the mark-word. This allows us to test lowest 2 bits easily while preserving the upper bits. + __ negq(_tmp1); + __ testq(_tmp1, markWord::lock_mask_in_place); + // Not forwarded, must have a legit CAS failure. + __ jcc(Assembler::notEqual, *continuation()); + // Set the lowest two bits. This is equivalent to clearing the two bits after + // the subsequent inversion. + __ orq(_tmp1, markWord::marked_value); + // And invert back to get the forwardee. + __ negq(_tmp1); + + if (UseCompressedOops) { + __ encode_heap_oop_not_null(_tmp1); // encode for comparison + } + + // Now we have the forwarded offender in tmp1. + // We preserved the original expected value in tmp2 in the fast-path. + // Compare and if they don't match, we have legitimate failure + __ cmpptr(_tmp1, _tmp2); + __ jcc(Assembler::notEqual, *continuation()); + + // Fall through to step 3. + + // Step 3. We've confirmed that the value originally held in memory + // (now held in result) pointed to from-space version of original + // expected value. Try the CAS again with the from-space expected + // value. If it now succeeds, we're good. + // + // Note: expected holds encoded from-space pointer that matches to-space + // object residing at tmp1. + __ lock(); + __ cmpxchgptr(_new_val, _addr); + + // If fetched value did not equal the new expected, this could + // still be a false negative because some other (GC) thread may have + // newly overwritten the memory value with its to-space equivalent. + __ jcc(Assembler::equal, *continuation()); + + // Step 4. Try to CAS again, but with the original to-space expected. + // This should be very rare. + __ movptr(_expected, _tmp2); + __ lock(); + __ cmpxchgptr(_new_val, _addr); + + // At this point, there can no longer be false negatives. + __ jmp(*continuation()); +} + +#undef __ +#define __ masm-> +#endif + #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ #else diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp index b0185f2dbffbd..1ea4edb8eb3a6 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp @@ -36,6 +36,9 @@ class ShenandoahPreBarrierStub; class ShenandoahLoadReferenceBarrierStub; class StubAssembler; #endif +#ifdef COMPILER2 +class MachNode; +#endif class StubCodeGenerator; class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { @@ -74,6 +77,11 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { void cmpxchg_oop(MacroAssembler* masm, Register res, Address addr, Register oldval, Register newval, bool exchange, Register tmp1, Register tmp2); +#ifdef COMPILER2 + void cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, + Register res, Address addr, Register oldval, Register newval, Register tmp1, Register tmp2, + bool exchange); +#endif virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register src, Register dst, Register count); virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad index c580d21c9b869..adcb922711601 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad @@ -33,18 +33,17 @@ instruct compareAndSwapP_shenandoah(rRegI res, rax_RegP oldval, rRegP newval, rFlagsReg cr) %{ - match(Set res (ShenandoahCompareAndSwapP mem_ptr (Binary oldval newval))); - match(Set res (ShenandoahWeakCompareAndSwapP mem_ptr (Binary oldval newval))); + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval); format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} ins_encode %{ - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, - $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, - false, // swap - $tmp1$$Register, $tmp2$$Register - ); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, + $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, $tmp1$$Register, $tmp2$$Register, + /*exchange*/ false); %} ins_pipe( pipe_cmpxchg ); %} @@ -54,18 +53,18 @@ instruct compareAndSwapN_shenandoah(rRegI res, rRegP tmp1, rRegP tmp2, rax_RegN oldval, rRegN newval, rFlagsReg cr) %{ - match(Set res (ShenandoahCompareAndSwapN mem_ptr (Binary oldval newval))); - match(Set res (ShenandoahWeakCompareAndSwapN mem_ptr (Binary oldval newval))); + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval); format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} ins_encode %{ - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, - $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, - false, // swap - $tmp1$$Register, $tmp2$$Register - ); + guarantee(UseCompressedOops, "must be compressed oops"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, + $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, $tmp1$$Register, $tmp2$$Register, + /*exchange*/ false); %} ins_pipe( pipe_cmpxchg ); %} @@ -74,17 +73,17 @@ instruct compareAndExchangeN_shenandoah(memory mem_ptr, rax_RegN oldval, rRegN newval, rRegP tmp1, rRegP tmp2, rFlagsReg cr) %{ - match(Set oldval (ShenandoahCompareAndExchangeN mem_ptr (Binary oldval newval))); + match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); effect(TEMP tmp1, TEMP tmp2, KILL cr); format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} ins_encode %{ - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, - noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, - true, // exchange - $tmp1$$Register, $tmp2$$Register - ); + guarantee(UseCompressedOops, "must be compressed oops"); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, + noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, $tmp1$$Register, $tmp2$$Register, + /*exchange*/ true); %} ins_pipe( pipe_cmpxchg ); %} @@ -94,18 +93,17 @@ instruct compareAndExchangeP_shenandoah(memory mem_ptr, rRegP tmp1, rRegP tmp2, rFlagsReg cr) %{ - match(Set oldval (ShenandoahCompareAndExchangeP mem_ptr (Binary oldval newval))); + match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); effect(KILL cr, TEMP tmp1, TEMP tmp2); ins_cost(1000); format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} ins_encode %{ - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, - noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, - true, // exchange - $tmp1$$Register, $tmp2$$Register - ); + ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, + noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, $tmp1$$Register, $tmp2$$Register, + /*exchange*/ true); %} ins_pipe( pipe_cmpxchg ); %} diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp index ebdbeff862f87..36c8938e6e386 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp @@ -26,6 +26,8 @@ #include "classfile/javaClasses.hpp" #include "gc/shared/barrierSet.hpp" #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" + +#include #include "gc/shenandoah/c2/shenandoahSupport.hpp" #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" @@ -1273,14 +1275,20 @@ void ShenandoahBarrierStub::register_stub() { } } -ShenandoahCASBarrierSlowStub* ShenandoahCASBarrierSlowStub::create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool narrow, bool acquire, bool release, bool weak) { - auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierSlowStub(node, addr, expected, new_val, result, tmp, cae, narrow, acquire, release, weak); +ShenandoahCASBarrierSlowStub* ShenandoahCASBarrierSlowStub::create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool acquire, bool release, bool weak) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierSlowStub(node, addr, Address(), expected, new_val, result, tmp, noreg, cae, acquire, release, weak); + stub->register_stub(); + return stub; +} + +ShenandoahCASBarrierSlowStub* ShenandoahCASBarrierSlowStub::create(const MachNode* node, Address addr, Register expected, Register new_val, Register result, Register tmp1, Register tmp2, bool cae) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierSlowStub(node, noreg, addr, expected, new_val, result, tmp1, tmp2, cae, false, false, false); stub->register_stub(); return stub; } -ShenandoahCASBarrierMidStub* ShenandoahCASBarrierMidStub::create(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register result, Register tmp, bool cae) { - auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierMidStub(node, slow_stub, result, tmp, cae); +ShenandoahCASBarrierMidStub* ShenandoahCASBarrierMidStub::create(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register expected, Register result, Register tmp, bool cae) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierMidStub(node, slow_stub, expected, result, tmp, cae); stub->register_stub(); return stub; } diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp index 3b8a0518e4b9b..073fa64304ec9 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp @@ -184,35 +184,38 @@ class ShenandoahBarrierStub : public BarrierStubC2 { }; class ShenandoahCASBarrierSlowStub : public ShenandoahBarrierStub { - Register _addr; + Register _addr_reg; + Address _addr; Register _expected; Register _new_val; Register _result; - Register _tmp; + Register _tmp1; + Register _tmp2; bool _cae; - bool _narrow; bool _acquire; bool _release; bool _weak; - explicit ShenandoahCASBarrierSlowStub(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool narrow, bool acquire, bool release, bool weak) : + explicit ShenandoahCASBarrierSlowStub(const MachNode* node, Register addr_reg, Address addr, Register expected, Register new_val, Register result, Register tmp1, Register tmp2, bool cae, bool acquire, bool release, bool weak) : ShenandoahBarrierStub(node), - _addr(addr), _expected(expected), _new_val(new_val), _result(result), _tmp(tmp), _cae(cae), _narrow(narrow), _acquire(acquire), _release(release), _weak(weak) {} + _addr_reg(addr_reg), _addr(addr), _expected(expected), _new_val(new_val), _result(result), _tmp1(tmp1), _tmp2(tmp2), _cae(cae), _acquire(acquire), _release(release), _weak(weak) {} public: - static ShenandoahCASBarrierSlowStub* create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool narrow, bool acquire, bool release, bool weak); + static ShenandoahCASBarrierSlowStub* create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool acquire, bool release, bool weak); + static ShenandoahCASBarrierSlowStub* create(const MachNode* node, Address addr, Register expected, Register new_val, Register result, Register tmp1, Register tmp2, bool cae); void emit_code(MacroAssembler& masm) override; }; class ShenandoahCASBarrierMidStub : public ShenandoahBarrierStub { ShenandoahCASBarrierSlowStub* _slow_stub; + Register _expected; Register _result; Register _tmp; bool _cae; - ShenandoahCASBarrierMidStub(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register result, Register tmp, bool cae) : - ShenandoahBarrierStub(node), _slow_stub(slow_stub), _result(result), _tmp(tmp), _cae(cae) {} + ShenandoahCASBarrierMidStub(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register expected, Register result, Register tmp, bool cae) : + ShenandoahBarrierStub(node), _slow_stub(slow_stub), _expected(expected), _result(result), _tmp(tmp), _cae(cae) {} public: - static ShenandoahCASBarrierMidStub* create(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register result, Register tmp, bool cae); + static ShenandoahCASBarrierMidStub* create(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register expected, Register result, Register tmp, bool cae); void emit_code(MacroAssembler& masm) override;}; #endif // SHARE_GC_SHENANDOAH_C2_SHENANDOAHBARRIERSETC2_HPP From e7adb6f96f9e4e5eb8dba25d2ed7b54c57881d12 Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Mon, 25 Aug 2025 17:06:16 +0200 Subject: [PATCH 3/9] Re-fit aarch64 --- .../shenandoahBarrierSetAssembler_aarch64.cpp | 53 +++++++++++++------ .../shenandoahBarrierSetAssembler_aarch64.hpp | 2 +- .../gc/shenandoah/shenandoah_aarch64.ad | 30 ++++++----- 3 files changed, 55 insertions(+), 30 deletions(-) diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index f58536d747121..041aaf3da883f 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -617,15 +617,15 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, Register new_val, Register result, bool acquire, bool release, bool weak, - bool is_cae, bool narrow) { + bool is_cae) { Register tmp = rscratch2; - Assembler::operand_size size = narrow ? Assembler::word : Assembler::xword; + Assembler::operand_size size = UseCompressedOops ? Assembler::word : Assembler::xword; assert_different_registers(addr, expected, result, tmp); assert_different_registers(addr, new_val, result, tmp); - ShenandoahCASBarrierSlowStub* const slow_stub = ShenandoahCASBarrierSlowStub::create(node, addr, expected, new_val, result, tmp, is_cae, narrow, acquire, release, weak); - ShenandoahCASBarrierMidStub* const mid_stub = ShenandoahCASBarrierMidStub::create(node, slow_stub, result, tmp, is_cae); + ShenandoahCASBarrierSlowStub* const slow_stub = ShenandoahCASBarrierSlowStub::create(node, addr, expected, new_val, result, tmp, is_cae, acquire, release, weak); + ShenandoahCASBarrierMidStub* const mid_stub = ShenandoahCASBarrierMidStub::create(node, slow_stub, expected, result, tmp, is_cae); // Step 1. Fast-path. // @@ -652,10 +652,19 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, void ShenandoahCASBarrierMidStub::emit_code(MacroAssembler& masm) { __ bind(*entry()); + // Check if CAS result is null. If it is, then we must have a legitimate failure. + // This makes loading the fwdptr in the slow-path simpler. + __ tst(_result, _result); + // In case of !CAE, this has the correct value for legitimate failure (0/false) + // in result register. + __ br(Assembler::EQ, *continuation()); + + // Check if GC is in progress, otherwise we must have a legitimate failure. Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); __ ldrb(_tmp, gc_state); __ tstw(_tmp, ShenandoahHeap::HAS_FORWARDED); __ br(Assembler::NE, *_slow_stub->entry()); + if (!_cae) { __ mov(_result, 0); // result = false } @@ -664,7 +673,7 @@ void ShenandoahCASBarrierMidStub::emit_code(MacroAssembler& masm) { void ShenandoahCASBarrierSlowStub::emit_code(MacroAssembler& masm) { __ bind(*entry()); - Assembler::operand_size size = _narrow ? Assembler::word : Assembler::xword; + Assembler::operand_size size = UseCompressedOops ? Assembler::word : Assembler::xword; // Step 2. CAS has failed because the value held at addr does not // match expected. This may be a false negative because the value fetched @@ -677,18 +686,29 @@ void ShenandoahCASBarrierSlowStub::emit_code(MacroAssembler& masm) { // failure, and we're done. // overwrite tmp with from-space pointer fetched from memory - __ mov(_tmp, _result); + __ mov(_tmp1, _result); - if (_narrow) { + if (UseCompressedOops) { // Decode tmp in order to resolve its forward pointer - __ decode_heap_oop(_tmp, _tmp); + __ decode_heap_oop_not_null(_tmp1, _tmp1); } - ShenandoahBarrierSet::assembler()->resolve_forward_pointer(&masm, _tmp); + // Load/decode forwarding pointer. + __ ldr(_tmp1, Address(_tmp1, oopDesc::mark_offset_in_bytes())); + // Negate the mark-word. This allows us to test lowest 2 bits easily while preserving the upper bits. + __ eon(_tmp1, _tmp1, zr); + __ ands(zr, _tmp1, markWord::lock_mask_in_place); + // Not forwarded, must have a legit CAS failure. + __ br(Assembler::NE, *continuation()); + // Set the lowest two bits. This is equivalent to clearing the two bits after + // the subsequent inversion. + __ orr(_tmp1, _tmp1, markWord::marked_value); + // And invert back to get the forwardee. + __ eon(_tmp1, _tmp1, zr); - if (_narrow) { + if (UseCompressedOops) { // Encode tmp to compare against expected. - __ encode_heap_oop(_tmp, _tmp); + __ encode_heap_oop_not_null(_tmp1, _tmp1); } // Does forwarded value of fetched from-space pointer match original @@ -696,7 +716,7 @@ void ShenandoahCASBarrierSlowStub::emit_code(MacroAssembler& masm) { // because we know from step1 that expected is not null. There is // no need for a separate test for result (the value originally held // in memory) equal to null. - __ cmp(_tmp, _expected); + __ cmp(_tmp1, _expected); // If not, then the failure was legitimate and we're done. // Branching to continuation with NE condition denotes failure. @@ -715,8 +735,8 @@ void ShenandoahCASBarrierSlowStub::emit_code(MacroAssembler& masm) { // Note that macro implementation of __cmpxchg cannot use same register // tmp2 for result and expected since it overwrites result before it // compares result with expected. - __ mov(_tmp, _result); - __ cmpxchg(_addr, _tmp, _new_val, size, _acquire, _release, _weak, _result); + __ mov(_tmp1, _result); + __ cmpxchg(_addr_reg, _tmp1, _new_val, size, _acquire, _release, _weak, _result); // EQ flag set iff success. result holds value fetched, rscratch1 clobbered. // If fetched value did not equal the new expected, this could @@ -724,9 +744,8 @@ void ShenandoahCASBarrierSlowStub::emit_code(MacroAssembler& masm) { // newly overwritten the memory value with its to-space equivalent. __ br(Assembler::EQ, *continuation()); - // Step 4. - __ mov(_tmp, _result); - __ cmpxchg(_addr, _tmp, _new_val, size, _acquire, _release, _weak, _result); + // Step 4. Retry CAS with original to-space expected. + __ cmpxchg(_addr_reg, _expected, _new_val, size, _acquire, _release, _weak, _result); __ b(*continuation()); } diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp index 31d18ced6fe91..6de6d40fa3044 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp @@ -94,7 +94,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { bool acquire, bool release, bool is_cae, Register result); #ifdef COMPILER2 void cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register expected, Register new_val, Register result, - bool acquire, bool release, bool weak, bool is_cae, bool narrow); + bool acquire, bool release, bool weak, bool is_cae); #endif }; diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad index d930277830e32..a09ec29185c52 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad @@ -42,7 +42,7 @@ instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, i ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, /*narrow*/ false); + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false); %} ins_pipe(pipe_slow); @@ -62,8 +62,9 @@ instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, i ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + guarantee(UseCompressedOops, "must be compressed oops"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, /*narrow*/ true); + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false); %} ins_pipe(pipe_slow); @@ -85,7 +86,7 @@ instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, /*narrow*/ false); + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false); %} ins_pipe(pipe_slow); @@ -106,8 +107,9 @@ instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + guarantee(UseCompressedOops, "must be compressed oops"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, /*narrow*/ true); + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false); %} ins_pipe(pipe_slow); @@ -123,8 +125,9 @@ instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldva %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + guarantee(UseCompressedOops, "must be compressed oops"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, /*narrow*/ true); + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true); %} ins_pipe(pipe_slow); %} @@ -140,7 +143,7 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, /*narrow*/ false); + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true); %} ins_pipe(pipe_slow); %} @@ -156,8 +159,9 @@ instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN ol %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + guarantee(UseCompressedOops, "must be compressed oops"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true, /*narrow*/ true); + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true); %} ins_pipe(pipe_slow); %} @@ -174,7 +178,7 @@ instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP ol ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true, /*narrow*/ false); + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true); %} ins_pipe(pipe_slow); %} @@ -190,8 +194,9 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + guarantee(UseCompressedOops, "must be compressed oops"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, /*narrow*/ true); + /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false); %} ins_pipe(pipe_slow); %} @@ -207,7 +212,7 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, /*narrow*/ false); + /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false); %} ins_pipe(pipe_slow); %} @@ -224,8 +229,9 @@ instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN ol %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + guarantee(UseCompressedOops, "must be compressed oops"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false, /*narrow*/ true); + /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false); %} ins_pipe(pipe_slow); %} @@ -243,7 +249,7 @@ instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP ol ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false, /*narrow*/ false); + /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false); %} ins_pipe(pipe_slow); %} From 6878a6e0380ce29ffe5a11e1d0c52a224c4f0c52 Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Tue, 26 Aug 2025 16:34:27 +0200 Subject: [PATCH 4/9] SATB barrier late expansion, aarch64 parts --- .../shenandoahBarrierSetAssembler_aarch64.cpp | 56 +- .../shenandoahBarrierSetAssembler_aarch64.hpp | 3 + .../gc/shenandoah/shenandoah_aarch64.ad | 415 +++++++++- .../share/gc/shared/c2/barrierSetC2.hpp | 2 - .../shenandoah/c2/shenandoahBarrierSetC2.cpp | 748 ++++-------------- .../shenandoah/c2/shenandoahBarrierSetC2.hpp | 112 ++- src/hotspot/share/opto/macro.cpp | 2 +- src/hotspot/share/opto/node.cpp | 2 - src/hotspot/share/opto/phaseX.cpp | 4 - 9 files changed, 672 insertions(+), 672 deletions(-) diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index 041aaf3da883f..37201ca6e4f05 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -34,6 +34,7 @@ #include "gc/shenandoah/shenandoahThreadLocalData.hpp" #include "interpreter/interp_masm.hpp" #include "interpreter/interpreter.hpp" +#include "runtime/continuationHelper.hpp" #include "runtime/javaThread.hpp" #include "runtime/sharedRuntime.hpp" #ifdef COMPILER1 @@ -610,6 +611,15 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, } #ifdef COMPILER2 +void ShenandoahBarrierSetAssembler::satb_barrier_c2(MacroAssembler* masm, Register addr, Register pre_val, Register rthread, Register tmp1, Register tmp2, ShenandoahSATBBarrierStubC2* stub) { + // Check if GC marking is in progress, otherwise we don't have to do anything. + Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ ldrb(tmp1, gc_state); + __ tstw(tmp1, ShenandoahHeap::MARKING); + __ br(Assembler::NE, *stub->entry()); + __ bind(*stub->continuation()); +} + void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, Register addr, @@ -624,8 +634,8 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, assert_different_registers(addr, expected, result, tmp); assert_different_registers(addr, new_val, result, tmp); - ShenandoahCASBarrierSlowStub* const slow_stub = ShenandoahCASBarrierSlowStub::create(node, addr, expected, new_val, result, tmp, is_cae, acquire, release, weak); - ShenandoahCASBarrierMidStub* const mid_stub = ShenandoahCASBarrierMidStub::create(node, slow_stub, expected, result, tmp, is_cae); + ShenandoahCASBarrierSlowStubC2* const slow_stub = ShenandoahCASBarrierSlowStubC2::create(node, addr, expected, new_val, result, tmp, is_cae, acquire, release, weak); + ShenandoahCASBarrierMidStubC2* const mid_stub = ShenandoahCASBarrierMidStubC2::create(node, slow_stub, expected, result, tmp, is_cae); // Step 1. Fast-path. // @@ -649,7 +659,45 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, #undef __ #define __ masm. -void ShenandoahCASBarrierMidStub::emit_code(MacroAssembler& masm) { +void ShenandoahSATBBarrierStubC2::emit_code(MacroAssembler& masm) { + __ bind(*entry()); + // Do we need to load the previous value? + if (_addr_reg != noreg) { + __ load_heap_oop(_preval, Address(_addr_reg, 0), noreg, noreg, AS_RAW); + } + // Is the previous value null? + __ tst(_preval, _preval); + // Then we don't need to do anything. + __ br(Assembler::EQ, *continuation()); + + Address queue_index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + Label runtime; + __ ldr(_tmp1, queue_index); + // If buffer is full, call into runtime. + __ cbz(_tmp1, runtime); + + // The buffer is not full, store value into it. + __ sub(_tmp1, _tmp1, wordSize); + __ str(_tmp1, queue_index); + __ ldr(_tmp2, buffer); + __ str(_preval, Address(_tmp2, _tmp1)); + __ b(*continuation()); + + // Runtime call + __ bind(runtime); + { + SaveLiveRegisters save_registers(&masm, this); + if (c_rarg0 != _preval) { + __ mov(c_rarg0, _preval); + } + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre)); + __ blr(lr); + } + __ b(*continuation()); +} + +void ShenandoahCASBarrierMidStubC2::emit_code(MacroAssembler& masm) { __ bind(*entry()); // Check if CAS result is null. If it is, then we must have a legitimate failure. @@ -671,7 +719,7 @@ void ShenandoahCASBarrierMidStub::emit_code(MacroAssembler& masm) { __ b(*continuation()); } -void ShenandoahCASBarrierSlowStub::emit_code(MacroAssembler& masm) { +void ShenandoahCASBarrierSlowStubC2::emit_code(MacroAssembler& masm) { __ bind(*entry()); Assembler::operand_size size = UseCompressedOops ? Assembler::word : Assembler::xword; diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp index 6de6d40fa3044..ed22108c132eb 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp @@ -36,6 +36,7 @@ class ShenandoahLoadReferenceBarrierStub; class StubAssembler; #endif #ifdef COMPILER2 +class ShenandoahSATBBarrierStubC2; class MachNode; #endif // COMPILER2 class StubCodeGenerator; @@ -93,6 +94,8 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, bool acquire, bool release, bool is_cae, Register result); #ifdef COMPILER2 + + void satb_barrier_c2(MacroAssembler* masm, Register obj, Register pre_val, Register rthread, Register tmp1, Register tmp2, ShenandoahSATBBarrierStubC2* stub); void cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register expected, Register new_val, Register result, bool acquire, bool release, bool weak, bool is_cae); #endif diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad index a09ec29185c52..84ca656c0ab0c 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad @@ -27,6 +27,162 @@ source_hpp %{ #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" %} +source %{ + +#include "gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp" +#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" + +static void satb_barrier(MacroAssembler* masm, + const MachNode* node, + Register addr, + Register pre_val, + Register tmp1, + Register tmp2, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!ShenandoahSATBBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + ShenandoahBarrierSetAssembler* bsasm = ShenandoahBarrierSet::barrier_set()->assembler(); + ShenandoahSATBBarrierStubC2* const stub = ShenandoahSATBBarrierStubC2::create(node, addr, pre_val, tmp1, tmp2); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + bsasm->satb_barrier_c2(masm, addr, pre_val, rthread, tmp1, tmp2, stub); +} + +%} + +instruct storeP_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr) +%{ + match(Set mem (StoreP mem src)); + predicate(UseShenandoahGC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + effect(TEMP tmp, KILL cr); + ins_cost(INSN_COST); + format %{ "str $src, $mem\t# ptr" %} + ins_encode %{ + satb_barrier(masm, this, + $mem$$Register /* obj */, + rscratch1 /* pre_val */, + rscratch2 /* tmp1 */, + $tmp$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ str($src$$Register, $mem$$Register); + %} + ins_pipe(istore_reg_mem); +%} + +instruct storePVolatile_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr) +%{ + match(Set mem (StoreP mem src)); + predicate(UseShenandoahGC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + effect(TEMP tmp, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "stlr $src, $mem\t# ptr" %} + ins_encode %{ + satb_barrier(masm, this, + $mem$$Register /* obj */, + rscratch1 /* pre_val */, + rscratch2 /* tmp1 */, + $tmp$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ stlr($src$$Register, $mem$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct storeN_shenandoah(indirect mem, iRegN src, iRegPNoSp tmp, rFlagsReg cr) +%{ + match(Set mem (StoreN mem src)); + predicate(UseShenandoahGC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + effect(TEMP tmp, KILL cr); + ins_cost(INSN_COST); + format %{ "strw $src, $mem\t# compressed ptr" %} + ins_encode %{ + satb_barrier(masm, this, + $mem$$Register /* obj */, + rscratch1 /* pre_val */, + rscratch2 /* tmp1 */, + $tmp$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ strw($src$$Register, $mem$$Register); + %} + ins_pipe(istore_reg_mem); +%} + +instruct storeNVolatile_shenandoah(indirect mem, iRegN src, iRegPNoSp tmp, rFlagsReg cr) +%{ + match(Set mem (StoreN mem src)); + predicate(UseShenandoahGC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + effect(TEMP tmp, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "stlrw $src, $mem\t# compressed ptr" %} + ins_encode %{ + satb_barrier(masm, this, + $mem$$Register /* obj */, + rscratch1 /* pre_val */, + rscratch2 /* tmp1 */, + $tmp$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ stlrw($src$$Register, $mem$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct encodePAndStoreN_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr) +%{ + match(Set mem (StoreN mem (EncodeP src))); + predicate(UseShenandoahGC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + effect(TEMP tmp, KILL cr); + ins_cost(INSN_COST); + format %{ "encode_heap_oop $tmp, $src\n\t" + "strw $tmp, $mem\t# compressed ptr" %} + ins_encode %{ + satb_barrier(masm, this, + $mem$$Register /* obj */, + rscratch1 /* pre_val */, + rscratch2 /* tmp1 */, + $tmp$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & ShenandoahBarrierCardMarkNotNull) == 0) { + __ encode_heap_oop($tmp$$Register, $src$$Register); + } else { + __ encode_heap_oop_not_null($tmp$$Register, $src$$Register); + } + __ strw($tmp$$Register, $mem$$Register); + %} + ins_pipe(istore_reg_mem); +%} + +instruct encodePAndStoreNVolatile_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr) +%{ + match(Set mem (StoreN mem (EncodeP src))); + predicate(UseShenandoahGC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + effect(TEMP tmp, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "encode_heap_oop $tmp, $src\n\t" + "stlrw $tmp, $mem\t# compressed ptr" %} + ins_encode %{ + satb_barrier(masm, this, + $mem$$Register /* obj */, + rscratch1 /* pre_val */, + rscratch2 /* tmp1 */, + $tmp$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & ShenandoahBarrierCardMarkNotNull) == 0) { + __ encode_heap_oop($tmp$$Register, $src$$Register); + } else { + __ encode_heap_oop_not_null($tmp$$Register, $src$$Register); + } + __ stlrw($tmp$$Register, $mem$$Register); + %} + ins_pipe(pipe_class_memory); +%} + instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapP mem (Binary oldval newval))); @@ -41,6 +197,13 @@ instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, i ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + satb_barrier(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false); %} @@ -48,13 +211,13 @@ instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, i ins_pipe(pipe_slow); %} -instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ +instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{ match(Set res (CompareAndSwapN mem (Binary oldval newval))); predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP_DEF res, KILL cr); + effect(TEMP_DEF res, TEMP tmp, KILL cr); format %{ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" @@ -63,6 +226,16 @@ instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, i ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); guarantee(UseCompressedOops, "must be compressed oops"); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $oldval$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $tmp$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false); %} @@ -85,6 +258,13 @@ instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + satb_barrier(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false); %} @@ -92,14 +272,14 @@ instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval ins_pipe(pipe_slow); %} -instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ +instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{ match(Set res (CompareAndSwapN mem (Binary oldval newval))); predicate(needs_acquiring_load_exclusive(n)); predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(VOLATILE_REF_COST); - effect(TEMP_DEF res, KILL cr); + effect(TEMP_DEF res, TEMP tmp, KILL cr); format %{ "cmpxchgw_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" @@ -108,6 +288,16 @@ instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); guarantee(UseCompressedOops, "must be compressed oops"); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $oldval$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $tmp$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false); %} @@ -115,17 +305,27 @@ instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval ins_pipe(pipe_slow); %} -instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ +instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP_DEF res, KILL cr); + effect(TEMP_DEF res, TEMP tmp, KILL cr); format %{ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); guarantee(UseCompressedOops, "must be compressed oops"); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $oldval$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $tmp$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true); %} @@ -142,24 +342,41 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + satb_barrier(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true); %} ins_pipe(pipe_slow); %} -instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ +instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); predicate(needs_acquiring_load_exclusive(n)); predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(VOLATILE_REF_COST); - effect(TEMP_DEF res, KILL cr); + effect(TEMP_DEF res, TEMP tmp, KILL cr); format %{ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); guarantee(UseCompressedOops, "must be compressed oops"); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $oldval$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $tmp$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true); %} @@ -177,17 +394,24 @@ instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP ol %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + satb_barrier(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true); %} ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ +instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP_DEF res, KILL cr); + effect(TEMP_DEF res, TEMP tmp, KILL cr); format %{ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)" @@ -195,6 +419,16 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); guarantee(UseCompressedOops, "must be compressed oops"); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $oldval$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $tmp$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false); %} @@ -211,18 +445,25 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + satb_barrier(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false); %} ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ +instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); predicate(needs_acquiring_load_exclusive(n)); predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); ins_cost(VOLATILE_REF_COST); - effect(TEMP_DEF res, KILL cr); + effect(TEMP_DEF res, TEMP tmp, KILL cr); format %{ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)" @@ -230,6 +471,16 @@ instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN ol ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); guarantee(UseCompressedOops, "must be compressed oops"); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $oldval$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $tmp$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false); %} @@ -248,8 +499,148 @@ instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP ol %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + satb_barrier(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false); %} ins_pipe(pipe_slow); %} + +instruct getAndSetP_shenandoah(indirect mem, iRegP newval, iRegPNoSp preval, rFlagsReg cr) +%{ + match(Set preval (GetAndSetP mem newval)); + predicate(UseShenandoahGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + effect(TEMP preval, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchg $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + __ atomic_xchg($preval$$Register, $newval$$Register, $mem$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $preval$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + %} + ins_pipe(pipe_serial); +%} + +instruct getAndSetPAcq_shenandoah(indirect mem, iRegP newval, iRegPNoSp preval, rFlagsReg cr) +%{ + match(Set preval (GetAndSetP mem newval)); + predicate(UseShenandoahGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + effect(TEMP preval, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchg_acq $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + __ atomic_xchgal($preval$$Register, $newval$$Register, $mem$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $preval$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + %} + ins_pipe(pipe_serial); +%} + +instruct getAndSetN_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, iRegPNoSp tmp, rFlagsReg cr) +%{ + match(Set preval (GetAndSetN mem newval)); + predicate(UseShenandoahGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + effect(TEMP preval, TEMP tmp, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchgw $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + __ atomic_xchgw($preval$$Register, $newval$$Register, $mem$$Register); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $preval$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $tmp$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + } + %} + ins_pipe(pipe_serial); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct getAndSetNAcq_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, iRegPNoSp tmp, rFlagsReg cr) +%{ + match(Set preval (GetAndSetN mem newval)); + predicate(UseShenandoahGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + effect(TEMP preval, TEMP tmp, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchgw_acq $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + __ atomic_xchgalw($preval$$Register, $newval$$Register, $mem$$Register); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $preval$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $tmp$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + } + %} + ins_pipe(pipe_serial); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct loadP_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) +%{ + // This instruction does not need an acquiring counterpart because it is only + // used for reference loading (Reference::get()). The same holds for g1LoadN. + match(Set dst (LoadP mem)); + predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + effect(TEMP dst, KILL cr); + ins_cost(4 * INSN_COST); + format %{ "ldr $dst, $mem\t# ptr" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */); + %} + ins_pipe(iload_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct loadN_shenandoah(iRegNNoSp dst, indirect mem, iRegPNoSp tmp, rFlagsReg cr) +%{ + match(Set dst (LoadN mem)); + predicate(UseShenandoahGC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + effect(TEMP dst, TEMP tmp, KILL cr); + ins_cost(4 * INSN_COST); + format %{ "ldrw $dst, $mem\t# compressed ptr" %} + ins_encode %{ + __ ldrw($dst$$Register, $mem$$Register); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $dst$$Register); + satb_barrier(masm, this, + noreg /* obj */, + $tmp$$Register /* pre_val */, + rscratch1 /* tmp1 */, + rscratch2 /* tmp2 */); + } + %} + ins_pipe(iload_reg_mem); +%} diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp index b167b39474bd1..3b406f53bb04d 100644 --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp @@ -318,7 +318,6 @@ class BarrierSetC2: public CHeapObj { // Support for GC barriers emitted during parsing virtual bool has_load_barrier_nodes() const { return false; } - virtual bool is_gc_pre_barrier_node(Node* node) const { return false; } virtual bool is_gc_barrier_node(Node* node) const { return false; } virtual Node* step_over_gc_barrier(Node* c) const { return c; } @@ -327,7 +326,6 @@ class BarrierSetC2: public CHeapObj { virtual void unregister_potential_barrier_node(Node* node) const { } virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { } virtual void eliminate_gc_barrier_data(Node* node) const { } - virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const {} virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const {} // Allow barrier sets to have shared state that is preserved across a compilation unit. diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp index 36c8938e6e386..7f0812b8430db 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp @@ -54,7 +54,7 @@ ShenandoahBarrierSetC2* ShenandoahBarrierSetC2::bsc2() { ShenandoahBarrierSetC2State::ShenandoahBarrierSetC2State(Arena* comp_arena) : BarrierSetC2State(comp_arena), _load_reference_barriers(new (comp_arena) GrowableArray(comp_arena, 8, 0, nullptr)), - _stubs(new (comp_arena) GrowableArray(comp_arena, 8, 0, nullptr)), + _stubs(new (comp_arena) GrowableArray(comp_arena, 8, 0, nullptr)), _stubs_start_offset(0) { } @@ -79,8 +79,8 @@ void ShenandoahBarrierSetC2State::remove_load_reference_barrier(ShenandoahLoadRe #define __ kit-> -bool ShenandoahBarrierSetC2::satb_can_remove_pre_barrier(GraphKit* kit, PhaseValues* phase, Node* adr, - BasicType bt, uint adr_idx) const { +static bool satb_can_remove_pre_barrier(GraphKit* kit, PhaseValues* phase, Node* adr, + BasicType bt, uint adr_idx) { intptr_t offset = 0; Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); AllocateNode* alloc = AllocateNode::Ideal_allocation(base); @@ -164,118 +164,29 @@ bool ShenandoahBarrierSetC2::satb_can_remove_pre_barrier(GraphKit* kit, PhaseVal return false; } -#undef __ -#define __ ideal. - -void ShenandoahBarrierSetC2::satb_write_barrier_pre(GraphKit* kit, - bool do_load, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt) const { - // Some sanity checks - // Note: val is unused in this routine. - - if (do_load) { - // We need to generate the load of the previous value - assert(adr != nullptr, "where are loading from?"); - assert(pre_val == nullptr, "loaded already?"); - assert(val_type != nullptr, "need a type"); - - if (ReduceInitialCardMarks - && satb_can_remove_pre_barrier(kit, &kit->gvn(), adr, bt, alias_idx)) { - return; - } +static bool shenandoah_can_remove_post_barrier(GraphKit* kit, PhaseValues* phase, Node* store_ctrl, Node* adr) { + intptr_t offset = 0; + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); + AllocateNode* alloc = AllocateNode::Ideal_allocation(base); - } else { - // In this case both val_type and alias_idx are unused. - assert(pre_val != nullptr, "must be loaded already"); - // Nothing to be done if pre_val is null. - if (pre_val->bottom_type() == TypePtr::NULL_PTR) return; - assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here"); + if (offset == Type::OffsetBot) { + return false; // Cannot unalias unless there are precise offsets. + } + if (alloc == nullptr) { + return false; // No allocation found. } - assert(bt == T_OBJECT, "or we shouldn't be here"); - - IdealKit ideal(kit, true); - - Node* tls = __ thread(); // ThreadLocalStorage - - Node* no_base = __ top(); - Node* zero = __ ConI(0); - Node* zeroX = __ ConX(0); - - float likely = PROB_LIKELY(0.999); - float unlikely = PROB_UNLIKELY(0.999); - - // Offsets into the thread - const int index_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()); - const int buffer_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()); - - // Now the actual pointers into the thread - Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); - Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); - - // Now some of the values - Node* marking; - Node* gc_state = __ AddP(no_base, tls, __ ConX(in_bytes(ShenandoahThreadLocalData::gc_state_offset()))); - Node* ld = __ load(__ ctrl(), gc_state, TypeInt::BYTE, T_BYTE, Compile::AliasIdxRaw); - marking = __ AndI(ld, __ ConI(ShenandoahHeap::MARKING)); - assert(ShenandoahBarrierC2Support::is_gc_state_load(ld), "Should match the shape"); - - // if (!marking) - __ if_then(marking, BoolTest::ne, zero, unlikely); { - BasicType index_bt = TypeX_X->basic_type(); - assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading Shenandoah SATBMarkQueue::_index with wrong size."); - Node* index = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw); - if (do_load) { - // load original value - // alias_idx correct?? - pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx); + Node* mem = store_ctrl; // Start search from Store node. + if (mem->is_Proj() && mem->in(0)->is_Initialize()) { + InitializeNode* st_init = mem->in(0)->as_Initialize(); + AllocateNode* st_alloc = st_init->allocation(); + // Make sure we are looking at the same allocation + if (alloc == st_alloc) { + return true; } - - // if (pre_val != nullptr) - __ if_then(pre_val, BoolTest::ne, kit->null()); { - Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); - - // is the queue for this thread full? - __ if_then(index, BoolTest::ne, zeroX, likely); { - - // decrement the index - Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); - - // Now get the buffer location we will log the previous value into and store it - Node *log_addr = __ AddP(no_base, buffer, next_index); - __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered); - // update the index - __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered); - - } __ else_(); { - - // logging buffer is full, call the runtime - const TypeFunc *tf = ShenandoahBarrierSetC2::write_barrier_pre_Type(); - __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), "shenandoah_wb_pre", pre_val); - } __ end_if(); // (!index) - } __ end_if(); // (pre_val != nullptr) - } __ end_if(); // (!marking) - - // Final sync IdealKit and GraphKit. - kit->final_sync(ideal); - - if (ShenandoahSATBBarrier && adr != nullptr) { - Node* c = kit->control(); - Node* call = c->in(1)->in(1)->in(1)->in(0); - assert(is_shenandoah_wb_pre_call(call), "shenandoah_wb_pre call expected"); - call->add_req(adr); } -} -bool ShenandoahBarrierSetC2::is_shenandoah_wb_pre_call(Node* call) { - return call->is_CallLeaf() && - call->as_CallLeaf()->entry_point() == CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre); + return false; } bool ShenandoahBarrierSetC2::is_shenandoah_clone_call(Node* call) { @@ -297,25 +208,6 @@ bool ShenandoahBarrierSetC2::is_shenandoah_lrb_call(Node* call) { (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow)); } -bool ShenandoahBarrierSetC2::is_shenandoah_marking_if(PhaseValues* phase, Node* n) { - if (n->Opcode() != Op_If) { - return false; - } - - Node* bol = n->in(1); - assert(bol->is_Bool(), ""); - Node* cmpx = bol->in(1); - if (bol->as_Bool()->_test._test == BoolTest::ne && - cmpx->is_Cmp() && cmpx->in(2) == phase->intcon(0) && - is_shenandoah_state_load(cmpx->in(1)->in(1)) && - cmpx->in(1)->in(2)->is_Con() && - cmpx->in(1)->in(2) == phase->intcon(ShenandoahHeap::MARKING)) { - return true; - } - - return false; -} - bool ShenandoahBarrierSetC2::is_shenandoah_state_load(Node* n) { if (!n->is_Load()) return false; const int state_offset = in_bytes(ShenandoahThreadLocalData::gc_state_offset()); @@ -324,219 +216,6 @@ bool ShenandoahBarrierSetC2::is_shenandoah_state_load(Node* n) { && n->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == state_offset; } -void ShenandoahBarrierSetC2::shenandoah_write_barrier_pre(GraphKit* kit, - bool do_load, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt) const { - if (ShenandoahSATBBarrier) { - IdealKit ideal(kit); - kit->sync_kit(ideal); - - satb_write_barrier_pre(kit, do_load, obj, adr, alias_idx, val, val_type, pre_val, bt); - - ideal.sync_kit(kit); - kit->final_sync(ideal); - } -} - -// Helper that guards and inserts a pre-barrier. -void ShenandoahBarrierSetC2::insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, - Node* pre_val, bool need_mem_bar) const { - // We could be accessing the referent field of a reference object. If so, when Shenandoah - // is enabled, we need to log the value in the referent field in an SATB buffer. - // This routine performs some compile time filters and generates suitable - // runtime filters that guard the pre-barrier code. - // Also add memory barrier for non volatile load from the referent field - // to prevent commoning of loads across safepoint. - - // Some compile time checks. - - // If offset is a constant, is it java_lang_ref_Reference::_reference_offset? - const TypeX* otype = offset->find_intptr_t_type(); - if (otype != nullptr && otype->is_con() && - otype->get_con() != java_lang_ref_Reference::referent_offset()) { - // Constant offset but not the reference_offset so just return - return; - } - - // We only need to generate the runtime guards for instances. - const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr(); - if (btype != nullptr) { - if (btype->isa_aryptr()) { - // Array type so nothing to do - return; - } - - const TypeInstPtr* itype = btype->isa_instptr(); - if (itype != nullptr) { - // Can the klass of base_oop be statically determined to be - // _not_ a sub-class of Reference and _not_ Object? - ciKlass* klass = itype->instance_klass(); - if (klass->is_loaded() && - !klass->is_subtype_of(kit->env()->Reference_klass()) && - !kit->env()->Object_klass()->is_subtype_of(klass)) { - return; - } - } - } - - // The compile time filters did not reject base_oop/offset so - // we need to generate the following runtime filters - // - // if (offset == java_lang_ref_Reference::_reference_offset) { - // if (instance_of(base, java.lang.ref.Reference)) { - // pre_barrier(_, pre_val, ...); - // } - // } - - float likely = PROB_LIKELY( 0.999); - float unlikely = PROB_UNLIKELY(0.999); - - IdealKit ideal(kit); - - Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset()); - - __ if_then(offset, BoolTest::eq, referent_off, unlikely); { - // Update graphKit memory and control from IdealKit. - kit->sync_kit(ideal); - - Node* ref_klass_con = kit->makecon(TypeKlassPtr::make(kit->env()->Reference_klass())); - Node* is_instof = kit->gen_instanceof(base_oop, ref_klass_con); - - // Update IdealKit memory and control from graphKit. - __ sync_kit(kit); - - Node* one = __ ConI(1); - // is_instof == 0 if base_oop == nullptr - __ if_then(is_instof, BoolTest::eq, one, unlikely); { - - // Update graphKit from IdeakKit. - kit->sync_kit(ideal); - - // Use the pre-barrier to record the value in the referent field - satb_write_barrier_pre(kit, false /* do_load */, - nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */, - pre_val /* pre_val */, - T_OBJECT); - if (need_mem_bar) { - // Add memory barrier to prevent commoning reads from this field - // across safepoint since GC can change its value. - kit->insert_mem_bar(Op_MemBarCPUOrder); - } - // Update IdealKit from graphKit. - __ sync_kit(kit); - - } __ end_if(); // _ref_type != ref_none - } __ end_if(); // offset == referent_offset - - // Final sync IdealKit and GraphKit. - kit->final_sync(ideal); -} - -void ShenandoahBarrierSetC2::post_barrier(GraphKit* kit, - Node* ctl, - Node* oop_store, - Node* obj, - Node* adr, - uint adr_idx, - Node* val, - BasicType bt, - bool use_precise) const { - assert(ShenandoahCardBarrier, "Should have been checked by caller"); - - // No store check needed if we're storing a null. - if (val != nullptr && val->is_Con()) { - // must be either an oop or null - const Type* t = val->bottom_type(); - if (t == TypePtr::NULL_PTR || t == Type::TOP) - return; - } - - if (ReduceInitialCardMarks && obj == kit->just_allocated_object(kit->control())) { - // We use card marks to track old to young references in Generational Shenandoah; - // see flag ShenandoahCardBarrier above. - // Objects are always allocated in the young generation and initialized - // before they are promoted. There's always a safepoint (e.g. at final mark) - // before an object is promoted from young to old. Promotion entails dirtying of - // the cards backing promoted objects, so they will be guaranteed to be scanned - // at the next remembered set scan of the old generation. - // Thus, we can safely skip card-marking of initializing stores on a - // freshly-allocated object. If any of the assumptions above change in - // the future, this code will need to be re-examined; see check in - // ShenandoahCardBarrier::on_slowpath_allocation_exit(). - return; - } - - if (!use_precise) { - // All card marks for a (non-array) instance are in one place: - adr = obj; - } - // (Else it's an array (or unknown), and we want more precise card marks.) - assert(adr != nullptr, ""); - - IdealKit ideal(kit, true); - - Node* tls = __ thread(); // ThreadLocalStorage - - // Convert the pointer to an int prior to doing math on it - Node* cast = __ CastPX(__ ctrl(), adr); - - Node* curr_ct_holder_offset = __ ConX(in_bytes(ShenandoahThreadLocalData::card_table_offset())); - Node* curr_ct_holder_addr = __ AddP(__ top(), tls, curr_ct_holder_offset); - Node* curr_ct_base_addr = __ load( __ ctrl(), curr_ct_holder_addr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); - - // Divide by card size - Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift()) ); - - // Combine card table base and card offset - Node* card_adr = __ AddP(__ top(), curr_ct_base_addr, card_offset); - - // Get the alias_index for raw card-mark memory - int adr_type = Compile::AliasIdxRaw; - Node* zero = __ ConI(0); // Dirty card value - - if (UseCondCardMark) { - // The classic GC reference write barrier is typically implemented - // as a store into the global card mark table. Unfortunately - // unconditional stores can result in false sharing and excessive - // coherence traffic as well as false transactional aborts. - // UseCondCardMark enables MP "polite" conditional card mark - // stores. In theory we could relax the load from ctrl() to - // no_ctrl, but that doesn't buy much latitude. - Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, T_BYTE, adr_type); - __ if_then(card_val, BoolTest::ne, zero); - } - - // Smash zero into card - __ store(__ ctrl(), card_adr, zero, T_BYTE, adr_type, MemNode::unordered); - - if (UseCondCardMark) { - __ end_if(); - } - - // Final sync IdealKit and GraphKit. - kit->final_sync(ideal); -} - -#undef __ - -const TypeFunc* ShenandoahBarrierSetC2::write_barrier_pre_Type() { - const Type **fields = TypeTuple::fields(1); - fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value - const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields); - - // create result type (range) - fields = TypeTuple::fields(0); - const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields); - - return TypeFunc::make(domain, range); -} - const TypeFunc* ShenandoahBarrierSetC2::clone_barrier_Type() { const Type **fields = TypeTuple::fields(1); fields[TypeFunc::Parms+0] = TypeOopPtr::NOTNULL; // src oop @@ -564,57 +243,88 @@ const TypeFunc* ShenandoahBarrierSetC2::load_reference_barrier_Type() { return TypeFunc::make(domain, range); } -Node* ShenandoahBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { - DecoratorSet decorators = access.decorators(); - - const TypePtr* adr_type = access.addr().type(); +static uint8_t get_store_barrier(C2Access& access) { + if (!access.is_parse_access()) { + // Only support for eliding barriers at parse time for now. + return ShenandoahBarrierSATB | ShenandoahBarrierCardMark; + } + GraphKit* kit = (static_cast(access)).kit(); + Node* ctl = kit->control(); Node* adr = access.addr().node(); + uint adr_idx = kit->C->get_alias_index(access.addr().type()); + assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory"); - bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0; + bool can_remove_pre_barrier = satb_can_remove_pre_barrier(kit, &kit->gvn(), adr, access.type(), adr_idx); - if (!access.is_oop()) { - return BarrierSetC2::store_at_resolved(access, val); - } + // We can skip marks on a freshly-allocated object in Eden. Keep this code in + // sync with CardTableBarrierSet::on_slowpath_allocation_exit. That routine + // informs GC to take appropriate compensating steps, upon a slow-path + // allocation, so as to make this card-mark elision safe. + // The post-barrier can also be removed if null is written. This case is + // handled by ShenandoahBarrierSetC2::expand_barriers, which runs at the end of C2's + // platform-independent optimizations to exploit stronger type information. + bool can_remove_post_barrier = ReduceInitialCardMarks && + ((access.base() == kit->just_allocated_object(ctl)) || + shenandoah_can_remove_post_barrier(kit, &kit->gvn(), ctl, adr)); - if (no_keepalive) { - // No keep-alive means no need for the pre-barrier. - return BarrierSetC2::store_at_resolved(access, val); + int barriers = 0; + if (!can_remove_pre_barrier) { + barriers |= ShenandoahBarrierSATB; + } + if (!can_remove_post_barrier) { + barriers |= ShenandoahBarrierCardMark; } - if (access.is_parse_access()) { - C2ParseAccess& parse_access = static_cast(access); - GraphKit* kit = parse_access.kit(); - - uint adr_idx = kit->C->get_alias_index(adr_type); - assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" ); - shenandoah_write_barrier_pre(kit, true /* do_load */, /*kit->control(),*/ access.base(), adr, adr_idx, val.node(), - static_cast(val.type()), nullptr /* pre_val */, access.type()); - - Node* result = BarrierSetC2::store_at_resolved(access, val); + return barriers; +} - if (ShenandoahCardBarrier) { - const bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; - const bool is_array = (decorators & IS_ARRAY) != 0; - const bool use_precise = is_array || anonymous; - post_barrier(kit, kit->control(), access.raw_access(), access.base(), - adr, adr_idx, val.node(), access.type(), use_precise); +Node* ShenandoahBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { + DecoratorSet decorators = access.decorators(); + bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool in_heap = (decorators & IN_HEAP) != 0; + bool tightly_coupled_alloc = (decorators & C2_TIGHTLY_COUPLED_ALLOC) != 0; + bool need_store_barrier = !(tightly_coupled_alloc && ReduceInitialCardMarks) && (in_heap || anonymous); + bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0; + if (access.is_oop() && need_store_barrier) { + access.set_barrier_data(get_store_barrier(access)); + if (tightly_coupled_alloc) { + assert(!ReduceInitialCardMarks, + "post-barriers are only needed for tightly-coupled initialization stores when ReduceInitialCardMarks is disabled"); + // Pre-barriers are unnecessary for tightly-coupled initialization stores. + access.set_barrier_data(access.barrier_data() & ~ShenandoahBarrierSATB); } - return result; - } else { - assert(access.is_opt_access(), "only for optimization passes"); - assert(((decorators & C2_TIGHTLY_COUPLED_ALLOC) != 0 || !ShenandoahSATBBarrier) && (decorators & C2_ARRAY_COPY) != 0, "unexpected caller of this code"); - return BarrierSetC2::store_at_resolved(access, val); } + if (no_keepalive) { + // No keep-alive means no need for the pre-barrier. + access.set_barrier_data(access.barrier_data() & ~ShenandoahBarrierSATB); + } + return BarrierSetC2::store_at_resolved(access, val); } Node* ShenandoahBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { - // 1: non-reference load, no additional barrier is needed + // 0: non-reference load, no additional barrier is needed if (!access.is_oop()) { return BarrierSetC2::load_at_resolved(access, val_type); } - Node* load = BarrierSetC2::load_at_resolved(access, val_type); + // 1. If we are reading the value of the referent field of a Reference object, we + // need to record the referent in an SATB log buffer using the pre-barrier + // mechanism. DecoratorSet decorators = access.decorators(); + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0; + // If we are reading the value of the referent field of a Reference object, we + // need to record the referent in an SATB log buffer using the pre-barrier + // mechanism. Also we need to add a memory barrier to prevent commoning reads + // from this field across safepoints, since GC can change its value. + bool need_read_barrier = ((on_weak || on_phantom) && !no_keepalive); + if (access.is_oop() && need_read_barrier) { + access.set_barrier_data(ShenandoahBarrierSATB); + } + + Node* load = BarrierSetC2::load_at_resolved(access, val_type); + BasicType type = access.type(); // 2: apply LRB if needed @@ -627,50 +337,6 @@ Node* ShenandoahBarrierSetC2::load_at_resolved(C2Access& access, const Type* val } } - // 3: apply keep-alive barrier for java.lang.ref.Reference if needed - if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { - Node* top = Compile::current()->top(); - Node* adr = access.addr().node(); - Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : top; - Node* obj = access.base(); - - bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0; - bool on_weak_ref = (decorators & (ON_WEAK_OOP_REF | ON_PHANTOM_OOP_REF)) != 0; - bool keep_alive = (decorators & AS_NO_KEEPALIVE) == 0; - - // If we are reading the value of the referent field of a Reference - // object (either by using Unsafe directly or through reflection) - // then, if SATB is enabled, we need to record the referent in an - // SATB log buffer using the pre-barrier mechanism. - // Also we need to add memory barrier to prevent commoning reads - // from this field across safepoint since GC can change its value. - if (!on_weak_ref || (unknown && (offset == top || obj == top)) || !keep_alive) { - return load; - } - - assert(access.is_parse_access(), "entry not supported at optimization time"); - C2ParseAccess& parse_access = static_cast(access); - GraphKit* kit = parse_access.kit(); - bool mismatched = (decorators & C2_MISMATCHED) != 0; - bool is_unordered = (decorators & MO_UNORDERED) != 0; - bool in_native = (decorators & IN_NATIVE) != 0; - bool need_cpu_mem_bar = !is_unordered || mismatched || in_native; - - if (on_weak_ref) { - // Use the pre-barrier to record the value in the referent field - satb_write_barrier_pre(kit, false /* do_load */, - nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */, - load /* pre_val */, T_OBJECT); - // Add memory barrier to prevent commoning reads from this field - // across safepoint since GC can change its value. - kit->insert_mem_bar(Op_MemBarCPUOrder); - } else if (unknown) { - // We do not require a mem bar inside pre_barrier if need_mem_bar - // is set: the barriers would be emitted by us. - insert_pre_barrier(kit, obj, offset, load, !need_cpu_mem_bar); - } - } - return load; } @@ -698,10 +364,6 @@ static void set_barrier_data(C2Access& access) { barrier_data |= ShenandoahBarrierNative; } - if (access.decorators() & AS_NO_KEEPALIVE) { - barrier_data |= ShenandoahBarrierNoKeepAlive; - } - access.set_barrier_data(barrier_data); } @@ -712,19 +374,14 @@ Node* ShenandoahBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess } GraphKit* kit = access.kit(); - // TODO: Implement late SATB barriers. if (access.is_oop()) { - shenandoah_write_barrier_pre(kit, false /* do_load */, nullptr, nullptr, max_juint, nullptr, nullptr, expected_val /* pre_val */, T_OBJECT); + access.set_barrier_data(access.barrier_data() | ShenandoahBarrierSATB | ShenandoahBarrierCardMark); } Node* load_store = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); - // TODO: Implement late barriers for LRB and Card-Table. + // TODO: Implement late barriers for LRB. if (access.is_oop()) { load_store = kit->gvn().transform(new ShenandoahLoadReferenceBarrierNode(nullptr, load_store, access.decorators())); - if (ShenandoahCardBarrier) { - post_barrier(kit, kit->control(), access.raw_access(), access.base(), - access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true); - } } return load_store; } @@ -736,45 +393,27 @@ Node* ShenandoahBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAcces } GraphKit* kit = access.kit(); if (access.is_oop()) { - shenandoah_write_barrier_pre(kit, false /* do_load */, - nullptr, nullptr, max_juint, nullptr, nullptr, - expected_val /* pre_val */, T_OBJECT); + access.set_barrier_data(access.barrier_data() | ShenandoahBarrierSATB | ShenandoahBarrierCardMark); } - Node* load_store = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); - if (access.is_oop()) { - if (ShenandoahCardBarrier) { - post_barrier(kit, kit->control(), access.raw_access(), access.base(), - access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true); - } - } - return load_store; + return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); } Node* ShenandoahBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* val, const Type* value_type) const { GraphKit* kit = access.kit(); + if (access.is_oop()) { + access.set_barrier_data(ShenandoahBarrierSATB | ShenandoahBarrierCardMark); + } Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, val, value_type); if (access.is_oop()) { result = kit->gvn().transform(new ShenandoahLoadReferenceBarrierNode(nullptr, result, access.decorators())); - shenandoah_write_barrier_pre(kit, false /* do_load */, - nullptr, nullptr, max_juint, nullptr, nullptr, - result /* pre_val */, T_OBJECT); - if (ShenandoahCardBarrier) { - post_barrier(kit, kit->control(), access.raw_access(), access.base(), - access.addr().node(), access.alias_idx(), val, T_OBJECT, true); - } } return result; } -bool ShenandoahBarrierSetC2::is_gc_pre_barrier_node(Node* node) const { - return is_shenandoah_wb_pre_call(node); -} - bool ShenandoahBarrierSetC2::is_gc_barrier_node(Node* node) const { return (node->Opcode() == Op_ShenandoahLoadReferenceBarrier) || is_shenandoah_lrb_call(node) || - is_shenandoah_wb_pre_call(node) || is_shenandoah_clone_call(node); } @@ -788,7 +427,59 @@ Node* ShenandoahBarrierSetC2::step_over_gc_barrier(Node* c) const { return c; } +static void refine_barrier_by_new_val_type(const Node* n) { + if (n->Opcode() != Op_StoreP && n->Opcode() != Op_StoreN) { + return; + } + MemNode* store = n->as_Mem(); + const Node* newval = n->in(MemNode::ValueIn); + assert(newval != nullptr, ""); + const Type* newval_bottom = newval->bottom_type(); + TypePtr::PTR newval_type = newval_bottom->make_ptr()->ptr(); + uint8_t barrier_data = store->barrier_data(); + if (!newval_bottom->isa_oopptr() && + !newval_bottom->isa_narrowoop() && + newval_type != TypePtr::Null) { + // newval is neither an OOP nor null, so there is no barrier to refine. + assert(barrier_data == 0, "non-OOP stores should have no barrier data"); + return; + } + if (barrier_data == 0) { + // No barrier to refine. + return; + } + if (newval_type == TypePtr::Null) { + // Simply elide post-barrier if writing null. + barrier_data &= ~ShenandoahBarrierCardMark; + barrier_data &= ~ShenandoahBarrierCardMarkNotNull; + } else if ((barrier_data & ShenandoahBarrierCardMark) != 0 && + newval_type == TypePtr::NotNull) { + // If the post-barrier has not been elided yet (e.g. due to newval being + // freshly allocated), mark it as not-null (simplifies barrier tests and + // compressed OOPs logic). + barrier_data |= ShenandoahBarrierCardMarkNotNull; + } + store->set_barrier_data(barrier_data); +} + bool ShenandoahBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const { + ResourceMark rm; + VectorSet visited; + Node_List worklist; + worklist.push(C->root()); + while (worklist.size() > 0) { + Node* n = worklist.pop(); + if (visited.test_set(n->_idx)) { + continue; + } + refine_barrier_by_new_val_type(n); + for (uint j = 0; j < n->req(); j++) { + Node* in = n->in(j); + if (in != nullptr) { + worklist.push(in); + } + } + } return !ShenandoahBarrierC2Support::expand(C, igvn); } @@ -930,65 +621,21 @@ void ShenandoahBarrierSetC2::unregister_potential_barrier_node(Node* node) const } } -void ShenandoahBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { - if (is_shenandoah_wb_pre_call(node)) { - shenandoah_eliminate_wb_pre(node, ¯o->igvn()); - } - if (ShenandoahCardBarrier && node->Opcode() == Op_CastP2X) { - Node* shift = node->unique_out(); - Node* addp = shift->unique_out(); - for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) { - Node* mem = addp->last_out(j); - if (UseCondCardMark && mem->is_Load()) { - assert(mem->Opcode() == Op_LoadB, "unexpected code shape"); - // The load is checking if the card has been written so - // replace it with zero to fold the test. - macro->replace_node(mem, macro->intcon(0)); - continue; - } - assert(mem->is_Store(), "store required"); - macro->replace_node(mem, mem->in(MemNode::Memory)); - } - } -} - -void ShenandoahBarrierSetC2::shenandoah_eliminate_wb_pre(Node* call, PhaseIterGVN* igvn) const { - assert(UseShenandoahGC && is_shenandoah_wb_pre_call(call), ""); - Node* c = call->as_Call()->proj_out(TypeFunc::Control); - c = c->unique_ctrl_out(); - assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?"); - c = c->unique_ctrl_out(); - assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?"); - Node* iff = c->in(1)->is_IfProj() ? c->in(1)->in(0) : c->in(2)->in(0); - assert(iff->is_If(), "expect test"); - if (!is_shenandoah_marking_if(igvn, iff)) { - c = c->unique_ctrl_out(); - assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?"); - iff = c->in(1)->is_IfProj() ? c->in(1)->in(0) : c->in(2)->in(0); - assert(is_shenandoah_marking_if(igvn, iff), "expect marking test"); +void ShenandoahBarrierSetC2::eliminate_gc_barrier_data(Node* node) const { + if (node->is_LoadStore()) { + LoadStoreNode* loadstore = node->as_LoadStore(); + loadstore->set_barrier_data(0); + } else if (node->is_Mem()) { + MemNode* mem = node->as_Mem(); + mem->set_barrier_data(0); } - Node* cmpx = iff->in(1)->in(1); - igvn->replace_node(cmpx, igvn->makecon(TypeInt::CC_EQ)); - igvn->rehash_node_delayed(call); - call->del_req(call->req()-1); } -void ShenandoahBarrierSetC2::enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const { - if (node->Opcode() == Op_AddP && ShenandoahBarrierSetC2::has_only_shenandoah_wb_pre_uses(node)) { - igvn->add_users_to_worklist(node); - } +void ShenandoahBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { + eliminate_gc_barrier_data(node); } void ShenandoahBarrierSetC2::eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const { - for (uint i = 0; i < useful.size(); i++) { - Node* n = useful.at(i); - if (n->Opcode() == Op_AddP && ShenandoahBarrierSetC2::has_only_shenandoah_wb_pre_uses(n)) { - for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { - C->record_for_igvn(n->fast_out(i)); - } - } - } - for (int i = state()->load_reference_barriers_count() - 1; i >= 0; i--) { ShenandoahLoadReferenceBarrierNode* n = state()->load_reference_barrier(i); if (!useful.member(n)) { @@ -1005,10 +652,6 @@ ShenandoahBarrierSetC2State* ShenandoahBarrierSetC2::state() const { return reinterpret_cast(Compile::current()->barrier_set_state()); } -// If the BarrierSetC2 state has kept macro nodes in its compilation unit state to be -// expanded later, then now is the time to do so. -bool ShenandoahBarrierSetC2::expand_macro_nodes(PhaseMacroExpand* macro) const { return false; } - #ifdef ASSERT void ShenandoahBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const { if (ShenandoahVerifyOptoBarriers && phase == BarrierSetC2::BeforeMacroExpand) { @@ -1078,19 +721,6 @@ void ShenandoahBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase p #endif Node* ShenandoahBarrierSetC2::ideal_node(PhaseGVN* phase, Node* n, bool can_reshape) const { - if (is_shenandoah_wb_pre_call(n)) { - uint cnt = ShenandoahBarrierSetC2::write_barrier_pre_Type()->domain()->cnt(); - if (n->req() > cnt) { - Node* addp = n->in(cnt); - if (has_only_shenandoah_wb_pre_uses(addp)) { - n->del_req(cnt); - if (can_reshape) { - phase->is_IterGVN()->_worklist.push(addp); - } - return n; - } - } - } if (n->Opcode() == Op_CmpP) { Node* in1 = n->in(1); Node* in2 = n->in(2); @@ -1148,33 +778,8 @@ Node* ShenandoahBarrierSetC2::ideal_node(PhaseGVN* phase, Node* n, bool can_resh return nullptr; } -bool ShenandoahBarrierSetC2::has_only_shenandoah_wb_pre_uses(Node* n) { - for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { - Node* u = n->fast_out(i); - if (!is_shenandoah_wb_pre_call(u)) { - return false; - } - } - return n->outcnt() > 0; -} - bool ShenandoahBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, uint opcode, Unique_Node_List& dead_nodes) const { switch (opcode) { - case Op_CallLeaf: - case Op_CallLeafNoFP: { - assert (n->is_Call(), ""); - CallNode *call = n->as_Call(); - if (ShenandoahBarrierSetC2::is_shenandoah_wb_pre_call(call)) { - uint cnt = ShenandoahBarrierSetC2::write_barrier_pre_Type()->domain()->cnt(); - if (call->req() > cnt) { - assert(call->req() == cnt + 1, "only one extra input"); - Node *addp = call->in(cnt); - assert(!ShenandoahBarrierSetC2::has_only_shenandoah_wb_pre_uses(addp), "useless address computation?"); - call->del_req(cnt); - } - } - return false; - } case Op_ShenandoahLoadReferenceBarrier: assert(false, "should have been expanded already"); return true; @@ -1185,29 +790,6 @@ bool ShenandoahBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, ui bool ShenandoahBarrierSetC2::escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const { switch (opcode) { - case Op_StoreP: { - Node* adr = n->in(MemNode::Address); - const Type* adr_type = gvn->type(adr); - // Pointer stores in Shenandoah barriers looks like unsafe access. - // Ignore such stores to be able scalar replace non-escaping - // allocations. - if (adr_type->isa_rawptr() && adr->is_AddP()) { - Node* base = conn_graph->get_addp_base(adr); - if (base->Opcode() == Op_LoadP && - base->in(MemNode::Address)->is_AddP()) { - adr = base->in(MemNode::Address); - Node* tls = conn_graph->get_addp_base(adr); - if (tls->Opcode() == Op_ThreadLocal) { - int offs = (int) gvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot); - const int buf_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()); - if (offs == buf_offset) { - return true; // Pre barrier previous oop value store. - } - } - } - } - return false; - } case Op_ShenandoahLoadReferenceBarrier: conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(ShenandoahLoadReferenceBarrierNode::ValueIn), delayed_worklist); return true; @@ -1237,7 +819,7 @@ static ShenandoahBarrierSetC2State* barrier_set_state() { int ShenandoahBarrierSetC2::estimate_stub_size() const { Compile* const C = Compile::current(); BufferBlob* const blob = C->output()->scratch_buffer_blob(); - GrowableArray* const stubs = barrier_set_state()->stubs(); + GrowableArray* const stubs = barrier_set_state()->stubs(); int size = 0; for (int i = 0; i < stubs->length(); i++) { @@ -1252,7 +834,7 @@ int ShenandoahBarrierSetC2::estimate_stub_size() const { void ShenandoahBarrierSetC2::emit_stubs(CodeBuffer& cb) const { MacroAssembler masm(&cb); - GrowableArray* const stubs = barrier_set_state()->stubs(); + GrowableArray* const stubs = barrier_set_state()->stubs(); barrier_set_state()->set_stubs_start_offset(masm.offset()); for (int i = 0; i < stubs->length(); i++) { @@ -1269,26 +851,32 @@ void ShenandoahBarrierSetC2::emit_stubs(CodeBuffer& cb) const { } -void ShenandoahBarrierStub::register_stub() { +void ShenandoahBarrierStubC2::register_stub() { if (!Compile::current()->output()->in_scratch_emit_size()) { barrier_set_state()->stubs()->append(this); } } -ShenandoahCASBarrierSlowStub* ShenandoahCASBarrierSlowStub::create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool acquire, bool release, bool weak) { - auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierSlowStub(node, addr, Address(), expected, new_val, result, tmp, noreg, cae, acquire, release, weak); +ShenandoahSATBBarrierStubC2* ShenandoahSATBBarrierStubC2::create(const MachNode* node, Register addr_reg, Register preval, Register tmp1, Register tmp2) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahSATBBarrierStubC2(node, addr_reg, preval, tmp1, tmp2); + stub->register_stub(); + return stub; +} + +ShenandoahCASBarrierSlowStubC2* ShenandoahCASBarrierSlowStubC2::create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool acquire, bool release, bool weak) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierSlowStubC2(node, addr, Address(), expected, new_val, result, tmp, noreg, cae, acquire, release, weak); stub->register_stub(); return stub; } -ShenandoahCASBarrierSlowStub* ShenandoahCASBarrierSlowStub::create(const MachNode* node, Address addr, Register expected, Register new_val, Register result, Register tmp1, Register tmp2, bool cae) { - auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierSlowStub(node, noreg, addr, expected, new_val, result, tmp1, tmp2, cae, false, false, false); +ShenandoahCASBarrierSlowStubC2* ShenandoahCASBarrierSlowStubC2::create(const MachNode* node, Address addr, Register expected, Register new_val, Register result, Register tmp1, Register tmp2, bool cae) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierSlowStubC2(node, noreg, addr, expected, new_val, result, tmp1, tmp2, cae, false, false, false); stub->register_stub(); return stub; } -ShenandoahCASBarrierMidStub* ShenandoahCASBarrierMidStub::create(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register expected, Register result, Register tmp, bool cae) { - auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierMidStub(node, slow_stub, expected, result, tmp, cae); +ShenandoahCASBarrierMidStubC2* ShenandoahCASBarrierMidStubC2::create(const MachNode* node, ShenandoahCASBarrierSlowStubC2* slow_stub, Register expected, Register result, Register tmp, bool cae) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierMidStubC2(node, slow_stub, expected, result, tmp, cae); stub->register_stub(); return stub; } diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp index 073fa64304ec9..682dd2eb438ce 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp @@ -25,22 +25,25 @@ #ifndef SHARE_GC_SHENANDOAH_C2_SHENANDOAHBARRIERSETC2_HPP #define SHARE_GC_SHENANDOAH_C2_SHENANDOAHBARRIERSETC2_HPP +#include "shenandoahBarrierSetC2.hpp" #include "gc/shared/c2/barrierSetC2.hpp" #include "gc/shenandoah/c2/shenandoahSupport.hpp" #include "utilities/growableArray.hpp" -static const uint8_t ShenandoahBarrierStrong = 1 << 0; -static const uint8_t ShenandoahBarrierWeak = 1 << 1; -static const uint8_t ShenandoahBarrierPhantom = 1 << 2; -static const uint8_t ShenandoahBarrierNoKeepAlive = 1 << 3; -static const uint8_t ShenandoahBarrierNative = 1 << 4; -static const uint8_t ShenandoahBarrierElided = 1 << 5; +static const uint8_t ShenandoahBarrierStrong = 1 << 0; +static const uint8_t ShenandoahBarrierWeak = 1 << 1; +static const uint8_t ShenandoahBarrierPhantom = 1 << 2; +static const uint8_t ShenandoahBarrierNative = 1 << 3; +static const uint8_t ShenandoahBarrierElided = 1 << 4; +static const uint8_t ShenandoahBarrierSATB = 1 << 5; +static const uint8_t ShenandoahBarrierCardMark = 1 << 6; +static const uint8_t ShenandoahBarrierCardMarkNotNull = 1 << 7; -class ShenandoahBarrierStub; +class ShenandoahBarrierStubC2; class ShenandoahBarrierSetC2State : public BarrierSetC2State { GrowableArray* _load_reference_barriers; - GrowableArray* _stubs; + GrowableArray* _stubs; int _stubs_start_offset; public: @@ -54,7 +57,7 @@ class ShenandoahBarrierSetC2State : public BarrierSetC2State { void add_load_reference_barrier(ShenandoahLoadReferenceBarrierNode* n); void remove_load_reference_barrier(ShenandoahLoadReferenceBarrierNode * n); - GrowableArray* stubs() { + GrowableArray* stubs() { return _stubs; } @@ -67,42 +70,6 @@ class ShenandoahBarrierSetC2State : public BarrierSetC2State { }}; class ShenandoahBarrierSetC2 : public BarrierSetC2 { -private: - void shenandoah_eliminate_wb_pre(Node* call, PhaseIterGVN* igvn) const; - - bool satb_can_remove_pre_barrier(GraphKit* kit, PhaseValues* phase, Node* adr, - BasicType bt, uint adr_idx) const; - void satb_write_barrier_pre(GraphKit* kit, bool do_load, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt) const; - - void shenandoah_write_barrier_pre(GraphKit* kit, - bool do_load, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt) const; - - void post_barrier(GraphKit* kit, - Node* ctl, - Node* store, - Node* obj, - Node* adr, - uint adr_idx, - Node* val, - BasicType bt, - bool use_precise) const; - - void insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, - Node* pre_val, bool need_mem_bar) const; static bool clone_needs_barrier(Node* src, PhaseGVN& gvn); @@ -118,16 +85,12 @@ class ShenandoahBarrierSetC2 : public BarrierSetC2 { public: static ShenandoahBarrierSetC2* bsc2(); - static bool is_shenandoah_wb_pre_call(Node* call); static bool is_shenandoah_clone_call(Node* call); static bool is_shenandoah_lrb_call(Node* call); - static bool is_shenandoah_marking_if(PhaseValues* phase, Node* n); static bool is_shenandoah_state_load(Node* n); - static bool has_only_shenandoah_wb_pre_uses(Node* n); ShenandoahBarrierSetC2State* state() const; - static const TypeFunc* write_barrier_pre_Type(); static const TypeFunc* clone_barrier_Type(); static const TypeFunc* load_reference_barrier_Type(); virtual bool has_load_barrier_nodes() const { return true; } @@ -139,7 +102,6 @@ class ShenandoahBarrierSetC2 : public BarrierSetC2 { virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, bool is_clone_instance, ArrayCopyPhase phase) const; // Support for GC barriers emitted during parsing - virtual bool is_gc_pre_barrier_node(Node* node) const; virtual bool is_gc_barrier_node(Node* node) const; virtual Node* step_over_gc_barrier(Node* c) const; virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const; @@ -151,15 +113,12 @@ class ShenandoahBarrierSetC2 : public BarrierSetC2 { virtual void register_potential_barrier_node(Node* node) const; virtual void unregister_potential_barrier_node(Node* node) const; virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const; - virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const; + virtual void eliminate_gc_barrier_data(Node* node) const; virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const; // Allow barrier sets to have shared state that is preserved across a compilation unit. // This could for example comprise macro nodes to be expanded during macro expansion. virtual void* create_barrier_state(Arena* comp_arena) const; - // If the BarrierSetC2 state has kept macro nodes in its compilation unit state to be - // expanded later, then now is the time to do so. - virtual bool expand_macro_nodes(PhaseMacroExpand* macro) const; #ifdef ASSERT virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const; @@ -175,15 +134,33 @@ class ShenandoahBarrierSetC2 : public BarrierSetC2 { void emit_stubs(CodeBuffer& cb) const /* override */; }; -class ShenandoahBarrierStub : public BarrierStubC2 { +class ShenandoahBarrierStubC2 : public BarrierStubC2 { protected: - explicit ShenandoahBarrierStub(const MachNode* node) : BarrierStubC2(node) {} + explicit ShenandoahBarrierStubC2(const MachNode* node) : BarrierStubC2(node) {} void register_stub(); public: virtual void emit_code(MacroAssembler& masm) = 0; }; -class ShenandoahCASBarrierSlowStub : public ShenandoahBarrierStub { +class ShenandoahSATBBarrierStubC2 : public ShenandoahBarrierStubC2 { + Register _addr_reg; + Register _preval; + Register _tmp1; + Register _tmp2; + ShenandoahSATBBarrierStubC2(const MachNode* node, Register addr, Register preval, Register tmp1, Register tmp2) : + ShenandoahBarrierStubC2(node), + _addr_reg(addr), _preval(preval), _tmp1(tmp1), _tmp2(tmp2) {} + +public: + static bool needs_barrier(const MachNode* node) { + return (node->barrier_data() & ShenandoahBarrierSATB) != 0; + } + static ShenandoahSATBBarrierStubC2* create(const MachNode* node, Register addr_reg, Register preval, Register tmp1, Register tmp2); + + void emit_code(MacroAssembler& masm) override; +}; + +class ShenandoahCASBarrierSlowStubC2 : public ShenandoahBarrierStubC2 { Register _addr_reg; Address _addr; Register _expected; @@ -196,26 +173,27 @@ class ShenandoahCASBarrierSlowStub : public ShenandoahBarrierStub { bool _release; bool _weak; - explicit ShenandoahCASBarrierSlowStub(const MachNode* node, Register addr_reg, Address addr, Register expected, Register new_val, Register result, Register tmp1, Register tmp2, bool cae, bool acquire, bool release, bool weak) : - ShenandoahBarrierStub(node), + explicit ShenandoahCASBarrierSlowStubC2(const MachNode* node, Register addr_reg, Address addr, Register expected, Register new_val, Register result, Register tmp1, Register tmp2, bool cae, bool acquire, bool release, bool weak) : + ShenandoahBarrierStubC2(node), _addr_reg(addr_reg), _addr(addr), _expected(expected), _new_val(new_val), _result(result), _tmp1(tmp1), _tmp2(tmp2), _cae(cae), _acquire(acquire), _release(release), _weak(weak) {} public: - static ShenandoahCASBarrierSlowStub* create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool acquire, bool release, bool weak); - static ShenandoahCASBarrierSlowStub* create(const MachNode* node, Address addr, Register expected, Register new_val, Register result, Register tmp1, Register tmp2, bool cae); + static ShenandoahCASBarrierSlowStubC2* create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, Register tmp, bool cae, bool acquire, bool release, bool weak); + static ShenandoahCASBarrierSlowStubC2* create(const MachNode* node, Address addr, Register expected, Register new_val, Register result, Register tmp1, Register tmp2, bool cae); void emit_code(MacroAssembler& masm) override; }; -class ShenandoahCASBarrierMidStub : public ShenandoahBarrierStub { - ShenandoahCASBarrierSlowStub* _slow_stub; +class ShenandoahCASBarrierMidStubC2 : public ShenandoahBarrierStubC2 { + ShenandoahCASBarrierSlowStubC2* _slow_stub; Register _expected; Register _result; Register _tmp; bool _cae; - ShenandoahCASBarrierMidStub(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register expected, Register result, Register tmp, bool cae) : - ShenandoahBarrierStub(node), _slow_stub(slow_stub), _expected(expected), _result(result), _tmp(tmp), _cae(cae) {} + ShenandoahCASBarrierMidStubC2(const MachNode* node, ShenandoahCASBarrierSlowStubC2* slow_stub, Register expected, Register result, Register tmp, bool cae) : + ShenandoahBarrierStubC2(node), _slow_stub(slow_stub), _expected(expected), _result(result), _tmp(tmp), _cae(cae) {} public: - static ShenandoahCASBarrierMidStub* create(const MachNode* node, ShenandoahCASBarrierSlowStub* slow_stub, Register expected, Register result, Register tmp, bool cae); - void emit_code(MacroAssembler& masm) override;}; + static ShenandoahCASBarrierMidStubC2* create(const MachNode* node, ShenandoahCASBarrierSlowStubC2* slow_stub, Register expected, Register result, Register tmp, bool cae); + void emit_code(MacroAssembler& masm) override; +}; #endif // SHARE_GC_SHENANDOAH_C2_SHENANDOAHBARRIERSETC2_HPP diff --git a/src/hotspot/share/opto/macro.cpp b/src/hotspot/share/opto/macro.cpp index a0b52358bac8e..78fedeae862d5 100644 --- a/src/hotspot/share/opto/macro.cpp +++ b/src/hotspot/share/opto/macro.cpp @@ -595,7 +595,7 @@ bool PhaseMacroExpand::can_eliminate_allocation(PhaseIterGVN* igvn, AllocateNode for (DUIterator_Fast kmax, k = use->fast_outs(kmax); k < kmax && can_eliminate; k++) { Node* n = use->fast_out(k); - if (!n->is_Store() && n->Opcode() != Op_CastP2X && !bs->is_gc_pre_barrier_node(n) && !reduce_merge_precheck) { + if (!n->is_Store() && n->Opcode() != Op_CastP2X && !reduce_merge_precheck) { DEBUG_ONLY(disq_node = n;) if (n->is_Load() || n->is_LoadStore()) { NOT_PRODUCT(fail_eliminate = "Field load";) diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp index 5ecc038954dd4..02a4bb0cbc125 100644 --- a/src/hotspot/share/opto/node.cpp +++ b/src/hotspot/share/opto/node.cpp @@ -1455,8 +1455,6 @@ static void kill_dead_code( Node *dead, PhaseIterGVN *igvn ) { igvn->add_users_to_worklist( n ); } else if (dead->is_data_proj_of_pure_function(n)) { igvn->_worklist.push(n); - } else { - BarrierSet::barrier_set()->barrier_set_c2()->enqueue_useful_gc_barrier(igvn, n); } } } diff --git a/src/hotspot/share/opto/phaseX.cpp b/src/hotspot/share/opto/phaseX.cpp index 1df2cdb179e52..651a2e78d4a7f 100644 --- a/src/hotspot/share/opto/phaseX.cpp +++ b/src/hotspot/share/opto/phaseX.cpp @@ -2257,8 +2257,6 @@ void PhaseIterGVN::remove_globally_dead_node( Node *dead ) { } } else if (dead->is_data_proj_of_pure_function(in)) { _worklist.push(in); - } else { - BarrierSet::barrier_set()->barrier_set_c2()->enqueue_useful_gc_barrier(this, in); } if (ReduceFieldZeroing && dead->is_Load() && i == MemNode::Memory && in->is_Proj() && in->in(0) != nullptr && in->in(0)->is_Initialize()) { @@ -3378,8 +3376,6 @@ void Node::set_req_X( uint i, Node *n, PhaseIterGVN *igvn ) { default: break; } - - BarrierSet::barrier_set()->barrier_set_c2()->enqueue_useful_gc_barrier(igvn, old); } } From 63d044a969c9c8cddd952373258d0137b89b28a1 Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Thu, 28 Aug 2025 10:00:11 +0000 Subject: [PATCH 5/9] SATB late barriers, aarch64 parts fixed --- .../shenandoahBarrierSetAssembler_aarch64.cpp | 29 ++++--- .../shenandoahBarrierSetAssembler_aarch64.hpp | 2 +- .../gc/shenandoah/shenandoah_aarch64.ad | 78 +++---------------- .../shenandoah/c2/shenandoahBarrierSetC2.cpp | 13 ++-- .../shenandoah/c2/shenandoahBarrierSetC2.hpp | 12 +-- .../share/gc/shenandoah/shenandoahRuntime.cpp | 5 ++ .../share/gc/shenandoah/shenandoahRuntime.hpp | 1 + 7 files changed, 46 insertions(+), 94 deletions(-) diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index 37201ca6e4f05..e923e06e099f7 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -611,11 +611,12 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, } #ifdef COMPILER2 -void ShenandoahBarrierSetAssembler::satb_barrier_c2(MacroAssembler* masm, Register addr, Register pre_val, Register rthread, Register tmp1, Register tmp2, ShenandoahSATBBarrierStubC2* stub) { +void ShenandoahBarrierSetAssembler::satb_barrier_c2(MacroAssembler* masm, Register addr, Register pre_val, ShenandoahSATBBarrierStubC2* stub) { + assert_different_registers(addr, pre_val); // Check if GC marking is in progress, otherwise we don't have to do anything. Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); - __ ldrb(tmp1, gc_state); - __ tstw(tmp1, ShenandoahHeap::MARKING); + __ ldrb(rscratch1, gc_state); + __ tstw(rscratch1, ShenandoahHeap::MARKING); __ br(Assembler::NE, *stub->entry()); __ bind(*stub->continuation()); } @@ -666,22 +667,20 @@ void ShenandoahSATBBarrierStubC2::emit_code(MacroAssembler& masm) { __ load_heap_oop(_preval, Address(_addr_reg, 0), noreg, noreg, AS_RAW); } // Is the previous value null? - __ tst(_preval, _preval); - // Then we don't need to do anything. - __ br(Assembler::EQ, *continuation()); + __ cbz(_preval, *continuation()); - Address queue_index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); + Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); Label runtime; - __ ldr(_tmp1, queue_index); + __ ldr(rscratch1, index); // If buffer is full, call into runtime. - __ cbz(_tmp1, runtime); + __ cbz(rscratch1, runtime); // The buffer is not full, store value into it. - __ sub(_tmp1, _tmp1, wordSize); - __ str(_tmp1, queue_index); - __ ldr(_tmp2, buffer); - __ str(_preval, Address(_tmp2, _tmp1)); + __ sub(rscratch1, rscratch1, wordSize); + __ str(rscratch1, index); + __ ldr(rscratch2, buffer); + __ str(_preval, Address(rscratch2, rscratch1)); __ b(*continuation()); // Runtime call @@ -691,8 +690,8 @@ void ShenandoahSATBBarrierStubC2::emit_code(MacroAssembler& masm) { if (c_rarg0 != _preval) { __ mov(c_rarg0, _preval); } - __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre)); - __ blr(lr); + __ mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre_c2)); + __ blr(rscratch1); } __ b(*continuation()); } diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp index ed22108c132eb..bf01bbe547249 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp @@ -95,7 +95,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { bool acquire, bool release, bool is_cae, Register result); #ifdef COMPILER2 - void satb_barrier_c2(MacroAssembler* masm, Register obj, Register pre_val, Register rthread, Register tmp1, Register tmp2, ShenandoahSATBBarrierStubC2* stub); + void satb_barrier_c2(MacroAssembler* masm, Register obj, Register pre_val, ShenandoahSATBBarrierStubC2* stub); void cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register expected, Register new_val, Register result, bool acquire, bool release, bool weak, bool is_cae); #endif diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad index 84ca656c0ab0c..314d65a2816b6 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad @@ -36,8 +36,6 @@ static void satb_barrier(MacroAssembler* masm, const MachNode* node, Register addr, Register pre_val, - Register tmp1, - Register tmp2, RegSet preserve = RegSet(), RegSet no_preserve = RegSet()) { if (!ShenandoahSATBBarrierStubC2::needs_barrier(node)) { @@ -45,14 +43,14 @@ static void satb_barrier(MacroAssembler* masm, } Assembler::InlineSkippedInstructionsCounter skip_counter(masm); ShenandoahBarrierSetAssembler* bsasm = ShenandoahBarrierSet::barrier_set()->assembler(); - ShenandoahSATBBarrierStubC2* const stub = ShenandoahSATBBarrierStubC2::create(node, addr, pre_val, tmp1, tmp2); + ShenandoahSATBBarrierStubC2* const stub = ShenandoahSATBBarrierStubC2::create(node, addr, pre_val); for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { stub->preserve(*reg); } for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { stub->dont_preserve(*reg); } - bsasm->satb_barrier_c2(masm, addr, pre_val, rthread, tmp1, tmp2, stub); + bsasm->satb_barrier_c2(masm, addr, pre_val, stub); } %} @@ -67,9 +65,7 @@ instruct storeP_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr) ins_encode %{ satb_barrier(masm, this, $mem$$Register /* obj */, - rscratch1 /* pre_val */, - rscratch2 /* tmp1 */, - $tmp$$Register /* tmp2 */, + $tmp$$Register /* pre_val */, RegSet::of($mem$$Register, $src$$Register) /* preserve */); __ str($src$$Register, $mem$$Register); %} @@ -86,9 +82,7 @@ instruct storePVolatile_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlag ins_encode %{ satb_barrier(masm, this, $mem$$Register /* obj */, - rscratch1 /* pre_val */, - rscratch2 /* tmp1 */, - $tmp$$Register /* tmp2 */, + $tmp$$Register /* pre_val */, RegSet::of($mem$$Register, $src$$Register) /* preserve */); __ stlr($src$$Register, $mem$$Register); %} @@ -105,9 +99,7 @@ instruct storeN_shenandoah(indirect mem, iRegN src, iRegPNoSp tmp, rFlagsReg cr) ins_encode %{ satb_barrier(masm, this, $mem$$Register /* obj */, - rscratch1 /* pre_val */, - rscratch2 /* tmp1 */, - $tmp$$Register /* tmp2 */, + $tmp$$Register /* pre_val */, RegSet::of($mem$$Register, $src$$Register) /* preserve */); __ strw($src$$Register, $mem$$Register); %} @@ -124,9 +116,7 @@ instruct storeNVolatile_shenandoah(indirect mem, iRegN src, iRegPNoSp tmp, rFlag ins_encode %{ satb_barrier(masm, this, $mem$$Register /* obj */, - rscratch1 /* pre_val */, - rscratch2 /* tmp1 */, - $tmp$$Register /* tmp2 */, + $tmp$$Register /* pre_val */, RegSet::of($mem$$Register, $src$$Register) /* preserve */); __ stlrw($src$$Register, $mem$$Register); %} @@ -144,9 +134,7 @@ instruct encodePAndStoreN_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFl ins_encode %{ satb_barrier(masm, this, $mem$$Register /* obj */, - rscratch1 /* pre_val */, - rscratch2 /* tmp1 */, - $tmp$$Register /* tmp2 */, + $tmp$$Register /* pre_val */, RegSet::of($mem$$Register, $src$$Register) /* preserve */); if ((barrier_data() & ShenandoahBarrierCardMarkNotNull) == 0) { __ encode_heap_oop($tmp$$Register, $src$$Register); @@ -169,9 +157,7 @@ instruct encodePAndStoreNVolatile_shenandoah(indirect mem, iRegP src, iRegPNoSp ins_encode %{ satb_barrier(masm, this, $mem$$Register /* obj */, - rscratch1 /* pre_val */, - rscratch2 /* tmp1 */, - $tmp$$Register /* tmp2 */, + $tmp$$Register /* pre_val */, RegSet::of($mem$$Register, $src$$Register) /* preserve */); if ((barrier_data() & ShenandoahBarrierCardMarkNotNull) == 0) { __ encode_heap_oop($tmp$$Register, $src$$Register); @@ -200,8 +186,6 @@ instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, i satb_barrier(masm, this, noreg /* obj */, $oldval$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, @@ -231,8 +215,6 @@ instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, i satb_barrier(masm, this, noreg /* obj */, $tmp$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); } @@ -261,8 +243,6 @@ instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval satb_barrier(masm, this, noreg /* obj */, $oldval$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, @@ -293,8 +273,6 @@ instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval satb_barrier(masm, this, noreg /* obj */, $tmp$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); } @@ -321,8 +299,6 @@ instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldva satb_barrier(masm, this, noreg /* obj */, $tmp$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); } @@ -345,8 +321,6 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva satb_barrier(masm, this, noreg /* obj */, $oldval$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, @@ -372,8 +346,6 @@ instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN ol satb_barrier(masm, this, noreg /* obj */, $tmp$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); } @@ -397,8 +369,6 @@ instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP ol satb_barrier(masm, this, noreg /* obj */, $oldval$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, @@ -424,8 +394,6 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva satb_barrier(masm, this, noreg /* obj */, $tmp$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); } @@ -448,8 +416,6 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva satb_barrier(masm, this, noreg /* obj */, $oldval$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, @@ -476,8 +442,6 @@ instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN ol satb_barrier(masm, this, noreg /* obj */, $tmp$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); } @@ -502,8 +466,6 @@ instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP ol satb_barrier(masm, this, noreg /* obj */, $oldval$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, RegSet::of($res$$Register) /* no_preserve */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, @@ -525,8 +487,6 @@ instruct getAndSetP_shenandoah(indirect mem, iRegP newval, iRegPNoSp preval, rFl satb_barrier(masm, this, noreg /* obj */, $preval$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); %} ins_pipe(pipe_serial); @@ -545,8 +505,6 @@ instruct getAndSetPAcq_shenandoah(indirect mem, iRegP newval, iRegPNoSp preval, satb_barrier(masm, this, noreg /* obj */, $preval$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); %} ins_pipe(pipe_serial); @@ -567,16 +525,12 @@ instruct getAndSetN_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, iRe satb_barrier(masm, this, noreg /* obj */, $tmp$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); } %} ins_pipe(pipe_serial); %} -// This pattern is generated automatically from g1_aarch64.m4. -// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct getAndSetNAcq_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, iRegPNoSp tmp, rFlagsReg cr) %{ match(Set preval (GetAndSetN mem newval)); @@ -592,22 +546,18 @@ instruct getAndSetNAcq_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, satb_barrier(masm, this, noreg /* obj */, $tmp$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */, RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); } %} ins_pipe(pipe_serial); %} -// This pattern is generated automatically from g1_aarch64.m4. -// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct loadP_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) %{ // This instruction does not need an acquiring counterpart because it is only // used for reference loading (Reference::get()). The same holds for g1LoadN. match(Set dst (LoadP mem)); - predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + predicate(UseShenandoahGC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); effect(TEMP dst, KILL cr); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# ptr" %} @@ -615,15 +565,11 @@ instruct loadP_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) __ ldr($dst$$Register, $mem$$Register); satb_barrier(masm, this, noreg /* obj */, - $dst$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */); + $dst$$Register /* pre_val */); %} ins_pipe(iload_reg_mem); %} -// This pattern is generated automatically from g1_aarch64.m4. -// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct loadN_shenandoah(iRegNNoSp dst, indirect mem, iRegPNoSp tmp, rFlagsReg cr) %{ match(Set dst (LoadN mem)); @@ -637,9 +583,7 @@ instruct loadN_shenandoah(iRegNNoSp dst, indirect mem, iRegPNoSp tmp, rFlagsReg __ decode_heap_oop($tmp$$Register, $dst$$Register); satb_barrier(masm, this, noreg /* obj */, - $tmp$$Register /* pre_val */, - rscratch1 /* tmp1 */, - rscratch2 /* tmp2 */); + $tmp$$Register /* pre_val */); } %} ins_pipe(iload_reg_mem); diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp index 7f0812b8430db..de70b73de3318 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp @@ -857,8 +857,8 @@ void ShenandoahBarrierStubC2::register_stub() { } } -ShenandoahSATBBarrierStubC2* ShenandoahSATBBarrierStubC2::create(const MachNode* node, Register addr_reg, Register preval, Register tmp1, Register tmp2) { - auto* stub = new (Compile::current()->comp_arena()) ShenandoahSATBBarrierStubC2(node, addr_reg, preval, tmp1, tmp2); +ShenandoahSATBBarrierStubC2* ShenandoahSATBBarrierStubC2::create(const MachNode* node, Register addr_reg, Register preval) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahSATBBarrierStubC2(node, addr_reg, preval); stub->register_stub(); return stub; } @@ -882,11 +882,12 @@ ShenandoahCASBarrierMidStubC2* ShenandoahCASBarrierMidStubC2::create(const MachN } bool ShenandoahBarrierSetC2State::needs_liveness_data(const MachNode* mach) const { - assert(mach->barrier_data() != 0, "what else?"); - return mach->barrier_data() != 0; -; + //assert(mach->barrier_data() != 0, "what else?"); + // return mach->barrier_data() != 0; + //return (mach->barrier_data() & ShenandoahSATBBarrier) != 0; + return ShenandoahSATBBarrierStubC2::needs_barrier(mach); } bool ShenandoahBarrierSetC2State::needs_livein_data() const { return true; -} \ No newline at end of file +} diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp index 682dd2eb438ce..9e1e42f536e95 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp @@ -132,6 +132,10 @@ class ShenandoahBarrierSetC2 : public BarrierSetC2 { int estimate_stub_size() const /* override */; void emit_stubs(CodeBuffer& cb) const /* override */; + void late_barrier_analysis() const /* override*/ { + compute_liveness_at_stubs(); + } + }; class ShenandoahBarrierStubC2 : public BarrierStubC2 { @@ -145,17 +149,15 @@ class ShenandoahBarrierStubC2 : public BarrierStubC2 { class ShenandoahSATBBarrierStubC2 : public ShenandoahBarrierStubC2 { Register _addr_reg; Register _preval; - Register _tmp1; - Register _tmp2; - ShenandoahSATBBarrierStubC2(const MachNode* node, Register addr, Register preval, Register tmp1, Register tmp2) : + ShenandoahSATBBarrierStubC2(const MachNode* node, Register addr, Register preval) : ShenandoahBarrierStubC2(node), - _addr_reg(addr), _preval(preval), _tmp1(tmp1), _tmp2(tmp2) {} + _addr_reg(addr), _preval(preval) {} public: static bool needs_barrier(const MachNode* node) { return (node->barrier_data() & ShenandoahBarrierSATB) != 0; } - static ShenandoahSATBBarrierStubC2* create(const MachNode* node, Register addr_reg, Register preval, Register tmp1, Register tmp2); + static ShenandoahSATBBarrierStubC2* create(const MachNode* node, Register addr_reg, Register preval); void emit_code(MacroAssembler& masm) override; }; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp index 0bee8b4cf4205..7c094cfde4a19 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp @@ -38,6 +38,11 @@ JRT_LEAF(void, ShenandoahRuntime::arraycopy_barrier_narrow_oop(narrowOop* src, n ShenandoahBarrierSet::barrier_set()->arraycopy_barrier(src, dst, length); JRT_END +JRT_LEAF(void, ShenandoahRuntime::write_barrier_pre_c2(oopDesc* orig)) +//log_info(gc)("WB pre C2: " PTR_FORMAT, p2i(orig)); + write_barrier_pre(orig); +JRT_END + JRT_LEAF(void, ShenandoahRuntime::write_barrier_pre(oopDesc* orig)) assert(orig != nullptr, "should be optimized out"); shenandoah_assert_correct(nullptr, orig); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp index f1919095d58f1..593d795b14a62 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp @@ -37,6 +37,7 @@ class ShenandoahRuntime : public AllStatic { static void arraycopy_barrier_narrow_oop(narrowOop* src, narrowOop* dst, size_t length); static void write_barrier_pre(oopDesc* orig); + static void write_barrier_pre_c2(oopDesc* orig); static oopDesc* load_reference_barrier_strong(oopDesc* src, oop* load_addr); static oopDesc* load_reference_barrier_strong_narrow(oopDesc* src, narrowOop* load_addr); From a51c6bdcd41057b2895ac67f71f8aef28d2bf484 Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Thu, 28 Aug 2025 16:06:45 +0000 Subject: [PATCH 6/9] SATB late barrier, x86 parts --- .../shenandoahBarrierSetAssembler_x86.cpp | 65 ++++++- .../shenandoahBarrierSetAssembler_x86.hpp | 6 +- .../x86/gc/shenandoah/shenandoah_x86_64.ad | 165 +++++++++++++++++- .../shenandoah/c2/shenandoahBarrierSetC2.cpp | 8 +- .../shenandoah/c2/shenandoahBarrierSetC2.hpp | 7 +- 5 files changed, 239 insertions(+), 12 deletions(-) diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp index cee634faa4938..531275bf910d0 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp @@ -778,6 +778,19 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, } #ifdef COMPILER2 +void ShenandoahBarrierSetAssembler::satb_barrier_c2(const MachNode* node, MacroAssembler* masm, + Address addr, Register preval, Register tmp) { + if (!ShenandoahSATBBarrierStubC2::needs_barrier(node)) { + return; + } + ShenandoahSATBBarrierStubC2* const stub = ShenandoahSATBBarrierStubC2::create(node, addr, preval, tmp); + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ testb(gc_state, ShenandoahHeap::MARKING); + __ jcc(Assembler::notZero, *stub->entry()); + __ bind(*stub->continuation()); +} + void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, Register res, Address addr, Register oldval, Register newval, Register tmp1, Register tmp2, bool exchange) { @@ -786,8 +799,8 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, MacroAs assert_different_registers(oldval, tmp1, tmp2); assert_different_registers(newval, tmp1, tmp2); - ShenandoahCASBarrierSlowStub* const slow_stub = ShenandoahCASBarrierSlowStub::create(node, addr, oldval, newval, res, tmp1, tmp2, exchange); - ShenandoahCASBarrierMidStub* const mid_stub = ShenandoahCASBarrierMidStub::create(node, slow_stub, oldval, res, tmp1, exchange); + ShenandoahCASBarrierSlowStubC2* const slow_stub = ShenandoahCASBarrierSlowStubC2::create(node, addr, oldval, newval, res, tmp1, tmp2, exchange); + ShenandoahCASBarrierMidStubC2* const mid_stub = ShenandoahCASBarrierMidStubC2::create(node, slow_stub, oldval, res, tmp1, exchange); Label L_success, L_failure; @@ -821,7 +834,51 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, MacroAs #undef __ #define __ masm. -void ShenandoahCASBarrierMidStub::emit_code(MacroAssembler& masm) { +void ShenandoahSATBBarrierStubC2::emit_code(MacroAssembler& masm) { + __ bind(*entry()); + Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + + Label runtime; + + // Do we need to load the previous value? + if (_addr.base() != noreg) { + __ load_heap_oop(_preval, _addr, noreg, AS_RAW); + } + // Is the previous value null? + __ cmpptr(_preval, NULL_WORD); + __ jcc(Assembler::equal, *continuation()); + + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + __ movptr(_tmp, index); + __ testptr(_tmp, _tmp); + __ jccb(Assembler::zero, runtime); + // The buffer is not full, store value into it. + __ subptr(_tmp, wordSize); + __ movptr(index, _tmp); + __ addptr(_tmp, buffer); + __ movptr(Address(_tmp, 0), _preval); + + __ jmp(*continuation()); + + __ bind(runtime); + { + SaveLiveRegisters save_registers(&masm, this); + if (c_rarg0 != _preval) { + __ mov(c_rarg0, _preval); + } + // rax is a caller-saved, non-argument-passing register, so it does not + // interfere with c_rarg0 or c_rarg1. If it contained any live value before + // entering this stub, it is saved at this point, and restored after the + // call. If it did not contain any live value, it is free to be used. In + // either case, it is safe to use it here as a call scratch register. + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre_c2)), rax); + } + __ jmp(*continuation()); +} + +void ShenandoahCASBarrierMidStubC2::emit_code(MacroAssembler& masm) { __ bind(*entry()); if (!_cae) { @@ -843,7 +900,7 @@ void ShenandoahCASBarrierMidStub::emit_code(MacroAssembler& masm) { __ jmp(*continuation()); } -void ShenandoahCASBarrierSlowStub::emit_code(MacroAssembler& masm) { +void ShenandoahCASBarrierSlowStubC2::emit_code(MacroAssembler& masm) { __ bind(*entry()); assert(_expected == rax, "expected must be rax"); diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp index 1ea4edb8eb3a6..46a1c776135e1 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp @@ -78,9 +78,11 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { Register res, Address addr, Register oldval, Register newval, bool exchange, Register tmp1, Register tmp2); #ifdef COMPILER2 + void satb_barrier_c2(const MachNode* node, MacroAssembler* masm, + Address addr, Register preval, Register tmp); void cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, - Register res, Address addr, Register oldval, Register newval, Register tmp1, Register tmp2, - bool exchange); + Register res, Address addr, Register oldval, Register newval, Register tmp1, Register tmp2, + bool exchange); #endif virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register src, Register dst, Register count); diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad index adcb922711601..1d3f4aad2014e 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad @@ -22,9 +22,68 @@ // // -source_hpp %{ -#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -#include "gc/shenandoah/c2/shenandoahSupport.hpp" +source %{ +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp" +#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" +%} + +instruct storeP_shenandoah(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rFlagsReg cr) +%{ + match(Set mem (StoreP mem src)); + predicate(UseShenandoahGC && n->as_Store()->barrier_data() != 0); + effect(TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(125); // XXX + format %{ "movq $mem, $src\t# ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + $mem$$Address /* addr */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp */); + __ movq($mem$$Address, $src$$Register); + %} + ins_pipe(ialu_mem_reg); +%} + +instruct storeN_shenandoah(memory mem, rRegN src, rRegP tmp1, rRegP tmp2, rFlagsReg cr) +%{ + match(Set mem (StoreN mem src)); + predicate(UseShenandoahGC && n->as_Store()->barrier_data() != 0); + effect(TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(125); // XXX + format %{ "movl $mem, $src\t# ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + $mem$$Address /* addr */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp */); + __ movl($mem$$Address, $src$$Register); + %} + ins_pipe(ialu_mem_reg); +%} + +instruct encodePAndStoreN_shenandoah(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rFlagsReg cr) +%{ + match(Set mem (StoreN mem (EncodeP src))); + predicate(UseShenandoahGC && n->as_Store()->barrier_data() != 0); + effect(TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(125); // XXX + format %{ "encode_heap_oop $src\n\t" + "movl $mem, $src\t# ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + $mem$$Address /* addr */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp */); + __ movq($tmp2$$Register, $src$$Register); + if ((barrier_data() & ShenandoahBarrierCardMarkNotNull) == 0) { + __ encode_heap_oop($tmp2$$Register); + } else { + __ encode_heap_oop_not_null($tmp2$$Register); + } + __ movl($mem$$Address, $tmp2$$Register); + %} + ins_pipe(ialu_mem_reg); %} instruct compareAndSwapP_shenandoah(rRegI res, @@ -41,6 +100,10 @@ instruct compareAndSwapP_shenandoah(rRegI res, format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} ins_encode %{ + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + Address() /* addr */, + $oldval$$Register /* pre_val */, + $tmp2$$Register /* tmp */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, $tmp1$$Register, $tmp2$$Register, /*exchange*/ false); @@ -62,6 +125,14 @@ instruct compareAndSwapN_shenandoah(rRegI res, ins_encode %{ guarantee(UseCompressedOops, "must be compressed oops"); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ movl($tmp1$$Register, $oldval$$Register); + __ decode_heap_oop($tmp1$$Register); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + Address() /* addr */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp */); + } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, $tmp1$$Register, $tmp2$$Register, /*exchange*/ false); @@ -81,6 +152,14 @@ instruct compareAndExchangeN_shenandoah(memory mem_ptr, ins_encode %{ guarantee(UseCompressedOops, "must be compressed oops"); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ movl($tmp1$$Register, $oldval$$Register); + __ decode_heap_oop($tmp1$$Register); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + Address() /* addr */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp */); + } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, $tmp1$$Register, $tmp2$$Register, /*exchange*/ true); @@ -101,9 +180,89 @@ instruct compareAndExchangeP_shenandoah(memory mem_ptr, format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} ins_encode %{ + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + Address() /* addr */, + $oldval$$Register /* pre_val */, + $tmp2$$Register /* tmp */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, $tmp1$$Register, $tmp2$$Register, /*exchange*/ true); %} ins_pipe( pipe_cmpxchg ); %} + +instruct getAndSetP_shenandoah(indirect mem, rRegP newval, rRegP tmp, rFlagsReg cr) +%{ + match(Set newval (GetAndSetP mem newval)); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); + effect(TEMP tmp, KILL cr); + format %{ "xchgq $newval, $mem" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + __ xchgq($newval$$Register, Address($mem$$Register, 0)); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + Address() /* addr */, + $newval$$Register /* pre_val */, + $tmp$$Register /* tmp */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct getAndSetN_shenandoah(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rFlagsReg cr) +%{ + match(Set newval (GetAndSetN mem newval)); + predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0); + effect(TEMP tmp1, TEMP tmp2, KILL cr); + format %{ "xchgq $newval, $mem" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + __ xchgl($newval$$Register, Address($mem$$Register, 0)); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ movl($tmp1$$Register, $newval$$Register); + __ decode_heap_oop($tmp1$$Register); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + Address() /* addr */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp */); + } + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct loadP_shenandaoh(rRegP dst, memory mem, rRegP tmp, rFlagsReg cr) +%{ + match(Set dst (LoadP mem)); + predicate(UseShenandoahGC && n->as_Load()->barrier_data() != 0); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + ins_cost(125); // XXX + format %{ "movq $dst, $mem\t# ptr" %} + ins_encode %{ + __ movq($dst$$Register, $mem$$Address); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + Address() /* addr */, + $dst$$Register /* pre_val */, + $tmp$$Register /* tmp */); + %} + ins_pipe(ialu_reg_mem); // XXX +%} + +instruct loadN_shenandoah(rRegN dst, memory mem, rRegP tmp1, rRegP tmp2, rFlagsReg cr) +%{ + match(Set dst (LoadN mem)); + predicate(UseShenandoahGC && n->as_Load()->barrier_data() != 0); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(125); // XXX + format %{ "movl $dst, $mem\t# compressed ptr" %} + ins_encode %{ + __ movl($dst$$Register, $mem$$Address); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ movl($tmp1$$Register, $dst$$Register); + __ decode_heap_oop($tmp1$$Register); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + Address() /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp */); + } + %} + ins_pipe(ialu_reg_mem); // XXX +%} diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp index de70b73de3318..f9d13456364f0 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp @@ -858,7 +858,13 @@ void ShenandoahBarrierStubC2::register_stub() { } ShenandoahSATBBarrierStubC2* ShenandoahSATBBarrierStubC2::create(const MachNode* node, Register addr_reg, Register preval) { - auto* stub = new (Compile::current()->comp_arena()) ShenandoahSATBBarrierStubC2(node, addr_reg, preval); + auto* stub = new (Compile::current()->comp_arena()) ShenandoahSATBBarrierStubC2(node, addr_reg, Address(), preval, noreg); + stub->register_stub(); + return stub; +} + +ShenandoahSATBBarrierStubC2* ShenandoahSATBBarrierStubC2::create(const MachNode* node, Address addr, Register preval, Register tmp) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahSATBBarrierStubC2(node, noreg, addr, preval, tmp); stub->register_stub(); return stub; } diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp index 9e1e42f536e95..0c77b122f260f 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp @@ -148,16 +148,19 @@ class ShenandoahBarrierStubC2 : public BarrierStubC2 { class ShenandoahSATBBarrierStubC2 : public ShenandoahBarrierStubC2 { Register _addr_reg; + Address _addr; Register _preval; - ShenandoahSATBBarrierStubC2(const MachNode* node, Register addr, Register preval) : + Register _tmp; + ShenandoahSATBBarrierStubC2(const MachNode* node, Register addr_reg, Address addr, Register preval, Register tmp) : ShenandoahBarrierStubC2(node), - _addr_reg(addr), _preval(preval) {} + _addr_reg(addr_reg), _addr(addr), _preval(preval), _tmp(tmp) {} public: static bool needs_barrier(const MachNode* node) { return (node->barrier_data() & ShenandoahBarrierSATB) != 0; } static ShenandoahSATBBarrierStubC2* create(const MachNode* node, Register addr_reg, Register preval); + static ShenandoahSATBBarrierStubC2* create(const MachNode* node, Address addr, Register preval, Register tmp); void emit_code(MacroAssembler& masm) override; }; From 4ada17d94a99614ff3b029dd1594736f5280ef5e Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Thu, 28 Aug 2025 17:53:31 +0000 Subject: [PATCH 7/9] Some aarch64 adjustments --- .../shenandoahBarrierSetAssembler_aarch64.cpp | 8 +- .../shenandoahBarrierSetAssembler_aarch64.hpp | 2 +- .../gc/shenandoah/shenandoah_aarch64.ad | 157 ++++++------------ 3 files changed, 56 insertions(+), 111 deletions(-) diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index e923e06e099f7..9722d632f26a1 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -611,8 +611,14 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, } #ifdef COMPILER2 -void ShenandoahBarrierSetAssembler::satb_barrier_c2(MacroAssembler* masm, Register addr, Register pre_val, ShenandoahSATBBarrierStubC2* stub) { +void ShenandoahBarrierSetAssembler::satb_barrier_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register pre_val) { assert_different_registers(addr, pre_val); + if (!ShenandoahSATBBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + ShenandoahSATBBarrierStubC2* const stub = ShenandoahSATBBarrierStubC2::create(node, addr, pre_val); + // Check if GC marking is in progress, otherwise we don't have to do anything. Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); __ ldrb(rscratch1, gc_state); diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp index bf01bbe547249..dca4bc0e0dda1 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp @@ -95,7 +95,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { bool acquire, bool release, bool is_cae, Register result); #ifdef COMPILER2 - void satb_barrier_c2(MacroAssembler* masm, Register obj, Register pre_val, ShenandoahSATBBarrierStubC2* stub); + void satb_barrier_c2(const MachNode* node, MacroAssembler* masm, Register obj, Register pre_val); void cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register expected, Register new_val, Register result, bool acquire, bool release, bool weak, bool is_cae); #endif diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad index 314d65a2816b6..884b791897bd0 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad @@ -22,37 +22,10 @@ // // -source_hpp %{ -#include "gc/shenandoah/shenandoahBarrierSet.hpp" -#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -%} - source %{ - +#include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp" #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" - -static void satb_barrier(MacroAssembler* masm, - const MachNode* node, - Register addr, - Register pre_val, - RegSet preserve = RegSet(), - RegSet no_preserve = RegSet()) { - if (!ShenandoahSATBBarrierStubC2::needs_barrier(node)) { - return; - } - Assembler::InlineSkippedInstructionsCounter skip_counter(masm); - ShenandoahBarrierSetAssembler* bsasm = ShenandoahBarrierSet::barrier_set()->assembler(); - ShenandoahSATBBarrierStubC2* const stub = ShenandoahSATBBarrierStubC2::create(node, addr, pre_val); - for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { - stub->preserve(*reg); - } - for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { - stub->dont_preserve(*reg); - } - bsasm->satb_barrier_c2(masm, addr, pre_val, stub); -} - %} instruct storeP_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr) @@ -63,10 +36,9 @@ instruct storeP_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr) ins_cost(INSN_COST); format %{ "str $src, $mem\t# ptr" %} ins_encode %{ - satb_barrier(masm, this, - $mem$$Register /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $src$$Register) /* preserve */); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + $mem$$Register /* obj */, + $tmp$$Register /* pre_val */); __ str($src$$Register, $mem$$Register); %} ins_pipe(istore_reg_mem); @@ -80,10 +52,9 @@ instruct storePVolatile_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlag ins_cost(VOLATILE_REF_COST); format %{ "stlr $src, $mem\t# ptr" %} ins_encode %{ - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, $mem$$Register /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $src$$Register) /* preserve */); + $tmp$$Register /* pre_val */); __ stlr($src$$Register, $mem$$Register); %} ins_pipe(pipe_class_memory); @@ -97,10 +68,9 @@ instruct storeN_shenandoah(indirect mem, iRegN src, iRegPNoSp tmp, rFlagsReg cr) ins_cost(INSN_COST); format %{ "strw $src, $mem\t# compressed ptr" %} ins_encode %{ - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, $mem$$Register /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $src$$Register) /* preserve */); + $tmp$$Register /* pre_val */); __ strw($src$$Register, $mem$$Register); %} ins_pipe(istore_reg_mem); @@ -114,10 +84,9 @@ instruct storeNVolatile_shenandoah(indirect mem, iRegN src, iRegPNoSp tmp, rFlag ins_cost(VOLATILE_REF_COST); format %{ "stlrw $src, $mem\t# compressed ptr" %} ins_encode %{ - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, $mem$$Register /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $src$$Register) /* preserve */); + $tmp$$Register /* pre_val */); __ stlrw($src$$Register, $mem$$Register); %} ins_pipe(pipe_class_memory); @@ -132,10 +101,9 @@ instruct encodePAndStoreN_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFl format %{ "encode_heap_oop $tmp, $src\n\t" "strw $tmp, $mem\t# compressed ptr" %} ins_encode %{ - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, $mem$$Register /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $src$$Register) /* preserve */); + $tmp$$Register /* pre_val */); if ((barrier_data() & ShenandoahBarrierCardMarkNotNull) == 0) { __ encode_heap_oop($tmp$$Register, $src$$Register); } else { @@ -155,10 +123,9 @@ instruct encodePAndStoreNVolatile_shenandoah(indirect mem, iRegP src, iRegPNoSp format %{ "encode_heap_oop $tmp, $src\n\t" "stlrw $tmp, $mem\t# compressed ptr" %} ins_encode %{ - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, $mem$$Register /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $src$$Register) /* preserve */); + $tmp$$Register /* pre_val */); if ((barrier_data() & ShenandoahBarrierCardMarkNotNull) == 0) { __ encode_heap_oop($tmp$$Register, $src$$Register); } else { @@ -183,11 +150,9 @@ instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, i ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $oldval$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $oldval$$Register /* pre_val */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false); %} @@ -212,11 +177,9 @@ instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, i guarantee(UseCompressedOops, "must be compressed oops"); if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { __ decode_heap_oop($tmp$$Register, $oldval$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $tmp$$Register /* pre_val */); } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false); @@ -240,11 +203,9 @@ instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $oldval$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $oldval$$Register /* pre_val */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false); %} @@ -270,11 +231,9 @@ instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval guarantee(UseCompressedOops, "must be compressed oops"); if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { __ decode_heap_oop($tmp$$Register, $oldval$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $tmp$$Register /* pre_val */); } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false); @@ -296,11 +255,9 @@ instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldva guarantee(UseCompressedOops, "must be compressed oops"); if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { __ decode_heap_oop($tmp$$Register, $oldval$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $tmp$$Register /* pre_val */); } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true); @@ -318,11 +275,9 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $oldval$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $oldval$$Register /* pre_val */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true); %} @@ -343,11 +298,9 @@ instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN ol guarantee(UseCompressedOops, "must be compressed oops"); if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { __ decode_heap_oop($tmp$$Register, $oldval$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $tmp$$Register /* pre_val */); } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true); @@ -366,11 +319,9 @@ instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP ol %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $oldval$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $oldval$$Register /* pre_val */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true); %} @@ -391,11 +342,9 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva guarantee(UseCompressedOops, "must be compressed oops"); if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { __ decode_heap_oop($tmp$$Register, $oldval$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $tmp$$Register /* pre_val */); } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false); @@ -413,11 +362,9 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $oldval$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $oldval$$Register /* pre_val */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false); %} @@ -439,11 +386,9 @@ instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN ol guarantee(UseCompressedOops, "must be compressed oops"); if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { __ decode_heap_oop($tmp$$Register, $oldval$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $tmp$$Register /* pre_val */); } ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false); @@ -463,11 +408,9 @@ instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP ol %} ins_encode %{ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $oldval$$Register /* pre_val */, - RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, - RegSet::of($res$$Register) /* no_preserve */); + $oldval$$Register /* pre_val */); ShenandoahBarrierSet::assembler()->cmpxchg_oop_c2(this, masm, $mem$$base$$Register, $oldval$$Register, $newval$$Register, $res$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false); %} @@ -484,10 +427,9 @@ instruct getAndSetP_shenandoah(indirect mem, iRegP newval, iRegPNoSp preval, rFl ins_encode %{ assert_different_registers($mem$$Register, $newval$$Register); __ atomic_xchg($preval$$Register, $newval$$Register, $mem$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $preval$$Register /* pre_val */, - RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + $preval$$Register /* pre_val */); %} ins_pipe(pipe_serial); %} @@ -502,10 +444,9 @@ instruct getAndSetPAcq_shenandoah(indirect mem, iRegP newval, iRegPNoSp preval, ins_encode %{ assert_different_registers($mem$$Register, $newval$$Register); __ atomic_xchgal($preval$$Register, $newval$$Register, $mem$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $preval$$Register /* pre_val */, - RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + $preval$$Register /* pre_val */); %} ins_pipe(pipe_serial); %} @@ -522,10 +463,9 @@ instruct getAndSetN_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, iRe __ atomic_xchgw($preval$$Register, $newval$$Register, $mem$$Register); if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { __ decode_heap_oop($tmp$$Register, $preval$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + $tmp$$Register /* pre_val */); } %} ins_pipe(pipe_serial); @@ -543,10 +483,9 @@ instruct getAndSetNAcq_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, __ atomic_xchgalw($preval$$Register, $newval$$Register, $mem$$Register); if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { __ decode_heap_oop($tmp$$Register, $preval$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, - $tmp$$Register /* pre_val */, - RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + $tmp$$Register /* pre_val */); } %} ins_pipe(pipe_serial); @@ -563,7 +502,7 @@ instruct loadP_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) format %{ "ldr $dst, $mem\t# ptr" %} ins_encode %{ __ ldr($dst$$Register, $mem$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, $dst$$Register /* pre_val */); %} @@ -581,7 +520,7 @@ instruct loadN_shenandoah(iRegNNoSp dst, indirect mem, iRegPNoSp tmp, rFlagsReg __ ldrw($dst$$Register, $mem$$Register); if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { __ decode_heap_oop($tmp$$Register, $dst$$Register); - satb_barrier(masm, this, + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, $tmp$$Register /* pre_val */); } From 153fe1df7b011439a2ad623c57ef777a2cd59242 Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Thu, 28 Aug 2025 22:51:31 +0200 Subject: [PATCH 8/9] LRB late expanded, first attempts --- src/hotspot/cpu/aarch64/aarch64.ad | 2 + .../shenandoahBarrierSetAssembler_aarch64.cpp | 64 +++++++++++++++ .../shenandoahBarrierSetAssembler_aarch64.hpp | 1 + .../gc/shenandoah/shenandoah_aarch64.ad | 81 ++++++++++++++++--- .../shenandoah/c2/shenandoahBarrierSetC2.cpp | 77 +++++++++--------- .../shenandoah/c2/shenandoahBarrierSetC2.hpp | 14 ++++ src/hotspot/share/opto/memnode.cpp | 6 +- src/hotspot/share/opto/narrowptrnode.hpp | 9 ++- 8 files changed, 199 insertions(+), 55 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 7a3fc930fc504..61c6fe81d8a44 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -8200,6 +8200,7 @@ instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{ instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); + predicate(!UseShenandoahGC || n->as_DecodeN()->barrier_data() == 0); match(Set dst (DecodeN src)); ins_cost(INSN_COST * 3); format %{ "decode_heap_oop $dst, $src" %} @@ -8214,6 +8215,7 @@ instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); + predicate(!UseShenandoahGC || n->as_DecodeN()->barrier_data() == 0); match(Set dst (DecodeN src)); ins_cost(INSN_COST * 3); format %{ "decode_heap_oop_not_null $dst, $src" %} diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index 9722d632f26a1..44f9b0ac1319d 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -611,6 +611,27 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, } #ifdef COMPILER2 +void ShenandoahBarrierSetAssembler::load_ref_barrier_c2(const MachNode* node, MacroAssembler* masm, Register obj, Register addr, bool narrow, bool maybe_null) { + if (!ShenandoahLoadRefBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label done; + if (maybe_null) { + __ cbz(obj, done); + } + ShenandoahLoadRefBarrierStubC2* const stub = ShenandoahLoadRefBarrierStubC2::create(node, obj, addr, narrow); + // Don't preserve the obj across the runtime call, we override it from the return value anyway. + stub->dont_preserve(obj); + // Check if GC marking is in progress, otherwise we don't have to do anything. + Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ ldrb(rscratch1, gc_state); + __ tstw(rscratch1, ShenandoahHeap::HAS_FORWARDED); + __ br(Assembler::NE, *stub->entry()); + __ bind(*stub->continuation()); + __ bind(done); +} + void ShenandoahBarrierSetAssembler::satb_barrier_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register pre_val) { assert_different_registers(addr, pre_val); if (!ShenandoahSATBBarrierStubC2::needs_barrier(node)) { @@ -666,6 +687,49 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, #undef __ #define __ masm. +void ShenandoahLoadRefBarrierStubC2::emit_code(MacroAssembler& masm) { + __ bind(*entry()); + // Weak/phantom loads always need to go to runtime. + if ((_node->barrier_data() & ShenandoahBarrierStrong) != 0) { + // Check for object in cset. + __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); + __ lsr(rscratch1, _obj, ShenandoahHeapRegion::region_size_bytes_shift_jint()); + __ ldrb(rscratch2, Address(rscratch2, rscratch1)); + __ cbz(rscratch2, *continuation()); + } + { + SaveLiveRegisters save_registers(&masm, this); + if (c_rarg0 != _obj) { + if (c_rarg0 == _addr) { + __ mov(rscratch1, _addr); + _addr = rscratch1; + } + __ mov(c_rarg0, _obj); + } + __ mov(c_rarg1, _addr); + + if (_narrow) { + if ((_node->barrier_data() & ShenandoahBarrierStrong) != 0) { + __ mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow)); + } else if ((_node->barrier_data() & ShenandoahBarrierWeak) != 0) { + __ mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)); + } else if ((_node->barrier_data() & ShenandoahBarrierPhantom) != 0) { + __ mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow)); + } + } else { + if ((_node->barrier_data() & ShenandoahBarrierStrong) != 0) { + __ mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)); + } else if ((_node->barrier_data() & ShenandoahBarrierWeak) != 0) { + __ mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)); + } else if ((_node->barrier_data() & ShenandoahBarrierPhantom) != 0) { + __ mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)); + } + } + __ blr(rscratch1); + __ mov(_obj, r0); + } +} + void ShenandoahSATBBarrierStubC2::emit_code(MacroAssembler& masm) { __ bind(*entry()); // Do we need to load the previous value? diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp index dca4bc0e0dda1..2260431550409 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp @@ -95,6 +95,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { bool acquire, bool release, bool is_cae, Register result); #ifdef COMPILER2 + void load_ref_barrier_c2(const MachNode* node, MacroAssembler* masm, Register obj, Register addr, bool narrow, bool maybe_null); void satb_barrier_c2(const MachNode* node, MacroAssembler* masm, Register obj, Register pre_val); void cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register expected, Register new_val, Register result, bool acquire, bool release, bool weak, bool is_cae); diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad index 884b791897bd0..0777f514938fb 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad @@ -494,7 +494,7 @@ instruct getAndSetNAcq_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, instruct loadP_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) %{ // This instruction does not need an acquiring counterpart because it is only - // used for reference loading (Reference::get()). The same holds for g1LoadN. + // used for reference loading (Reference::get()). match(Set dst (LoadP mem)); predicate(UseShenandoahGC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); effect(TEMP dst, KILL cr); @@ -505,25 +505,80 @@ instruct loadP_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, noreg /* obj */, $dst$$Register /* pre_val */); + ShenandoahBarrierSet::assembler()->load_ref_barrier_c2(this, masm, + $dst$$Register /* obj */, + $mem$$Register /* addr */, + false /* narrow */, + true /* maybe_null */); %} ins_pipe(iload_reg_mem); %} -instruct loadN_shenandoah(iRegNNoSp dst, indirect mem, iRegPNoSp tmp, rFlagsReg cr) +instruct loadP_volatile_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) %{ - match(Set dst (LoadN mem)); - predicate(UseShenandoahGC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); - effect(TEMP dst, TEMP tmp, KILL cr); + // This instruction does not need an acquiring counterpart because it is only + // used for reference loading (Reference::get()). + match(Set dst (LoadP mem)); + predicate(UseShenandoahGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + effect(TEMP dst, KILL cr); ins_cost(4 * INSN_COST); - format %{ "ldrw $dst, $mem\t# compressed ptr" %} + format %{ "ldar $dst, $mem\t# ptr" %} ins_encode %{ - __ ldrw($dst$$Register, $mem$$Register); - if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { - __ decode_heap_oop($tmp$$Register, $dst$$Register); - ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, - noreg /* obj */, - $tmp$$Register /* pre_val */); - } + __ ldar($dst$$Register, $mem$$Register); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + noreg /* obj */, + $dst$$Register /* pre_val */); + ShenandoahBarrierSet::assembler()->load_ref_barrier_c2(this, masm, + $dst$$Register /* obj */, + $mem$$Register /* addr */, + false /* narrow */, + true /* maybe_null */); %} ins_pipe(iload_reg_mem); %} + +instruct loadAndDecodeN_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) %{ + match(Set dst (DecodeN (LoadN mem))); + predicate(UseShenandoahGC && !needs_acquiring_load(n->in(1)) && n->in(1)->as_Load()->barrier_data() != 0); + effect(TEMP_DEF dst, KILL cr); + ins_cost(INSN_COST * 6); + format %{ "ldr $dst, $mem\t# ptr" + "decode_heap_oop $dst, $dst" + %} + ins_encode %{ + __ ldrw($dst$$Register, $mem$$Register); + __ decode_heap_oop($dst$$Register, $dst$$Register); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + noreg /* obj */, + $dst$$Register /* pre_val */); + ShenandoahBarrierSet::assembler()->load_ref_barrier_c2(this, masm, + $dst$$Register /* obj */, + $mem$$Register /* addr */, + true /* narrow */, + true /* maybe_null */); + %} + ins_pipe(ialu_reg); +%} + +instruct loadAndDecodeN_volatile_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) %{ + predicate(UseShenandoahGC && needs_acquiring_load(n->in(1)) && n->in(1)->as_Load()->barrier_data() != 0); + match(Set dst (DecodeN (LoadN mem))); + effect(TEMP dst, KILL cr); + ins_cost(INSN_COST * 3); + format %{ "ldarw $dst, $mem\t# ptr" + "decode_heap_oop $dst, $dst" + %} + ins_encode %{ + __ ldarw($dst$$Register, $mem$$Register); + __ decode_heap_oop($dst$$Register, $dst$$Register); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + noreg /* obj */, + $dst$$Register /* pre_val */); + ShenandoahBarrierSet::assembler()->load_ref_barrier_c2(this, masm, + $dst$$Register /* obj */, + $mem$$Register /* addr */, + true /* narrow */, + true /* maybe_null */); + %} + ins_pipe(ialu_reg); +%} diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp index f9d13456364f0..85e37347215ee 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp @@ -301,45 +301,6 @@ Node* ShenandoahBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& return BarrierSetC2::store_at_resolved(access, val); } -Node* ShenandoahBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { - // 0: non-reference load, no additional barrier is needed - if (!access.is_oop()) { - return BarrierSetC2::load_at_resolved(access, val_type); - } - - // 1. If we are reading the value of the referent field of a Reference object, we - // need to record the referent in an SATB log buffer using the pre-barrier - // mechanism. - DecoratorSet decorators = access.decorators(); - bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; - bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; - bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0; - // If we are reading the value of the referent field of a Reference object, we - // need to record the referent in an SATB log buffer using the pre-barrier - // mechanism. Also we need to add a memory barrier to prevent commoning reads - // from this field across safepoints, since GC can change its value. - bool need_read_barrier = ((on_weak || on_phantom) && !no_keepalive); - if (access.is_oop() && need_read_barrier) { - access.set_barrier_data(ShenandoahBarrierSATB); - } - - Node* load = BarrierSetC2::load_at_resolved(access, val_type); - - BasicType type = access.type(); - - // 2: apply LRB if needed - if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { - load = new ShenandoahLoadReferenceBarrierNode(nullptr, load, decorators); - if (access.is_parse_access()) { - load = static_cast(access).kit()->gvn().transform(load); - } else { - load = static_cast(access).gvn().transform(load); - } - } - - return load; -} - static void set_barrier_data(C2Access& access) { if (!access.is_oop()) { return; @@ -367,6 +328,36 @@ static void set_barrier_data(C2Access& access) { access.set_barrier_data(barrier_data); } +Node* ShenandoahBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { + // 1: non-reference load, no additional barrier is needed + if (!access.is_oop()) { + return BarrierSetC2::load_at_resolved(access, val_type); + } + + // 2. Set barrier data for LRB. + set_barrier_data(access); + + // 3. If we are reading the value of the referent field of a Reference object, we + // need to record the referent in an SATB log buffer using the pre-barrier + // mechanism. + DecoratorSet decorators = access.decorators(); + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0; + // If we are reading the value of the referent field of a Reference object, we + // need to record the referent in an SATB log buffer using the pre-barrier + // mechanism. Also we need to add a memory barrier to prevent commoning reads + // from this field across safepoints, since GC can change its value. + uint8_t barriers = access.barrier_data(); + bool need_read_barrier = ((on_weak || on_phantom) && !no_keepalive); + if (access.is_oop() && need_read_barrier) { + barriers |= ShenandoahBarrierSATB; + } + access.set_barrier_data(barriers); + + return BarrierSetC2::load_at_resolved(access, val_type); +} + Node* ShenandoahBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, Node* new_val, const Type* value_type) const { if (ShenandoahCASBarrier) { @@ -857,6 +848,12 @@ void ShenandoahBarrierStubC2::register_stub() { } } +ShenandoahLoadRefBarrierStubC2* ShenandoahLoadRefBarrierStubC2::create(const MachNode* node, Register obj, Register addr, bool narrow) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahLoadRefBarrierStubC2(node, obj, addr, narrow); + stub->register_stub(); + return stub; +} + ShenandoahSATBBarrierStubC2* ShenandoahSATBBarrierStubC2::create(const MachNode* node, Register addr_reg, Register preval) { auto* stub = new (Compile::current()->comp_arena()) ShenandoahSATBBarrierStubC2(node, addr_reg, Address(), preval, noreg); stub->register_stub(); @@ -891,7 +888,7 @@ bool ShenandoahBarrierSetC2State::needs_liveness_data(const MachNode* mach) cons //assert(mach->barrier_data() != 0, "what else?"); // return mach->barrier_data() != 0; //return (mach->barrier_data() & ShenandoahSATBBarrier) != 0; - return ShenandoahSATBBarrierStubC2::needs_barrier(mach); + return ShenandoahSATBBarrierStubC2::needs_barrier(mach) || ShenandoahLoadRefBarrierStubC2::needs_barrier(mach); } bool ShenandoahBarrierSetC2State::needs_livein_data() const { diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp index 0c77b122f260f..d1f82b4edce77 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp @@ -146,6 +146,20 @@ class ShenandoahBarrierStubC2 : public BarrierStubC2 { virtual void emit_code(MacroAssembler& masm) = 0; }; +class ShenandoahLoadRefBarrierStubC2 : public ShenandoahBarrierStubC2 { + Register _obj; + Register _addr; + bool _narrow; + ShenandoahLoadRefBarrierStubC2(const MachNode* node, Register obj, Register addr, bool narrow) : + ShenandoahBarrierStubC2(node), _obj(obj), _addr(addr), _narrow(narrow) {} +public: + static bool needs_barrier(const MachNode* node) { + return (node->barrier_data() & (ShenandoahBarrierStrong | ShenandoahBarrierWeak | ShenandoahBarrierPhantom | ShenandoahBarrierNative)) != 0; + } + static ShenandoahLoadRefBarrierStubC2* create(const MachNode* node, Register obj, Register addr, bool narrow); + void emit_code(MacroAssembler& masm) override; +}; + class ShenandoahSATBBarrierStubC2 : public ShenandoahBarrierStubC2 { Register _addr_reg; Address _addr; diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index f358729dfb2d0..63a081cfbed9b 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -872,6 +872,8 @@ uint8_t MemNode::barrier_data(const Node* n) { return n->as_LoadStore()->barrier_data(); } else if (n->is_Mem()) { return n->as_Mem()->barrier_data(); + } else if (n->is_DecodeN()) { + return n->as_DecodeN()->barrier_data(); } return 0; } @@ -1000,7 +1002,9 @@ Node* LoadNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypeP load->set_barrier_data(barrier_data); if (load->Opcode() == Op_LoadN) { Node* ld = gvn.transform(load); - return new DecodeNNode(ld, ld->bottom_type()->make_ptr()); + DecodeNNode* decode = new DecodeNNode(ld, ld->bottom_type()->make_ptr()); + decode->set_barrier_data(barrier_data); + return decode; } return load; diff --git a/src/hotspot/share/opto/narrowptrnode.hpp b/src/hotspot/share/opto/narrowptrnode.hpp index e7cd19cb42441..394715111dddc 100644 --- a/src/hotspot/share/opto/narrowptrnode.hpp +++ b/src/hotspot/share/opto/narrowptrnode.hpp @@ -89,14 +89,21 @@ class DecodeNarrowPtrNode : public TypeNode { // Takes an extra argument which is the real heap base as a long which // may be useful for code generation in the backend. class DecodeNNode : public DecodeNarrowPtrNode { + uint8_t _barrier_data; public: DecodeNNode(Node* value, const Type* type): - DecodeNarrowPtrNode(value, type) { + DecodeNarrowPtrNode(value, type), _barrier_data(0) { init_class_id(Class_DecodeN); } virtual int Opcode() const; virtual const Type* Value(PhaseGVN* phase) const; virtual Node* Identity(PhaseGVN* phase); + void set_barrier_data(uint8_t barrier_data) { + _barrier_data = barrier_data; + } + uint8_t barrier_data() const { + return _barrier_data; + } }; //------------------------------DecodeNKlass-------------------------------- From 81032a64bbfedddb301b2778c901cb419ee861bc Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Sat, 30 Aug 2025 09:23:10 +0000 Subject: [PATCH 9/9] More late LRB (aarch64) --- src/hotspot/cpu/aarch64/aarch64.ad | 2 - .../shenandoahBarrierSetAssembler_aarch64.cpp | 19 ++++++-- .../shenandoahBarrierSetAssembler_aarch64.hpp | 2 +- .../gc/shenandoah/shenandoah_aarch64.ad | 48 +++++++++++-------- .../shenandoah/c2/shenandoahBarrierSetC2.cpp | 4 +- .../shenandoah/c2/shenandoahBarrierSetC2.hpp | 7 +-- src/hotspot/share/opto/memnode.cpp | 7 +-- src/hotspot/share/opto/narrowptrnode.hpp | 9 +--- 8 files changed, 51 insertions(+), 47 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 61c6fe81d8a44..7a3fc930fc504 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -8200,7 +8200,6 @@ instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{ instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); - predicate(!UseShenandoahGC || n->as_DecodeN()->barrier_data() == 0); match(Set dst (DecodeN src)); ins_cost(INSN_COST * 3); format %{ "decode_heap_oop $dst, $src" %} @@ -8215,7 +8214,6 @@ instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); - predicate(!UseShenandoahGC || n->as_DecodeN()->barrier_data() == 0); match(Set dst (DecodeN src)); ins_cost(INSN_COST * 3); format %{ "decode_heap_oop_not_null $dst, $src" %} diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index 44f9b0ac1319d..576a84db6cb66 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -611,7 +611,7 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, } #ifdef COMPILER2 -void ShenandoahBarrierSetAssembler::load_ref_barrier_c2(const MachNode* node, MacroAssembler* masm, Register obj, Register addr, bool narrow, bool maybe_null) { +void ShenandoahBarrierSetAssembler::load_ref_barrier_c2(const MachNode* node, MacroAssembler* masm, Register obj, Register addr, Register tmp, bool narrow, bool maybe_null) { if (!ShenandoahLoadRefBarrierStubC2::needs_barrier(node)) { return; } @@ -620,7 +620,7 @@ void ShenandoahBarrierSetAssembler::load_ref_barrier_c2(const MachNode* node, Ma if (maybe_null) { __ cbz(obj, done); } - ShenandoahLoadRefBarrierStubC2* const stub = ShenandoahLoadRefBarrierStubC2::create(node, obj, addr, narrow); + ShenandoahLoadRefBarrierStubC2* const stub = ShenandoahLoadRefBarrierStubC2::create(node, obj, addr, tmp, narrow); // Don't preserve the obj across the runtime call, we override it from the return value anyway. stub->dont_preserve(obj); // Check if GC marking is in progress, otherwise we don't have to do anything. @@ -689,22 +689,27 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop_c2(const MachNode* node, void ShenandoahLoadRefBarrierStubC2::emit_code(MacroAssembler& masm) { __ bind(*entry()); + Register obj = _obj; + if (_narrow) { + __ decode_heap_oop(_tmp, _obj); + obj = _tmp; + } // Weak/phantom loads always need to go to runtime. if ((_node->barrier_data() & ShenandoahBarrierStrong) != 0) { // Check for object in cset. __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); - __ lsr(rscratch1, _obj, ShenandoahHeapRegion::region_size_bytes_shift_jint()); + __ lsr(rscratch1, obj, ShenandoahHeapRegion::region_size_bytes_shift_jint()); __ ldrb(rscratch2, Address(rscratch2, rscratch1)); __ cbz(rscratch2, *continuation()); } { SaveLiveRegisters save_registers(&masm, this); - if (c_rarg0 != _obj) { + if (c_rarg0 != obj) { if (c_rarg0 == _addr) { __ mov(rscratch1, _addr); _addr = rscratch1; } - __ mov(c_rarg0, _obj); + __ mov(c_rarg0, obj); } __ mov(c_rarg1, _addr); @@ -728,6 +733,10 @@ void ShenandoahLoadRefBarrierStubC2::emit_code(MacroAssembler& masm) { __ blr(rscratch1); __ mov(_obj, r0); } + if (_narrow) { + __ encode_heap_oop(_obj); + } + __ b(*continuation()); } void ShenandoahSATBBarrierStubC2::emit_code(MacroAssembler& masm) { diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp index 2260431550409..d9a1250b90f0e 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp @@ -95,7 +95,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { bool acquire, bool release, bool is_cae, Register result); #ifdef COMPILER2 - void load_ref_barrier_c2(const MachNode* node, MacroAssembler* masm, Register obj, Register addr, bool narrow, bool maybe_null); + void load_ref_barrier_c2(const MachNode* node, MacroAssembler* masm, Register obj, Register addr, Register tmp, bool narrow, bool maybe_null); void satb_barrier_c2(const MachNode* node, MacroAssembler* masm, Register obj, Register pre_val); void cmpxchg_oop_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register expected, Register new_val, Register result, bool acquire, bool release, bool weak, bool is_cae); diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad index 0777f514938fb..5614477c25a9c 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad @@ -497,7 +497,7 @@ instruct loadP_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) // used for reference loading (Reference::get()). match(Set dst (LoadP mem)); predicate(UseShenandoahGC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); - effect(TEMP dst, KILL cr); + effect(TEMP_DEF dst, KILL cr); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# ptr" %} ins_encode %{ @@ -508,6 +508,7 @@ instruct loadP_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) ShenandoahBarrierSet::assembler()->load_ref_barrier_c2(this, masm, $dst$$Register /* obj */, $mem$$Register /* addr */, + noreg /* noreg - not needed */, false /* narrow */, true /* maybe_null */); %} @@ -531,52 +532,57 @@ instruct loadP_volatile_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) ShenandoahBarrierSet::assembler()->load_ref_barrier_c2(this, masm, $dst$$Register /* obj */, $mem$$Register /* addr */, + noreg /* noreg - not needed */, false /* narrow */, true /* maybe_null */); %} ins_pipe(iload_reg_mem); %} -instruct loadAndDecodeN_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) %{ - match(Set dst (DecodeN (LoadN mem))); - predicate(UseShenandoahGC && !needs_acquiring_load(n->in(1)) && n->in(1)->as_Load()->barrier_data() != 0); - effect(TEMP_DEF dst, KILL cr); - ins_cost(INSN_COST * 6); - format %{ "ldr $dst, $mem\t# ptr" - "decode_heap_oop $dst, $dst" - %} +instruct loadN_shenandoah(iRegNNoSp dst, indirect mem, iRegPNoSp tmp, rFlagsReg cr) %{ + match(Set dst (LoadN mem)); + predicate(UseShenandoahGC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + ins_cost(INSN_COST * 4); + format %{ "ldrw $dst, $mem\t# compressed ptr" %} ins_encode %{ __ ldrw($dst$$Register, $mem$$Register); - __ decode_heap_oop($dst$$Register, $dst$$Register); - ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, - noreg /* obj */, - $dst$$Register /* pre_val */); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $dst$$Register); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + noreg /* obj */, + $tmp$$Register /* pre_val */); + } ShenandoahBarrierSet::assembler()->load_ref_barrier_c2(this, masm, $dst$$Register /* obj */, $mem$$Register /* addr */, + $tmp$$Register /* tmp */, true /* narrow */, true /* maybe_null */); %} ins_pipe(ialu_reg); %} -instruct loadAndDecodeN_volatile_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr) %{ - predicate(UseShenandoahGC && needs_acquiring_load(n->in(1)) && n->in(1)->as_Load()->barrier_data() != 0); - match(Set dst (DecodeN (LoadN mem))); - effect(TEMP dst, KILL cr); +instruct loadN_volatile_shenandoah(iRegNNoSp dst, indirect mem, iRegPNoSp tmp, rFlagsReg cr) %{ + predicate(UseShenandoahGC && /*needs_acquiring_load(n) && */ n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); ins_cost(INSN_COST * 3); format %{ "ldarw $dst, $mem\t# ptr" "decode_heap_oop $dst, $dst" %} ins_encode %{ __ ldarw($dst$$Register, $mem$$Register); - __ decode_heap_oop($dst$$Register, $dst$$Register); - ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, - noreg /* obj */, - $dst$$Register /* pre_val */); + if (ShenandoahSATBBarrierStubC2::needs_barrier(this)) { + __ decode_heap_oop($tmp$$Register, $dst$$Register); + ShenandoahBarrierSet::assembler()->satb_barrier_c2(this, masm, + noreg /* obj */, + $tmp$$Register /* pre_val */); + } ShenandoahBarrierSet::assembler()->load_ref_barrier_c2(this, masm, $dst$$Register /* obj */, $mem$$Register /* addr */, + $tmp$$Register /* tmp */, true /* narrow */, true /* maybe_null */); %} diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp index 85e37347215ee..948c42301ca10 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp @@ -848,8 +848,8 @@ void ShenandoahBarrierStubC2::register_stub() { } } -ShenandoahLoadRefBarrierStubC2* ShenandoahLoadRefBarrierStubC2::create(const MachNode* node, Register obj, Register addr, bool narrow) { - auto* stub = new (Compile::current()->comp_arena()) ShenandoahLoadRefBarrierStubC2(node, obj, addr, narrow); +ShenandoahLoadRefBarrierStubC2* ShenandoahLoadRefBarrierStubC2::create(const MachNode* node, Register obj, Register addr, Register tmp, bool narrow) { + auto* stub = new (Compile::current()->comp_arena()) ShenandoahLoadRefBarrierStubC2(node, obj, addr, tmp, narrow); stub->register_stub(); return stub; } diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp index d1f82b4edce77..5bc639efcd54d 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp @@ -149,14 +149,15 @@ class ShenandoahBarrierStubC2 : public BarrierStubC2 { class ShenandoahLoadRefBarrierStubC2 : public ShenandoahBarrierStubC2 { Register _obj; Register _addr; + Register _tmp; bool _narrow; - ShenandoahLoadRefBarrierStubC2(const MachNode* node, Register obj, Register addr, bool narrow) : - ShenandoahBarrierStubC2(node), _obj(obj), _addr(addr), _narrow(narrow) {} + ShenandoahLoadRefBarrierStubC2(const MachNode* node, Register obj, Register addr, Register tmp, bool narrow) : + ShenandoahBarrierStubC2(node), _obj(obj), _addr(addr), _tmp(tmp), _narrow(narrow) {} public: static bool needs_barrier(const MachNode* node) { return (node->barrier_data() & (ShenandoahBarrierStrong | ShenandoahBarrierWeak | ShenandoahBarrierPhantom | ShenandoahBarrierNative)) != 0; } - static ShenandoahLoadRefBarrierStubC2* create(const MachNode* node, Register obj, Register addr, bool narrow); + static ShenandoahLoadRefBarrierStubC2* create(const MachNode* node, Register obj, Register addr, Register tmp, bool narrow); void emit_code(MacroAssembler& masm) override; }; diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 63a081cfbed9b..9609b312349fb 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -106,6 +106,7 @@ void MemNode::dump_spec(outputStream *st) const { if (_unsafe_access) { st->print(" unsafe"); } + st->print(" barrier: %u", _barrier_data); } void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) { @@ -872,8 +873,6 @@ uint8_t MemNode::barrier_data(const Node* n) { return n->as_LoadStore()->barrier_data(); } else if (n->is_Mem()) { return n->as_Mem()->barrier_data(); - } else if (n->is_DecodeN()) { - return n->as_DecodeN()->barrier_data(); } return 0; } @@ -1002,9 +1001,7 @@ Node* LoadNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypeP load->set_barrier_data(barrier_data); if (load->Opcode() == Op_LoadN) { Node* ld = gvn.transform(load); - DecodeNNode* decode = new DecodeNNode(ld, ld->bottom_type()->make_ptr()); - decode->set_barrier_data(barrier_data); - return decode; + return new DecodeNNode(ld, ld->bottom_type()->make_ptr()); } return load; diff --git a/src/hotspot/share/opto/narrowptrnode.hpp b/src/hotspot/share/opto/narrowptrnode.hpp index 394715111dddc..e7cd19cb42441 100644 --- a/src/hotspot/share/opto/narrowptrnode.hpp +++ b/src/hotspot/share/opto/narrowptrnode.hpp @@ -89,21 +89,14 @@ class DecodeNarrowPtrNode : public TypeNode { // Takes an extra argument which is the real heap base as a long which // may be useful for code generation in the backend. class DecodeNNode : public DecodeNarrowPtrNode { - uint8_t _barrier_data; public: DecodeNNode(Node* value, const Type* type): - DecodeNarrowPtrNode(value, type), _barrier_data(0) { + DecodeNarrowPtrNode(value, type) { init_class_id(Class_DecodeN); } virtual int Opcode() const; virtual const Type* Value(PhaseGVN* phase) const; virtual Node* Identity(PhaseGVN* phase); - void set_barrier_data(uint8_t barrier_data) { - _barrier_data = barrier_data; - } - uint8_t barrier_data() const { - return _barrier_data; - } }; //------------------------------DecodeNKlass--------------------------------