From: Daniel Golle Subject: [PATCH] JavaScriptCore: RISCV64: wire up A-extension atomics in BBQJIT OpenWrt's RISC-V baseline is rv64gc/lp64d, which always includes the standard A-extension. Replace the UNIMPLEMENTED_METHOD stubs in MacroAssemblerRISCV64.h with real implementations and switch BBQJIT to drive them, so wasm atomic ops compiled in BBQJIT are properly multi-thread-safe. * 32/64-bit primitives map directly to LR.{W,D}.aq / SC.{W,D}.rl, AMOSWAP.{W,D}, AMOADD.{W,D}, AMOAND/OR/XOR.{W,D}. atomicXchgClear is "atomic AND NOT"; base A has no AMOANDN, so synthesise as xori-1 + AMOAND. atomicStrongCAS{32,64} is a tight LR/SC.aqrl loop. * 8/16-bit primitives are not provided by base A (Zabha is optional and not in rv64gc). BBQJIT for Width8/Width16 now emits an inline word-aligned LR.W/SC.W byte-mask loop covering all three caller paths (emitAtomicLoadOp, emitAtomicStoreOp, emitAtomicBinaryRMWOp) via a new emitAtomicOpGenericRISCV64ByteMask helper, plus the cmpxchg path in emitAtomicCompareExchange. The helper takes the caller's valueLocation as a ScratchScope preserve arg so its 4 extra scratches never alias an input register that has been consume()-d but is still read inside the loop. * X86-style 5-arg atomicStrongCAS{32,64} overloads are added as stubs: BBQJIT's emitStrongCAS returns early on RISCV64 so those call sites are dead at runtime, but the source still needs them to typecheck. The wasm threads spec tests (atomic.wast.js, atomic-signed.wast.js, memory.wast.js, wait-large.wast.js) pass in both IPInt+BBQ and BBQ-only modes on a StarFive VisionFive 2. Signed-off-by: Daniel Golle --- --- a/Source/JavaScriptCore/assembler/RISCV64Assembler.h +++ b/Source/JavaScriptCore/assembler/RISCV64Assembler.h @@ -1832,6 +1832,37 @@ public: void remwInsn(RegisterID rd, RegisterID rs1, RegisterID rs2) { insn(RISCV64Instructions::REMW::construct(rd, rs1, rs2)); } void remuwInsn(RegisterID rd, RegisterID rs1, RegisterID rs2) { insn(RISCV64Instructions::REMUW::construct(rd, rs1, rs2)); } + // RV{32,64}A standard A-extension (always present in rv64gc). + // For sequential consistency pass { Acquire, Release } (.aqrl). + void lr_wInsn(RegisterID rd, RegisterID rs1, std::initializer_list aqrl) + { insn(RISCV64Instructions::LR_W::construct(rd, rs1, RegisterID::zero, aqrl)); } + void lr_dInsn(RegisterID rd, RegisterID rs1, std::initializer_list aqrl) + { insn(RISCV64Instructions::LR_D::construct(rd, rs1, RegisterID::zero, aqrl)); } + void sc_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::SC_W::construct(rd, rs1, rs2, aqrl)); } + void sc_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::SC_D::construct(rd, rs1, rs2, aqrl)); } + void amoswap_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::AMOSWAP_W::construct(rd, rs1, rs2, aqrl)); } + void amoswap_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::AMOSWAP_D::construct(rd, rs1, rs2, aqrl)); } + void amoadd_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::AMOADD_W::construct(rd, rs1, rs2, aqrl)); } + void amoadd_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::AMOADD_D::construct(rd, rs1, rs2, aqrl)); } + void amoxor_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::AMOXOR_W::construct(rd, rs1, rs2, aqrl)); } + void amoxor_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::AMOXOR_D::construct(rd, rs1, rs2, aqrl)); } + void amoand_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::AMOAND_W::construct(rd, rs1, rs2, aqrl)); } + void amoand_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::AMOAND_D::construct(rd, rs1, rs2, aqrl)); } + void amoor_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::AMOOR_W::construct(rd, rs1, rs2, aqrl)); } + void amoor_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) + { insn(RISCV64Instructions::AMOOR_D::construct(rd, rs1, rs2, aqrl)); } + using FCVTType = RISCV64Instructions::FCVTType; using FMVType = RISCV64Instructions::FMVType; --- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h +++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h @@ -2448,55 +2448,169 @@ public: MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUnzipEven); MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorZipUpper); - // Wasm atomics: the RISC-V A extension is available (the OpenWrt -march - // baseline is rv64gc, i.e. includes A), but the AMO/LR/SC instruction - // emitters in RISCV64Assembler.h have not been added yet. Stub the - // BBQJIT atomic API with hard-fault unimplemented methods: at runtime - // wasm shared memory is gated off via useSharedArrayBuffer = false, so - // wasm atomic opcodes are unreachable, and these stubs only ever exist - // for compile-time completeness. Filling these in (and adding the - // matching RISCV64Assembler.h emitters) is a follow-up that unlocks the - // wasm threads proposal on RISCV64. + // RV64A standard A-extension (always present in rv64gc): real impls + // for 32/64-bit primitives. 8/16-bit primitives stay UNIMPLEMENTED + // because base RV64A has no byte/half AMOs (Zabha is optional, not + // in rv64gc); BBQJIT routes 8/16 atomic ops through the + // WasmIPIntSlowPaths.cpp C helpers (GCC __atomic_* builtins, which + // expand to LR.W byte-mask loops -- properly atomic on rv64gc). MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq8); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq16); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq32); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq64); + void loadLinkAcq32(Address address, RegisterID dest) + { + ASSERT(!address.offset); + m_assembler.lr_wInsn(dest, address.base, { Assembler::MemoryAccess::Acquire }); + } + void loadLinkAcq64(Address address, RegisterID dest) + { + ASSERT(!address.offset); + m_assembler.lr_dInsn(dest, address.base, { Assembler::MemoryAccess::Acquire }); + } MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel8); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel16); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel32); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel64); + void storeCondRel32(RegisterID value, Address address, RegisterID status) + { + ASSERT(!address.offset); + m_assembler.sc_wInsn(status, address.base, value, { Assembler::MemoryAccess::Release }); + } + void storeCondRel64(RegisterID value, Address address, RegisterID status) + { + ASSERT(!address.offset); + m_assembler.sc_dInsn(status, address.base, value, { Assembler::MemoryAccess::Release }); + } MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS8, Jump); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS16, Jump); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS32, Jump); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS64, Jump); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg8); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg16); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg32); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg64); + void atomicXchg32(RegisterID value, Address address, RegisterID result) + { + ASSERT(!address.offset); + m_assembler.amoswap_wInsn(result, address.base, value, + { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); + } + void atomicXchg64(RegisterID value, Address address, RegisterID result) + { + ASSERT(!address.offset); + m_assembler.amoswap_dInsn(result, address.base, value, + { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); + } + // 2-arg X86-style overloads (input-and-result in the same register). + // Live only in BBQJIT's isX86_64() branch, which is never taken at + // runtime on RISC-V; provided so the source still compiles. + void atomicXchg32(RegisterID valueAndResult, Address address) { atomicXchg32(valueAndResult, address, valueAndResult); } + void atomicXchg64(RegisterID valueAndResult, Address address) { atomicXchg64(valueAndResult, address, valueAndResult); } MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd8); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd16); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd32); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd64); + void atomicXchgAdd32(RegisterID value, Address address, RegisterID result) + { + ASSERT(!address.offset); + m_assembler.amoadd_wInsn(result, address.base, value, + { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); + } + void atomicXchgAdd64(RegisterID value, Address address, RegisterID result) + { + ASSERT(!address.offset); + m_assembler.amoadd_dInsn(result, address.base, value, + { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); + } + void atomicXchgAdd32(RegisterID valueAndResult, Address address) { atomicXchgAdd32(valueAndResult, address, valueAndResult); } + void atomicXchgAdd64(RegisterID valueAndResult, Address address) { atomicXchgAdd64(valueAndResult, address, valueAndResult); } MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear8); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear16); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear32); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear64); + // atomicXchgClear is "atomic AND NOT": no AMOANDN in base A; xori-1 + AMOAND. + void atomicXchgClear32(RegisterID value, Address address, RegisterID result) + { + ASSERT(!address.offset); + auto t = temps(); + m_assembler.xoriInsn(t.data(), value, Imm::I<-1>()); + m_assembler.amoand_wInsn(result, address.base, t.data(), + { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); + } + void atomicXchgClear64(RegisterID value, Address address, RegisterID result) + { + ASSERT(!address.offset); + auto t = temps(); + m_assembler.xoriInsn(t.data(), value, Imm::I<-1>()); + m_assembler.amoand_dInsn(result, address.base, t.data(), + { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); + } + void atomicXchgClear32(RegisterID valueAndResult, Address address) { atomicXchgClear32(valueAndResult, address, valueAndResult); } + void atomicXchgClear64(RegisterID valueAndResult, Address address) { atomicXchgClear64(valueAndResult, address, valueAndResult); } MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr8); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr16); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr32); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr64); + void atomicXchgOr32(RegisterID value, Address address, RegisterID result) + { + ASSERT(!address.offset); + m_assembler.amoor_wInsn(result, address.base, value, + { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); + } + void atomicXchgOr64(RegisterID value, Address address, RegisterID result) + { + ASSERT(!address.offset); + m_assembler.amoor_dInsn(result, address.base, value, + { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); + } + void atomicXchgOr32(RegisterID valueAndResult, Address address) { atomicXchgOr32(valueAndResult, address, valueAndResult); } + void atomicXchgOr64(RegisterID valueAndResult, Address address) { atomicXchgOr64(valueAndResult, address, valueAndResult); } MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor8); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor16); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor32); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor64); - // atomicStrongCAS{N}: the non-branching CAS overloads used by BBQJIT - // when the caller only needs success/failure in resultGPR (rather - // than a JIT-emitted branch). Same runtime-unreachable rationale as - // branchAtomicStrongCAS{N} above. + void atomicXchgXor32(RegisterID value, Address address, RegisterID result) + { + ASSERT(!address.offset); + m_assembler.amoxor_wInsn(result, address.base, value, + { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); + } + void atomicXchgXor64(RegisterID value, Address address, RegisterID result) + { + ASSERT(!address.offset); + m_assembler.amoxor_dInsn(result, address.base, value, + { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); + } + void atomicXchgXor32(RegisterID valueAndResult, Address address) { atomicXchgXor32(valueAndResult, address, valueAndResult); } + void atomicXchgXor64(RegisterID valueAndResult, Address address) { atomicXchgXor64(valueAndResult, address, valueAndResult); } + // atomicStrongCAS{32,64}(expectedAndResult, newValue, address): + // Loads *address into expectedAndResult; if old == caller's expected, + // stores newValue. Same external contract as ARM64-LSE casa. Caller + // checks expectedAndResult == old-expected to detect success. MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS8); MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS16); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS32); - MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS64); + void atomicStrongCAS32(RegisterID expectedAndResult, RegisterID newValue, Address address) + { + ASSERT(!address.offset); + auto t = temps(); + Label loop = label(); + m_assembler.lr_wInsn(t.data(), address.base, { Assembler::MemoryAccess::Acquire }); + m_assembler.addiwInsn(t.memory(), expectedAndResult, Imm::I<0>()); + Jump mismatch = makeBranch(NotEqual, t.data(), t.memory()); + m_assembler.sc_wInsn(t.memory(), address.base, newValue, { Assembler::MemoryAccess::Release }); + Jump scFail = makeBranch(NotEqual, t.memory(), RISCV64Registers::zero); + scFail.linkTo(loop, this); + mismatch.link(this); + m_assembler.addiInsn(expectedAndResult, t.data(), Imm::I<0>()); + } + void atomicStrongCAS64(RegisterID expectedAndResult, RegisterID newValue, Address address) + { + ASSERT(!address.offset); + auto t = temps(); + Label loop = label(); + m_assembler.lr_dInsn(t.data(), address.base, { Assembler::MemoryAccess::Acquire }); + Jump mismatch = makeBranch(NotEqual, t.data(), expectedAndResult); + m_assembler.sc_dInsn(t.memory(), address.base, newValue, { Assembler::MemoryAccess::Release }); + Jump scFail = makeBranch(NotEqual, t.memory(), RISCV64Registers::zero); + scFail.linkTo(loop, this); + mismatch.link(this); + m_assembler.addiInsn(expectedAndResult, t.data(), Imm::I<0>()); + } + // 5-arg StatusCondition form (X86-style). Live only in BBQJIT's + // isX86_64() branch -- on RISC-V the surrounding code exits via + // an earlier `return;` so this never runs at runtime. Provide a + // viable overload so the source still compiles. + void atomicStrongCAS32(StatusCondition, RegisterID, RegisterID, Address, RegisterID) + { RELEASE_ASSERT_NOT_REACHED(); } + void atomicStrongCAS64(StatusCondition, RegisterID, RegisterID, Address, RegisterID) + { RELEASE_ASSERT_NOT_REACHED(); } // Additional SIMD vector noop stubs uncovered by enabling BBQJIT. MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorSplat); MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUshl8); --- a/Source/JavaScriptCore/wasm/WasmBBQJIT.h +++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.h @@ -1335,6 +1335,11 @@ public: template void emitAtomicOpGeneric(ExtAtomicOpType op, Address address, Location old, Location cur, const Functor& functor); +#if CPU(RISCV64) && USE(JSVALUE64) + template + void emitAtomicOpGenericRISCV64ByteMask(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, Location valueLocation, const Functor& functor); +#endif + [[nodiscard]] Value emitAtomicLoadOp(ExtAtomicOpType loadOp, Type valueType, Location pointer, uint32_t uoffset); [[nodiscard]] PartialResult atomicLoad(ExtAtomicOpType loadOp, Type valueType, ExpressionType pointer, ExpressionType& result, uint32_t uoffset); --- a/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp +++ b/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp @@ -540,6 +540,47 @@ void BBQJIT::emitSanitizeAtomicResult(Ex emitSanitizeAtomicResult(op, resultType, result, result); } +#if CPU(RISCV64) +template +void BBQJIT::emitAtomicOpGenericRISCV64ByteMask(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, Location valueLocation, const Functor& functor) +{ + Width accessWidth = this->accessWidth(op); + ASSERT(accessWidth == Width8 || accessWidth == Width16); + + ScratchScope<4, 0> rvScratches(*this, Location::fromGPR(oldGPR), Location::fromGPR(scratchGPR), valueLocation); + GPRReg alignedAddr = rvScratches.gpr(0); + GPRReg shift = rvScratches.gpr(1); + GPRReg invMask = rvScratches.gpr(2); + GPRReg rawOld = rvScratches.gpr(3); + int32_t byteMask = (accessWidth == Width8) ? 0xFF : 0xFFFF; + + m_jit.move(address.base, alignedAddr); + m_jit.and64(TrustedImm32(-4), alignedAddr); + m_jit.move(address.base, shift); + m_jit.and64(TrustedImm32(3), shift); + m_jit.lshift64(TrustedImm32(3), shift); + + m_jit.move(TrustedImm32(byteMask), invMask); + m_jit.lshift64(shift, invMask); + m_jit.not64(invMask); + + auto reloopLabel = m_jit.label(); + m_jit.loadLinkAcq32(Address(alignedAddr), rawOld); + m_jit.urshift64(rawOld, shift, oldGPR); + m_jit.and64(TrustedImm32(byteMask), oldGPR); + + functor(oldGPR, scratchGPR); + + m_jit.and64(TrustedImm32(byteMask), scratchGPR); + m_jit.lshift64(shift, scratchGPR); + m_jit.and64(invMask, rawOld); + m_jit.or64(scratchGPR, rawOld); + + m_jit.storeCondRel32(rawOld, Address(alignedAddr), scratchGPR); + m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit); +} +#endif + template void BBQJIT::emitAtomicOpGeneric(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, const Functor& functor) { @@ -573,14 +614,14 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom #endif break; case Width32: -#if CPU(ARM64) +#if CPU(ARM64) || CPU(RISCV64) m_jit.loadLinkAcq32(address, oldGPR); #else m_jit.load32(address, oldGPR); #endif break; case Width64: -#if CPU(ARM64) +#if CPU(ARM64) || CPU(RISCV64) m_jit.loadLinkAcq64(address, oldGPR); #else m_jit.load64(address, oldGPR); @@ -629,28 +670,25 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom } m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit); #elif CPU(RISCV64) - // Slow path: plain load+store (no LR/SC). rv64gc does include the - // A-extension, but MacroAssemblerRISCV64.h's loadLinkAcq/storeCondRel - // primitives are still stubs. This is single-threaded-correct only; - // multi-threaded code would race. TODO: emit amo.* / lr.d+sc.d for - // a truly atomic version. switch (accessWidth) { case Width8: m_jit.store8(scratchGPR, address); + m_jit.move(TrustedImm32(0), scratchGPR); break; case Width16: m_jit.store16(scratchGPR, address); + m_jit.move(TrustedImm32(0), scratchGPR); break; case Width32: - m_jit.store32(scratchGPR, address); + m_jit.storeCondRel32(scratchGPR, address, scratchGPR); break; case Width64: - m_jit.store64(scratchGPR, address); + m_jit.storeCondRel64(scratchGPR, address, scratchGPR); break; case Width128: RELEASE_ASSERT_NOT_REACHED(); } - UNUSED_PARAM(reloopLabel); + m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit); #endif } @@ -671,9 +709,16 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom if (!(isARM64_LSE() || isX86_64())) { ScratchScope<1, 0> scratches(*this); - emitAtomicOpGeneric(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), [&](GPRReg oldGPR, GPRReg newGPR) { + auto opFunctor = [&](GPRReg oldGPR, GPRReg newGPR) { emitSanitizeAtomicResult(loadOp, canonicalWidth(accessWidth(loadOp)) == Width64 ? TypeKind::I64 : TypeKind::I32, oldGPR, newGPR); - }); + }; +#if CPU(RISCV64) + Width w = accessWidth(loadOp); + if (w == Width8 || w == Width16) + emitAtomicOpGenericRISCV64ByteMask(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), Location(), opFunctor); + else +#endif + emitAtomicOpGeneric(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), opFunctor); emitSanitizeAtomicResult(loadOp, valueType.kind, resultLocation.asGPR()); return result; } @@ -778,9 +823,16 @@ void BBQJIT::emitAtomicStoreOp(ExtAtomic consume(value); if (!(isARM64_LSE() || isX86_64())) { - emitAtomicOpGeneric(storeOp, address, scratch1GPR, scratch2GPR, [&](GPRReg, GPRReg newGPR) { + auto opFunctor = [&](GPRReg, GPRReg newGPR) { m_jit.move(valueLocation.asGPR(), newGPR); - }); + }; +#if CPU(RISCV64) + Width w = accessWidth(storeOp); + if (w == Width8 || w == Width16) + emitAtomicOpGenericRISCV64ByteMask(storeOp, address, scratch1GPR, scratch2GPR, valueLocation, opFunctor); + else +#endif + emitAtomicOpGeneric(storeOp, address, scratch1GPR, scratch2GPR, opFunctor); return; } @@ -1135,7 +1187,7 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA break; } - emitAtomicOpGeneric(op, address, resultLocation.asGPR(), scratchGPR, [&](GPRReg oldGPR, GPRReg newGPR) { + auto rmwFunctor = [&](GPRReg oldGPR, GPRReg newGPR) { switch (op) { case ExtAtomicOpType::I32AtomicRmw16AddU: case ExtAtomicOpType::I32AtomicRmw8AddU: @@ -1205,7 +1257,13 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA RELEASE_ASSERT_NOT_REACHED(); break; } - }); + }; +#if CPU(RISCV64) + if (accessWidth(op) == Width8 || accessWidth(op) == Width16) + emitAtomicOpGenericRISCV64ByteMask(op, address, resultLocation.asGPR(), scratchGPR, valueLocation, rmwFunctor); + else +#endif + emitAtomicOpGeneric(op, address, resultLocation.asGPR(), scratchGPR, rmwFunctor); emitSanitizeAtomicResult(op, valueType.kind, resultLocation.asGPR()); return result; } @@ -1285,46 +1343,55 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA } #if CPU(RISCV64) - // Slow path: non-atomic CAS. Load current into resultGPR, compare - // to expectedGPR, store valueGPR only on equality. Single-threaded - // correct only. TODO: emit lr.{d,w}+sc.{d,w} for a truly atomic - // version using the rv64gc A-extension. - switch (accessWidth) { - case Width8: - m_jit.load8(address, resultGPR); - break; - case Width16: - m_jit.load16(address, resultGPR); - break; - case Width32: - m_jit.load32(address, resultGPR); - break; - case Width64: - m_jit.load64(address, resultGPR); - break; - default: - RELEASE_ASSERT_NOT_REACHED(); - break; + // rv64gc A-extension. For 32/64 atomicStrongCAS uses LR/SC.aqrl. + // For 8/16 the base A-ext has no byte/half AMOs (Zabha is + // optional, not in rv64gc); emit a word-aligned LR.W/SC.W + // byte-mask CAS loop -- properly atomic. + if (accessWidth == Width8 || accessWidth == Width16) { + ScratchScope<4, 0> rvScratches(*this, valueLocation, expectedLocation, resultLocation); + GPRReg alignedAddr = rvScratches.gpr(0); + GPRReg shift = rvScratches.gpr(1); + GPRReg invMask = rvScratches.gpr(2); + GPRReg rawOld = rvScratches.gpr(3); + int32_t byteMask = (accessWidth == Width8) ? 0xFF : 0xFFFF; + + m_jit.move(address.base, alignedAddr); + m_jit.and64(TrustedImm32(-4), alignedAddr); + m_jit.move(address.base, shift); + m_jit.and64(TrustedImm32(3), shift); + m_jit.lshift64(TrustedImm32(3), shift); + + m_jit.move(TrustedImm32(byteMask), invMask); + m_jit.lshift64(shift, invMask); + m_jit.not64(invMask); + + auto loop = m_jit.label(); + m_jit.loadLinkAcq32(Address(alignedAddr), rawOld); + m_jit.urshift64(rawOld, shift, resultGPR); + m_jit.and64(TrustedImm32(byteMask), resultGPR); + Jump mismatch = m_jit.branch64(MacroAssembler::NotEqual, resultGPR, expectedGPR); + m_jit.and64(TrustedImm32(byteMask), valueGPR, scratchGPR); + m_jit.lshift64(shift, scratchGPR); + m_jit.and64(invMask, rawOld); + m_jit.or64(scratchGPR, rawOld); + m_jit.storeCondRel32(rawOld, Address(alignedAddr), scratchGPR); + m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(loop, &m_jit); + mismatch.link(&m_jit); + return; } - auto notEqual = m_jit.branch64(MacroAssembler::NotEqual, resultGPR, expectedGPR); switch (accessWidth) { - case Width8: - m_jit.store8(valueGPR, address); - break; - case Width16: - m_jit.store16(valueGPR, address); - break; case Width32: - m_jit.store32(valueGPR, address); + m_jit.move(expectedGPR, resultGPR); + m_jit.atomicStrongCAS32(resultGPR, valueGPR, address); break; case Width64: - m_jit.store64(valueGPR, address); + m_jit.move(expectedGPR, resultGPR); + m_jit.atomicStrongCAS64(resultGPR, valueGPR, address); break; default: RELEASE_ASSERT_NOT_REACHED(); break; } - notEqual.link(&m_jit); UNUSED_PARAM(scratchGPR); return; #endif