Files
video/libs/wpewebkit/patches/148-JavaScriptCore-RISCV64-A-extension-atomics.patch
T
Daniel Golle df0b899123 wpewebkit: update to version 2.52.3
Update WPEWebKit to the 2.52 stable major release branch.

Includes a pending patchset to get WASM BBQJIT working on RISCV64,
upstream PR https://github.com/WebKit/WebKit/pull/65621

Alltogether this brings acceptable performance (even with LLVMPipe
Mesa software renderer) on RISCV64.

Link: https://wpewebkit.org/release/wpewebkit-2.52.0.html
Link: https://wpewebkit.org/release/wpewebkit-2.52.1.html
Link: https://wpewebkit.org/release/wpewebkit-2.52.2.html
Link: https://wpewebkit.org/release/wpewebkit-2.52.3.html
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
2026-05-27 19:27:26 +01:00

540 lines
28 KiB
Diff

From: Daniel Golle <daniel@makrotopia.org>
Subject: [PATCH] JavaScriptCore: RISCV64: wire up A-extension atomics in BBQJIT
OpenWrt's RISC-V baseline is rv64gc/lp64d, which always includes the
standard A-extension. Replace the UNIMPLEMENTED_METHOD stubs in
MacroAssemblerRISCV64.h with real implementations and switch BBQJIT to
drive them, so wasm atomic ops compiled in BBQJIT are properly
multi-thread-safe.
* 32/64-bit primitives map directly to LR.{W,D}.aq / SC.{W,D}.rl,
AMOSWAP.{W,D}, AMOADD.{W,D}, AMOAND/OR/XOR.{W,D}. atomicXchgClear
is "atomic AND NOT"; base A has no AMOANDN, so synthesise as
xori-1 + AMOAND. atomicStrongCAS{32,64} is a tight LR/SC.aqrl loop.
* 8/16-bit primitives are not provided by base A (Zabha is optional
and not in rv64gc). BBQJIT for Width8/Width16 now emits an inline
word-aligned LR.W/SC.W byte-mask loop covering all three caller
paths (emitAtomicLoadOp, emitAtomicStoreOp, emitAtomicBinaryRMWOp)
via a new emitAtomicOpGenericRISCV64ByteMask helper, plus the
cmpxchg path in emitAtomicCompareExchange. The helper takes the
caller's valueLocation as a ScratchScope preserve arg so its 4
extra scratches never alias an input register that has been
consume()-d but is still read inside the loop.
* X86-style 5-arg atomicStrongCAS{32,64} overloads are added as
stubs: BBQJIT's emitStrongCAS returns early on RISCV64 so those
call sites are dead at runtime, but the source still needs them
to typecheck.
The wasm threads spec tests (atomic.wast.js, atomic-signed.wast.js,
memory.wast.js, wait-large.wast.js) pass in both IPInt+BBQ and BBQ-only
modes on a StarFive VisionFive 2.
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
---
--- a/Source/JavaScriptCore/assembler/RISCV64Assembler.h
+++ b/Source/JavaScriptCore/assembler/RISCV64Assembler.h
@@ -1832,6 +1832,37 @@ public:
void remwInsn(RegisterID rd, RegisterID rs1, RegisterID rs2) { insn(RISCV64Instructions::REMW::construct(rd, rs1, rs2)); }
void remuwInsn(RegisterID rd, RegisterID rs1, RegisterID rs2) { insn(RISCV64Instructions::REMUW::construct(rd, rs1, rs2)); }
+ // RV{32,64}A standard A-extension (always present in rv64gc).
+ // For sequential consistency pass { Acquire, Release } (.aqrl).
+ void lr_wInsn(RegisterID rd, RegisterID rs1, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::LR_W::construct(rd, rs1, RegisterID::zero, aqrl)); }
+ void lr_dInsn(RegisterID rd, RegisterID rs1, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::LR_D::construct(rd, rs1, RegisterID::zero, aqrl)); }
+ void sc_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::SC_W::construct(rd, rs1, rs2, aqrl)); }
+ void sc_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::SC_D::construct(rd, rs1, rs2, aqrl)); }
+ void amoswap_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::AMOSWAP_W::construct(rd, rs1, rs2, aqrl)); }
+ void amoswap_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::AMOSWAP_D::construct(rd, rs1, rs2, aqrl)); }
+ void amoadd_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::AMOADD_W::construct(rd, rs1, rs2, aqrl)); }
+ void amoadd_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::AMOADD_D::construct(rd, rs1, rs2, aqrl)); }
+ void amoxor_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::AMOXOR_W::construct(rd, rs1, rs2, aqrl)); }
+ void amoxor_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::AMOXOR_D::construct(rd, rs1, rs2, aqrl)); }
+ void amoand_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::AMOAND_W::construct(rd, rs1, rs2, aqrl)); }
+ void amoand_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::AMOAND_D::construct(rd, rs1, rs2, aqrl)); }
+ void amoor_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::AMOOR_W::construct(rd, rs1, rs2, aqrl)); }
+ void amoor_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
+ { insn(RISCV64Instructions::AMOOR_D::construct(rd, rs1, rs2, aqrl)); }
+
using FCVTType = RISCV64Instructions::FCVTType;
using FMVType = RISCV64Instructions::FMVType;
--- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h
+++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h
@@ -2448,55 +2448,169 @@ public:
MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUnzipEven);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorZipUpper);
- // Wasm atomics: the RISC-V A extension is available (the OpenWrt -march
- // baseline is rv64gc, i.e. includes A), but the AMO/LR/SC instruction
- // emitters in RISCV64Assembler.h have not been added yet. Stub the
- // BBQJIT atomic API with hard-fault unimplemented methods: at runtime
- // wasm shared memory is gated off via useSharedArrayBuffer = false, so
- // wasm atomic opcodes are unreachable, and these stubs only ever exist
- // for compile-time completeness. Filling these in (and adding the
- // matching RISCV64Assembler.h emitters) is a follow-up that unlocks the
- // wasm threads proposal on RISCV64.
+ // RV64A standard A-extension (always present in rv64gc): real impls
+ // for 32/64-bit primitives. 8/16-bit primitives stay UNIMPLEMENTED
+ // because base RV64A has no byte/half AMOs (Zabha is optional, not
+ // in rv64gc); BBQJIT routes 8/16 atomic ops through the
+ // WasmIPIntSlowPaths.cpp C helpers (GCC __atomic_* builtins, which
+ // expand to LR.W byte-mask loops -- properly atomic on rv64gc).
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq8);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq16);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq32);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq64);
+ void loadLinkAcq32(Address address, RegisterID dest)
+ {
+ ASSERT(!address.offset);
+ m_assembler.lr_wInsn(dest, address.base, { Assembler::MemoryAccess::Acquire });
+ }
+ void loadLinkAcq64(Address address, RegisterID dest)
+ {
+ ASSERT(!address.offset);
+ m_assembler.lr_dInsn(dest, address.base, { Assembler::MemoryAccess::Acquire });
+ }
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel8);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel16);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel32);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel64);
+ void storeCondRel32(RegisterID value, Address address, RegisterID status)
+ {
+ ASSERT(!address.offset);
+ m_assembler.sc_wInsn(status, address.base, value, { Assembler::MemoryAccess::Release });
+ }
+ void storeCondRel64(RegisterID value, Address address, RegisterID status)
+ {
+ ASSERT(!address.offset);
+ m_assembler.sc_dInsn(status, address.base, value, { Assembler::MemoryAccess::Release });
+ }
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS8, Jump);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS16, Jump);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS32, Jump);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS64, Jump);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg8);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg16);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg32);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg64);
+ void atomicXchg32(RegisterID value, Address address, RegisterID result)
+ {
+ ASSERT(!address.offset);
+ m_assembler.amoswap_wInsn(result, address.base, value,
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
+ }
+ void atomicXchg64(RegisterID value, Address address, RegisterID result)
+ {
+ ASSERT(!address.offset);
+ m_assembler.amoswap_dInsn(result, address.base, value,
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
+ }
+ // 2-arg X86-style overloads (input-and-result in the same register).
+ // Live only in BBQJIT's isX86_64() branch, which is never taken at
+ // runtime on RISC-V; provided so the source still compiles.
+ void atomicXchg32(RegisterID valueAndResult, Address address) { atomicXchg32(valueAndResult, address, valueAndResult); }
+ void atomicXchg64(RegisterID valueAndResult, Address address) { atomicXchg64(valueAndResult, address, valueAndResult); }
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd8);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd16);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd32);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd64);
+ void atomicXchgAdd32(RegisterID value, Address address, RegisterID result)
+ {
+ ASSERT(!address.offset);
+ m_assembler.amoadd_wInsn(result, address.base, value,
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
+ }
+ void atomicXchgAdd64(RegisterID value, Address address, RegisterID result)
+ {
+ ASSERT(!address.offset);
+ m_assembler.amoadd_dInsn(result, address.base, value,
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
+ }
+ void atomicXchgAdd32(RegisterID valueAndResult, Address address) { atomicXchgAdd32(valueAndResult, address, valueAndResult); }
+ void atomicXchgAdd64(RegisterID valueAndResult, Address address) { atomicXchgAdd64(valueAndResult, address, valueAndResult); }
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear8);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear16);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear32);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear64);
+ // atomicXchgClear is "atomic AND NOT": no AMOANDN in base A; xori-1 + AMOAND.
+ void atomicXchgClear32(RegisterID value, Address address, RegisterID result)
+ {
+ ASSERT(!address.offset);
+ auto t = temps<Data>();
+ m_assembler.xoriInsn(t.data(), value, Imm::I<-1>());
+ m_assembler.amoand_wInsn(result, address.base, t.data(),
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
+ }
+ void atomicXchgClear64(RegisterID value, Address address, RegisterID result)
+ {
+ ASSERT(!address.offset);
+ auto t = temps<Data>();
+ m_assembler.xoriInsn(t.data(), value, Imm::I<-1>());
+ m_assembler.amoand_dInsn(result, address.base, t.data(),
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
+ }
+ void atomicXchgClear32(RegisterID valueAndResult, Address address) { atomicXchgClear32(valueAndResult, address, valueAndResult); }
+ void atomicXchgClear64(RegisterID valueAndResult, Address address) { atomicXchgClear64(valueAndResult, address, valueAndResult); }
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr8);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr16);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr32);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr64);
+ void atomicXchgOr32(RegisterID value, Address address, RegisterID result)
+ {
+ ASSERT(!address.offset);
+ m_assembler.amoor_wInsn(result, address.base, value,
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
+ }
+ void atomicXchgOr64(RegisterID value, Address address, RegisterID result)
+ {
+ ASSERT(!address.offset);
+ m_assembler.amoor_dInsn(result, address.base, value,
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
+ }
+ void atomicXchgOr32(RegisterID valueAndResult, Address address) { atomicXchgOr32(valueAndResult, address, valueAndResult); }
+ void atomicXchgOr64(RegisterID valueAndResult, Address address) { atomicXchgOr64(valueAndResult, address, valueAndResult); }
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor8);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor16);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor32);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor64);
- // atomicStrongCAS{N}: the non-branching CAS overloads used by BBQJIT
- // when the caller only needs success/failure in resultGPR (rather
- // than a JIT-emitted branch). Same runtime-unreachable rationale as
- // branchAtomicStrongCAS{N} above.
+ void atomicXchgXor32(RegisterID value, Address address, RegisterID result)
+ {
+ ASSERT(!address.offset);
+ m_assembler.amoxor_wInsn(result, address.base, value,
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
+ }
+ void atomicXchgXor64(RegisterID value, Address address, RegisterID result)
+ {
+ ASSERT(!address.offset);
+ m_assembler.amoxor_dInsn(result, address.base, value,
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
+ }
+ void atomicXchgXor32(RegisterID valueAndResult, Address address) { atomicXchgXor32(valueAndResult, address, valueAndResult); }
+ void atomicXchgXor64(RegisterID valueAndResult, Address address) { atomicXchgXor64(valueAndResult, address, valueAndResult); }
+ // atomicStrongCAS{32,64}(expectedAndResult, newValue, address):
+ // Loads *address into expectedAndResult; if old == caller's expected,
+ // stores newValue. Same external contract as ARM64-LSE casa. Caller
+ // checks expectedAndResult == old-expected to detect success.
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS8);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS16);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS32);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS64);
+ void atomicStrongCAS32(RegisterID expectedAndResult, RegisterID newValue, Address address)
+ {
+ ASSERT(!address.offset);
+ auto t = temps<Data, Memory>();
+ Label loop = label();
+ m_assembler.lr_wInsn(t.data(), address.base, { Assembler::MemoryAccess::Acquire });
+ m_assembler.addiwInsn(t.memory(), expectedAndResult, Imm::I<0>());
+ Jump mismatch = makeBranch(NotEqual, t.data(), t.memory());
+ m_assembler.sc_wInsn(t.memory(), address.base, newValue, { Assembler::MemoryAccess::Release });
+ Jump scFail = makeBranch(NotEqual, t.memory(), RISCV64Registers::zero);
+ scFail.linkTo(loop, this);
+ mismatch.link(this);
+ m_assembler.addiInsn(expectedAndResult, t.data(), Imm::I<0>());
+ }
+ void atomicStrongCAS64(RegisterID expectedAndResult, RegisterID newValue, Address address)
+ {
+ ASSERT(!address.offset);
+ auto t = temps<Data, Memory>();
+ Label loop = label();
+ m_assembler.lr_dInsn(t.data(), address.base, { Assembler::MemoryAccess::Acquire });
+ Jump mismatch = makeBranch(NotEqual, t.data(), expectedAndResult);
+ m_assembler.sc_dInsn(t.memory(), address.base, newValue, { Assembler::MemoryAccess::Release });
+ Jump scFail = makeBranch(NotEqual, t.memory(), RISCV64Registers::zero);
+ scFail.linkTo(loop, this);
+ mismatch.link(this);
+ m_assembler.addiInsn(expectedAndResult, t.data(), Imm::I<0>());
+ }
+ // 5-arg StatusCondition form (X86-style). Live only in BBQJIT's
+ // isX86_64() branch -- on RISC-V the surrounding code exits via
+ // an earlier `return;` so this never runs at runtime. Provide a
+ // viable overload so the source still compiles.
+ void atomicStrongCAS32(StatusCondition, RegisterID, RegisterID, Address, RegisterID)
+ { RELEASE_ASSERT_NOT_REACHED(); }
+ void atomicStrongCAS64(StatusCondition, RegisterID, RegisterID, Address, RegisterID)
+ { RELEASE_ASSERT_NOT_REACHED(); }
// Additional SIMD vector noop stubs uncovered by enabling BBQJIT.
MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorSplat);
MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUshl8);
--- a/Source/JavaScriptCore/wasm/WasmBBQJIT.h
+++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.h
@@ -1335,6 +1335,11 @@ public:
template<typename Functor>
void emitAtomicOpGeneric(ExtAtomicOpType op, Address address, Location old, Location cur, const Functor& functor);
+#if CPU(RISCV64) && USE(JSVALUE64)
+ template<typename Functor>
+ void emitAtomicOpGenericRISCV64ByteMask(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, Location valueLocation, const Functor& functor);
+#endif
+
[[nodiscard]] Value emitAtomicLoadOp(ExtAtomicOpType loadOp, Type valueType, Location pointer, uint32_t uoffset);
[[nodiscard]] PartialResult atomicLoad(ExtAtomicOpType loadOp, Type valueType, ExpressionType pointer, ExpressionType& result, uint32_t uoffset);
--- a/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp
+++ b/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp
@@ -540,6 +540,47 @@ void BBQJIT::emitSanitizeAtomicResult(Ex
emitSanitizeAtomicResult(op, resultType, result, result);
}
+#if CPU(RISCV64)
+template<typename Functor>
+void BBQJIT::emitAtomicOpGenericRISCV64ByteMask(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, Location valueLocation, const Functor& functor)
+{
+ Width accessWidth = this->accessWidth(op);
+ ASSERT(accessWidth == Width8 || accessWidth == Width16);
+
+ ScratchScope<4, 0> rvScratches(*this, Location::fromGPR(oldGPR), Location::fromGPR(scratchGPR), valueLocation);
+ GPRReg alignedAddr = rvScratches.gpr(0);
+ GPRReg shift = rvScratches.gpr(1);
+ GPRReg invMask = rvScratches.gpr(2);
+ GPRReg rawOld = rvScratches.gpr(3);
+ int32_t byteMask = (accessWidth == Width8) ? 0xFF : 0xFFFF;
+
+ m_jit.move(address.base, alignedAddr);
+ m_jit.and64(TrustedImm32(-4), alignedAddr);
+ m_jit.move(address.base, shift);
+ m_jit.and64(TrustedImm32(3), shift);
+ m_jit.lshift64(TrustedImm32(3), shift);
+
+ m_jit.move(TrustedImm32(byteMask), invMask);
+ m_jit.lshift64(shift, invMask);
+ m_jit.not64(invMask);
+
+ auto reloopLabel = m_jit.label();
+ m_jit.loadLinkAcq32(Address(alignedAddr), rawOld);
+ m_jit.urshift64(rawOld, shift, oldGPR);
+ m_jit.and64(TrustedImm32(byteMask), oldGPR);
+
+ functor(oldGPR, scratchGPR);
+
+ m_jit.and64(TrustedImm32(byteMask), scratchGPR);
+ m_jit.lshift64(shift, scratchGPR);
+ m_jit.and64(invMask, rawOld);
+ m_jit.or64(scratchGPR, rawOld);
+
+ m_jit.storeCondRel32(rawOld, Address(alignedAddr), scratchGPR);
+ m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit);
+}
+#endif
+
template<typename Functor>
void BBQJIT::emitAtomicOpGeneric(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, const Functor& functor)
{
@@ -573,14 +614,14 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom
#endif
break;
case Width32:
-#if CPU(ARM64)
+#if CPU(ARM64) || CPU(RISCV64)
m_jit.loadLinkAcq32(address, oldGPR);
#else
m_jit.load32(address, oldGPR);
#endif
break;
case Width64:
-#if CPU(ARM64)
+#if CPU(ARM64) || CPU(RISCV64)
m_jit.loadLinkAcq64(address, oldGPR);
#else
m_jit.load64(address, oldGPR);
@@ -629,28 +670,25 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom
}
m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit);
#elif CPU(RISCV64)
- // Slow path: plain load+store (no LR/SC). rv64gc does include the
- // A-extension, but MacroAssemblerRISCV64.h's loadLinkAcq/storeCondRel
- // primitives are still stubs. This is single-threaded-correct only;
- // multi-threaded code would race. TODO: emit amo.* / lr.d+sc.d for
- // a truly atomic version.
switch (accessWidth) {
case Width8:
m_jit.store8(scratchGPR, address);
+ m_jit.move(TrustedImm32(0), scratchGPR);
break;
case Width16:
m_jit.store16(scratchGPR, address);
+ m_jit.move(TrustedImm32(0), scratchGPR);
break;
case Width32:
- m_jit.store32(scratchGPR, address);
+ m_jit.storeCondRel32(scratchGPR, address, scratchGPR);
break;
case Width64:
- m_jit.store64(scratchGPR, address);
+ m_jit.storeCondRel64(scratchGPR, address, scratchGPR);
break;
case Width128:
RELEASE_ASSERT_NOT_REACHED();
}
- UNUSED_PARAM(reloopLabel);
+ m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit);
#endif
}
@@ -671,9 +709,16 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom
if (!(isARM64_LSE() || isX86_64())) {
ScratchScope<1, 0> scratches(*this);
- emitAtomicOpGeneric(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), [&](GPRReg oldGPR, GPRReg newGPR) {
+ auto opFunctor = [&](GPRReg oldGPR, GPRReg newGPR) {
emitSanitizeAtomicResult(loadOp, canonicalWidth(accessWidth(loadOp)) == Width64 ? TypeKind::I64 : TypeKind::I32, oldGPR, newGPR);
- });
+ };
+#if CPU(RISCV64)
+ Width w = accessWidth(loadOp);
+ if (w == Width8 || w == Width16)
+ emitAtomicOpGenericRISCV64ByteMask(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), Location(), opFunctor);
+ else
+#endif
+ emitAtomicOpGeneric(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), opFunctor);
emitSanitizeAtomicResult(loadOp, valueType.kind, resultLocation.asGPR());
return result;
}
@@ -778,9 +823,16 @@ void BBQJIT::emitAtomicStoreOp(ExtAtomic
consume(value);
if (!(isARM64_LSE() || isX86_64())) {
- emitAtomicOpGeneric(storeOp, address, scratch1GPR, scratch2GPR, [&](GPRReg, GPRReg newGPR) {
+ auto opFunctor = [&](GPRReg, GPRReg newGPR) {
m_jit.move(valueLocation.asGPR(), newGPR);
- });
+ };
+#if CPU(RISCV64)
+ Width w = accessWidth(storeOp);
+ if (w == Width8 || w == Width16)
+ emitAtomicOpGenericRISCV64ByteMask(storeOp, address, scratch1GPR, scratch2GPR, valueLocation, opFunctor);
+ else
+#endif
+ emitAtomicOpGeneric(storeOp, address, scratch1GPR, scratch2GPR, opFunctor);
return;
}
@@ -1135,7 +1187,7 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA
break;
}
- emitAtomicOpGeneric(op, address, resultLocation.asGPR(), scratchGPR, [&](GPRReg oldGPR, GPRReg newGPR) {
+ auto rmwFunctor = [&](GPRReg oldGPR, GPRReg newGPR) {
switch (op) {
case ExtAtomicOpType::I32AtomicRmw16AddU:
case ExtAtomicOpType::I32AtomicRmw8AddU:
@@ -1205,7 +1257,13 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA
RELEASE_ASSERT_NOT_REACHED();
break;
}
- });
+ };
+#if CPU(RISCV64)
+ if (accessWidth(op) == Width8 || accessWidth(op) == Width16)
+ emitAtomicOpGenericRISCV64ByteMask(op, address, resultLocation.asGPR(), scratchGPR, valueLocation, rmwFunctor);
+ else
+#endif
+ emitAtomicOpGeneric(op, address, resultLocation.asGPR(), scratchGPR, rmwFunctor);
emitSanitizeAtomicResult(op, valueType.kind, resultLocation.asGPR());
return result;
}
@@ -1285,46 +1343,55 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA
}
#if CPU(RISCV64)
- // Slow path: non-atomic CAS. Load current into resultGPR, compare
- // to expectedGPR, store valueGPR only on equality. Single-threaded
- // correct only. TODO: emit lr.{d,w}+sc.{d,w} for a truly atomic
- // version using the rv64gc A-extension.
- switch (accessWidth) {
- case Width8:
- m_jit.load8(address, resultGPR);
- break;
- case Width16:
- m_jit.load16(address, resultGPR);
- break;
- case Width32:
- m_jit.load32(address, resultGPR);
- break;
- case Width64:
- m_jit.load64(address, resultGPR);
- break;
- default:
- RELEASE_ASSERT_NOT_REACHED();
- break;
+ // rv64gc A-extension. For 32/64 atomicStrongCAS uses LR/SC.aqrl.
+ // For 8/16 the base A-ext has no byte/half AMOs (Zabha is
+ // optional, not in rv64gc); emit a word-aligned LR.W/SC.W
+ // byte-mask CAS loop -- properly atomic.
+ if (accessWidth == Width8 || accessWidth == Width16) {
+ ScratchScope<4, 0> rvScratches(*this, valueLocation, expectedLocation, resultLocation);
+ GPRReg alignedAddr = rvScratches.gpr(0);
+ GPRReg shift = rvScratches.gpr(1);
+ GPRReg invMask = rvScratches.gpr(2);
+ GPRReg rawOld = rvScratches.gpr(3);
+ int32_t byteMask = (accessWidth == Width8) ? 0xFF : 0xFFFF;
+
+ m_jit.move(address.base, alignedAddr);
+ m_jit.and64(TrustedImm32(-4), alignedAddr);
+ m_jit.move(address.base, shift);
+ m_jit.and64(TrustedImm32(3), shift);
+ m_jit.lshift64(TrustedImm32(3), shift);
+
+ m_jit.move(TrustedImm32(byteMask), invMask);
+ m_jit.lshift64(shift, invMask);
+ m_jit.not64(invMask);
+
+ auto loop = m_jit.label();
+ m_jit.loadLinkAcq32(Address(alignedAddr), rawOld);
+ m_jit.urshift64(rawOld, shift, resultGPR);
+ m_jit.and64(TrustedImm32(byteMask), resultGPR);
+ Jump mismatch = m_jit.branch64(MacroAssembler::NotEqual, resultGPR, expectedGPR);
+ m_jit.and64(TrustedImm32(byteMask), valueGPR, scratchGPR);
+ m_jit.lshift64(shift, scratchGPR);
+ m_jit.and64(invMask, rawOld);
+ m_jit.or64(scratchGPR, rawOld);
+ m_jit.storeCondRel32(rawOld, Address(alignedAddr), scratchGPR);
+ m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(loop, &m_jit);
+ mismatch.link(&m_jit);
+ return;
}
- auto notEqual = m_jit.branch64(MacroAssembler::NotEqual, resultGPR, expectedGPR);
switch (accessWidth) {
- case Width8:
- m_jit.store8(valueGPR, address);
- break;
- case Width16:
- m_jit.store16(valueGPR, address);
- break;
case Width32:
- m_jit.store32(valueGPR, address);
+ m_jit.move(expectedGPR, resultGPR);
+ m_jit.atomicStrongCAS32(resultGPR, valueGPR, address);
break;
case Width64:
- m_jit.store64(valueGPR, address);
+ m_jit.move(expectedGPR, resultGPR);
+ m_jit.atomicStrongCAS64(resultGPR, valueGPR, address);
break;
default:
RELEASE_ASSERT_NOT_REACHED();
break;
}
- notEqual.link(&m_jit);
UNUSED_PARAM(scratchGPR);
return;
#endif