mirror of
https://github.com/openwrt/video.git
synced 2026-05-31 06:51:54 +08:00
df0b899123
Update WPEWebKit to the 2.52 stable major release branch. Includes a pending patchset to get WASM BBQJIT working on RISCV64, upstream PR https://github.com/WebKit/WebKit/pull/65621 Alltogether this brings acceptable performance (even with LLVMPipe Mesa software renderer) on RISCV64. Link: https://wpewebkit.org/release/wpewebkit-2.52.0.html Link: https://wpewebkit.org/release/wpewebkit-2.52.1.html Link: https://wpewebkit.org/release/wpewebkit-2.52.2.html Link: https://wpewebkit.org/release/wpewebkit-2.52.3.html Signed-off-by: Daniel Golle <daniel@makrotopia.org>
540 lines
28 KiB
Diff
540 lines
28 KiB
Diff
From: Daniel Golle <daniel@makrotopia.org>
|
|
Subject: [PATCH] JavaScriptCore: RISCV64: wire up A-extension atomics in BBQJIT
|
|
|
|
OpenWrt's RISC-V baseline is rv64gc/lp64d, which always includes the
|
|
standard A-extension. Replace the UNIMPLEMENTED_METHOD stubs in
|
|
MacroAssemblerRISCV64.h with real implementations and switch BBQJIT to
|
|
drive them, so wasm atomic ops compiled in BBQJIT are properly
|
|
multi-thread-safe.
|
|
|
|
* 32/64-bit primitives map directly to LR.{W,D}.aq / SC.{W,D}.rl,
|
|
AMOSWAP.{W,D}, AMOADD.{W,D}, AMOAND/OR/XOR.{W,D}. atomicXchgClear
|
|
is "atomic AND NOT"; base A has no AMOANDN, so synthesise as
|
|
xori-1 + AMOAND. atomicStrongCAS{32,64} is a tight LR/SC.aqrl loop.
|
|
|
|
* 8/16-bit primitives are not provided by base A (Zabha is optional
|
|
and not in rv64gc). BBQJIT for Width8/Width16 now emits an inline
|
|
word-aligned LR.W/SC.W byte-mask loop covering all three caller
|
|
paths (emitAtomicLoadOp, emitAtomicStoreOp, emitAtomicBinaryRMWOp)
|
|
via a new emitAtomicOpGenericRISCV64ByteMask helper, plus the
|
|
cmpxchg path in emitAtomicCompareExchange. The helper takes the
|
|
caller's valueLocation as a ScratchScope preserve arg so its 4
|
|
extra scratches never alias an input register that has been
|
|
consume()-d but is still read inside the loop.
|
|
|
|
* X86-style 5-arg atomicStrongCAS{32,64} overloads are added as
|
|
stubs: BBQJIT's emitStrongCAS returns early on RISCV64 so those
|
|
call sites are dead at runtime, but the source still needs them
|
|
to typecheck.
|
|
|
|
The wasm threads spec tests (atomic.wast.js, atomic-signed.wast.js,
|
|
memory.wast.js, wait-large.wast.js) pass in both IPInt+BBQ and BBQ-only
|
|
modes on a StarFive VisionFive 2.
|
|
|
|
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
|
|
---
|
|
--- a/Source/JavaScriptCore/assembler/RISCV64Assembler.h
|
|
+++ b/Source/JavaScriptCore/assembler/RISCV64Assembler.h
|
|
@@ -1832,6 +1832,37 @@ public:
|
|
void remwInsn(RegisterID rd, RegisterID rs1, RegisterID rs2) { insn(RISCV64Instructions::REMW::construct(rd, rs1, rs2)); }
|
|
void remuwInsn(RegisterID rd, RegisterID rs1, RegisterID rs2) { insn(RISCV64Instructions::REMUW::construct(rd, rs1, rs2)); }
|
|
|
|
+ // RV{32,64}A standard A-extension (always present in rv64gc).
|
|
+ // For sequential consistency pass { Acquire, Release } (.aqrl).
|
|
+ void lr_wInsn(RegisterID rd, RegisterID rs1, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::LR_W::construct(rd, rs1, RegisterID::zero, aqrl)); }
|
|
+ void lr_dInsn(RegisterID rd, RegisterID rs1, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::LR_D::construct(rd, rs1, RegisterID::zero, aqrl)); }
|
|
+ void sc_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::SC_W::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void sc_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::SC_D::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void amoswap_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::AMOSWAP_W::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void amoswap_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::AMOSWAP_D::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void amoadd_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::AMOADD_W::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void amoadd_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::AMOADD_D::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void amoxor_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::AMOXOR_W::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void amoxor_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::AMOXOR_D::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void amoand_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::AMOAND_W::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void amoand_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::AMOAND_D::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void amoor_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::AMOOR_W::construct(rd, rs1, rs2, aqrl)); }
|
|
+ void amoor_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list<RISCV64Instructions::MemoryAccess> aqrl)
|
|
+ { insn(RISCV64Instructions::AMOOR_D::construct(rd, rs1, rs2, aqrl)); }
|
|
+
|
|
using FCVTType = RISCV64Instructions::FCVTType;
|
|
using FMVType = RISCV64Instructions::FMVType;
|
|
|
|
--- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h
|
|
+++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h
|
|
@@ -2448,55 +2448,169 @@ public:
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUnzipEven);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorZipUpper);
|
|
|
|
- // Wasm atomics: the RISC-V A extension is available (the OpenWrt -march
|
|
- // baseline is rv64gc, i.e. includes A), but the AMO/LR/SC instruction
|
|
- // emitters in RISCV64Assembler.h have not been added yet. Stub the
|
|
- // BBQJIT atomic API with hard-fault unimplemented methods: at runtime
|
|
- // wasm shared memory is gated off via useSharedArrayBuffer = false, so
|
|
- // wasm atomic opcodes are unreachable, and these stubs only ever exist
|
|
- // for compile-time completeness. Filling these in (and adding the
|
|
- // matching RISCV64Assembler.h emitters) is a follow-up that unlocks the
|
|
- // wasm threads proposal on RISCV64.
|
|
+ // RV64A standard A-extension (always present in rv64gc): real impls
|
|
+ // for 32/64-bit primitives. 8/16-bit primitives stay UNIMPLEMENTED
|
|
+ // because base RV64A has no byte/half AMOs (Zabha is optional, not
|
|
+ // in rv64gc); BBQJIT routes 8/16 atomic ops through the
|
|
+ // WasmIPIntSlowPaths.cpp C helpers (GCC __atomic_* builtins, which
|
|
+ // expand to LR.W byte-mask loops -- properly atomic on rv64gc).
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq8);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq16);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq32);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq64);
|
|
+ void loadLinkAcq32(Address address, RegisterID dest)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.lr_wInsn(dest, address.base, { Assembler::MemoryAccess::Acquire });
|
|
+ }
|
|
+ void loadLinkAcq64(Address address, RegisterID dest)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.lr_dInsn(dest, address.base, { Assembler::MemoryAccess::Acquire });
|
|
+ }
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel8);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel16);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel32);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel64);
|
|
+ void storeCondRel32(RegisterID value, Address address, RegisterID status)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.sc_wInsn(status, address.base, value, { Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ void storeCondRel64(RegisterID value, Address address, RegisterID status)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.sc_dInsn(status, address.base, value, { Assembler::MemoryAccess::Release });
|
|
+ }
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS8, Jump);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS16, Jump);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS32, Jump);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS64, Jump);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg8);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg16);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg32);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg64);
|
|
+ void atomicXchg32(RegisterID value, Address address, RegisterID result)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.amoswap_wInsn(result, address.base, value,
|
|
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ void atomicXchg64(RegisterID value, Address address, RegisterID result)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.amoswap_dInsn(result, address.base, value,
|
|
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ // 2-arg X86-style overloads (input-and-result in the same register).
|
|
+ // Live only in BBQJIT's isX86_64() branch, which is never taken at
|
|
+ // runtime on RISC-V; provided so the source still compiles.
|
|
+ void atomicXchg32(RegisterID valueAndResult, Address address) { atomicXchg32(valueAndResult, address, valueAndResult); }
|
|
+ void atomicXchg64(RegisterID valueAndResult, Address address) { atomicXchg64(valueAndResult, address, valueAndResult); }
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd8);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd16);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd32);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd64);
|
|
+ void atomicXchgAdd32(RegisterID value, Address address, RegisterID result)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.amoadd_wInsn(result, address.base, value,
|
|
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ void atomicXchgAdd64(RegisterID value, Address address, RegisterID result)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.amoadd_dInsn(result, address.base, value,
|
|
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ void atomicXchgAdd32(RegisterID valueAndResult, Address address) { atomicXchgAdd32(valueAndResult, address, valueAndResult); }
|
|
+ void atomicXchgAdd64(RegisterID valueAndResult, Address address) { atomicXchgAdd64(valueAndResult, address, valueAndResult); }
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear8);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear16);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear32);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear64);
|
|
+ // atomicXchgClear is "atomic AND NOT": no AMOANDN in base A; xori-1 + AMOAND.
|
|
+ void atomicXchgClear32(RegisterID value, Address address, RegisterID result)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ auto t = temps<Data>();
|
|
+ m_assembler.xoriInsn(t.data(), value, Imm::I<-1>());
|
|
+ m_assembler.amoand_wInsn(result, address.base, t.data(),
|
|
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ void atomicXchgClear64(RegisterID value, Address address, RegisterID result)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ auto t = temps<Data>();
|
|
+ m_assembler.xoriInsn(t.data(), value, Imm::I<-1>());
|
|
+ m_assembler.amoand_dInsn(result, address.base, t.data(),
|
|
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ void atomicXchgClear32(RegisterID valueAndResult, Address address) { atomicXchgClear32(valueAndResult, address, valueAndResult); }
|
|
+ void atomicXchgClear64(RegisterID valueAndResult, Address address) { atomicXchgClear64(valueAndResult, address, valueAndResult); }
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr8);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr16);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr32);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr64);
|
|
+ void atomicXchgOr32(RegisterID value, Address address, RegisterID result)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.amoor_wInsn(result, address.base, value,
|
|
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ void atomicXchgOr64(RegisterID value, Address address, RegisterID result)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.amoor_dInsn(result, address.base, value,
|
|
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ void atomicXchgOr32(RegisterID valueAndResult, Address address) { atomicXchgOr32(valueAndResult, address, valueAndResult); }
|
|
+ void atomicXchgOr64(RegisterID valueAndResult, Address address) { atomicXchgOr64(valueAndResult, address, valueAndResult); }
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor8);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor16);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor32);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor64);
|
|
- // atomicStrongCAS{N}: the non-branching CAS overloads used by BBQJIT
|
|
- // when the caller only needs success/failure in resultGPR (rather
|
|
- // than a JIT-emitted branch). Same runtime-unreachable rationale as
|
|
- // branchAtomicStrongCAS{N} above.
|
|
+ void atomicXchgXor32(RegisterID value, Address address, RegisterID result)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.amoxor_wInsn(result, address.base, value,
|
|
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ void atomicXchgXor64(RegisterID value, Address address, RegisterID result)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ m_assembler.amoxor_dInsn(result, address.base, value,
|
|
+ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release });
|
|
+ }
|
|
+ void atomicXchgXor32(RegisterID valueAndResult, Address address) { atomicXchgXor32(valueAndResult, address, valueAndResult); }
|
|
+ void atomicXchgXor64(RegisterID valueAndResult, Address address) { atomicXchgXor64(valueAndResult, address, valueAndResult); }
|
|
+ // atomicStrongCAS{32,64}(expectedAndResult, newValue, address):
|
|
+ // Loads *address into expectedAndResult; if old == caller's expected,
|
|
+ // stores newValue. Same external contract as ARM64-LSE casa. Caller
|
|
+ // checks expectedAndResult == old-expected to detect success.
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS8);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS16);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS32);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS64);
|
|
+ void atomicStrongCAS32(RegisterID expectedAndResult, RegisterID newValue, Address address)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ auto t = temps<Data, Memory>();
|
|
+ Label loop = label();
|
|
+ m_assembler.lr_wInsn(t.data(), address.base, { Assembler::MemoryAccess::Acquire });
|
|
+ m_assembler.addiwInsn(t.memory(), expectedAndResult, Imm::I<0>());
|
|
+ Jump mismatch = makeBranch(NotEqual, t.data(), t.memory());
|
|
+ m_assembler.sc_wInsn(t.memory(), address.base, newValue, { Assembler::MemoryAccess::Release });
|
|
+ Jump scFail = makeBranch(NotEqual, t.memory(), RISCV64Registers::zero);
|
|
+ scFail.linkTo(loop, this);
|
|
+ mismatch.link(this);
|
|
+ m_assembler.addiInsn(expectedAndResult, t.data(), Imm::I<0>());
|
|
+ }
|
|
+ void atomicStrongCAS64(RegisterID expectedAndResult, RegisterID newValue, Address address)
|
|
+ {
|
|
+ ASSERT(!address.offset);
|
|
+ auto t = temps<Data, Memory>();
|
|
+ Label loop = label();
|
|
+ m_assembler.lr_dInsn(t.data(), address.base, { Assembler::MemoryAccess::Acquire });
|
|
+ Jump mismatch = makeBranch(NotEqual, t.data(), expectedAndResult);
|
|
+ m_assembler.sc_dInsn(t.memory(), address.base, newValue, { Assembler::MemoryAccess::Release });
|
|
+ Jump scFail = makeBranch(NotEqual, t.memory(), RISCV64Registers::zero);
|
|
+ scFail.linkTo(loop, this);
|
|
+ mismatch.link(this);
|
|
+ m_assembler.addiInsn(expectedAndResult, t.data(), Imm::I<0>());
|
|
+ }
|
|
+ // 5-arg StatusCondition form (X86-style). Live only in BBQJIT's
|
|
+ // isX86_64() branch -- on RISC-V the surrounding code exits via
|
|
+ // an earlier `return;` so this never runs at runtime. Provide a
|
|
+ // viable overload so the source still compiles.
|
|
+ void atomicStrongCAS32(StatusCondition, RegisterID, RegisterID, Address, RegisterID)
|
|
+ { RELEASE_ASSERT_NOT_REACHED(); }
|
|
+ void atomicStrongCAS64(StatusCondition, RegisterID, RegisterID, Address, RegisterID)
|
|
+ { RELEASE_ASSERT_NOT_REACHED(); }
|
|
// Additional SIMD vector noop stubs uncovered by enabling BBQJIT.
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorSplat);
|
|
MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUshl8);
|
|
--- a/Source/JavaScriptCore/wasm/WasmBBQJIT.h
|
|
+++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.h
|
|
@@ -1335,6 +1335,11 @@ public:
|
|
template<typename Functor>
|
|
void emitAtomicOpGeneric(ExtAtomicOpType op, Address address, Location old, Location cur, const Functor& functor);
|
|
|
|
+#if CPU(RISCV64) && USE(JSVALUE64)
|
|
+ template<typename Functor>
|
|
+ void emitAtomicOpGenericRISCV64ByteMask(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, Location valueLocation, const Functor& functor);
|
|
+#endif
|
|
+
|
|
[[nodiscard]] Value emitAtomicLoadOp(ExtAtomicOpType loadOp, Type valueType, Location pointer, uint32_t uoffset);
|
|
|
|
[[nodiscard]] PartialResult atomicLoad(ExtAtomicOpType loadOp, Type valueType, ExpressionType pointer, ExpressionType& result, uint32_t uoffset);
|
|
--- a/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp
|
|
+++ b/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp
|
|
@@ -540,6 +540,47 @@ void BBQJIT::emitSanitizeAtomicResult(Ex
|
|
emitSanitizeAtomicResult(op, resultType, result, result);
|
|
}
|
|
|
|
+#if CPU(RISCV64)
|
|
+template<typename Functor>
|
|
+void BBQJIT::emitAtomicOpGenericRISCV64ByteMask(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, Location valueLocation, const Functor& functor)
|
|
+{
|
|
+ Width accessWidth = this->accessWidth(op);
|
|
+ ASSERT(accessWidth == Width8 || accessWidth == Width16);
|
|
+
|
|
+ ScratchScope<4, 0> rvScratches(*this, Location::fromGPR(oldGPR), Location::fromGPR(scratchGPR), valueLocation);
|
|
+ GPRReg alignedAddr = rvScratches.gpr(0);
|
|
+ GPRReg shift = rvScratches.gpr(1);
|
|
+ GPRReg invMask = rvScratches.gpr(2);
|
|
+ GPRReg rawOld = rvScratches.gpr(3);
|
|
+ int32_t byteMask = (accessWidth == Width8) ? 0xFF : 0xFFFF;
|
|
+
|
|
+ m_jit.move(address.base, alignedAddr);
|
|
+ m_jit.and64(TrustedImm32(-4), alignedAddr);
|
|
+ m_jit.move(address.base, shift);
|
|
+ m_jit.and64(TrustedImm32(3), shift);
|
|
+ m_jit.lshift64(TrustedImm32(3), shift);
|
|
+
|
|
+ m_jit.move(TrustedImm32(byteMask), invMask);
|
|
+ m_jit.lshift64(shift, invMask);
|
|
+ m_jit.not64(invMask);
|
|
+
|
|
+ auto reloopLabel = m_jit.label();
|
|
+ m_jit.loadLinkAcq32(Address(alignedAddr), rawOld);
|
|
+ m_jit.urshift64(rawOld, shift, oldGPR);
|
|
+ m_jit.and64(TrustedImm32(byteMask), oldGPR);
|
|
+
|
|
+ functor(oldGPR, scratchGPR);
|
|
+
|
|
+ m_jit.and64(TrustedImm32(byteMask), scratchGPR);
|
|
+ m_jit.lshift64(shift, scratchGPR);
|
|
+ m_jit.and64(invMask, rawOld);
|
|
+ m_jit.or64(scratchGPR, rawOld);
|
|
+
|
|
+ m_jit.storeCondRel32(rawOld, Address(alignedAddr), scratchGPR);
|
|
+ m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit);
|
|
+}
|
|
+#endif
|
|
+
|
|
template<typename Functor>
|
|
void BBQJIT::emitAtomicOpGeneric(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, const Functor& functor)
|
|
{
|
|
@@ -573,14 +614,14 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom
|
|
#endif
|
|
break;
|
|
case Width32:
|
|
-#if CPU(ARM64)
|
|
+#if CPU(ARM64) || CPU(RISCV64)
|
|
m_jit.loadLinkAcq32(address, oldGPR);
|
|
#else
|
|
m_jit.load32(address, oldGPR);
|
|
#endif
|
|
break;
|
|
case Width64:
|
|
-#if CPU(ARM64)
|
|
+#if CPU(ARM64) || CPU(RISCV64)
|
|
m_jit.loadLinkAcq64(address, oldGPR);
|
|
#else
|
|
m_jit.load64(address, oldGPR);
|
|
@@ -629,28 +670,25 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom
|
|
}
|
|
m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit);
|
|
#elif CPU(RISCV64)
|
|
- // Slow path: plain load+store (no LR/SC). rv64gc does include the
|
|
- // A-extension, but MacroAssemblerRISCV64.h's loadLinkAcq/storeCondRel
|
|
- // primitives are still stubs. This is single-threaded-correct only;
|
|
- // multi-threaded code would race. TODO: emit amo.* / lr.d+sc.d for
|
|
- // a truly atomic version.
|
|
switch (accessWidth) {
|
|
case Width8:
|
|
m_jit.store8(scratchGPR, address);
|
|
+ m_jit.move(TrustedImm32(0), scratchGPR);
|
|
break;
|
|
case Width16:
|
|
m_jit.store16(scratchGPR, address);
|
|
+ m_jit.move(TrustedImm32(0), scratchGPR);
|
|
break;
|
|
case Width32:
|
|
- m_jit.store32(scratchGPR, address);
|
|
+ m_jit.storeCondRel32(scratchGPR, address, scratchGPR);
|
|
break;
|
|
case Width64:
|
|
- m_jit.store64(scratchGPR, address);
|
|
+ m_jit.storeCondRel64(scratchGPR, address, scratchGPR);
|
|
break;
|
|
case Width128:
|
|
RELEASE_ASSERT_NOT_REACHED();
|
|
}
|
|
- UNUSED_PARAM(reloopLabel);
|
|
+ m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit);
|
|
#endif
|
|
}
|
|
|
|
@@ -671,9 +709,16 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom
|
|
|
|
if (!(isARM64_LSE() || isX86_64())) {
|
|
ScratchScope<1, 0> scratches(*this);
|
|
- emitAtomicOpGeneric(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), [&](GPRReg oldGPR, GPRReg newGPR) {
|
|
+ auto opFunctor = [&](GPRReg oldGPR, GPRReg newGPR) {
|
|
emitSanitizeAtomicResult(loadOp, canonicalWidth(accessWidth(loadOp)) == Width64 ? TypeKind::I64 : TypeKind::I32, oldGPR, newGPR);
|
|
- });
|
|
+ };
|
|
+#if CPU(RISCV64)
|
|
+ Width w = accessWidth(loadOp);
|
|
+ if (w == Width8 || w == Width16)
|
|
+ emitAtomicOpGenericRISCV64ByteMask(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), Location(), opFunctor);
|
|
+ else
|
|
+#endif
|
|
+ emitAtomicOpGeneric(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), opFunctor);
|
|
emitSanitizeAtomicResult(loadOp, valueType.kind, resultLocation.asGPR());
|
|
return result;
|
|
}
|
|
@@ -778,9 +823,16 @@ void BBQJIT::emitAtomicStoreOp(ExtAtomic
|
|
consume(value);
|
|
|
|
if (!(isARM64_LSE() || isX86_64())) {
|
|
- emitAtomicOpGeneric(storeOp, address, scratch1GPR, scratch2GPR, [&](GPRReg, GPRReg newGPR) {
|
|
+ auto opFunctor = [&](GPRReg, GPRReg newGPR) {
|
|
m_jit.move(valueLocation.asGPR(), newGPR);
|
|
- });
|
|
+ };
|
|
+#if CPU(RISCV64)
|
|
+ Width w = accessWidth(storeOp);
|
|
+ if (w == Width8 || w == Width16)
|
|
+ emitAtomicOpGenericRISCV64ByteMask(storeOp, address, scratch1GPR, scratch2GPR, valueLocation, opFunctor);
|
|
+ else
|
|
+#endif
|
|
+ emitAtomicOpGeneric(storeOp, address, scratch1GPR, scratch2GPR, opFunctor);
|
|
return;
|
|
}
|
|
|
|
@@ -1135,7 +1187,7 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA
|
|
break;
|
|
}
|
|
|
|
- emitAtomicOpGeneric(op, address, resultLocation.asGPR(), scratchGPR, [&](GPRReg oldGPR, GPRReg newGPR) {
|
|
+ auto rmwFunctor = [&](GPRReg oldGPR, GPRReg newGPR) {
|
|
switch (op) {
|
|
case ExtAtomicOpType::I32AtomicRmw16AddU:
|
|
case ExtAtomicOpType::I32AtomicRmw8AddU:
|
|
@@ -1205,7 +1257,13 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA
|
|
RELEASE_ASSERT_NOT_REACHED();
|
|
break;
|
|
}
|
|
- });
|
|
+ };
|
|
+#if CPU(RISCV64)
|
|
+ if (accessWidth(op) == Width8 || accessWidth(op) == Width16)
|
|
+ emitAtomicOpGenericRISCV64ByteMask(op, address, resultLocation.asGPR(), scratchGPR, valueLocation, rmwFunctor);
|
|
+ else
|
|
+#endif
|
|
+ emitAtomicOpGeneric(op, address, resultLocation.asGPR(), scratchGPR, rmwFunctor);
|
|
emitSanitizeAtomicResult(op, valueType.kind, resultLocation.asGPR());
|
|
return result;
|
|
}
|
|
@@ -1285,46 +1343,55 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA
|
|
}
|
|
|
|
#if CPU(RISCV64)
|
|
- // Slow path: non-atomic CAS. Load current into resultGPR, compare
|
|
- // to expectedGPR, store valueGPR only on equality. Single-threaded
|
|
- // correct only. TODO: emit lr.{d,w}+sc.{d,w} for a truly atomic
|
|
- // version using the rv64gc A-extension.
|
|
- switch (accessWidth) {
|
|
- case Width8:
|
|
- m_jit.load8(address, resultGPR);
|
|
- break;
|
|
- case Width16:
|
|
- m_jit.load16(address, resultGPR);
|
|
- break;
|
|
- case Width32:
|
|
- m_jit.load32(address, resultGPR);
|
|
- break;
|
|
- case Width64:
|
|
- m_jit.load64(address, resultGPR);
|
|
- break;
|
|
- default:
|
|
- RELEASE_ASSERT_NOT_REACHED();
|
|
- break;
|
|
+ // rv64gc A-extension. For 32/64 atomicStrongCAS uses LR/SC.aqrl.
|
|
+ // For 8/16 the base A-ext has no byte/half AMOs (Zabha is
|
|
+ // optional, not in rv64gc); emit a word-aligned LR.W/SC.W
|
|
+ // byte-mask CAS loop -- properly atomic.
|
|
+ if (accessWidth == Width8 || accessWidth == Width16) {
|
|
+ ScratchScope<4, 0> rvScratches(*this, valueLocation, expectedLocation, resultLocation);
|
|
+ GPRReg alignedAddr = rvScratches.gpr(0);
|
|
+ GPRReg shift = rvScratches.gpr(1);
|
|
+ GPRReg invMask = rvScratches.gpr(2);
|
|
+ GPRReg rawOld = rvScratches.gpr(3);
|
|
+ int32_t byteMask = (accessWidth == Width8) ? 0xFF : 0xFFFF;
|
|
+
|
|
+ m_jit.move(address.base, alignedAddr);
|
|
+ m_jit.and64(TrustedImm32(-4), alignedAddr);
|
|
+ m_jit.move(address.base, shift);
|
|
+ m_jit.and64(TrustedImm32(3), shift);
|
|
+ m_jit.lshift64(TrustedImm32(3), shift);
|
|
+
|
|
+ m_jit.move(TrustedImm32(byteMask), invMask);
|
|
+ m_jit.lshift64(shift, invMask);
|
|
+ m_jit.not64(invMask);
|
|
+
|
|
+ auto loop = m_jit.label();
|
|
+ m_jit.loadLinkAcq32(Address(alignedAddr), rawOld);
|
|
+ m_jit.urshift64(rawOld, shift, resultGPR);
|
|
+ m_jit.and64(TrustedImm32(byteMask), resultGPR);
|
|
+ Jump mismatch = m_jit.branch64(MacroAssembler::NotEqual, resultGPR, expectedGPR);
|
|
+ m_jit.and64(TrustedImm32(byteMask), valueGPR, scratchGPR);
|
|
+ m_jit.lshift64(shift, scratchGPR);
|
|
+ m_jit.and64(invMask, rawOld);
|
|
+ m_jit.or64(scratchGPR, rawOld);
|
|
+ m_jit.storeCondRel32(rawOld, Address(alignedAddr), scratchGPR);
|
|
+ m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(loop, &m_jit);
|
|
+ mismatch.link(&m_jit);
|
|
+ return;
|
|
}
|
|
- auto notEqual = m_jit.branch64(MacroAssembler::NotEqual, resultGPR, expectedGPR);
|
|
switch (accessWidth) {
|
|
- case Width8:
|
|
- m_jit.store8(valueGPR, address);
|
|
- break;
|
|
- case Width16:
|
|
- m_jit.store16(valueGPR, address);
|
|
- break;
|
|
case Width32:
|
|
- m_jit.store32(valueGPR, address);
|
|
+ m_jit.move(expectedGPR, resultGPR);
|
|
+ m_jit.atomicStrongCAS32(resultGPR, valueGPR, address);
|
|
break;
|
|
case Width64:
|
|
- m_jit.store64(valueGPR, address);
|
|
+ m_jit.move(expectedGPR, resultGPR);
|
|
+ m_jit.atomicStrongCAS64(resultGPR, valueGPR, address);
|
|
break;
|
|
default:
|
|
RELEASE_ASSERT_NOT_REACHED();
|
|
break;
|
|
}
|
|
- notEqual.link(&m_jit);
|
|
UNUSED_PARAM(scratchGPR);
|
|
return;
|
|
#endif
|