diff --git a/libs/wpewebkit/Makefile b/libs/wpewebkit/Makefile index 64137b6..14df977 100644 --- a/libs/wpewebkit/Makefile +++ b/libs/wpewebkit/Makefile @@ -1,12 +1,12 @@ include $(TOPDIR)/rules.mk PKG_NAME:=wpewebkit -PKG_VERSION:=2.50.1 +PKG_VERSION:=2.52.3 PKG_RELEASE:=1 PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.xz PKG_SOURCE_URL:=https://wpewebkit.org/releases -PKG_HASH:=305437a4ba869e4106c1d4a8cb2a7be3f43b75d9d026e9b97c6f6debc04a673b +PKG_HASH:=b51b1db1e6ee99d1771f4a358c128fde27a77984df20ee6cb59858e520662d0b PKG_MAINTAINER:=Daniel Golle PKG_LICENSE:=LGPL-2.1-or-later BSD-2-Clause @@ -60,7 +60,8 @@ define Package/libwpewebkit CATEGORY:=Libraries TITLE:=WPEWebKit Library URL:=https://wpewebkit.org - DEPENDS:=+bubblewrap +glib2 +gst1-mod-opengl +harfbuzz +icu-full-data \ + DEPENDS:=+bubblewrap +glib2 +gst1-mod-autodetect +gst1-mod-opengl \ + +harfbuzz +icu-full-data \ +libepoxy +libgcrypt +libgst1allocators +libgst1app +libgst1audio \ +libgst1fft +libgst1gl +libgst1pbutils +libgst1tag \ +libgst1transcoder +libgst1video +libinput +libmanette +libjpeg \ diff --git a/libs/wpewebkit/patches/121-currentStackPointer-for-Linux-PPC-Linux-PPC64.patch b/libs/wpewebkit/patches/121-currentStackPointer-for-Linux-PPC-Linux-PPC64.patch index 4e699a1..5f5df6c 100644 --- a/libs/wpewebkit/patches/121-currentStackPointer-for-Linux-PPC-Linux-PPC64.patch +++ b/libs/wpewebkit/patches/121-currentStackPointer-for-Linux-PPC-Linux-PPC64.patch @@ -11,7 +11,7 @@ pointer on Linux running on PowerPC and PowerPC64 archtecture. --- a/Source/WTF/wtf/StackPointer.cpp +++ b/Source/WTF/wtf/StackPointer.cpp -@@ -160,6 +160,17 @@ asm ( +@@ -157,6 +157,17 @@ __asm__( ".previous" "\n" ); diff --git a/libs/wpewebkit/patches/122-JavaScriptCore-RISCV64-fix-undefined-EnableIfInteger.patch b/libs/wpewebkit/patches/122-JavaScriptCore-RISCV64-fix-undefined-EnableIfInteger.patch new file mode 100644 index 0000000..e316c12 --- /dev/null +++ b/libs/wpewebkit/patches/122-JavaScriptCore-RISCV64-fix-undefined-EnableIfInteger.patch @@ -0,0 +1,50 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: use std::integral in RISCV64 immediate helpers + +Backport of WebKit commit 6b720e4 ("[JSC] Use std::integral in RISCV64 +immediate helpers"). + +The RISCV64 MacroAssembler's Imm helper declared the runtime immediate +constructors I(T)/S(T)/B(T) with a SFINAE default template argument +'typename = EnableIfInteger', but EnableIfInteger is not defined +anywhere in the tree - it was left dangling when JavaScriptCore's +assembler adopted C++20 concepts. The compiler rejects the declarations +with: + + error: 'EnableIfInteger' does not name a type + +which removes the runtime overloads entirely, leaving only the +compile-time 'template' variants. Every runtime use, +e.g. store8()/store16() calling Imm::S(resolution.offset), then fails +with "no matching function" and the RISCV64 JIT does not build. + +Replace the undefined constraint with the C++20 std::integral concept, +exactly as done upstream. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h ++++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h +@@ -4365,20 +4365,20 @@ private: + using IType = RISCV64Assembler::IImmediate; + template + static IType I() { return IType::v(); } +- template> ++ template + static IType I(T value) { return IType::v(value); } + static IType I(uint32_t value) { return IType(value); } + + using SType = RISCV64Assembler::SImmediate; + template + static SType S() { return SType::v(); } +- template> ++ template + static SType S(T value) { return SType::v(value); } + + using BType = RISCV64Assembler::BImmediate; + template + static BType B() { return BType::v(); } +- template> ++ template + static BType B(T value) { return BType::v(value); } + static BType B(uint32_t value) { return BType(value); } + diff --git a/libs/wpewebkit/patches/123-JavaScriptCore-RISCV64-LLInt-in-place-interpreter.patch b/libs/wpewebkit/patches/123-JavaScriptCore-RISCV64-LLInt-in-place-interpreter.patch new file mode 100644 index 0000000..e0c5f6b --- /dev/null +++ b/libs/wpewebkit/patches/123-JavaScriptCore-RISCV64-LLInt-in-place-interpreter.patch @@ -0,0 +1,166 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: support the LLInt in-place interpreter on RISCV64 + +The WebAssembly in-place interpreter (IPInt, llint/InPlaceInterpreter.asm) +does not build for RISCV64: offlineasm's RISCV64 backend has not been +updated since the IPInt code landed. Two gaps: + + * LowLevelInterpreter.asm aliases the WebAssembly scratch registers + ws0..ws3 to t9..t12 in the catch-all 'else' branch (written for + ARM64). The RISCV64 offlineasm backend only provides t0..t7, so + lowering fails with "Bad register name t9". RISCV64 also has no + register budget for four WebAssembly scratch registers. + + Add an explicit RISCV64 branch that uses t8/t9 for ws0/ws1 and + leaves ws2/ws3 unavailable, matching the X86_64 configuration. + + * offlineasm/riscv64.rb does not implement the 'transferp' opcode + (a memory-to-memory pointer move) used by InPlaceInterpreter.asm, + so lowering fails with "Unhandled opcode transferp". + + Map the previously unused physical registers x5/x6 to offlineasm + t8/t9 and f6/f7 to ft6/ft7, and rewrite the transferi/transferp/ + transferq pseudo-instructions to a load into a temporary followed + by a store, before the address-lowering passes run. + +With both changes offlineasm emits LLIntAssembly.h for RISCV64 with +WebAssembly enabled. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/offlineasm/riscv64.rb ++++ b/Source/JavaScriptCore/offlineasm/riscv64.rb +@@ -34,9 +34,9 @@ + # x2 => sp (through alias sp) (RISC-V stack pointer register) + # x3 => not used (RISC-V global pointer register) + # x4 => not used (RISC-V thread pointer register) +-# x5 => not used +-# x6 => ws0 +-# x7 => ws1 ++# x5 => t8 ++# x6 => t9 ++# x7 => not used + # x8 => cfr (through alias fp) (RISC-V frame pointer register) + # x9 => csr0 + # x10 => t0, a0, wa0, r0 +@@ -70,8 +70,8 @@ + # f3 => ft3 + # f4 => ft4 + # f5 => ft5 +-# f6 => not used +-# f7 => not used ++# f6 => ft6 ++# f7 => ft7 + # f8 => csfr0 + # f9 => csfr1 + # f10 => fa0, wfa0 +@@ -170,10 +170,10 @@ class RegisterID + 'x16' + when 't7', 'a7', 'wa7' + 'x17' +- when 'ws0' ++ when 't8' ++ 'x5' ++ when 't9' + 'x6' +- when 'ws1' +- 'x7' + when 'csr0' + 'x9' + when 'csr1' +@@ -223,6 +223,10 @@ class FPRegisterID + 'f4' + when 'ft5' + 'f5' ++ when 'ft6' ++ 'f6' ++ when 'ft7' ++ 'f7' + when 'csfr0' + 'f8' + when 'csfr1' +@@ -394,6 +398,22 @@ def riscv64LowerOperandIntoRegisterAndSi + destination + end + ++def riscv64LowerTransfer(list) ++ newList = [] ++ list.each { ++ | node | ++ if node.is_a?(Instruction) and ["transferi", "transferp", "transferq"].include?(node.opcode) ++ size = node.opcode[-1, 1] ++ tmp = Tmp.new(node.codeOrigin, :gpr) ++ newList << Instruction.new(node.codeOrigin, "load#{size}", [node.operands[0], tmp]) ++ newList << Instruction.new(node.codeOrigin, "store#{size}", [tmp, node.operands[1]]) ++ else ++ newList << node ++ end ++ } ++ newList ++end ++ + def riscv64LowerMisplacedAddresses(list) + newList = [] + list.each { +@@ -1541,6 +1561,7 @@ class Sequence + result = @list + + result = riscDropTags(result) ++ result = riscv64LowerTransfer(result) + result = riscLowerMalformedAddresses(result) { + | node, address | + if address.is_a? Address +--- a/Source/JavaScriptCore/llint/LowLevelInterpreter.asm ++++ b/Source/JavaScriptCore/llint/LowLevelInterpreter.asm +@@ -365,6 +365,52 @@ elsif X86_64 + const wfa7 = ft7 + + const fr = fa0 ++elsif RISCV64 ++ const a0 = t0 ++ const a1 = t1 ++ const a2 = t2 ++ const a3 = t3 ++ const a4 = t4 ++ const a5 = t5 ++ const a6 = t6 ++ const a7 = t7 ++ ++ const wa0 = a0 ++ const wa1 = a1 ++ const wa2 = a2 ++ const wa3 = a3 ++ const wa4 = a4 ++ const wa5 = a5 ++ const wa6 = a6 ++ const wa7 = a7 ++ ++ # RISCV64 uses all eight argument GPRs (a0-a7 == t0-t7); the only ++ # non-argument temporaries left for the WebAssembly scratch ++ # registers are t8 and t9. ws2/ws3 are unavailable, matching the ++ # X86_64 configuration above. ++ const ws0 = t8 ++ const ws1 = t9 ++ const ws2 = invalidGPR ++ const ws3 = invalidGPR ++ ++ const r0 = a0 ++ const r1 = a1 ++ ++ const fa0 = ft0 ++ const fa1 = ft1 ++ const fa2 = ft2 ++ const fa3 = ft3 ++ ++ const wfa0 = fa0 ++ const wfa1 = fa1 ++ const wfa2 = fa2 ++ const wfa3 = fa3 ++ const wfa4 = ft4 ++ const wfa5 = ft5 ++ const wfa6 = ft6 ++ const wfa7 = ft7 ++ ++ const fr = fa0 + else + const a0 = t0 + const a1 = t1 diff --git a/libs/wpewebkit/patches/124-JavaScriptCore-RISCV64-LLInt-arch-attribute-once.patch b/libs/wpewebkit/patches/124-JavaScriptCore-RISCV64-LLInt-arch-attribute-once.patch new file mode 100644 index 0000000..5caefae --- /dev/null +++ b/libs/wpewebkit/patches/124-JavaScriptCore-RISCV64-LLInt-arch-attribute-once.patch @@ -0,0 +1,38 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: emit the RISCV64 LLInt arch attribute only once + +The RISCV64 variant of OFFLINE_ASM_GLOBAL_LABEL_IMPL emits a +'.attribute arch, "rv64gc"' directive for every global label in the +generated LLInt assembly. The whole LLInt is emitted as a single +top-level __asm__ block, so from the second label onwards the +directive appears after instructions have already been emitted, and +recent binutils rejects it: + + {standard input}: Fatal error: architecture elf attributes must + set before any instructions + +The ELF architecture attribute is already emitted once for the +translation unit by the compiler from -march=rv64gc, which equally +covers the rv64gc instructions in the LLInt asm. Drop the RISCV64 +special case so the generic OFFLINE_ASM_GLOBAL_LABEL_IMPL is used, +which does not emit the redundant per-label directive. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/llint/LowLevelInterpreter.cpp ++++ b/Source/JavaScriptCore/llint/LowLevelInterpreter.cpp +@@ -569,15 +569,6 @@ WTF_ALLOW_UNSAFE_BUFFER_USAGE_END + ".thumb\n" \ + ".thumb_func " THUMB_FUNC_PARAM(label) "\n" \ + SYMBOL_STRING(label) ":\n" +-#elif CPU(RISCV64) +-#define OFFLINE_ASM_GLOBAL_LABEL_IMPL(label, ALT_ENTRY, ALIGNMENT, VISIBILITY) \ +- OFFLINE_ASM_TEXT_SECTION \ +- ALIGNMENT \ +- ALT_ENTRY(label) \ +- ".globl " SYMBOL_STRING(label) "\n" \ +- ".attribute arch, \"rv64gc\"" "\n" \ +- VISIBILITY(label) "\n" \ +- SYMBOL_STRING(label) ":\n" + #else + #define OFFLINE_ASM_GLOBAL_LABEL_IMPL(label, ALT_ENTRY, ALIGNMENT, VISIBILITY) \ + OFFLINE_ASM_TEXT_SECTION \ diff --git a/libs/wpewebkit/patches/125-JavaScriptCore-RISCV64-MacroAssembler-add-missing-methods.patch b/libs/wpewebkit/patches/125-JavaScriptCore-RISCV64-MacroAssembler-add-missing-methods.patch new file mode 100644 index 0000000..aba79ab --- /dev/null +++ b/libs/wpewebkit/patches/125-JavaScriptCore-RISCV64-MacroAssembler-add-missing-methods.patch @@ -0,0 +1,121 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: add missing RISCV64 MacroAssembler methods + +The RISCV64 MacroAssembler is missing five primitives that JSC's +optimising tiers (DFG, FTL, the inline-cache compiler) now call. Each +is straightforward, the RISC-V base ISA has the instructions required, +and the implementations mirror the patterns already used in this file: + + * add8(TrustedImm32, Address) - lbu / addi / sb (with the same + immediate-out-of-range fallback used by add32(TrustedImm32, Address) + right below it); needed by InlineCacheCompiler to bump an 8-bit + countdown counter. + + * or32(RegisterID, Address) - lw / or / sw, the missing direct-Address + counterpart of or32(RegisterID, AbsoluteAddress); used by FTL OSR + exit. + + * convertUInt32ToDouble(RegisterID, FPRegisterID) and the TrustedImm32 + overload - fcvt.d.wu via the FCVTType::WU template; called from DFG + and the inline-cache compiler. + + * add64/sub64(FPRegisterID, FPRegisterID, FPRegisterID) - 64-bit + integer arithmetic performed on values that live in FP registers, + used by the JSValue double boxing / NaN purification paths (see + DFGSpeculativeJIT::boxDoubleAsDouble and purifyNaN). RISC-V has no + integer ALU on FPRs so the values are moved through GPR scratch + registers via fmv.x.d / add or sub / fmv.d.x. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h ++++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h +@@ -199,6 +199,25 @@ public: + m_assembler.maskRegister<32>(dest); + } + ++ void add8(TrustedImm32 imm, Address address) ++ { ++ auto temp = temps(); ++ auto resolution = resolveAddress(address, temp.memory()); ++ if (Imm::isValid(imm.m_value)) { ++ m_assembler.lbuInsn(temp.data(), resolution.base, Imm::I(resolution.offset)); ++ m_assembler.addiInsn(temp.data(), temp.data(), Imm::I(imm.m_value)); ++ m_assembler.sbInsn(resolution.base, temp.data(), Imm::S(resolution.offset)); ++ return; ++ } ++ ++ m_assembler.lbuInsn(temp.memory(), resolution.base, Imm::I(resolution.offset)); ++ loadImmediate(imm, temp.data()); ++ m_assembler.addInsn(temp.data(), temp.memory(), temp.data()); ++ ++ resolution = resolveAddress(address, temp.memory()); ++ m_assembler.sbInsn(resolution.base, temp.data(), Imm::S(resolution.offset)); ++ } ++ + void add32(TrustedImm32 imm, AbsoluteAddress address) + { + auto temp = temps(); +@@ -1701,6 +1720,15 @@ public: + m_assembler.swInsn(temp.memory(), temp.data(), Imm::S<0>()); + } + ++ void or32(RegisterID src, Address address) ++ { ++ auto temp = temps(); ++ auto resolution = resolveAddress(address, temp.memory()); ++ m_assembler.lwInsn(temp.data(), resolution.base, Imm::I(resolution.offset)); ++ m_assembler.orInsn(temp.data(), src, temp.data()); ++ m_assembler.swInsn(resolution.base, temp.data(), Imm::S(resolution.offset)); ++ } ++ + void or32(TrustedImm32 imm, AbsoluteAddress address) + { + auto temp = temps(); +@@ -2007,6 +2035,28 @@ public: + m_assembler.fmvInsn(dest, src); + } + ++ // 64-bit integer arithmetic on values held in FP registers. Used by the ++ // JSValue double boxing / NaN purification paths, where the bit pattern of ++ // a double is offset by JSValue::DoubleEncodeOffset without leaving the FP ++ // register file. RISC-V has no integer ALU on FPRs, so move through GPRs. ++ void add64(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) ++ { ++ auto temp = temps(); ++ m_assembler.fmvInsn(temp.data(), op1); ++ m_assembler.fmvInsn(temp.memory(), op2); ++ m_assembler.addInsn(temp.data(), temp.data(), temp.memory()); ++ m_assembler.fmvInsn(dest, temp.data()); ++ } ++ ++ void sub64(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) ++ { ++ auto temp = temps(); ++ m_assembler.fmvInsn(temp.data(), op1); ++ m_assembler.fmvInsn(temp.memory(), op2); ++ m_assembler.subInsn(temp.data(), temp.data(), temp.memory()); ++ m_assembler.fmvInsn(dest, temp.data()); ++ } ++ + void moveDouble(FPRegisterID src, FPRegisterID dest) + { + if (src != dest) +@@ -3609,6 +3659,18 @@ public: + convertInt32ToDouble(temp.data(), dest); + } + ++ void convertUInt32ToDouble(RegisterID src, FPRegisterID dest) ++ { ++ m_assembler.fcvtInsn(dest, src); ++ } ++ ++ void convertUInt32ToDouble(TrustedImm32 imm, FPRegisterID dest) ++ { ++ auto temp = temps(); ++ loadImmediate(imm, temp.data()); ++ convertUInt32ToDouble(temp.data(), dest); ++ } ++ + void convertInt64ToFloat(RegisterID src, FPRegisterID dest) + { + m_assembler.fcvtInsn(dest, src); diff --git a/libs/wpewebkit/patches/126-JavaScriptCore-RISCV64-GdbJIT-support.patch b/libs/wpewebkit/patches/126-JavaScriptCore-RISCV64-GdbJIT-support.patch new file mode 100644 index 0000000..a4f7145 --- /dev/null +++ b/libs/wpewebkit/patches/126-JavaScriptCore-RISCV64-GdbJIT-support.patch @@ -0,0 +1,65 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: add RISCV64 support to GdbJIT + +GdbJIT emits a tiny in-memory ELF object for the debugger to pick up, +and three places in the file hard-coded the supported architecture +list to X86_64/ARM64 (with ARMv7/Thumb for the 32-bit case). Each of +them refuses to compile on RISCV64. + +Add the missing CPU(RISCV64) branches: + + * ELF e_ident: ELFCLASS64 / ELFDATA2LSB, the same as X86_64 / ARM64. + * ELF e_machine: EM_RISCV (243) from the RISC-V ELF psABI. + * ELFSymbol::SerializedLayout: the 64-bit layout shared with the + X86_64 / ARM64 branch, since RISCV64 has the same uintptr_t width + and packed-symbol layout. + * RegisterMapping (DWARF unwinding): RegisterFP = 8 (s0/x8) and + RegisterLR = 1 (ra/x1), matching the RISC-V psABI's DWARF register + numbering. + +This is a debug-only path; it has no effect on generated JIT code, +it just makes GdbJIT.cpp compile on RISCV64. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/jit/GdbJIT.cpp ++++ b/Source/JavaScriptCore/jit/GdbJIT.cpp +@@ -873,7 +873,7 @@ private: + 0x7F, 'E', 'L', 'F', 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }; +-#elif CPU(X86_64) || CPU(ARM64) ++#elif CPU(X86_64) || CPU(ARM64) || CPU(RISCV64) + const uint8_t ident[16] = { + 0x7F, 'E', 'L', 'F', 2, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +@@ -895,6 +895,9 @@ private: + #elif CPU(ARM64) + // AARCH64 + header->machine = 0xB7; ++#elif CPU(RISCV64) ++ // EM_RISCV from the RISC-V ELF psABI specification. ++ header->machine = 243; + #else + #error Unsupported target architecture. + #endif +@@ -996,7 +999,7 @@ public: + uint8_t m_other; + uint16_t m_section; + } __attribute__((packed,aligned(1))); +-#elif CPU(X86_64) || CPU(ARM64) ++#elif CPU(X86_64) || CPU(ARM64) || CPU(RISCV64) + struct SerializedLayout { + SerializedLayout(uint32_t name, uintptr_t value, uintptr_t size, Binding binding, Type type, uint16_t section) + : m_name(name) +@@ -1166,6 +1169,11 @@ private: + #elif CPU(ARM64) + RegisterFP = 29, + RegisterLR = 30, ++#elif CPU(RISCV64) ++ // RISC-V psABI: DWARF register numbers match x0..x31, so the frame ++ // pointer s0/x8 is 8 and the return-address register ra/x1 is 1. ++ RegisterFP = 8, ++ RegisterLR = 1, + #else + RegisterFP = 7, + RegisterLR = 14, diff --git a/libs/wpewebkit/patches/127-JavaScriptCore-WasmTypeDefinition-toB3Type-B3-JIT-guard.patch b/libs/wpewebkit/patches/127-JavaScriptCore-WasmTypeDefinition-toB3Type-B3-JIT-guard.patch new file mode 100644 index 0000000..207ff82 --- /dev/null +++ b/libs/wpewebkit/patches/127-JavaScriptCore-WasmTypeDefinition-toB3Type-B3-JIT-guard.patch @@ -0,0 +1,39 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: make WasmTypeDefinition::toB3Type available whenever B3 is + +The inline helper Wasm::toB3Type() was only declared when one of the +WebAssembly B3-based JIT tiers (OMGJIT or BBQJIT) was enabled, and the +header relied on those TUs having pulled in +transitively for B3::Type to be in scope. + +B3Validate.cpp uses Wasm::toB3Type to validate vector opcode operand +types, and only depends on ENABLE(B3_JIT) - not on the WASM JIT tiers. +RISCV64 currently enables B3_JIT but not WEBASSEMBLY_OMGJIT/BBQJIT, so +B3Validate.cpp fails with "'toB3Type' is not a member of 'JSC::Wasm'". + +Gate toB3Type on ENABLE(B3_JIT) (the actual prerequisite) and include + explicitly so the header is self-sufficient +in any TU that needs it. B3Type.h itself is guarded by ENABLE(B3_JIT) +|| ENABLE(WEBASSEMBLY_BBQJIT), so the include is harmless when neither +is set. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/wasm/WasmTypeDefinition.h ++++ b/Source/JavaScriptCore/wasm/WasmTypeDefinition.h +@@ -29,6 +29,7 @@ + + #if ENABLE(WEBASSEMBLY) + ++#include + #include + #include + #include +@@ -284,7 +285,7 @@ ALWAYS_INLINE Width Type::width() const + RELEASE_ASSERT_NOT_REACHED(); + } + +-#if ENABLE(WEBASSEMBLY_OMGJIT) || ENABLE(WEBASSEMBLY_BBQJIT) ++#if ENABLE(B3_JIT) + #define CREATE_CASE(name, id, b3type, ...) case TypeKind::name: return b3type; + inline B3::Type toB3Type(Type type) + { diff --git a/libs/wpewebkit/patches/128-JavaScriptCore-RISCV64-testFinalize-handle-Carry.patch b/libs/wpewebkit/patches/128-JavaScriptCore-RISCV64-testFinalize-handle-Carry.patch new file mode 100644 index 0000000..f4ee812 --- /dev/null +++ b/libs/wpewebkit/patches/128-JavaScriptCore-RISCV64-testFinalize-handle-Carry.patch @@ -0,0 +1,27 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: handle Carry in RISCV64 testFinalize switch + +testFinalize() switches on ResultCondition but does not list the Carry +case, which makes every translation unit that includes +MacroAssemblerRISCV64.h emit: + + warning: enumeration value 'Carry' not handled in switch [-Wswitch] + +The Carry condition cannot be produced by RISC-V test operations (the +ResultCondition enum declaration already comments it "<- not +implemented"), and the sibling switch elsewhere in this file already +groups Carry with Overflow/Signed/PositiveOrZero under +RELEASE_ASSERT_NOT_REACHED(). Add the missing case so testFinalize +matches that convention and the warning disappears. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h ++++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h +@@ -4770,6 +4770,7 @@ private: + void testFinalize(ResultCondition cond, RegisterID src, RegisterID dest) + { + switch (cond) { ++ case Carry: + case Overflow: + case Signed: + case PositiveOrZero: diff --git a/libs/wpewebkit/patches/129-bmalloc-enable-libpas-on-Linux-RISCV64.patch b/libs/wpewebkit/patches/129-bmalloc-enable-libpas-on-Linux-RISCV64.patch new file mode 100644 index 0000000..84ac0a3 --- /dev/null +++ b/libs/wpewebkit/patches/129-bmalloc-enable-libpas-on-Linux-RISCV64.patch @@ -0,0 +1,47 @@ +From: Daniel Golle +Subject: [PATCH] bmalloc: enable libpas on Linux/RISCV64 + +bmalloc's BPlatform.h enabled the libpas-backed bmalloc only on a +small allow-list of 64-bit Linux architectures (X86_64 and ARM64). +RISCV64 was missing from that list, so LIBPAS_ENABLED ended up 0 and +the inline allocator entry points bmalloc_*_inline (declared inside +'#if LIBPAS_ENABLED' in bmalloc_heap_inlines.h) were not available. +JSC consumers - e.g. StructureAlignedMemoryAllocator.cpp calling +bmalloc_try_allocate_auxiliary_with_alignment_inline / +bmalloc_deallocate_inline - then fail to compile with "was not +declared in this scope". + +RISCV64 is just another 64-bit Linux target as far as libpas is +concerned, so: + + * Add a BCPU(RISCV64) macro alongside the existing BCPU(X86_64) / + BCPU(ARM64) definitions, gated on __riscv && __riscv_xlen == 64. + * Add BCPU(RISCV64) to the BENABLE(LIBPAS) allow-list for Linux. + +This is what gives RISCV64 the same libpas-backed bmalloc as the +other supported 64-bit Linux targets. + +Signed-off-by: Daniel Golle +--- a/Source/bmalloc/bmalloc/BPlatform.h ++++ b/Source/bmalloc/bmalloc/BPlatform.h +@@ -172,6 +172,11 @@ + #endif + #endif + ++/* BCPU(RISCV64) - RISC-V 64-bit */ ++#if defined(__riscv) && __riscv_xlen == 64 ++#define BCPU_RISCV64 1 ++#endif ++ + /* BCPU(ARM) - ARM, any version*/ + #define BARM_ARCH_AT_LEAST(N) (BCPU(ARM) && BARM_ARCH_VERSION >= N) + +@@ -379,7 +384,7 @@ + + /* BENABLE(LIBPAS) is enabling libpas build. But this does not mean we use libpas for bmalloc replacement. */ + #if !defined(BENABLE_LIBPAS) +-#if BCPU(ADDRESS64) && (BOS(DARWIN) || BOS(WINDOWS) || (BOS(LINUX) && (BCPU(X86_64) || BCPU(ARM64))) || BPLATFORM(PLAYSTATION)) ++#if BCPU(ADDRESS64) && (BOS(DARWIN) || BOS(WINDOWS) || (BOS(LINUX) && (BCPU(X86_64) || BCPU(ARM64) || BCPU(RISCV64))) || BPLATFORM(PLAYSTATION)) + #define BENABLE_LIBPAS 1 + #ifndef PAS_BMALLOC + #define PAS_BMALLOC 1 diff --git a/libs/wpewebkit/patches/130-JavaScriptCore-IPInt-stub-call-trampoline-labels.patch b/libs/wpewebkit/patches/130-JavaScriptCore-IPInt-stub-call-trampoline-labels.patch new file mode 100644 index 0000000..8979e35 --- /dev/null +++ b/libs/wpewebkit/patches/130-JavaScriptCore-IPInt-stub-call-trampoline-labels.patch @@ -0,0 +1,52 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: stub IPInt call trampoline labels on unsupported archs + +InPlaceInterpreter is the WebAssembly in-place interpreter (IPInt). +Its real implementation in InPlaceInterpreter64.asm and +InPlaceInterpreter32_64.asm is gated on a small allow-list of +architectures (ARM64/ARM64E/X86_64/ARMv7); every other CPU falls into +the catch-all 'else' branch of InPlaceInterpreter.asm, which only +emits unimplementedInstruction() stubs for the IPInt opcodes. + +LowLevelInterpreter.asm, however, unconditionally takes the address +of nine IPInt call-sequence labels: + + _wasm_trampoline_wasm_ipint_{call,tail_call}[_wide16][_wide32] + _wasm_ipint_call_return_location[_wide16][_wide32] + +so on a CPU where WEBASSEMBLY is enabled but the real IPInt is not +(e.g. RISCV64) the link fails: + + mold: error: undefined symbol: .Lwasm_trampoline_wasm_ipint_tail_call + ... + +LowLevelInterpreter.asm itself already emits crash() stubs for the +same nine labels in its !WEBASSEMBLY branch. Mirror that here so the +labels also exist when WEBASSEMBLY is on but the architecture has no +IPInt - they trap if ever reached. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/llint/InPlaceInterpreter.asm ++++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.asm +@@ -2177,5 +2177,21 @@ unimplementedInstruction(_i32_atomic_rmw + unimplementedInstruction(_i64_atomic_rmw8_cmpxchg_u) + unimplementedInstruction(_i64_atomic_rmw16_cmpxchg_u) + unimplementedInstruction(_i64_atomic_rmw32_cmpxchg_u) ++ ++# LowLevelInterpreter.asm captures the addresses of these labels via ++# 'lla' / equivalent for the IPInt call sequence, so they have to exist ++# at link time on architectures that enable WEBASSEMBLY but have no ++# IPInt implementation. They should never be reached at run time. ++_wasm_trampoline_wasm_ipint_call: ++_wasm_trampoline_wasm_ipint_call_wide16: ++_wasm_trampoline_wasm_ipint_call_wide32: ++_wasm_trampoline_wasm_ipint_tail_call: ++_wasm_trampoline_wasm_ipint_tail_call_wide16: ++_wasm_trampoline_wasm_ipint_tail_call_wide32: ++ ++_wasm_ipint_call_return_location: ++_wasm_ipint_call_return_location_wide16: ++_wasm_ipint_call_return_location_wide32: ++ crash() + end + diff --git a/libs/wpewebkit/patches/130-libpas-detect-backtrace-like-StackTrace.patch b/libs/wpewebkit/patches/130-libpas-detect-backtrace-like-StackTrace.patch deleted file mode 100644 index 118d5dd..0000000 --- a/libs/wpewebkit/patches/130-libpas-detect-backtrace-like-StackTrace.patch +++ /dev/null @@ -1,21 +0,0 @@ ---- a/Source/bmalloc/libpas/src/libpas/pas_probabilistic_guard_malloc_allocator.c -+++ b/Source/bmalloc/libpas/src/libpas/pas_probabilistic_guard_malloc_allocator.c -@@ -48,7 +48,7 @@ - #endif - - /* PlayStation does not currently support the backtrace API. Android API versions < 33 don't, either. Windows does not either. */ --#if !PAS_PLATFORM(PLAYSTATION) && (!PAS_OS(ANDROID) || __ANDROID_API__ >= 33) && !PAS_OS(WINDOWS) -+#if !PAS_PLATFORM(PLAYSTATION) && (!PAS_OS(ANDROID) || __ANDROID_API__ >= 33) && !PAS_OS(WINDOWS) && defined(HAVE_EXECINFO_H) && HAVE_EXECINFO_H - #include - #else - size_t backtrace(void** buffer, size_t size) ---- a/Source/cmake/OptionsCommon.cmake -+++ b/Source/cmake/OptionsCommon.cmake -@@ -309,6 +309,7 @@ WEBKIT_CHECK_HAVE_INCLUDE(HAVE_SYS_PARAM - WEBKIT_CHECK_HAVE_INCLUDE(HAVE_SYS_TIME_H sys/time.h) - WEBKIT_CHECK_HAVE_INCLUDE(HAVE_SYS_TIMEB_H sys/timeb.h) - WEBKIT_CHECK_HAVE_INCLUDE(HAVE_LINUX_MEMFD_H linux/memfd.h) -+WEBKIT_CHECK_HAVE_INCLUDE(HAVE_EXECINFO_H execinfo.h) - - # Check for functions - # _GNU_SOURCE=1 is required to expose statx diff --git a/libs/wpewebkit/patches/131-JavaScriptCore-forward-MacroAssemblerRISCV64-header.patch b/libs/wpewebkit/patches/131-JavaScriptCore-forward-MacroAssemblerRISCV64-header.patch new file mode 100644 index 0000000..aab5b45 --- /dev/null +++ b/libs/wpewebkit/patches/131-JavaScriptCore-forward-MacroAssemblerRISCV64-header.patch @@ -0,0 +1,42 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: forward MacroAssemblerRISCV64.h and friends to PrivateHeaders + +JSC's CMakeLists copies a curated list of assembler headers into +PrivateHeaders/JavaScriptCore/ so that external consumers (WebCore, +WebKit, JSC's own GLib bindings, ...) can resolve them via +'#include '. That header in turn +does '#include "MacroAssemblerRISCV64.h"' on RISCV64, which in turn +does '#include "RISCV64Assembler.h"', which in turn pulls in +RISCV64Registers.h. None of the three is in the forwarding list. + +MacroAssemblerARM64.h / MacroAssemblerARMv7.h / MacroAssemblerX86_64.h +are listed alongside their respective ARM64Assembler.h / ARM64Registers.h +/ X86Assembler.h / X86_64Registers.h; the RISCV64 trio simply hadn't +been added. As a result, on RISCV64 every WebCore translation unit +that pulls in JSC's MacroAssembler chain fails with: + + MacroAssembler.h:54:10: fatal error: MacroAssemblerRISCV64.h: + No such file or directory + +or + + MacroAssemblerRISCV64.h:33:10: fatal error: RISCV64Assembler.h: + No such file or directory + +Add the three RISCV64 headers to the forwarding-headers list, matching +what is already done for the other architectures. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/CMakeLists.txt ++++ b/Source/JavaScriptCore/CMakeLists.txt +@@ -609,7 +609,10 @@ set(JavaScriptCore_PRIVATE_FRAMEWORK_HEA + assembler/MacroAssemblerARMv7.h + assembler/MacroAssemblerCodeRef.h + assembler/MacroAssemblerHelpers.h ++ assembler/MacroAssemblerRISCV64.h + assembler/MacroAssemblerX86_64.h ++ assembler/RISCV64Assembler.h ++ assembler/RISCV64Registers.h + assembler/MaxFrameExtentForSlowPathCall.h + assembler/OSCheck.h + assembler/Printer.h diff --git a/libs/wpewebkit/patches/132-WTF-RISCV64-WebAssembly-BBQJIT-tier.patch b/libs/wpewebkit/patches/132-WTF-RISCV64-WebAssembly-BBQJIT-tier.patch new file mode 100644 index 0000000..7594515 --- /dev/null +++ b/libs/wpewebkit/patches/132-WTF-RISCV64-WebAssembly-BBQJIT-tier.patch @@ -0,0 +1,56 @@ +From: Daniel Golle +Subject: [PATCH] WTF: enable WebAssembly and BBQJIT on RISCV64 + +RISCV64 previously had ENABLE_WEBASSEMBLY=1 but ENABLE_WEBASSEMBLY_BBQJIT=0 +and ENABLE_WEBASSEMBLY_OMGJIT=0. The LLInt-based wasm interpreter that used +to back this combination was replaced by the IPInt in-place interpreter, +which is not ported to RISCV64, so this configuration left no wasm execution +tier available and JSC aborted at startup with: + + INCOHERENT OPTIONS: at least one of useWasmIPInt, or useBBQJIT must be true + +Turn ENABLE_WEBASSEMBLY_BBQJIT on for RISCV64 so BBQJIT becomes the wasm +execution tier on this architecture. IPInt and OMGJIT remain off. + +The companion changes that make this safe at build- and run-time: + * MacroAssemblerRISCV64.h gains noop SIMD stubs and hard-fault stubs for + the wasm atomic MacroAssembler primitives that BBQJIT uses. + * Options.cpp no longer forces useBBQJIT() = false on RISCV64. + * Options.cpp already forces useWasmSIMD() = false on non-X86_64/ARM64 + architectures (and useSharedArrayBuffer defaults to false), so wasm + SIMD codegen and wasm atomic codegen are never reached on RISCV64 + and their stubs are unreachable. + +Signed-off-by: Daniel Golle +--- a/Source/WTF/wtf/PlatformEnable.h ++++ b/Source/WTF/wtf/PlatformEnable.h +@@ -738,12 +738,28 @@ + #endif + + #if CPU(RISCV64) ++// RISCV64 wasm execution tiers: ++// - IPInt (LLInt in-place interpreter): not ported; left disabled. ++// LowLevelInterpreter.asm's IPInt call-trampoline labels are stubbed ++// to crash() so the link still succeeds. ++// - BBQJIT: enabled. The arch-conditional sites in WasmBBQJIT64.cpp ++// that have no #else fall through to portable MacroAssembler ++// primitives; the remaining gaps (wasm SIMD codegen, wasm atomics ++// codegen) are addressed by gating both off at runtime: ++// Options::useWasmSIMD = false (already set in Options.cpp for ++// !X86_64 && !ARM64), ++// Options::useSharedArrayBuffer = false (the default), which in ++// turn keeps wasm atomic opcodes off the JIT codepath. ++// MacroAssemblerRISCV64.h carries hard-fault stubs for the wasm ++// atomic / SIMD MacroAssembler entry points so they trap loudly if ++// the runtime gating is ever bypassed. ++// - OMGJIT: not ported. + #undef ENABLE_WEBASSEMBLY + #define ENABLE_WEBASSEMBLY 1 + #undef ENABLE_WEBASSEMBLY_OMGJIT + #define ENABLE_WEBASSEMBLY_OMGJIT 0 + #undef ENABLE_WEBASSEMBLY_BBQJIT +-#define ENABLE_WEBASSEMBLY_BBQJIT 0 ++#define ENABLE_WEBASSEMBLY_BBQJIT 1 + #endif + + #if !defined(ENABLE_C_LOOP) diff --git a/libs/wpewebkit/patches/133-JavaScriptCore-RISCV64-MacroAssembler-BBQJIT-primitives.patch b/libs/wpewebkit/patches/133-JavaScriptCore-RISCV64-MacroAssembler-BBQJIT-primitives.patch new file mode 100644 index 0000000..9613036 --- /dev/null +++ b/libs/wpewebkit/patches/133-JavaScriptCore-RISCV64-MacroAssembler-BBQJIT-primitives.patch @@ -0,0 +1,424 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: add RISCV64 MacroAssembler primitives required by BBQJIT + +Enabling BBQJIT on RISCV64 exposes a set of MacroAssembler primitives that +WasmBBQJIT.cpp / WasmBBQJIT64.cpp / WasmBBQJIT32_64.cpp call but that +MacroAssemblerRISCV64.h does not yet provide. Add them in four groups: + + * Scalar BBQJIT primitives, fully implemented for RISCV64: + - floatMin / floatMax / doubleMin / doubleMax via fmin.s/fmax.s and + fmin.d/fmax.d (the F/D extensions in rv64gc). + - addLeftShift64 via slli + add. + - multiplyAddZeroExtend32 via mulw + 32-bit mask + add. + - rotateLeft32 / rotateLeft64 (immediate and variable shift) via + shift + shift + or; rv64gc has no rotate, the Zbb rol* family + is not in the baseline. + - div32 / uDiv32 / div64 / uDiv64 via the RISC-V M extension + (divw / divuw / div / divu); 32-bit results are masked back to + 32 bits. + - multiplySub32 / multiplySub64 via mul + sub. + - convertUInt32ToFloat via fcvt.s.wu. + + * 8/16-bit and Address->BaseIndex transfer overloads. transfer{8,16} + were not defined for any addressing mode, and transfer{32,64,Vector} + only had Address->Address and BaseIndex->BaseIndex variants. BBQJIT + calls all of these for wasm struct copy / memory init code. Each is + the same pattern as the existing transfer32: load to a scratch + register, store from it. + + * SIMD vector noop stubs (~44 methods). The RISC-V V vector extension + is not part of the OpenWrt rv64gc baseline, and there is no SIMD + codegen for RISCV64 in MacroAssemblerRISCV64.h. Options::useWasmSIMD + is forced off on RISCV64, so the BBQJIT SIMD codepaths are + unreachable; matching the existing pattern in this file, these are + templated empty noops via MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD. + + vectorExtractLane / vectorReplaceLane cannot use the existing + forwarding-reference noop macro: their first argument is a SIMDInfo + bit-field (SIMDLane / SIMDSignMode) and bit-fields cannot bind to + Args&&. They are defined as separate by-value templated overloads. + The SIMDLane / SIMDSignMode type names are not visible in this + header without pulling in , which we + avoid for pure stubs, so the by-value template avoids naming the + types altogether. + + * Wasm atomic hard-fault stubs (36 methods: loadLinkAcq{8,16,32,64}, + storeCondRel{8,16,32,64}, branchAtomicStrongCAS{8,16,32,64}, + atomicStrongCAS{8,16,32,64}, atomicXchg{,Add,Clear,Or,Xor}{8,16,32,64}). + The RISC-V A extension is present in rv64gc, but the corresponding + AMO / LR / SC instruction emitters have not been added to + RISCV64Assembler.h. Wasm threads / shared memory is gated off at + runtime via useSharedArrayBuffer = false (the default), which keeps + wasm atomic opcodes off the JIT codepath, so these stubs are + unreachable. A new + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD macro variant + is added so an accidentally-reached atomic codepath traps loudly + rather than silently miscompiling. + +This is the minimum surface needed to make BBQJIT on RISCV64 link and +run non-SIMD, non-atomic wasm modules. The two stub groups (atomic and +SIMD) are intentionally flagged in their comments so they are easy to +find when implementing the real codegen in follow-up work. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h ++++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h +@@ -36,6 +36,16 @@ + template void methodName(Args&&...) { } + #define MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD_WITH_RETURN(methodName, returnType) \ + template returnType methodName(Args&&...) { return { }; } ++// Atomic / FP-minmax / SIMD methods needed by BBQJIT for which no native ++// RISC-V code generation exists yet. These are deliberately runtime-fatal ++// rather than silent noops: wasm shared memory and SIMD are gated off on ++// RISCV64 (useSharedArrayBuffer / useWasmSIMD), so the BBQJIT codegen for ++// atomic and SIMD wasm opcodes must never run. If something does reach ++// these, we want to know with a loud crash, not a silent miscompilation. ++#define MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(methodName) \ ++ template void methodName(Args&&...) { RELEASE_ASSERT_NOT_REACHED(); } ++#define MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(methodName, returnType) \ ++ template returnType methodName(Args&&...) { RELEASE_ASSERT_NOT_REACHED(); return { }; } + + namespace JSC { + +@@ -837,6 +847,123 @@ public: + MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(rotateRight32); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(rotateRight64); + ++ // Scalar BBQJIT primitives: fused shift/add and rotate-left, used by the ++ // wasm bytecode-to-machine-code path. RISC-V's baseline rv64gc has no ++ // rotate instruction (Zbb's rolw/rol would do it in one), so synthesize ++ // via shift + shift + or. ++ void addLeftShift64(RegisterID n, RegisterID m, TrustedImm32 amount, RegisterID d) ++ { ++ auto temp = temps(); ++ m_assembler.slliInsn(temp.data(), m, uint32_t(amount.m_value & 63)); ++ m_assembler.addInsn(d, n, temp.data()); ++ } ++ ++ void multiplyAddZeroExtend32(RegisterID mulLeft, RegisterID mulRight, RegisterID summand, RegisterID dest) ++ { ++ auto temp = temps(); ++ m_assembler.mulwInsn(temp.data(), mulLeft, mulRight); ++ m_assembler.maskRegister<32>(temp.data()); ++ m_assembler.addInsn(dest, summand, temp.data()); ++ } ++ ++ void rotateLeft32(RegisterID src, TrustedImm32 imm, RegisterID dest) ++ { ++ int32_t shift = imm.m_value & 31; ++ if (!shift) { ++ if (src != dest) ++ move(src, dest); ++ m_assembler.maskRegister<32>(dest); ++ return; ++ } ++ auto temp = temps(); ++ m_assembler.slliwInsn(temp.data(), src, uint32_t(shift)); ++ m_assembler.srliwInsn(temp.memory(), src, uint32_t(32 - shift)); ++ m_assembler.orInsn(dest, temp.data(), temp.memory()); ++ m_assembler.maskRegister<32>(dest); ++ } ++ ++ void rotateLeft32(RegisterID src, RegisterID shift, RegisterID dest) ++ { ++ auto temp = temps(); ++ m_assembler.addiInsn(temp.data(), RISCV64Registers::zero, Imm::I<32>()); ++ m_assembler.subInsn(temp.data(), temp.data(), shift); ++ m_assembler.sllwInsn(temp.memory(), src, shift); ++ m_assembler.srlwInsn(temp.data(), src, temp.data()); ++ m_assembler.orInsn(dest, temp.memory(), temp.data()); ++ m_assembler.maskRegister<32>(dest); ++ } ++ ++ void rotateLeft64(RegisterID src, TrustedImm32 imm, RegisterID dest) ++ { ++ int32_t shift = imm.m_value & 63; ++ if (!shift) { ++ if (src != dest) ++ move(src, dest); ++ return; ++ } ++ auto temp = temps(); ++ m_assembler.slliInsn(temp.data(), src, uint32_t(shift)); ++ m_assembler.srliInsn(temp.memory(), src, uint32_t(64 - shift)); ++ m_assembler.orInsn(dest, temp.data(), temp.memory()); ++ } ++ ++ void rotateLeft64(RegisterID src, RegisterID shift, RegisterID dest) ++ { ++ auto temp = temps(); ++ m_assembler.addiInsn(temp.data(), RISCV64Registers::zero, Imm::I<64>()); ++ m_assembler.subInsn(temp.data(), temp.data(), shift); ++ m_assembler.sllInsn(temp.memory(), src, shift); ++ m_assembler.srlInsn(temp.data(), src, temp.data()); ++ m_assembler.orInsn(dest, temp.memory(), temp.data()); ++ } ++ ++ // Integer divide / modulo via the RISC-V M extension (in rv64gc). ++ // The 32-bit forms produce a sign-extended result; mask to 32 bits. ++ void div32(RegisterID dividend, RegisterID divisor, RegisterID dest) ++ { ++ m_assembler.divwInsn(dest, dividend, divisor); ++ m_assembler.maskRegister<32>(dest); ++ } ++ ++ void uDiv32(RegisterID dividend, RegisterID divisor, RegisterID dest) ++ { ++ m_assembler.divuwInsn(dest, dividend, divisor); ++ m_assembler.maskRegister<32>(dest); ++ } ++ ++ void div64(RegisterID dividend, RegisterID divisor, RegisterID dest) ++ { ++ m_assembler.divInsn(dest, dividend, divisor); ++ } ++ ++ void uDiv64(RegisterID dividend, RegisterID divisor, RegisterID dest) ++ { ++ m_assembler.divuInsn(dest, dividend, divisor); ++ } ++ ++ // dest = minuend - (mulLeft * mulRight). Used by wasm i32/i64 rem ++ // (rem == lhs - (lhs/rhs) * rhs). RISC-V has no fused mul-sub. ++ void multiplySub32(RegisterID mulLeft, RegisterID mulRight, RegisterID minuend, RegisterID dest) ++ { ++ auto temp = temps(); ++ m_assembler.mulwInsn(temp.data(), mulLeft, mulRight); ++ m_assembler.subwInsn(dest, minuend, temp.data()); ++ m_assembler.maskRegister<32>(dest); ++ } ++ ++ void multiplySub64(RegisterID mulLeft, RegisterID mulRight, RegisterID minuend, RegisterID dest) ++ { ++ auto temp = temps(); ++ m_assembler.mulInsn(temp.data(), mulLeft, mulRight); ++ m_assembler.subInsn(dest, minuend, temp.data()); ++ } ++ ++ // uint32 -> single-precision float, fcvt.s.wu (one instruction). ++ void convertUInt32ToFloat(RegisterID src, FPRegisterID dest) ++ { ++ m_assembler.fcvtInsn(dest, src); ++ } ++ + void load8(Address address, RegisterID dest) + { + auto resolution = resolveAddress(address, lazyTemp()); +@@ -1425,6 +1552,73 @@ public: + transfer64(src, dest); + } + ++ // 8- and 16-bit mem-to-mem transfers, and Address->BaseIndex ++ // variants of the existing widths, needed by BBQJIT (wasm ++ // struct copy / memory init etc.). Implemented in the same shape ++ // as the existing transfer32 / transfer64: load to a scratch ++ // register, store from it. ++ void transfer8(Address src, Address dest) ++ { ++ auto temp = temps(); ++ load8(src, temp.data()); ++ store8(temp.data(), dest); ++ } ++ ++ void transfer8(BaseIndex src, BaseIndex dest) ++ { ++ auto temp = temps(); ++ load8(src, temp.data()); ++ store8(temp.data(), dest); ++ } ++ ++ void transfer8(Address src, BaseIndex dest) ++ { ++ auto temp = temps(); ++ load8(src, temp.data()); ++ store8(temp.data(), dest); ++ } ++ ++ void transfer16(Address src, Address dest) ++ { ++ auto temp = temps(); ++ load16(src, temp.data()); ++ store16(temp.data(), dest); ++ } ++ ++ void transfer16(BaseIndex src, BaseIndex dest) ++ { ++ auto temp = temps(); ++ load16(src, temp.data()); ++ store16(temp.data(), dest); ++ } ++ ++ void transfer16(Address src, BaseIndex dest) ++ { ++ auto temp = temps(); ++ load16(src, temp.data()); ++ store16(temp.data(), dest); ++ } ++ ++ void transfer32(Address src, BaseIndex dest) ++ { ++ auto temp = temps(); ++ load32(src, temp.data()); ++ store32(temp.data(), dest); ++ } ++ ++ void transfer64(Address src, BaseIndex dest) ++ { ++ auto temp = temps(); ++ load64(src, temp.data()); ++ store64(temp.data(), dest); ++ } ++ ++ void transferVector(Address src, BaseIndex dest) ++ { ++ loadVector(src, fpTempRegister); ++ storeVector(fpTempRegister, dest); ++ } ++ + void storePair32(RegisterID src1, RegisterID src2, RegisterID dest) + { + storePair32(src1, src2, dest, TrustedImm32(0)); +@@ -2150,6 +2344,116 @@ public: + MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorMulSat); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorDotProduct); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorSwizzle); ++ // Additional vector noop stubs needed by BBQJIT (kept unreachable via ++ // useWasmSIMD = false on RISCV64; see Options.cpp). ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(compareFloatingPointVectorUnordered); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(moveZeroToVector); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorAbsInt64); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorConvertLowSignedInt32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorConvertLowUnsignedInt32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorConvertUnsigned); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorExtaddPairwiseUnsignedInt16); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorExtractPair); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorHorizontalAdd); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorLoad8Splat); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorSshl); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorSshr8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUshl); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUshr8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorSwizzle2); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorTruncSatSignedFloat64); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorTruncSatUnsignedFloat32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorTruncSatUnsignedFloat64); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUnsignedMax); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUnsignedMin); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUnzipEven); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorZipUpper); ++ ++ // Wasm atomics: the RISC-V A extension is available (the OpenWrt -march ++ // baseline is rv64gc, i.e. includes A), but the AMO/LR/SC instruction ++ // emitters in RISCV64Assembler.h have not been added yet. Stub the ++ // BBQJIT atomic API with hard-fault unimplemented methods: at runtime ++ // wasm shared memory is gated off via useSharedArrayBuffer = false, so ++ // wasm atomic opcodes are unreachable, and these stubs only ever exist ++ // for compile-time completeness. Filling these in (and adding the ++ // matching RISCV64Assembler.h emitters) is a follow-up that unlocks the ++ // wasm threads proposal on RISCV64. ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq16); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq64); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel16); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel64); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS8, Jump); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS16, Jump); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS32, Jump); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS64, Jump); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg16); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg64); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd16); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd64); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear16); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear64); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr16); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr64); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor16); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor64); ++ // atomicStrongCAS{N}: the non-branching CAS overloads used by BBQJIT ++ // when the caller only needs success/failure in resultGPR (rather ++ // than a JIT-emitted branch). Same runtime-unreachable rationale as ++ // branchAtomicStrongCAS{N} above. ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS16); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS32); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS64); ++ // Additional SIMD vector noop stubs uncovered by enabling BBQJIT. ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorSplat); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUshl8); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorSshr); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUshr); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorMulLow); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorMulHigh); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorFusedMulAdd); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorFusedNegMulAdd); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorLoad16Splat); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorLoad32Splat); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorLoad64Splat); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorLoad8Lane); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorLoad16Lane); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorLoad32Lane); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorLoad64Lane); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorStore8Lane); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorStore16Lane); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorStore32Lane); ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorStore64Lane); ++ // vectorExtractLane / vectorReplaceLane: by-value templated stubs. ++ // The MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD form uses ++ // Args&& forwarding references; binding a SIMDInfo::lane / ++ // SIMDInfo::signMode bit-field to a non-const reference is ++ // ill-formed, so use by-value template parameters instead. ++ // (The SIMD type names are not visible in this header without ++ // pulling in , which we avoid for ++ // pure stubs.) Same unreachability rationale as the other SIMD ++ // stubs: useWasmSIMD = false on RISCV64. ++ template ++ void vectorExtractLane(T1, T2, T3, T4) { } ++ template ++ void vectorExtractLane(T1, T2, T3, T4, T5) { } ++ template ++ void vectorReplaceLane(T1, T2, T3, T4) { } ++ MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(move128ToVector); + + template + static CodePtr readCallTarget(CodeLocationCall call) +@@ -3520,6 +3824,26 @@ public: + m_assembler.fsgnjxInsn<64>(dest, src, src); + } + ++ void floatMin(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) ++ { ++ m_assembler.fminInsn<32>(dest, op1, op2); ++ } ++ ++ void floatMax(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) ++ { ++ m_assembler.fmaxInsn<32>(dest, op1, op2); ++ } ++ ++ void doubleMin(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) ++ { ++ m_assembler.fminInsn<64>(dest, op1, op2); ++ } ++ ++ void doubleMax(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest) ++ { ++ m_assembler.fmaxInsn<64>(dest, op1, op2); ++ } ++ + void ceilFloat(FPRegisterID src, FPRegisterID dest) + { + roundFP<32, RISCV64Assembler::FPRoundingMode::RUP>(src, dest); diff --git a/libs/wpewebkit/patches/134-JavaScriptCore-Options-keep-BBQJIT-on-RISCV64.patch b/libs/wpewebkit/patches/134-JavaScriptCore-Options-keep-BBQJIT-on-RISCV64.patch new file mode 100644 index 0000000..87643e6 --- /dev/null +++ b/libs/wpewebkit/patches/134-JavaScriptCore-Options-keep-BBQJIT-on-RISCV64.patch @@ -0,0 +1,35 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: do not force useBBQJIT() off on RISCV64 + +Options.cpp force-disables a number of JIT-tier and wasm options on any +CPU other than X86_64 / ARM64. For RISCV64 these defaults remain correct +- there is no concurrent GC, no wasm SIMD codegen, and no IPInt tier +yet - except for useBBQJIT(): RISCV64 enables WEBASSEMBLY_BBQJIT in +PlatformEnable.h and BBQJIT is the only wasm execution tier available +on this architecture. + +Add RISCV64 to the carve-out so useBBQJIT() retains its (true) default, +matching what is already done for ARM_THUMB2. + +With ARM_THUMB2 and RISCV64 carved out, the resulting wasm tier matrix +on RISCV64 is: + useWasmIPInt = false (no IPInt port) + useWasmSIMD = false (no SIMD codegen) + useBBQJIT = true (this change) + useOMGJIT = false (no OMGJIT port) + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/runtime/Options.cpp ++++ b/Source/JavaScriptCore/runtime/Options.cpp +@@ -796,7 +796,10 @@ void Options::notifyOptionsChanged() + Options::forceUnlinkedDFG() = false; + Options::useWasmSIMD() = false; + Options::useWasmIPInt() = false; +-#if !CPU(ARM_THUMB2) ++#if !CPU(ARM_THUMB2) && !CPU(RISCV64) ++ // RISCV64 has BBQJIT (WEBASSEMBLY_BBQJIT enabled in PlatformEnable.h); ++ // wasm SIMD and IPInt are still off above, so BBQJIT is the only wasm ++ // tier on this architecture. + Options::useBBQJIT() = false; + #endif + #endif diff --git a/libs/wpewebkit/patches/135-JavaScriptCore-BBQJIT-ScratchScope-other-archs.patch b/libs/wpewebkit/patches/135-JavaScriptCore-BBQJIT-ScratchScope-other-archs.patch new file mode 100644 index 0000000..4d1070d --- /dev/null +++ b/libs/wpewebkit/patches/135-JavaScriptCore-BBQJIT-ScratchScope-other-archs.patch @@ -0,0 +1,45 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: declare BBQJIT ScratchScope for non-X86/non-ARM64 archs + +WasmBBQJIT64.cpp's addSIMDShuffle and addSIMDShift declare a ScratchScope +named "scratches" only inside #if CPU(X86_64) / #elif CPU(ARM64), then +reference that name from "if constexpr (isX86())" blocks and other code +in the same function. In a non-template function, "if constexpr" does not +discard the false branch from name lookup (C++17 [stmt.if]p2), so on any +architecture not in that list the build fails with: + + error: 'scratches' was not declared in this scope + +Add an #else branch in both functions declaring a ScratchScope with the +same template arity as the X86 path. wasm SIMD is gated off at runtime +on RISCV64 via Options::useWasmSIMD = false, so neither function is +reached and the new scratches declaration is unused at run time; it +only exists so the function bodies parse on RISCV64. + +Signed-off-by: Daniel Golle +--- a/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp ++++ b/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp +@@ -3442,6 +3442,11 @@ void BBQJIT::notifyFunctionUsesSIMD() + clobber(ARM64Registers::q28); + clobber(ARM64Registers::q29); + ScratchScope<0, 0> scratches(*this, Location::fromFPR(ARM64Registers::q28), Location::fromFPR(ARM64Registers::q29)); ++#else ++ // Other architectures (e.g. RISCV64) have no wasm SIMD codegen and ++ // never reach this function at runtime (useWasmSIMD is forced off). ++ // Declare scratches so the if-constexpr(isX86()) block below parses. ++ ScratchScope<0, 1> scratches(*this); + #endif + Location aLocation = loadIfNecessary(a); + Location bLocation = loadIfNecessary(b); +@@ -3497,6 +3502,11 @@ void BBQJIT::notifyFunctionUsesSIMD() + // Clobber and preserve RCX on x86, since we need it to do shifts. + clobber(shiftRCX); + ScratchScope<2, 2> scratches(*this, Location::fromGPR(shiftRCX)); ++#elif !CPU(ARM64) ++ // RISCV64 / other archs: no wasm SIMD codegen exists; this function ++ // is unreachable at runtime via useWasmSIMD = false. Declare a ++ // ScratchScope so the X86-only sub-block below still parses. ++ ScratchScope<2, 2> scratches(*this); + #endif + Location srcLocation = loadIfNecessary(src); + Location shiftLocation; diff --git a/libs/wpewebkit/patches/136-JavaScriptCore-IPInt-enable-validation-on-RISCV64.patch b/libs/wpewebkit/patches/136-JavaScriptCore-IPInt-enable-validation-on-RISCV64.patch new file mode 100644 index 0000000..455d8c5 --- /dev/null +++ b/libs/wpewebkit/patches/136-JavaScriptCore-IPInt-enable-validation-on-RISCV64.patch @@ -0,0 +1,44 @@ +--- a/Source/JavaScriptCore/llint/InPlaceInterpreter.h ++++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.h +@@ -784,7 +784,7 @@ extern "C" void SYSV_ABI ipint_entry(); + m(0x11, uint_stack_vector) \ + m(0x12, uint_ret) \ + +-#if !ENABLE(C_LOOP) && (CPU(ADDRESS64) && (CPU(ARM64) || CPU(X86_64)) || (CPU(ADDRESS32) && CPU(ARM_THUMB2))) ++#if !ENABLE(C_LOOP) && (CPU(ADDRESS64) && (CPU(ARM64) || CPU(X86_64) || CPU(RISCV64)) || (CPU(ADDRESS32) && CPU(ARM_THUMB2))) + FOR_EACH_IPINT_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION); + FOR_EACH_IPINT_GC_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION); + FOR_EACH_IPINT_CONVERSION_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION); +--- a/Source/JavaScriptCore/llint/InPlaceInterpreter.cpp ++++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.cpp +@@ -69,7 +69,7 @@ do { \ + + void initialize() + { +-#if !ENABLE(C_LOOP) && ((CPU(ADDRESS64) && (CPU(ARM64) || CPU(X86_64))) || (CPU(ADDRESS32) && CPU(ARM_THUMB2))) ++#if !ENABLE(C_LOOP) && ((CPU(ADDRESS64) && (CPU(ARM64) || CPU(X86_64) || CPU(RISCV64))) || (CPU(ADDRESS32) && CPU(ARM_THUMB2))) + + #define INIT_IPINT_BASE_POINTER(basePointerName, targetAddress) \ + g_opcodeConfig.basePointerName = removeCodePtrTag(reinterpret_cast(targetAddress)); +@@ -88,13 +88,19 @@ void initialize() + FOR_EACH_IPINT_MINT_RETURN_OPCODE(VALIDATE_IPINT_MINT_RETURN_OPCODE); + FOR_EACH_IPINT_UINT_OPCODE(VALIDATE_IPINT_UINT_OPCODE); + #else +- RELEASE_ASSERT_NOT_REACHED("IPInt only supports ARM64 and X86_64 (for now)."); ++ // No IPInt opcode dispatch table on this architecture: offlineasm ++ // does not emit the ipint_*_validate entry points outside the guard ++ // above, so there is nothing to validate. LLInt::initialize() calls ++ // us unconditionally; just return. Options::useWasmIPInt is forced ++ // false on such architectures (see Options.cpp), so the ++ // (uninitialised) g_opcodeConfig dispatch base pointers are never ++ // read at run time. + #endif + } + + void verifyInitialization() + { +-#if !ENABLE(C_LOOP) && ((CPU(ADDRESS64) && (CPU(ARM64) || CPU(X86_64))) || (CPU(ADDRESS32) && CPU(ARM_THUMB2))) ++#if !ENABLE(C_LOOP) && ((CPU(ADDRESS64) && (CPU(ARM64) || CPU(X86_64) || CPU(RISCV64))) || (CPU(ADDRESS32) && CPU(ARM_THUMB2))) + + #define VERIFY_IPINT_BASE_POINTER(basePointerName, targetAddress) \ + RELEASE_ASSERT(g_opcodeConfig.basePointerName == removeCodePtrTag(reinterpret_cast(targetAddress))); diff --git a/libs/wpewebkit/patches/137-JavaScriptCore-IPInt-asm-WEBASSEMBLY-guards-RISCV64.patch b/libs/wpewebkit/patches/137-JavaScriptCore-IPInt-asm-WEBASSEMBLY-guards-RISCV64.patch new file mode 100644 index 0000000..78dedf4 --- /dev/null +++ b/libs/wpewebkit/patches/137-JavaScriptCore-IPInt-asm-WEBASSEMBLY-guards-RISCV64.patch @@ -0,0 +1,83 @@ +--- a/Source/JavaScriptCore/llint/InPlaceInterpreter.asm ++++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.asm +@@ -1215,7 +1215,7 @@ op(wasm_throw_from_fault_handler_trampol + end) + + op(ipint_entry, macro() +-if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7) ++if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7 or RISCV64) + preserveCallerPCAndCFR() + saveIPIntRegisters() + storep wasmInstance, CodeBlock[cfr] +@@ -1233,7 +1233,7 @@ else + end + end) + +-if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7) ++if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7 or RISCV64) + .ipint_entry_end_local: + argumINTInitializeDefaultLocals() + jmp .ipint_entry_end_local +@@ -1331,7 +1331,7 @@ end + end + + op(ipint_catch_entry, macro() +-if WEBASSEMBLY and (ARM64 or ARM64E or X86_64) ++if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or RISCV64) + ipintCatchCommon() + + move cfr, a1 +@@ -1348,7 +1348,7 @@ end + end) + + op(ipint_catch_all_entry, macro() +-if WEBASSEMBLY and (ARM64 or ARM64E or X86_64) ++if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or RISCV64) + ipintCatchCommon() + + move cfr, a1 +@@ -1365,7 +1365,7 @@ end + end) + + op(ipint_table_catch_entry, macro() +-if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7) ++if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7 or RISCV64) + ipintCatchCommon() + + # push arguments but no ref: sp in a2, call normal operation +@@ -1384,7 +1384,7 @@ end + end) + + op(ipint_table_catch_ref_entry, macro() +-if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7) ++if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7 or RISCV64) + ipintCatchCommon() + + # push both arguments and ref +@@ -1403,7 +1403,7 @@ end + end) + + op(ipint_table_catch_all_entry, macro() +-if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7) ++if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7 or RISCV64) + ipintCatchCommon() + + # do nothing: 0 in sp for no arguments, call normal operation +@@ -1422,7 +1422,7 @@ end + end) + + op(ipint_table_catch_allref_entry, macro() +-if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7) ++if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7 or RISCV64) + ipintCatchCommon() + + # push only the ref +@@ -1543,7 +1543,7 @@ defineWasmBuiltinTrampoline(jsstring, co + # 5. Instruction implementation # + ################################# + +-if JSVALUE64 and (ARM64 or ARM64E or X86_64) ++if JSVALUE64 and (ARM64 or ARM64E or X86_64 or RISCV64) + include InPlaceInterpreter64 + elsif ARMv7 + include InPlaceInterpreter32_64 diff --git a/libs/wpewebkit/patches/138-JavaScriptCore-IPInt64-asm-RISCV64-port.patch b/libs/wpewebkit/patches/138-JavaScriptCore-IPInt64-asm-RISCV64-port.patch new file mode 100644 index 0000000..601afcb --- /dev/null +++ b/libs/wpewebkit/patches/138-JavaScriptCore-IPInt64-asm-RISCV64-port.patch @@ -0,0 +1,928 @@ +--- a/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm ++++ b/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm +@@ -51,7 +51,7 @@ end + + # Dispatch target bases + +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + const ipint_dispatch_base = _ipint_unreachable + const ipint_gc_dispatch_base = _ipint_struct_new + const ipint_conversion_dispatch_base = _ipint_i32_trunc_sat_f32_s +@@ -74,8 +74,15 @@ elsif X86_64 + lshiftq (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0 + addq t1, t0 + jmp t0 ++elsif RISCV64 ++ # RISC-V: synthesize the ARM64 addlshift via lshiftp + addp, since the ++ # baseline rv64gc ISA does not have an add-with-shifted-operand form. ++ pcrtoaddr ipint_dispatch_base, t7 ++ lshiftp (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0 ++ addp t7, t0 ++ jmp t0 + else +- error ++ break + end + end + +@@ -85,7 +92,7 @@ end + macro pushQuad(reg) + if ARM64 or ARM64E + push reg, reg +- elsif X86_64 ++ elsif X86_64 or RISCV64 + push reg, reg + else + break +@@ -100,7 +107,7 @@ macro popQuad(reg) + # FIXME: emit post-increment in offlineasm + if ARM64 or ARM64E + loadqinc [sp], reg, V128ISize +- elsif X86_64 ++ elsif X86_64 or RISCV64 + loadq [sp], reg + addq V128ISize, sp + else +@@ -198,7 +205,7 @@ macro argumINTDispatch() + addp 1, MC + bbgteq argumINTTmp, (constexpr IPInt::ArgumINTBytecode::NumOpcodes), _ipint_argument_dispatch_err + lshiftp (constexpr (WTF::fastLog2(JSC::IPInt::alignArgumInt))), argumINTTmp +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + pcrtoaddr _argumINT_begin, argumINTDsp + addp argumINTTmp, argumINTDsp + jmp argumINTDsp +@@ -221,7 +228,7 @@ macro argumINTInitializeDefaultLocals() + if ARM64 or ARM64E + # offlineasm doesn't have xzr so emit it + emit "stp x19, xzr, [x9]" +-elsif X86_64 ++elsif X86_64 or RISCV64 + storep argumINTTmp, [argumINTDst] + storep 0, 8[argumINTDst] + end +@@ -531,7 +538,7 @@ end + jmp .ipint_end_ret + end) + +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + const IPIntCallCallee = sc1 + const IPIntCallFunctionSlot = sc0 + elsif X86_64 +@@ -1834,7 +1841,7 @@ ipintOp(_i32_div_s, macro() + elsif ARM64 or ARM64E or RISCV64 + divis t1, t0 + else +- error ++ break + end + pushInt32(t0) + advancePC(1) +@@ -1859,7 +1866,7 @@ ipintOp(_i32_div_u, macro() + elsif ARM64 or ARM64E or RISCV64 + divi t1, t0 + else +- error ++ break + end + pushInt32(t0) + advancePC(1) +@@ -1895,7 +1902,7 @@ ipintOp(_i32_rem_s, macro() + elsif RISCV64 + remis t0, t1, t2 + else +- error ++ break + end + + .ipint_i32_rem_s_return: +@@ -1923,7 +1930,7 @@ ipintOp(_i32_rem_u, macro() + elsif RISCV64 + remi t0, t1, t2 + else +- error ++ break + end + pushInt32(t2) + advancePC(1) +@@ -2106,7 +2113,7 @@ ipintOp(_i64_div_s, macro() + elsif ARM64 or ARM64E or RISCV64 + divqs t1, t0 + else +- error ++ break + end + pushInt64(t0) + advancePC(1) +@@ -2131,7 +2138,7 @@ ipintOp(_i64_div_u, macro() + elsif ARM64 or ARM64E or RISCV64 + divq t1, t0 + else +- error ++ break + end + pushInt64(t0) + advancePC(1) +@@ -2167,7 +2174,7 @@ ipintOp(_i64_rem_s, macro() + elsif RISCV64 + remqs t0, t1, t2 + else +- error ++ break + end + + .ipint_i64_rem_s_return: +@@ -2195,7 +2202,7 @@ ipintOp(_i64_rem_u, macro() + elsif RISCV64 + remq t0, t1, t2 + else +- error ++ break + end + pushInt64(t2) + advancePC(1) +@@ -9204,7 +9211,7 @@ ipintOp(_i32_atomic_load, macro() + if ARM64 or ARM64E or X86_64 + atomicloadi [mem], scratch + else +- error ++ break + end + pushInt32(scratch) + end) +@@ -9215,7 +9222,7 @@ ipintOp(_i64_atomic_load, macro() + if ARM64 or ARM64E or X86_64 + atomicloadq [mem], scratch + else +- error ++ break + end + pushInt64(scratch) + end) +@@ -9226,7 +9233,7 @@ ipintOp(_i32_atomic_load8_u, macro() + if ARM64 or ARM64E or X86_64 + atomicloadb [mem], scratch + else +- error ++ break + end + pushInt32(scratch) + end) +@@ -9237,7 +9244,7 @@ ipintOp(_i32_atomic_load16_u, macro() + if ARM64 or ARM64E or X86_64 + atomicloadh [mem], scratch + else +- error ++ break + end + pushInt32(scratch) + end) +@@ -9248,7 +9255,7 @@ ipintOp(_i64_atomic_load8_u, macro() + if ARM64 or ARM64E or X86_64 + atomicloadb [mem], scratch + else +- error ++ break + end + pushInt64(scratch) + end) +@@ -9259,7 +9266,7 @@ ipintOp(_i64_atomic_load16_u, macro() + if ARM64 or ARM64E or X86_64 + atomicloadh [mem], scratch + else +- error ++ break + end + pushInt64(scratch) + end) +@@ -9270,7 +9277,7 @@ ipintOp(_i64_atomic_load32_u, macro() + if ARM64 or ARM64E or X86_64 + atomicloadi [mem], scratch + else +- error ++ break + end + pushInt64(scratch) + end) +@@ -9374,7 +9381,7 @@ ipintOp(_i32_atomic_store, macro() + move value, newValue + end) + else +- error ++ break + end + end) + end) +@@ -9390,7 +9397,7 @@ ipintOp(_i64_atomic_store, macro() + move value, newValue + end) + else +- error ++ break + end + end) + end) +@@ -9406,7 +9413,7 @@ ipintOp(_i32_atomic_store8_u, macro() + move value, newValue + end) + else +- error ++ break + end + end) + end) +@@ -9422,7 +9429,7 @@ ipintOp(_i32_atomic_store16_u, macro() + move value, newValue + end) + else +- error ++ break + end + end) + end) +@@ -9438,7 +9445,7 @@ ipintOp(_i64_atomic_store8_u, macro() + move value, newValue + end) + else +- error ++ break + end + end) + end) +@@ -9454,7 +9461,7 @@ ipintOp(_i64_atomic_store16_u, macro() + move value, newValue + end) + else +- error ++ break + end + end) + end) +@@ -9470,7 +9477,7 @@ ipintOp(_i64_atomic_store32_u, macro() + move value, newValue + end) + else +- error ++ break + end + end) + end) +@@ -9506,7 +9513,7 @@ ipintOp(_i32_atomic_rmw_add, macro() + addi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9524,7 +9531,7 @@ ipintOp(_i64_atomic_rmw_add, macro() + addq value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9543,7 +9550,7 @@ ipintOp(_i32_atomic_rmw8_add_u, macro() + addi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9562,7 +9569,7 @@ ipintOp(_i32_atomic_rmw16_add_u, macro() + addi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9581,7 +9588,7 @@ ipintOp(_i64_atomic_rmw8_add_u, macro() + addi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9600,7 +9607,7 @@ ipintOp(_i64_atomic_rmw16_add_u, macro() + addi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9619,7 +9626,7 @@ ipintOp(_i64_atomic_rmw32_add_u, macro() + addi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9639,7 +9646,7 @@ ipintOp(_i32_atomic_rmw_sub, macro() + subi oldValue, value, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9659,7 +9666,7 @@ ipintOp(_i64_atomic_rmw_sub, macro() + subq oldValue, value, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9680,7 +9687,7 @@ ipintOp(_i32_atomic_rmw8_sub_u, macro() + subi oldValue, value, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9701,7 +9708,7 @@ ipintOp(_i32_atomic_rmw16_sub_u, macro() + subi oldValue, value, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9722,7 +9729,7 @@ ipintOp(_i64_atomic_rmw8_sub_u, macro() + subi oldValue, value, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9743,7 +9750,7 @@ ipintOp(_i64_atomic_rmw16_sub_u, macro() + subi oldValue, value, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9764,7 +9771,7 @@ ipintOp(_i64_atomic_rmw32_sub_u, macro() + subi oldValue, value, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9784,7 +9791,7 @@ ipintOp(_i32_atomic_rmw_and, macro() + andi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9804,7 +9811,7 @@ ipintOp(_i64_atomic_rmw_and, macro() + andq value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9824,7 +9831,7 @@ ipintOp(_i32_atomic_rmw8_and_u, macro() + andi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9844,7 +9851,7 @@ ipintOp(_i32_atomic_rmw16_and_u, macro() + andi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9864,7 +9871,7 @@ ipintOp(_i64_atomic_rmw8_and_u, macro() + andi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9884,7 +9891,7 @@ ipintOp(_i64_atomic_rmw16_and_u, macro() + andi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9904,7 +9911,7 @@ ipintOp(_i64_atomic_rmw32_and_u, macro() + andi value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9923,7 +9930,7 @@ ipintOp(_i32_atomic_rmw_or, macro() + ori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9942,7 +9949,7 @@ ipintOp(_i64_atomic_rmw_or, macro() + orq value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -9961,7 +9968,7 @@ ipintOp(_i32_atomic_rmw8_or_u, macro() + ori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9980,7 +9987,7 @@ ipintOp(_i32_atomic_rmw16_or_u, macro() + ori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -9999,7 +10006,7 @@ ipintOp(_i64_atomic_rmw8_or_u, macro() + ori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10018,7 +10025,7 @@ ipintOp(_i64_atomic_rmw16_or_u, macro() + ori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10037,7 +10044,7 @@ ipintOp(_i64_atomic_rmw32_or_u, macro() + ori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10056,7 +10063,7 @@ ipintOp(_i32_atomic_rmw_xor, macro() + xori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -10075,7 +10082,7 @@ ipintOp(_i64_atomic_rmw_xor, macro() + xorq value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10094,7 +10101,7 @@ ipintOp(_i32_atomic_rmw8_xor_u, macro() + xori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -10113,7 +10120,7 @@ ipintOp(_i32_atomic_rmw16_xor_u, macro() + xori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -10132,7 +10139,7 @@ ipintOp(_i64_atomic_rmw8_xor_u, macro() + xori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10151,7 +10158,7 @@ ipintOp(_i64_atomic_rmw16_xor_u, macro() + xori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10170,7 +10177,7 @@ ipintOp(_i64_atomic_rmw32_xor_u, macro() + xori value, oldValue, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10189,7 +10196,7 @@ ipintOp(_i32_atomic_rmw_xchg, macro() + move value, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -10208,7 +10215,7 @@ ipintOp(_i64_atomic_rmw_xchg, macro() + move value, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10227,7 +10234,7 @@ ipintOp(_i32_atomic_rmw8_xchg_u, macro() + move value, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -10246,7 +10253,7 @@ ipintOp(_i32_atomic_rmw16_xchg_u, macro( + move value, newValue + end) + else +- error ++ break + end + pushInt32(scratch1) + end) +@@ -10265,7 +10272,7 @@ ipintOp(_i64_atomic_rmw8_xchg_u, macro() + move value, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10284,7 +10291,7 @@ ipintOp(_i64_atomic_rmw16_xchg_u, macro( + move value, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10303,7 +10310,7 @@ ipintOp(_i64_atomic_rmw32_xchg_u, macro( + move value, newValue + end) + else +- error ++ break + end + pushInt64(scratch1) + end) +@@ -10346,7 +10353,7 @@ macro weakCASExchangeByte(mem, value, ex + .done: + move scratch2, expected + else +- error ++ break + end + end + +@@ -10366,7 +10373,7 @@ macro weakCASExchangeHalf(mem, value, ex + .done: + move scratch2, expected + else +- error ++ break + end + end + +@@ -10386,7 +10393,7 @@ macro weakCASExchangeInt(mem, value, exp + .done: + move scratch2, expected + else +- error ++ break + end + end + +@@ -10406,7 +10413,7 @@ macro weakCASExchangeQuad(mem, value, ex + .done: + move scratch2, expected + else +- error ++ break + end + end + +@@ -10418,7 +10425,7 @@ ipintOp(_i32_atomic_rmw_cmpxchg, macro() + elsif ARM64 + weakCASExchangeInt(mem, value, expected, scratch, scratch2) + else +- error ++ break + end + pushInt32(expected) + end) +@@ -10431,7 +10438,7 @@ ipintOp(_i64_atomic_rmw_cmpxchg, macro() + elsif ARM64 + weakCASExchangeQuad(mem, value, expected, scratch, scratch2) + else +- error ++ break + end + pushInt64(expected) + end) +@@ -10445,7 +10452,7 @@ ipintOp(_i32_atomic_rmw8_cmpxchg_u, macr + elsif ARM64 + weakCASExchangeByte(mem, value, expected, scratch, scratch2) + else +- error ++ break + end + pushInt32(expected) + end) +@@ -10459,7 +10466,7 @@ ipintOp(_i32_atomic_rmw16_cmpxchg_u, mac + elsif ARM64 + weakCASExchangeHalf(mem, value, expected, scratch, scratch2) + else +- error ++ break + end + pushInt32(expected) + end) +@@ -10473,7 +10480,7 @@ ipintOp(_i64_atomic_rmw8_cmpxchg_u, macr + elsif ARM64 + weakCASExchangeByte(mem, value, expected, scratch, scratch2) + else +- error ++ break + end + pushInt64(expected) + end) +@@ -10487,7 +10494,7 @@ ipintOp(_i64_atomic_rmw16_cmpxchg_u, mac + elsif ARM64 + weakCASExchangeHalf(mem, value, expected, scratch, scratch2) + else +- error ++ break + end + pushInt64(expected) + end) +@@ -10501,7 +10508,7 @@ ipintOp(_i64_atomic_rmw32_cmpxchg_u, mac + elsif ARM64 + weakCASExchangeInt(mem, value, expected, scratch, scratch2) + else +- error ++ break + end + pushInt64(expected) + end) +@@ -10812,7 +10819,7 @@ mintAlign(_a1) + mintArgDispatch() + + mintAlign(_a2) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + mintPop(a2) + mintArgDispatch() + else +@@ -10820,7 +10827,7 @@ else + end + + mintAlign(_a3) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + mintPop(a3) + mintArgDispatch() + else +@@ -10828,7 +10835,7 @@ else + end + + mintAlign(_a4) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + mintPop(a4) + mintArgDispatch() + else +@@ -10836,7 +10843,7 @@ else + end + + mintAlign(_a5) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + mintPop(a5) + mintArgDispatch() + else +@@ -10844,7 +10851,7 @@ else + end + + mintAlign(_a6) +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + mintPop(a6) + mintArgDispatch() + else +@@ -10852,7 +10859,7 @@ else + end + + mintAlign(_a7) +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + mintPop(a7) + mintArgDispatch() + else +@@ -11072,7 +11079,7 @@ mintAlign(_r1) + mintRetDispatch() + + mintAlign(_r2) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + subp StackValueSize, mintRetDst + storeq wa2, [mintRetDst] + mintRetDispatch() +@@ -11081,7 +11088,7 @@ else + end + + mintAlign(_r3) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + subp StackValueSize, mintRetDst + storeq wa3, [mintRetDst] + mintRetDispatch() +@@ -11090,7 +11097,7 @@ else + end + + mintAlign(_r4) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + subp StackValueSize, mintRetDst + storeq wa4, [mintRetDst] + mintRetDispatch() +@@ -11099,7 +11106,7 @@ else + end + + mintAlign(_r5) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + subp StackValueSize, mintRetDst + storeq wa5, [mintRetDst] + mintRetDispatch() +@@ -11108,7 +11115,7 @@ else + end + + mintAlign(_r6) +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + subp StackValueSize, mintRetDst + storeq wa6, [mintRetDst] + mintRetDispatch() +@@ -11117,7 +11124,7 @@ else + end + + mintAlign(_r7) +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + subp StackValueSize, mintRetDst + storeq wa7, [mintRetDst] + mintRetDispatch() +@@ -11392,7 +11399,7 @@ uintAlign(_r5) + uintDispatch() + + uintAlign(_r6) +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + popQuad(wa6) + uintDispatch() + else +@@ -11400,7 +11407,7 @@ else + end + + uintAlign(_r7) +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + popQuad(wa7) + uintDispatch() + else +@@ -11481,7 +11488,7 @@ argumINTAlign(_a1) + argumINTDispatch() + + argumINTAlign(_a2) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + storeq wa2, [argumINTDst] + addp LocalSize, argumINTDst + argumINTDispatch() +@@ -11491,7 +11498,7 @@ end + + + argumINTAlign(_a3) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + storeq wa3, [argumINTDst] + addp LocalSize, argumINTDst + argumINTDispatch() +@@ -11500,7 +11507,7 @@ else + end + + argumINTAlign(_a4) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + storeq wa4, [argumINTDst] + addp LocalSize, argumINTDst + argumINTDispatch() +@@ -11509,7 +11516,7 @@ else + end + + argumINTAlign(_a5) +-if ARM64 or ARM64E or X86_64 ++if ARM64 or ARM64E or X86_64 or RISCV64 + storeq wa5, [argumINTDst] + addp LocalSize, argumINTDst + argumINTDispatch() +@@ -11518,7 +11525,7 @@ else + end + + argumINTAlign(_a6) +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + storeq wa6, [argumINTDst] + addp LocalSize, argumINTDst + argumINTDispatch() +@@ -11527,7 +11534,7 @@ else + end + + argumINTAlign(_a7) +-if ARM64 or ARM64E ++if ARM64 or ARM64E or RISCV64 + storeq wa7, [argumINTDst] + addp LocalSize, argumINTDst + argumINTDispatch() diff --git a/libs/wpewebkit/patches/139-JavaScriptCore-offlineasm-RISCV64-vregs-bitext.patch b/libs/wpewebkit/patches/139-JavaScriptCore-offlineasm-RISCV64-vregs-bitext.patch new file mode 100644 index 0000000..0756aaf --- /dev/null +++ b/libs/wpewebkit/patches/139-JavaScriptCore-offlineasm-RISCV64-vregs-bitext.patch @@ -0,0 +1,203 @@ +--- a/Source/JavaScriptCore/offlineasm/riscv64.rb ++++ b/Source/JavaScriptCore/offlineasm/riscv64.rb +@@ -122,6 +122,12 @@ def riscv64OperandTypes(operands) + else + raise "Invalid Tmp operand #{op.kind}" + end ++ elsif op.is_a? VecRegisterID ++ # Vector registers are aliased to single FPRs on RISC-V (no V ++ # extension); treat them as FPRegisterID for validation so the ++ # existing [Address, FPRegisterID] / [FPRegisterID, Address] ++ # patterns match loadv/storev/pushv/popv operands. ++ FPRegisterID + else + op.class + end +@@ -273,6 +279,40 @@ class FPRegisterID + end + end + ++class VecRegisterID ++ # Pseudo-vector registers v0..v7 (and lane variants) for the IPInt ++ # interpreter, which stores every wasm operand-stack value in a ++ # 16-byte slot regardless of type. RISC-V baseline rv64gc has no V ++ # extension and no 128-bit register, so each v-reg is aliased to a ++ # single 64-bit FPR (the low half of the slot); pushv/popv/loadv/ ++ # storev below operate on 16-byte slots but only move 8 bytes of ++ # payload. Safe because wasm SIMD is gated off at runtime ++ # (Options::useWasmSIMD = false on RISCV64) so the upper 8 bytes ++ # are pure padding. ++ def riscv64Operand ++ case @name ++ when 'v0', 'v0_b', 'v0_h', 'v0_i', 'v0_q' ++ 'f0' ++ when 'v1', 'v1_b', 'v1_h', 'v1_i', 'v1_q' ++ 'f1' ++ when 'v2', 'v2_b', 'v2_h', 'v2_i', 'v2_q' ++ 'f2' ++ when 'v3', 'v3_b', 'v3_h', 'v3_i', 'v3_q' ++ 'f3' ++ when 'v4', 'v4_b', 'v4_h', 'v4_i', 'v4_q' ++ 'f4' ++ when 'v5', 'v5_b', 'v5_h', 'v5_i', 'v5_q' ++ 'f5' ++ when 'v6', 'v6_b', 'v6_h', 'v6_i', 'v6_q' ++ 'f6' ++ when 'v7', 'v7_b', 'v7_h', 'v7_i', 'v7_q' ++ 'f7' ++ else ++ raise "Bad vector register name #{@name} at #{codeOriginString}" ++ end ++ end ++end ++ + class SpecialRegister + def riscv64Operand + @name +@@ -651,6 +691,43 @@ def riscv64LowerOperation(list) + newList << Instruction.new(node.codeOrigin, "rv_addi", [sp, Immediate.new(node.codeOrigin, size), sp]) + end + ++ # pushv / popv reserve 16 bytes per operand because the IPInt wasm ++ # operand stack uses 16-byte slots uniformly. RISC-V baseline rv64gc ++ # has no V extension, so each v-reg is aliased to a single 64-bit FPR; ++ # only the low 8 bytes are written/read, the upper 8 bytes are pure ++ # padding (wasm SIMD is disabled at runtime on this arch). ++ def emitPushv(newList, node) ++ sp = RegisterID.forName(node.codeOrigin, 'sp') ++ size = 16 * node.operands.size ++ newList << Instruction.new(node.codeOrigin, "rv_addi", [sp, Immediate.new(node.codeOrigin, -size), sp]) ++ node.operands.reverse.each_with_index { ++ | op, index | ++ offset = size - 16 * (index + 1) ++ newList << Instruction.new(node.codeOrigin, "rv_fsd", [op, Address.new(node.codeOrigin, sp, Immediate.new(node.codeOrigin, offset))]) ++ } ++ end ++ ++ def emitPopv(newList, node) ++ sp = RegisterID.forName(node.codeOrigin, 'sp') ++ size = 16 * node.operands.size ++ node.operands.each_with_index { ++ | op, index | ++ offset = size - 16 * (index + 1) ++ newList << Instruction.new(node.codeOrigin, "rv_fld", [Address.new(node.codeOrigin, sp, Immediate.new(node.codeOrigin, offset)), op]) ++ } ++ newList << Instruction.new(node.codeOrigin, "rv_addi", [sp, Immediate.new(node.codeOrigin, size), sp]) ++ end ++ ++ def emitLoadv(newList, node) ++ riscv64ValidateOperands(node.operands, [Address, FPRegisterID]) ++ newList << Instruction.new(node.codeOrigin, "rv_fld", node.operands) ++ end ++ ++ def emitStorev(newList, node) ++ riscv64ValidateOperands(node.operands, [FPRegisterID, Address]) ++ newList << Instruction.new(node.codeOrigin, "rv_fsd", node.operands) ++ end ++ + def emitAdditionOperation(newList, node, operation, size) + operands = node.operands + if operands.size == 2 +@@ -790,38 +867,56 @@ def riscv64LowerOperation(list) + def emitBitExtensionOperation(newList, node, extension, fromSize, toSize) + raise "Invalid operand types" unless riscv64OperandTypes(node.operands) == [RegisterID, RegisterID] + +- if [[:s, :i, :p], [:s, :i, :q]].include? [extension, fromSize, toSize] +- newList << Instruction.new(node.codeOrigin, "rv_sext.w", node.operands) +- return +- end +- + source = node.operands[0] + dest = node.operands[1] + +- if [[:z, :i, :p], [:z, :i, :q]].include? [extension, fromSize, toSize] ++ # On RISC-V :p (pointer) is the same width as :q (64-bit). ++ targetSize = (toSize == :p) ? :q : toSize ++ ++ case [extension, fromSize, targetSize] ++ when [:s, :i, :q] ++ newList << Instruction.new(node.codeOrigin, "rv_sext.w", [source, dest]) ++ return ++ when [:s, :i, :i] ++ # Already an i32 in canonical RISC-V form (low 32 bits = value, ++ # upper 32 bits = sign-extension of bit 31). sext.w re-normalises ++ # in case the source was produced by a non-canonical sequence. ++ newList << Instruction.new(node.codeOrigin, "rv_sext.w", [source, dest]) ++ return ++ when [:z, :i, :q] ++ newList << Instruction.new(node.codeOrigin, "rv_slli", [source, Immediate.new(node.codeOrigin, 32), dest]) ++ newList << Instruction.new(node.codeOrigin, "rv_srli", [dest, Immediate.new(node.codeOrigin, 32), dest]) ++ return ++ when [:z, :i, :i] ++ # Lower 32 bits of source already hold the value; clearing the ++ # upper 32 bits gives the :i canonical representation. + newList << Instruction.new(node.codeOrigin, "rv_slli", [source, Immediate.new(node.codeOrigin, 32), dest]) + newList << Instruction.new(node.codeOrigin, "rv_srli", [dest, Immediate.new(node.codeOrigin, 32), dest]) + return + end + +- raise "Invalid zero extension" unless extension == :s +- case [fromSize, toSize] +- when [:b, :i] ++ case [extension, fromSize, targetSize] ++ when [:s, :b, :i] + newList << Instruction.new(node.codeOrigin, "rv_slli", [source, Immediate.new(node.codeOrigin, 56), dest]) + newList << Instruction.new(node.codeOrigin, "rv_srai", [dest, Immediate.new(node.codeOrigin, 24), dest]) + newList << Instruction.new(node.codeOrigin, "rv_srli", [dest, Immediate.new(node.codeOrigin, 32), dest]) +- when [:b, :q] ++ when [:s, :b, :q] + newList << Instruction.new(node.codeOrigin, "rv_slli", [source, Immediate.new(node.codeOrigin, 56), dest]) + newList << Instruction.new(node.codeOrigin, "rv_srai", [dest, Immediate.new(node.codeOrigin, 56), dest]) +- when [:h, :i] ++ when [:s, :h, :i] + newList << Instruction.new(node.codeOrigin, "rv_slli", [source, Immediate.new(node.codeOrigin, 48), dest]) + newList << Instruction.new(node.codeOrigin, "rv_srai", [dest, Immediate.new(node.codeOrigin, 16), dest]) + newList << Instruction.new(node.codeOrigin, "rv_srli", [dest, Immediate.new(node.codeOrigin, 32), dest]) +- when [:h, :q] ++ when [:s, :h, :q] + newList << Instruction.new(node.codeOrigin, "rv_slli", [source, Immediate.new(node.codeOrigin, 48), dest]) + newList << Instruction.new(node.codeOrigin, "rv_srai", [dest, Immediate.new(node.codeOrigin, 48), dest]) ++ when [:z, :b, :i], [:z, :b, :q] ++ newList << Instruction.new(node.codeOrigin, "rv_andi", [source, Immediate.new(node.codeOrigin, 0xff), dest]) ++ when [:z, :h, :i], [:z, :h, :q] ++ newList << Instruction.new(node.codeOrigin, "rv_slli", [source, Immediate.new(node.codeOrigin, 48), dest]) ++ newList << Instruction.new(node.codeOrigin, "rv_srli", [dest, Immediate.new(node.codeOrigin, 48), dest]) + else +- raise "Invalid bit-extension combination" ++ raise "Invalid bit-extension combination #{[extension, fromSize, toSize]}" + end + end + +@@ -882,6 +977,14 @@ def riscv64LowerOperation(list) + emitPush(newList, node) + when "pop" + emitPop(newList, node) ++ when "pushv" ++ emitPushv(newList, node) ++ when "popv" ++ emitPopv(newList, node) ++ when "loadv" ++ emitLoadv(newList, node) ++ when "storev" ++ emitStorev(newList, node) + when /^(add|sub)(i|p|q)$/ + emitAdditionOperation(newList, node, $1.to_sym, $2.to_sym) + when /^(mul|div|rem)(i|p|q)(s?)$/ +@@ -1543,8 +1646,7 @@ def riscv64GenerateWASMPlaceholders(list + if node.is_a? Instruction + case node.opcode + when "loadlinkacqb", "loadlinkacqh", "loadlinkacqi", "loadlinkacqq", +- "storecondrelb", "storecondrelh", "storecondreli", "storecondrelq", +- "loadv", "storev" ++ "storecondrelb", "storecondrelh", "storecondreli", "storecondrelq" + newList << Instruction.new(node.codeOrigin, "rv_ebreak", [], "WebAssembly placeholder for opcode #{node.opcode}") + else + newList << node +@@ -1570,6 +1672,7 @@ class Sequence + false + end + } ++ result = riscLowerMalformedAddressesDouble(result) + result = riscv64LowerMisplacedAddresses(result) + result = riscLowerMisplacedAddresses(result) + result = riscv64LowerAddressLoads(result) diff --git a/libs/wpewebkit/patches/140-JavaScriptCore-LowLevelInterpreter-RISCV64-norelax.patch b/libs/wpewebkit/patches/140-JavaScriptCore-LowLevelInterpreter-RISCV64-norelax.patch new file mode 100644 index 0000000..d6a5749 --- /dev/null +++ b/libs/wpewebkit/patches/140-JavaScriptCore-LowLevelInterpreter-RISCV64-norelax.patch @@ -0,0 +1,33 @@ +--- a/Source/JavaScriptCore/llint/LowLevelInterpreter.cpp ++++ b/Source/JavaScriptCore/llint/LowLevelInterpreter.cpp +@@ -520,13 +520,30 @@ WTF_ALLOW_UNSAFE_BUFFER_USAGE_END + // the jsc_llint_begin and jsc_llint_end labels help lldb_webkit.py find the + // start and end of the llint instruction range quickly. + ++// On RISC-V, the linker (mold) relaxes `auipc + jalr/addi` pairs into single ++// `j`/`addi`-via-gp instructions, shrinking IPInt opcode handlers by 4 bytes ++// each. The `.balignw 256` padding that follows each handler is not recomputed ++// after relaxation, so consecutive `ipint_*_validate` labels end up 252 bytes ++// apart instead of 256, and `IPInt::initialize()`'s `VALIDATE_IPINT_OPCODE` ++// asserts fire. Suppress relaxation across the entire LLInt asm to keep all ++// 256-byte-aligned dispatch slots intact. ++#if CPU(RISCV64) ++#define OFFLINE_ASM_BEGIN_OPTIONS ".option push\n.option norelax\n" ++#define OFFLINE_ASM_END_OPTIONS ".option pop\n" ++#else ++#define OFFLINE_ASM_BEGIN_OPTIONS "" ++#define OFFLINE_ASM_END_OPTIONS "" ++#endif ++ + #define OFFLINE_ASM_BEGIN __asm__( \ ++ OFFLINE_ASM_BEGIN_OPTIONS \ + OFFLINE_ASM_GLOBAL_LABEL_IMPL(jsc_llint_begin, OFFLINE_ASM_NO_ALT_ENTRY_DIRECTIVE, OFFLINE_ASM_ALIGN4B, HIDE_SYMBOL) \ + OFFLINE_ASM_BEGIN_SPACER + + #define OFFLINE_ASM_END \ + OFFLINE_ASM_BEGIN_SPACER \ + OFFLINE_ASM_GLOBAL_LABEL_IMPL(jsc_llint_end, OFFLINE_ASM_NO_ALT_ENTRY_DIRECTIVE, OFFLINE_ASM_ALIGN4B, HIDE_SYMBOL) \ ++ OFFLINE_ASM_END_OPTIONS \ + ); + + #if ENABLE(LLINT_EMBEDDED_OPCODE_ID) diff --git a/libs/wpewebkit/patches/141-JavaScriptCore-BBQJIT-gate-canTierUpToOMG.patch b/libs/wpewebkit/patches/141-JavaScriptCore-BBQJIT-gate-canTierUpToOMG.patch new file mode 100644 index 0000000..20a3469 --- /dev/null +++ b/libs/wpewebkit/patches/141-JavaScriptCore-BBQJIT-gate-canTierUpToOMG.patch @@ -0,0 +1,32 @@ +--- a/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp ++++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp +@@ -735,6 +735,9 @@ BBQJIT::BBQJIT(CompilationContext& compi + + bool BBQJIT::canTierUpToOMG() const + { ++#if !ENABLE(WEBASSEMBLY_OMGJIT) ++ return false; ++#else + if (!Options::useOMGJIT()) + return false; + +@@ -746,6 +749,7 @@ bool BBQJIT::canTierUpToOMG() const + return false; + } + return true; ++#endif + } + + void BBQJIT::emitIncrementCallProfileCount(unsigned callProfileIndex) +@@ -3126,7 +3130,10 @@ void BBQJIT::emitEntryTierUpCheck() + jit.jump(tierUpResume); + }); + #else +- RELEASE_ASSERT_NOT_REACHED(); ++ // OMG/FTL tiering is unavailable on this architecture; canTierUpToOMG() ++ // can still return true (it is gated only on per-function thresholds, not ++ // on the OMG implementation being compiled in), so silently skip emitting ++ // the tier-up counter check rather than aborting. + #endif + } + diff --git a/libs/wpewebkit/patches/142-JavaScriptCore-RISCV64-BBQJIT-rotate-ctz-impl.patch b/libs/wpewebkit/patches/142-JavaScriptCore-RISCV64-BBQJIT-rotate-ctz-impl.patch new file mode 100644 index 0000000..0505704 --- /dev/null +++ b/libs/wpewebkit/patches/142-JavaScriptCore-RISCV64-BBQJIT-rotate-ctz-impl.patch @@ -0,0 +1,143 @@ +--- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h ++++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h +@@ -582,36 +582,51 @@ public: + + void countTrailingZeros32(RegisterID src, RegisterID dest) + { +- auto temp = temps(); +- m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<32>()); ++ // Previously used slli (64-bit left shift) on a zero-extended 32-bit ++ // value, looking for temp == 0. That never zeros the value until the ++ // set bits fall off bit 63 — so dest decremented past zero and ++ // returned a negative result for any nonzero src. Use the more ++ // direct "right-shift and test bit 0" loop, which terminates in at ++ // most 32 iterations. ++ auto temp = temps(); + m_assembler.zeroExtend<32>(temp.data(), src); ++ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<32>()); + +- JumpList zero(makeBranch(Equal, temp.data(), RISCV64Registers::zero)); ++ JumpList done(makeBranch(Equal, temp.data(), RISCV64Registers::zero)); ++ ++ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<0>()); + + Label loop = label(); +- m_assembler.slliInsn<1>(temp.data(), temp.data()); +- m_assembler.addiInsn(dest, dest, Imm::I<-1>()); +- zero.append(makeBranch(Equal, temp.data(), RISCV64Registers::zero)); ++ m_assembler.andiInsn(temp.memory(), temp.data(), Imm::I<1>()); ++ done.append(makeBranch(NotEqual, temp.memory(), RISCV64Registers::zero)); ++ m_assembler.srliInsn<1>(temp.data(), temp.data()); ++ m_assembler.addiInsn(dest, dest, Imm::I<1>()); + jump().linkTo(loop, this); + +- zero.link(this); ++ done.link(this); + } + + void countTrailingZeros64(RegisterID src, RegisterID dest) + { +- auto temp = temps(); +- m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<64>()); ++ // Same fix as countTrailingZeros32 for the 64-bit case: scan from ++ // bit 0 upward using right-shift + andi 1, instead of left-shifting ++ // until the value falls off the register. ++ auto temp = temps(); + m_assembler.addiInsn(temp.data(), src, Imm::I<0>()); ++ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<64>()); + +- JumpList zero(makeBranch(Equal, temp.data(), RISCV64Registers::zero)); ++ JumpList done(makeBranch(Equal, temp.data(), RISCV64Registers::zero)); ++ ++ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<0>()); + + Label loop = label(); +- m_assembler.slliInsn<1>(temp.data(), temp.data()); +- m_assembler.addiInsn(dest, dest, Imm::I<-1>()); +- zero.append(makeBranch(Equal, temp.data(), RISCV64Registers::zero)); ++ m_assembler.andiInsn(temp.memory(), temp.data(), Imm::I<1>()); ++ done.append(makeBranch(NotEqual, temp.memory(), RISCV64Registers::zero)); ++ m_assembler.srliInsn<1>(temp.data(), temp.data()); ++ m_assembler.addiInsn(dest, dest, Imm::I<1>()); + jump().linkTo(loop, this); + +- zero.link(this); ++ done.link(this); + } + + void byteSwap16(RegisterID reg) +@@ -844,8 +859,72 @@ public: + m_assembler.srliInsn(dest, src, uint32_t(imm.m_value & ((1 << 6) - 1))); + } + +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(rotateRight32); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(rotateRight64); ++ // rotateRight32/64: native rv64gc has no rotate instruction (Zbb's ++ // rorw/ror would do it in one), so synthesise via shift + shift + or. ++ // These are called by BBQJIT's I32Rotl/I32Rotr/I64Rotl/I64Rotr handlers ++ // on every non-x86 path (the rotl variants are routed through ++ // rotateRight32 with a negated shift, so a broken or missing ++ // rotateRight* silently miscompiles both rotr and rotl). ++ void rotateRight32(RegisterID src, TrustedImm32 imm, RegisterID dest) ++ { ++ int32_t shift = imm.m_value & 31; ++ if (!shift) { ++ if (src != dest) ++ move(src, dest); ++ m_assembler.maskRegister<32>(dest); ++ return; ++ } ++ auto temp = temps(); ++ m_assembler.srliwInsn(temp.data(), src, uint32_t(shift)); ++ m_assembler.slliwInsn(temp.memory(), src, uint32_t(32 - shift)); ++ m_assembler.orInsn(dest, temp.data(), temp.memory()); ++ m_assembler.maskRegister<32>(dest); ++ } ++ ++ void rotateRight32(RegisterID src, RegisterID shift, RegisterID dest) ++ { ++ auto temp = temps(); ++ m_assembler.addiInsn(temp.data(), RISCV64Registers::zero, Imm::I<32>()); ++ m_assembler.subInsn(temp.data(), temp.data(), shift); ++ m_assembler.srlwInsn(temp.memory(), src, shift); ++ m_assembler.sllwInsn(temp.data(), src, temp.data()); ++ m_assembler.orInsn(dest, temp.memory(), temp.data()); ++ m_assembler.maskRegister<32>(dest); ++ } ++ ++ void rotateRight64(RegisterID src, TrustedImm32 imm, RegisterID dest) ++ { ++ int32_t shift = imm.m_value & 63; ++ if (!shift) { ++ if (src != dest) ++ move(src, dest); ++ return; ++ } ++ auto temp = temps(); ++ m_assembler.srliInsn(temp.data(), src, uint32_t(shift)); ++ m_assembler.slliInsn(temp.memory(), src, uint32_t(64 - shift)); ++ m_assembler.orInsn(dest, temp.data(), temp.memory()); ++ } ++ ++ void rotateRight64(RegisterID src, RegisterID shift, RegisterID dest) ++ { ++ auto temp = temps(); ++ m_assembler.addiInsn(temp.data(), RISCV64Registers::zero, Imm::I<64>()); ++ m_assembler.subInsn(temp.data(), temp.data(), shift); ++ m_assembler.srlInsn(temp.memory(), src, shift); ++ m_assembler.sllInsn(temp.data(), src, temp.data()); ++ m_assembler.orInsn(dest, temp.memory(), temp.data()); ++ } ++ ++ // Two-operand in-place variants used by MacroAssembler.h convenience ++ // wrappers (e.g. urshiftPtr / rolPtr / FastRotation::apply). These were ++ // matched by the previous templated NOOP overload and silently did ++ // nothing; forward to the three-operand form so callers see the real ++ // rotate. ++ void rotateRight32(TrustedImm32 imm, RegisterID srcDst) { rotateRight32(srcDst, imm, srcDst); } ++ void rotateRight64(TrustedImm32 imm, RegisterID srcDst) { rotateRight64(srcDst, imm, srcDst); } ++ void rotateRight32(RegisterID shift, RegisterID srcDst) { rotateRight32(srcDst, shift, srcDst); } ++ void rotateRight64(RegisterID shift, RegisterID srcDst) { rotateRight64(srcDst, shift, srcDst); } + + // Scalar BBQJIT primitives: fused shift/add and rotate-left, used by the + // wasm bytecode-to-machine-code path. RISC-V's baseline rv64gc has no diff --git a/libs/wpewebkit/patches/143-JavaScriptCore-Wasm-NaN-box-f32-args-buildFrame.patch b/libs/wpewebkit/patches/143-JavaScriptCore-Wasm-NaN-box-f32-args-buildFrame.patch new file mode 100644 index 0000000..486cd2c --- /dev/null +++ b/libs/wpewebkit/patches/143-JavaScriptCore-Wasm-NaN-box-f32-args-buildFrame.patch @@ -0,0 +1,21 @@ +--- a/Source/JavaScriptCore/wasm/WasmOperations.cpp ++++ b/Source/JavaScriptCore/wasm/WasmOperations.cpp +@@ -135,8 +135,17 @@ JSC_DEFINE_JIT_OPERATION(operationJSToWa + + dataLogLnIf(WasmOperationsInternal::verbose, "* Register Arg ", i, " ", dst); + +- if (type.isI32() || type.isF32()) ++ if (type.isI32()) + value = static_cast(static_cast(value)); ++ else if (type.isF32()) { ++ // Pack as NaN-boxed single (high 32 = 0xFFFFFFFF) so that ++ // the shared trampoline's loadDouble into the FPR yields a ++ // properly NaN-boxed single. Otherwise on architectures ++ // that enforce NaN-boxing for single-precision ops ++ // (RV64GC), the wasm body's subsequent flw/fsw on the f-arg ++ // sees the canonical NaN instead of the actual f32 value. ++ value = static_cast(static_cast(value)) | 0xFFFFFFFF00000000ULL; ++ } + *access.operator()(registerSpace, dst) = value; + } + } diff --git a/libs/wpewebkit/patches/144-JavaScriptCore-IPInt-asm-RISCV64-reloadMemory.patch b/libs/wpewebkit/patches/144-JavaScriptCore-IPInt-asm-RISCV64-reloadMemory.patch new file mode 100644 index 0000000..3a48351 --- /dev/null +++ b/libs/wpewebkit/patches/144-JavaScriptCore-IPInt-asm-RISCV64-reloadMemory.patch @@ -0,0 +1,11 @@ +--- a/Source/JavaScriptCore/llint/InPlaceInterpreter.asm ++++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.asm +@@ -328,7 +328,7 @@ end + macro ipintReloadMemory() + if ARM64 or ARM64E + loadpairq JSWebAssemblyInstance::m_cachedMemory[wasmInstance], memoryBase, boundsCheckingSize +- elsif X86_64 ++ elsif X86_64 or RISCV64 + loadp JSWebAssemblyInstance::m_cachedMemory[wasmInstance], memoryBase + loadp JSWebAssemblyInstance::m_cachedBoundsCheckingSize[wasmInstance], boundsCheckingSize + end diff --git a/libs/wpewebkit/patches/145-JavaScriptCore-offlineasm-RISCV64-ft-to-fa-FPRs.patch b/libs/wpewebkit/patches/145-JavaScriptCore-offlineasm-RISCV64-ft-to-fa-FPRs.patch new file mode 100644 index 0000000..e2085e3 --- /dev/null +++ b/libs/wpewebkit/patches/145-JavaScriptCore-offlineasm-RISCV64-ft-to-fa-FPRs.patch @@ -0,0 +1,43 @@ +--- a/Source/JavaScriptCore/offlineasm/riscv64.rb ++++ b/Source/JavaScriptCore/offlineasm/riscv64.rb +@@ -217,22 +217,32 @@ end + class FPRegisterID + def riscv64Operand + case @name ++ # The LLInt convention assumes that ft0..ft7 are the platform's ++ # FP argument registers (used as scratches between calls). On X86 ++ # and ARM64 the FP temp and FP arg registers happen to coincide ++ # (xmm0-7 / q0-7). RISC-V's ABI splits them: ft0..ft7 = f0..f7 are ++ # temps, fa0..fa7 = f10..f17 are args. To keep LLInt's wfa* aliases ++ # (which chain wfa0 -> fa0 -> ft0) resolving to the wasm arg FPRs, ++ # we map offlineasm's ft0..ft7 directly to physical f10..f17 here. ++ # Physical f0..f7 become unreachable from offlineasm, but JSC's ++ # C++-side FPRInfo still uses them as fpRegT8..fpRegT15 (the JIT ++ # and offlineasm don't share register state across call boundaries). + when 'ft0' +- 'f0' ++ 'f10' + when 'ft1' +- 'f1' ++ 'f11' + when 'ft2' +- 'f2' ++ 'f12' + when 'ft3' +- 'f3' ++ 'f13' + when 'ft4' +- 'f4' ++ 'f14' + when 'ft5' +- 'f5' ++ 'f15' + when 'ft6' +- 'f6' ++ 'f16' + when 'ft7' +- 'f7' ++ 'f17' + when 'csfr0' + 'f8' + when 'csfr1' diff --git a/libs/wpewebkit/patches/146-JavaScriptCore-Wasm-BBQ-sext-i32-ccall-args-RISCV64.patch b/libs/wpewebkit/patches/146-JavaScriptCore-Wasm-BBQ-sext-i32-ccall-args-RISCV64.patch new file mode 100644 index 0000000..8f7e657 --- /dev/null +++ b/libs/wpewebkit/patches/146-JavaScriptCore-Wasm-BBQ-sext-i32-ccall-args-RISCV64.patch @@ -0,0 +1,62 @@ +--- a/Source/JavaScriptCore/wasm/WasmBBQJIT.h ++++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.h +@@ -2032,6 +2032,12 @@ public: + template + void saveValuesAcrossCallAndPassArguments(const Args& arguments, const CallInformation& callInfo, const TypeDefinition& signature); + ++ // On RISC-V the psABI requires 32-bit integer arguments to be sign-extended ++ // in their 64-bit argument registers; BBQ otherwise zero-extends them when ++ // loading from canonical i32 slots (lwu). Emit sext.w on any I32 arg that ++ // ends up in a register. No-op on other architectures. ++ void emitSignExtendI32ArgsForCCall(const CallInformation& callInfo, const TypeDefinition& signature); ++ + void slowPathSpillBindings(const RegisterBindings& bindings); + void slowPathRestoreBindings(const RegisterBindings&); + void restoreValuesAfterCall(const CallInformation& callInfo); +--- a/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp ++++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp +@@ -4286,6 +4286,26 @@ void BBQJIT::restoreValuesAfterCall(cons + // whenever they are next used. + } + ++void BBQJIT::emitSignExtendI32ArgsForCCall(const CallInformation& callInfo, const TypeDefinition& signature) ++{ ++#if CPU(RISCV64) ++ auto* fn = signature.as(); ++ for (size_t i = 0; i < callInfo.params.size(); ++i) { ++ auto type = fn->argumentType(i); ++ if (type.kind != TypeKind::I32) ++ continue; ++ Location loc = Location::fromArgumentLocation(callInfo.params[i], type.kind); ++ if (!loc.isGPR()) ++ continue; ++ // sext.w rd, rs lowers via signExtend32To64 -> rv_addiw rd, rs, 0 ++ m_jit.signExtend32To64(loc.asGPR(), loc.asGPR()); ++ } ++#else ++ UNUSED_PARAM(callInfo); ++ UNUSED_PARAM(signature); ++#endif ++} ++ + template + void BBQJIT::returnValuesFromCall(Vector& results, const FunctionSignature& functionType, const CallInformation& callInfo) + { +--- a/Source/JavaScriptCore/wasm/WasmBBQJIT64.h ++++ b/Source/JavaScriptCore/wasm/WasmBBQJIT64.h +@@ -505,6 +505,7 @@ void BBQJIT::emitCCall(Func function, co + // Preserve caller-saved registers and other info + prepareForExceptions(); + saveValuesAcrossCallAndPassArguments(arguments, callInfo, *functionType); ++ emitSignExtendI32ArgsForCCall(callInfo, *functionType); + + // Materialize address of native function and call register + void* taggedFunctionPtr = tagCFunctionPtr(function); +@@ -534,6 +535,7 @@ void BBQJIT::emitCCall(Func function, co + // Preserve caller-saved registers and other info + prepareForExceptions(); + saveValuesAcrossCallAndPassArguments(arguments, callInfo, *functionType); ++ emitSignExtendI32ArgsForCCall(callInfo, *functionType); + + // Materialize address of native function and call register + void* taggedFunctionPtr = tagCFunctionPtr(function); diff --git a/libs/wpewebkit/patches/147-JavaScriptCore-Wasm-RISCV64-atomic-slow-path.patch b/libs/wpewebkit/patches/147-JavaScriptCore-Wasm-RISCV64-atomic-slow-path.patch new file mode 100644 index 0000000..676d139 --- /dev/null +++ b/libs/wpewebkit/patches/147-JavaScriptCore-Wasm-RISCV64-atomic-slow-path.patch @@ -0,0 +1,1157 @@ +--- a/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.h ++++ b/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.h +@@ -139,6 +139,46 @@ WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(memory + WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(memory_atomic_wait64, uint64_t, uint64_t, uint64_t); + WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(memory_atomic_notify, unsigned, unsigned, int32_t); + ++// Slow-path atomic ops. The IPInt asm on architectures lacking direct ++// offlineasm atomic primitives (currently RISC-V64) routes here. The asm ++// caller passes the already-bounds-checked, page-aligned host pointer. ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_load8, uint64_t address); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_load16, uint64_t address); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_load32, uint64_t address); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_load64, uint64_t address); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_store8, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_store16, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_store32, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_store64, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_add8, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_add16, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_add32, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_add64, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_sub8, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_sub16, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_sub32, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_sub64, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_and8, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_and16, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_and32, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_and64, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_or8, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_or16, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_or32, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_or64, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_xor8, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_xor16, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_xor32, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_xor64, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_xchg8, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_xchg16, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_xchg32, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_xchg64, uint64_t address, uint64_t value); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_cmpxchg8, uint64_t address, uint64_t expected, uint64_t replacement); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_cmpxchg16, uint64_t address, uint64_t expected, uint64_t replacement); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_cmpxchg32, uint64_t address, uint64_t expected, uint64_t replacement); ++WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(atomic_rmw_cmpxchg64, uint64_t address, uint64_t expected, uint64_t replacement); ++ + WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(check_stack_and_vm_traps, void* candidateNewStackPointer, Wasm::IPIntCallee*); + WASM_IPINT_EXTERN_CPP_DECL(unreachable_breakpoint_handler, CallFrame*, Register*); + +--- a/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.cpp ++++ b/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.cpp +@@ -1189,6 +1189,123 @@ WASM_IPINT_EXTERN_CPP_DECL(ref_func, uns + IPINT_RETURN(Wasm::refFunc(instance, index)); + } + ++// ------------------------------------------------------------------ ++// Slow-path atomic ops (currently used by IPInt on RISC-V64; rv64gc ++// does include the A-extension but offlineasm's atomicloadq / ++// atomicxchg* family has no RISC-V backend yet, so we route through C ++// helpers using __atomic_* builtins. The asm caller passes the ++// already-bounds-checked, alignment-checked host pointer. ++// ++// All ops are __ATOMIC_SEQ_CST, matching the wasm spec. ++// ------------------------------------------------------------------ ++ ++WASM_IPINT_EXTERN_CPP_DECL(atomic_load8, uint64_t address) ++{ ++ UNUSED_PARAM(instance); ++ uint8_t v = __atomic_load_n(reinterpret_cast(address), __ATOMIC_SEQ_CST); ++ WASM_RETURN_TWO(std::bit_cast(static_cast(v)), nullptr); ++} ++ ++WASM_IPINT_EXTERN_CPP_DECL(atomic_load16, uint64_t address) ++{ ++ UNUSED_PARAM(instance); ++ uint16_t v = __atomic_load_n(reinterpret_cast(address), __ATOMIC_SEQ_CST); ++ WASM_RETURN_TWO(std::bit_cast(static_cast(v)), nullptr); ++} ++ ++WASM_IPINT_EXTERN_CPP_DECL(atomic_load32, uint64_t address) ++{ ++ UNUSED_PARAM(instance); ++ uint32_t v = __atomic_load_n(reinterpret_cast(address), __ATOMIC_SEQ_CST); ++ WASM_RETURN_TWO(std::bit_cast(static_cast(v)), nullptr); ++} ++ ++WASM_IPINT_EXTERN_CPP_DECL(atomic_load64, uint64_t address) ++{ ++ UNUSED_PARAM(instance); ++ uint64_t v = __atomic_load_n(reinterpret_cast(address), __ATOMIC_SEQ_CST); ++ WASM_RETURN_TWO(std::bit_cast(static_cast(v)), nullptr); ++} ++ ++WASM_IPINT_EXTERN_CPP_DECL(atomic_store8, uint64_t address, uint64_t value) ++{ ++ UNUSED_PARAM(instance); ++ __atomic_store_n(reinterpret_cast(address), static_cast(value), __ATOMIC_SEQ_CST); ++ WASM_RETURN_TWO(nullptr, nullptr); ++} ++ ++WASM_IPINT_EXTERN_CPP_DECL(atomic_store16, uint64_t address, uint64_t value) ++{ ++ UNUSED_PARAM(instance); ++ __atomic_store_n(reinterpret_cast(address), static_cast(value), __ATOMIC_SEQ_CST); ++ WASM_RETURN_TWO(nullptr, nullptr); ++} ++ ++WASM_IPINT_EXTERN_CPP_DECL(atomic_store32, uint64_t address, uint64_t value) ++{ ++ UNUSED_PARAM(instance); ++ __atomic_store_n(reinterpret_cast(address), static_cast(value), __ATOMIC_SEQ_CST); ++ WASM_RETURN_TWO(nullptr, nullptr); ++} ++ ++WASM_IPINT_EXTERN_CPP_DECL(atomic_store64, uint64_t address, uint64_t value) ++{ ++ UNUSED_PARAM(instance); ++ __atomic_store_n(reinterpret_cast(address), value, __ATOMIC_SEQ_CST); ++ WASM_RETURN_TWO(nullptr, nullptr); ++} ++ ++#define IPINT_RMW_OP(name, builtin, type) \ ++WASM_IPINT_EXTERN_CPP_DECL(name, uint64_t address, uint64_t value) \ ++{ \ ++ UNUSED_PARAM(instance); \ ++ type prev = builtin(reinterpret_cast(address), static_cast(value), __ATOMIC_SEQ_CST); \ ++ WASM_RETURN_TWO(std::bit_cast(static_cast(prev)), nullptr); \ ++} ++ ++IPINT_RMW_OP(atomic_rmw_add8, __atomic_fetch_add, uint8_t) ++IPINT_RMW_OP(atomic_rmw_add16, __atomic_fetch_add, uint16_t) ++IPINT_RMW_OP(atomic_rmw_add32, __atomic_fetch_add, uint32_t) ++IPINT_RMW_OP(atomic_rmw_add64, __atomic_fetch_add, uint64_t) ++IPINT_RMW_OP(atomic_rmw_sub8, __atomic_fetch_sub, uint8_t) ++IPINT_RMW_OP(atomic_rmw_sub16, __atomic_fetch_sub, uint16_t) ++IPINT_RMW_OP(atomic_rmw_sub32, __atomic_fetch_sub, uint32_t) ++IPINT_RMW_OP(atomic_rmw_sub64, __atomic_fetch_sub, uint64_t) ++IPINT_RMW_OP(atomic_rmw_and8, __atomic_fetch_and, uint8_t) ++IPINT_RMW_OP(atomic_rmw_and16, __atomic_fetch_and, uint16_t) ++IPINT_RMW_OP(atomic_rmw_and32, __atomic_fetch_and, uint32_t) ++IPINT_RMW_OP(atomic_rmw_and64, __atomic_fetch_and, uint64_t) ++IPINT_RMW_OP(atomic_rmw_or8, __atomic_fetch_or, uint8_t) ++IPINT_RMW_OP(atomic_rmw_or16, __atomic_fetch_or, uint16_t) ++IPINT_RMW_OP(atomic_rmw_or32, __atomic_fetch_or, uint32_t) ++IPINT_RMW_OP(atomic_rmw_or64, __atomic_fetch_or, uint64_t) ++IPINT_RMW_OP(atomic_rmw_xor8, __atomic_fetch_xor, uint8_t) ++IPINT_RMW_OP(atomic_rmw_xor16, __atomic_fetch_xor, uint16_t) ++IPINT_RMW_OP(atomic_rmw_xor32, __atomic_fetch_xor, uint32_t) ++IPINT_RMW_OP(atomic_rmw_xor64, __atomic_fetch_xor, uint64_t) ++IPINT_RMW_OP(atomic_rmw_xchg8, __atomic_exchange_n, uint8_t) ++IPINT_RMW_OP(atomic_rmw_xchg16, __atomic_exchange_n, uint16_t) ++IPINT_RMW_OP(atomic_rmw_xchg32, __atomic_exchange_n, uint32_t) ++IPINT_RMW_OP(atomic_rmw_xchg64, __atomic_exchange_n, uint64_t) ++#undef IPINT_RMW_OP ++ ++#define IPINT_CMPXCHG_OP(name, type) \ ++WASM_IPINT_EXTERN_CPP_DECL(name, uint64_t address, uint64_t expected, uint64_t replacement) \ ++{ \ ++ UNUSED_PARAM(instance); \ ++ type exp = static_cast(expected); \ ++ type rep = static_cast(replacement); \ ++ __atomic_compare_exchange_n(reinterpret_cast(address), &exp, rep, /*weak=*/false, \ ++ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ ++ WASM_RETURN_TWO(std::bit_cast(static_cast(exp)), nullptr); \ ++} ++ ++IPINT_CMPXCHG_OP(atomic_rmw_cmpxchg8, uint8_t) ++IPINT_CMPXCHG_OP(atomic_rmw_cmpxchg16, uint16_t) ++IPINT_CMPXCHG_OP(atomic_rmw_cmpxchg32, uint32_t) ++IPINT_CMPXCHG_OP(atomic_rmw_cmpxchg64, uint64_t) ++#undef IPINT_CMPXCHG_OP ++ + extern "C" void SYSV_ABI wasm_log_crash(CallFrame*, JSWebAssemblyInstance* instance) + { + dataLogLn("Reached IPInt code that should never have been executed."); +--- a/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm ++++ b/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm +@@ -9210,6 +9210,10 @@ ipintOp(_i32_atomic_load, macro() + atomicLoadOp(ipintCheckMemoryBoundWithAlignmentCheck4, macro(mem, scratch) + if ARM64 or ARM64E or X86_64 + atomicloadi [mem], scratch ++ elsif RISCV64 ++ move mem, a1 ++ operationCall(macro() cCall2(_ipint_extern_atomic_load32) end) ++ move r0, scratch + else + break + end +@@ -9221,6 +9225,10 @@ ipintOp(_i64_atomic_load, macro() + atomicLoadOp(ipintCheckMemoryBoundWithAlignmentCheck8, macro(mem, scratch) + if ARM64 or ARM64E or X86_64 + atomicloadq [mem], scratch ++ elsif RISCV64 ++ move mem, a1 ++ operationCall(macro() cCall2(_ipint_extern_atomic_load64) end) ++ move r0, scratch + else + break + end +@@ -9232,6 +9240,10 @@ ipintOp(_i32_atomic_load8_u, macro() + atomicLoadOp(ipintCheckMemoryBoundWithAlignmentCheck1, macro(mem, scratch) + if ARM64 or ARM64E or X86_64 + atomicloadb [mem], scratch ++ elsif RISCV64 ++ move mem, a1 ++ operationCall(macro() cCall2(_ipint_extern_atomic_load8) end) ++ move r0, scratch + else + break + end +@@ -9243,6 +9255,10 @@ ipintOp(_i32_atomic_load16_u, macro() + atomicLoadOp(ipintCheckMemoryBoundWithAlignmentCheck2, macro(mem, scratch) + if ARM64 or ARM64E or X86_64 + atomicloadh [mem], scratch ++ elsif RISCV64 ++ move mem, a1 ++ operationCall(macro() cCall2(_ipint_extern_atomic_load16) end) ++ move r0, scratch + else + break + end +@@ -9254,6 +9270,10 @@ ipintOp(_i64_atomic_load8_u, macro() + atomicLoadOp(ipintCheckMemoryBoundWithAlignmentCheck1, macro(mem, scratch) + if ARM64 or ARM64E or X86_64 + atomicloadb [mem], scratch ++ elsif RISCV64 ++ move mem, a1 ++ operationCall(macro() cCall2(_ipint_extern_atomic_load8) end) ++ move r0, scratch + else + break + end +@@ -9265,6 +9285,10 @@ ipintOp(_i64_atomic_load16_u, macro() + atomicLoadOp(ipintCheckMemoryBoundWithAlignmentCheck2, macro(mem, scratch) + if ARM64 or ARM64E or X86_64 + atomicloadh [mem], scratch ++ elsif RISCV64 ++ move mem, a1 ++ operationCall(macro() cCall2(_ipint_extern_atomic_load16) end) ++ move r0, scratch + else + break + end +@@ -9276,6 +9300,10 @@ ipintOp(_i64_atomic_load32_u, macro() + atomicLoadOp(ipintCheckMemoryBoundWithAlignmentCheck4, macro(mem, scratch) + if ARM64 or ARM64E or X86_64 + atomicloadi [mem], scratch ++ elsif RISCV64 ++ move mem, a1 ++ operationCall(macro() cCall2(_ipint_extern_atomic_load32) end) ++ move r0, scratch + else + break + end +@@ -9380,6 +9408,11 @@ ipintOp(_i32_atomic_store, macro() + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue + end) ++ elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_store32) end) + else + break + end +@@ -9396,6 +9429,11 @@ ipintOp(_i64_atomic_store, macro() + weakCASLoopQuad(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue + end) ++ elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_store64) end) + else + break + end +@@ -9412,6 +9450,11 @@ ipintOp(_i32_atomic_store8_u, macro() + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue + end) ++ elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_store8) end) + else + break + end +@@ -9428,6 +9471,11 @@ ipintOp(_i32_atomic_store16_u, macro() + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue + end) ++ elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_store16) end) + else + break + end +@@ -9444,6 +9492,11 @@ ipintOp(_i64_atomic_store8_u, macro() + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue + end) ++ elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_store8) end) + else + break + end +@@ -9460,6 +9513,11 @@ ipintOp(_i64_atomic_store16_u, macro() + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue + end) ++ elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_store16) end) + else + break + end +@@ -9476,6 +9534,11 @@ ipintOp(_i64_atomic_store32_u, macro() + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue + end) ++ elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_store32) end) + else + break + end +@@ -9511,7 +9574,13 @@ ipintOp(_i32_atomic_rmw_add, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + addi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_add32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9529,7 +9598,13 @@ ipintOp(_i64_atomic_rmw_add, macro() + elsif ARM64 + weakCASLoopQuad(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + addq value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_add64) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9548,7 +9623,13 @@ ipintOp(_i32_atomic_rmw8_add_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + addi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_add8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9567,7 +9648,13 @@ ipintOp(_i32_atomic_rmw16_add_u, macro() + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + addi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_add16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9586,7 +9673,13 @@ ipintOp(_i64_atomic_rmw8_add_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + addi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_add8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9605,7 +9698,13 @@ ipintOp(_i64_atomic_rmw16_add_u, macro() + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + addi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_add16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9624,7 +9723,13 @@ ipintOp(_i64_atomic_rmw32_add_u, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + addi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_add32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9644,7 +9749,13 @@ ipintOp(_i32_atomic_rmw_sub, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + subi oldValue, value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_sub32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9664,7 +9775,13 @@ ipintOp(_i64_atomic_rmw_sub, macro() + elsif ARM64 + weakCASLoopQuad(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + subq oldValue, value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_sub64) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9685,7 +9802,13 @@ ipintOp(_i32_atomic_rmw8_sub_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + subi oldValue, value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_sub8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9706,7 +9829,13 @@ ipintOp(_i32_atomic_rmw16_sub_u, macro() + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + subi oldValue, value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_sub16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9727,7 +9856,13 @@ ipintOp(_i64_atomic_rmw8_sub_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + subi oldValue, value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_sub8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9748,7 +9883,13 @@ ipintOp(_i64_atomic_rmw16_sub_u, macro() + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + subi oldValue, value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_sub16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9769,7 +9910,13 @@ ipintOp(_i64_atomic_rmw32_sub_u, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + subi oldValue, value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_sub32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9789,7 +9936,13 @@ ipintOp(_i32_atomic_rmw_and, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + andi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_and32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9809,7 +9962,13 @@ ipintOp(_i64_atomic_rmw_and, macro() + elsif ARM64 + weakCASLoopQuad(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + andq value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_and64) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9829,7 +9988,13 @@ ipintOp(_i32_atomic_rmw8_and_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + andi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_and8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9849,7 +10014,13 @@ ipintOp(_i32_atomic_rmw16_and_u, macro() + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + andi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_and16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9869,7 +10040,13 @@ ipintOp(_i64_atomic_rmw8_and_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + andi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_and8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9889,7 +10066,13 @@ ipintOp(_i64_atomic_rmw16_and_u, macro() + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + andi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_and16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9909,7 +10092,13 @@ ipintOp(_i64_atomic_rmw32_and_u, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + andi value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_and32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9928,7 +10117,13 @@ ipintOp(_i32_atomic_rmw_or, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + ori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_or32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9947,7 +10142,13 @@ ipintOp(_i64_atomic_rmw_or, macro() + elsif ARM64 + weakCASLoopQuad(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + orq value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_or64) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9966,7 +10167,13 @@ ipintOp(_i32_atomic_rmw8_or_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + ori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_or8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -9985,7 +10192,13 @@ ipintOp(_i32_atomic_rmw16_or_u, macro() + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + ori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_or16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10004,7 +10217,13 @@ ipintOp(_i64_atomic_rmw8_or_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + ori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_or8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10023,7 +10242,13 @@ ipintOp(_i64_atomic_rmw16_or_u, macro() + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + ori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_or16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10042,7 +10267,13 @@ ipintOp(_i64_atomic_rmw32_or_u, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + ori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_or32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10061,7 +10292,13 @@ ipintOp(_i32_atomic_rmw_xor, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + xori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xor32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10080,7 +10317,13 @@ ipintOp(_i64_atomic_rmw_xor, macro() + elsif ARM64 + weakCASLoopQuad(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + xorq value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xor64) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10099,7 +10342,13 @@ ipintOp(_i32_atomic_rmw8_xor_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + xori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xor8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10118,7 +10367,13 @@ ipintOp(_i32_atomic_rmw16_xor_u, macro() + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + xori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xor16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10137,7 +10392,13 @@ ipintOp(_i64_atomic_rmw8_xor_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + xori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xor8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10156,7 +10417,13 @@ ipintOp(_i64_atomic_rmw16_xor_u, macro() + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + xori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xor16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10175,7 +10442,13 @@ ipintOp(_i64_atomic_rmw32_xor_u, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + xori value, oldValue, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xor32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10194,7 +10467,13 @@ ipintOp(_i32_atomic_rmw_xchg, macro() + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xchg32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10213,7 +10492,13 @@ ipintOp(_i64_atomic_rmw_xchg, macro() + elsif ARM64 + weakCASLoopQuad(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xchg64) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10232,7 +10517,13 @@ ipintOp(_i32_atomic_rmw8_xchg_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xchg8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10251,7 +10542,13 @@ ipintOp(_i32_atomic_rmw16_xchg_u, macro( + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xchg16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10270,7 +10567,13 @@ ipintOp(_i64_atomic_rmw8_xchg_u, macro() + elsif ARM64 + weakCASLoopByte(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xchg8) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10289,7 +10592,13 @@ ipintOp(_i64_atomic_rmw16_xchg_u, macro( + elsif ARM64 + weakCASLoopHalf(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xchg16) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10308,7 +10617,13 @@ ipintOp(_i64_atomic_rmw32_xchg_u, macro( + elsif ARM64 + weakCASLoopInt(mem, value, scratch1, scratch2, macro(value, oldValue, newValue) + move value, newValue +- end) ++ end) elsif RISCV64 ++ move value, scratch1 ++ move mem, a1 ++ move scratch1, a2 ++ operationCall(macro() cCall3(_ipint_extern_atomic_rmw_xchg32) end) ++ move r0, scratch1 ++ + else + break + end +@@ -10423,7 +10738,13 @@ ipintOp(_i32_atomic_rmw_cmpxchg, macro() + if ARM64E or X86_64 + atomicweakcasi expected, value, [mem] + elsif ARM64 +- weakCASExchangeInt(mem, value, expected, scratch, scratch2) ++ weakCASExchangeInt(mem, value, expected, scratch, scratch2) elsif RISCV64 ++ move mem, scratch2 ++ move expected, scratch ++ move value, a3 ++ move scratch2, a1 ++ operationCall(macro() cCall4(_ipint_extern_atomic_rmw_cmpxchg32) end) ++ + else + break + end +@@ -10436,7 +10757,13 @@ ipintOp(_i64_atomic_rmw_cmpxchg, macro() + if ARM64E or X86_64 + atomicweakcasq expected, value, [mem] + elsif ARM64 +- weakCASExchangeQuad(mem, value, expected, scratch, scratch2) ++ weakCASExchangeQuad(mem, value, expected, scratch, scratch2) elsif RISCV64 ++ move mem, scratch2 ++ move expected, scratch ++ move value, a3 ++ move scratch2, a1 ++ operationCall(macro() cCall4(_ipint_extern_atomic_rmw_cmpxchg64) end) ++ + else + break + end +@@ -10450,7 +10777,13 @@ ipintOp(_i32_atomic_rmw8_cmpxchg_u, macr + if ARM64E or X86_64 + atomicweakcasb expected, value, [mem] + elsif ARM64 +- weakCASExchangeByte(mem, value, expected, scratch, scratch2) ++ weakCASExchangeByte(mem, value, expected, scratch, scratch2) elsif RISCV64 ++ move mem, scratch2 ++ move expected, scratch ++ move value, a3 ++ move scratch2, a1 ++ operationCall(macro() cCall4(_ipint_extern_atomic_rmw_cmpxchg8) end) ++ + else + break + end +@@ -10464,7 +10797,13 @@ ipintOp(_i32_atomic_rmw16_cmpxchg_u, mac + if ARM64E or X86_64 + atomicweakcash expected, value, [mem] + elsif ARM64 +- weakCASExchangeHalf(mem, value, expected, scratch, scratch2) ++ weakCASExchangeHalf(mem, value, expected, scratch, scratch2) elsif RISCV64 ++ move mem, scratch2 ++ move expected, scratch ++ move value, a3 ++ move scratch2, a1 ++ operationCall(macro() cCall4(_ipint_extern_atomic_rmw_cmpxchg16) end) ++ + else + break + end +@@ -10478,7 +10817,13 @@ ipintOp(_i64_atomic_rmw8_cmpxchg_u, macr + if ARM64E or X86_64 + atomicweakcasb expected, value, [mem] + elsif ARM64 +- weakCASExchangeByte(mem, value, expected, scratch, scratch2) ++ weakCASExchangeByte(mem, value, expected, scratch, scratch2) elsif RISCV64 ++ move mem, scratch2 ++ move expected, scratch ++ move value, a3 ++ move scratch2, a1 ++ operationCall(macro() cCall4(_ipint_extern_atomic_rmw_cmpxchg8) end) ++ + else + break + end +@@ -10492,7 +10837,13 @@ ipintOp(_i64_atomic_rmw16_cmpxchg_u, mac + if ARM64E or X86_64 + atomicweakcash expected, value, [mem] + elsif ARM64 +- weakCASExchangeHalf(mem, value, expected, scratch, scratch2) ++ weakCASExchangeHalf(mem, value, expected, scratch, scratch2) elsif RISCV64 ++ move mem, scratch2 ++ move expected, scratch ++ move value, a3 ++ move scratch2, a1 ++ operationCall(macro() cCall4(_ipint_extern_atomic_rmw_cmpxchg16) end) ++ + else + break + end +@@ -10506,7 +10857,13 @@ ipintOp(_i64_atomic_rmw32_cmpxchg_u, mac + if ARM64E or X86_64 + atomicweakcasi expected, value, [mem] + elsif ARM64 +- weakCASExchangeInt(mem, value, expected, scratch, scratch2) ++ weakCASExchangeInt(mem, value, expected, scratch, scratch2) elsif RISCV64 ++ move mem, scratch2 ++ move expected, scratch ++ move value, a3 ++ move scratch2, a1 ++ operationCall(macro() cCall4(_ipint_extern_atomic_rmw_cmpxchg32) end) ++ + else + break + end +--- a/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp ++++ b/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp +@@ -628,6 +628,29 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom + RELEASE_ASSERT_NOT_REACHED(); + } + m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit); ++#elif CPU(RISCV64) ++ // Slow path: plain load+store (no LR/SC). rv64gc does include the ++ // A-extension, but MacroAssemblerRISCV64.h's loadLinkAcq/storeCondRel ++ // primitives are still stubs. This is single-threaded-correct only; ++ // multi-threaded code would race. TODO: emit amo.* / lr.d+sc.d for ++ // a truly atomic version. ++ switch (accessWidth) { ++ case Width8: ++ m_jit.store8(scratchGPR, address); ++ break; ++ case Width16: ++ m_jit.store16(scratchGPR, address); ++ break; ++ case Width32: ++ m_jit.store32(scratchGPR, address); ++ break; ++ case Width64: ++ m_jit.store64(scratchGPR, address); ++ break; ++ case Width128: ++ RELEASE_ASSERT_NOT_REACHED(); ++ } ++ UNUSED_PARAM(reloopLabel); + #endif + } + +@@ -1261,6 +1284,51 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA + return; + } + ++#if CPU(RISCV64) ++ // Slow path: non-atomic CAS. Load current into resultGPR, compare ++ // to expectedGPR, store valueGPR only on equality. Single-threaded ++ // correct only. TODO: emit lr.{d,w}+sc.{d,w} for a truly atomic ++ // version using the rv64gc A-extension. ++ switch (accessWidth) { ++ case Width8: ++ m_jit.load8(address, resultGPR); ++ break; ++ case Width16: ++ m_jit.load16(address, resultGPR); ++ break; ++ case Width32: ++ m_jit.load32(address, resultGPR); ++ break; ++ case Width64: ++ m_jit.load64(address, resultGPR); ++ break; ++ default: ++ RELEASE_ASSERT_NOT_REACHED(); ++ break; ++ } ++ auto notEqual = m_jit.branch64(MacroAssembler::NotEqual, resultGPR, expectedGPR); ++ switch (accessWidth) { ++ case Width8: ++ m_jit.store8(valueGPR, address); ++ break; ++ case Width16: ++ m_jit.store16(valueGPR, address); ++ break; ++ case Width32: ++ m_jit.store32(valueGPR, address); ++ break; ++ case Width64: ++ m_jit.store64(valueGPR, address); ++ break; ++ default: ++ RELEASE_ASSERT_NOT_REACHED(); ++ break; ++ } ++ notEqual.link(&m_jit); ++ UNUSED_PARAM(scratchGPR); ++ return; ++#endif ++ + m_jit.move(expectedGPR, resultGPR); + switch (accessWidth) { + case Width8: diff --git a/libs/wpewebkit/patches/148-JavaScriptCore-RISCV64-A-extension-atomics.patch b/libs/wpewebkit/patches/148-JavaScriptCore-RISCV64-A-extension-atomics.patch new file mode 100644 index 0000000..8c39a4c --- /dev/null +++ b/libs/wpewebkit/patches/148-JavaScriptCore-RISCV64-A-extension-atomics.patch @@ -0,0 +1,539 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: RISCV64: wire up A-extension atomics in BBQJIT + +OpenWrt's RISC-V baseline is rv64gc/lp64d, which always includes the +standard A-extension. Replace the UNIMPLEMENTED_METHOD stubs in +MacroAssemblerRISCV64.h with real implementations and switch BBQJIT to +drive them, so wasm atomic ops compiled in BBQJIT are properly +multi-thread-safe. + + * 32/64-bit primitives map directly to LR.{W,D}.aq / SC.{W,D}.rl, + AMOSWAP.{W,D}, AMOADD.{W,D}, AMOAND/OR/XOR.{W,D}. atomicXchgClear + is "atomic AND NOT"; base A has no AMOANDN, so synthesise as + xori-1 + AMOAND. atomicStrongCAS{32,64} is a tight LR/SC.aqrl loop. + + * 8/16-bit primitives are not provided by base A (Zabha is optional + and not in rv64gc). BBQJIT for Width8/Width16 now emits an inline + word-aligned LR.W/SC.W byte-mask loop covering all three caller + paths (emitAtomicLoadOp, emitAtomicStoreOp, emitAtomicBinaryRMWOp) + via a new emitAtomicOpGenericRISCV64ByteMask helper, plus the + cmpxchg path in emitAtomicCompareExchange. The helper takes the + caller's valueLocation as a ScratchScope preserve arg so its 4 + extra scratches never alias an input register that has been + consume()-d but is still read inside the loop. + + * X86-style 5-arg atomicStrongCAS{32,64} overloads are added as + stubs: BBQJIT's emitStrongCAS returns early on RISCV64 so those + call sites are dead at runtime, but the source still needs them + to typecheck. + +The wasm threads spec tests (atomic.wast.js, atomic-signed.wast.js, +memory.wast.js, wait-large.wast.js) pass in both IPInt+BBQ and BBQ-only +modes on a StarFive VisionFive 2. + +Signed-off-by: Daniel Golle +--- +--- a/Source/JavaScriptCore/assembler/RISCV64Assembler.h ++++ b/Source/JavaScriptCore/assembler/RISCV64Assembler.h +@@ -1832,6 +1832,37 @@ public: + void remwInsn(RegisterID rd, RegisterID rs1, RegisterID rs2) { insn(RISCV64Instructions::REMW::construct(rd, rs1, rs2)); } + void remuwInsn(RegisterID rd, RegisterID rs1, RegisterID rs2) { insn(RISCV64Instructions::REMUW::construct(rd, rs1, rs2)); } + ++ // RV{32,64}A standard A-extension (always present in rv64gc). ++ // For sequential consistency pass { Acquire, Release } (.aqrl). ++ void lr_wInsn(RegisterID rd, RegisterID rs1, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::LR_W::construct(rd, rs1, RegisterID::zero, aqrl)); } ++ void lr_dInsn(RegisterID rd, RegisterID rs1, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::LR_D::construct(rd, rs1, RegisterID::zero, aqrl)); } ++ void sc_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::SC_W::construct(rd, rs1, rs2, aqrl)); } ++ void sc_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::SC_D::construct(rd, rs1, rs2, aqrl)); } ++ void amoswap_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::AMOSWAP_W::construct(rd, rs1, rs2, aqrl)); } ++ void amoswap_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::AMOSWAP_D::construct(rd, rs1, rs2, aqrl)); } ++ void amoadd_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::AMOADD_W::construct(rd, rs1, rs2, aqrl)); } ++ void amoadd_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::AMOADD_D::construct(rd, rs1, rs2, aqrl)); } ++ void amoxor_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::AMOXOR_W::construct(rd, rs1, rs2, aqrl)); } ++ void amoxor_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::AMOXOR_D::construct(rd, rs1, rs2, aqrl)); } ++ void amoand_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::AMOAND_W::construct(rd, rs1, rs2, aqrl)); } ++ void amoand_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::AMOAND_D::construct(rd, rs1, rs2, aqrl)); } ++ void amoor_wInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::AMOOR_W::construct(rd, rs1, rs2, aqrl)); } ++ void amoor_dInsn(RegisterID rd, RegisterID rs1, RegisterID rs2, std::initializer_list aqrl) ++ { insn(RISCV64Instructions::AMOOR_D::construct(rd, rs1, rs2, aqrl)); } ++ + using FCVTType = RISCV64Instructions::FCVTType; + using FMVType = RISCV64Instructions::FMVType; + +--- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h ++++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h +@@ -2448,55 +2448,169 @@ public: + MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUnzipEven); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorZipUpper); + +- // Wasm atomics: the RISC-V A extension is available (the OpenWrt -march +- // baseline is rv64gc, i.e. includes A), but the AMO/LR/SC instruction +- // emitters in RISCV64Assembler.h have not been added yet. Stub the +- // BBQJIT atomic API with hard-fault unimplemented methods: at runtime +- // wasm shared memory is gated off via useSharedArrayBuffer = false, so +- // wasm atomic opcodes are unreachable, and these stubs only ever exist +- // for compile-time completeness. Filling these in (and adding the +- // matching RISCV64Assembler.h emitters) is a follow-up that unlocks the +- // wasm threads proposal on RISCV64. ++ // RV64A standard A-extension (always present in rv64gc): real impls ++ // for 32/64-bit primitives. 8/16-bit primitives stay UNIMPLEMENTED ++ // because base RV64A has no byte/half AMOs (Zabha is optional, not ++ // in rv64gc); BBQJIT routes 8/16 atomic ops through the ++ // WasmIPIntSlowPaths.cpp C helpers (GCC __atomic_* builtins, which ++ // expand to LR.W byte-mask loops -- properly atomic on rv64gc). + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq8); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq16); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq32); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(loadLinkAcq64); ++ void loadLinkAcq32(Address address, RegisterID dest) ++ { ++ ASSERT(!address.offset); ++ m_assembler.lr_wInsn(dest, address.base, { Assembler::MemoryAccess::Acquire }); ++ } ++ void loadLinkAcq64(Address address, RegisterID dest) ++ { ++ ASSERT(!address.offset); ++ m_assembler.lr_dInsn(dest, address.base, { Assembler::MemoryAccess::Acquire }); ++ } + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel8); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel16); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel32); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(storeCondRel64); ++ void storeCondRel32(RegisterID value, Address address, RegisterID status) ++ { ++ ASSERT(!address.offset); ++ m_assembler.sc_wInsn(status, address.base, value, { Assembler::MemoryAccess::Release }); ++ } ++ void storeCondRel64(RegisterID value, Address address, RegisterID status) ++ { ++ ASSERT(!address.offset); ++ m_assembler.sc_dInsn(status, address.base, value, { Assembler::MemoryAccess::Release }); ++ } + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS8, Jump); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS16, Jump); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS32, Jump); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD_WITH_RETURN(branchAtomicStrongCAS64, Jump); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg8); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg16); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg32); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchg64); ++ void atomicXchg32(RegisterID value, Address address, RegisterID result) ++ { ++ ASSERT(!address.offset); ++ m_assembler.amoswap_wInsn(result, address.base, value, ++ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); ++ } ++ void atomicXchg64(RegisterID value, Address address, RegisterID result) ++ { ++ ASSERT(!address.offset); ++ m_assembler.amoswap_dInsn(result, address.base, value, ++ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); ++ } ++ // 2-arg X86-style overloads (input-and-result in the same register). ++ // Live only in BBQJIT's isX86_64() branch, which is never taken at ++ // runtime on RISC-V; provided so the source still compiles. ++ void atomicXchg32(RegisterID valueAndResult, Address address) { atomicXchg32(valueAndResult, address, valueAndResult); } ++ void atomicXchg64(RegisterID valueAndResult, Address address) { atomicXchg64(valueAndResult, address, valueAndResult); } + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd8); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd16); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd32); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgAdd64); ++ void atomicXchgAdd32(RegisterID value, Address address, RegisterID result) ++ { ++ ASSERT(!address.offset); ++ m_assembler.amoadd_wInsn(result, address.base, value, ++ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); ++ } ++ void atomicXchgAdd64(RegisterID value, Address address, RegisterID result) ++ { ++ ASSERT(!address.offset); ++ m_assembler.amoadd_dInsn(result, address.base, value, ++ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); ++ } ++ void atomicXchgAdd32(RegisterID valueAndResult, Address address) { atomicXchgAdd32(valueAndResult, address, valueAndResult); } ++ void atomicXchgAdd64(RegisterID valueAndResult, Address address) { atomicXchgAdd64(valueAndResult, address, valueAndResult); } + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear8); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear16); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear32); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgClear64); ++ // atomicXchgClear is "atomic AND NOT": no AMOANDN in base A; xori-1 + AMOAND. ++ void atomicXchgClear32(RegisterID value, Address address, RegisterID result) ++ { ++ ASSERT(!address.offset); ++ auto t = temps(); ++ m_assembler.xoriInsn(t.data(), value, Imm::I<-1>()); ++ m_assembler.amoand_wInsn(result, address.base, t.data(), ++ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); ++ } ++ void atomicXchgClear64(RegisterID value, Address address, RegisterID result) ++ { ++ ASSERT(!address.offset); ++ auto t = temps(); ++ m_assembler.xoriInsn(t.data(), value, Imm::I<-1>()); ++ m_assembler.amoand_dInsn(result, address.base, t.data(), ++ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); ++ } ++ void atomicXchgClear32(RegisterID valueAndResult, Address address) { atomicXchgClear32(valueAndResult, address, valueAndResult); } ++ void atomicXchgClear64(RegisterID valueAndResult, Address address) { atomicXchgClear64(valueAndResult, address, valueAndResult); } + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr8); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr16); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr32); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgOr64); ++ void atomicXchgOr32(RegisterID value, Address address, RegisterID result) ++ { ++ ASSERT(!address.offset); ++ m_assembler.amoor_wInsn(result, address.base, value, ++ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); ++ } ++ void atomicXchgOr64(RegisterID value, Address address, RegisterID result) ++ { ++ ASSERT(!address.offset); ++ m_assembler.amoor_dInsn(result, address.base, value, ++ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); ++ } ++ void atomicXchgOr32(RegisterID valueAndResult, Address address) { atomicXchgOr32(valueAndResult, address, valueAndResult); } ++ void atomicXchgOr64(RegisterID valueAndResult, Address address) { atomicXchgOr64(valueAndResult, address, valueAndResult); } + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor8); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor16); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor32); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicXchgXor64); +- // atomicStrongCAS{N}: the non-branching CAS overloads used by BBQJIT +- // when the caller only needs success/failure in resultGPR (rather +- // than a JIT-emitted branch). Same runtime-unreachable rationale as +- // branchAtomicStrongCAS{N} above. ++ void atomicXchgXor32(RegisterID value, Address address, RegisterID result) ++ { ++ ASSERT(!address.offset); ++ m_assembler.amoxor_wInsn(result, address.base, value, ++ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); ++ } ++ void atomicXchgXor64(RegisterID value, Address address, RegisterID result) ++ { ++ ASSERT(!address.offset); ++ m_assembler.amoxor_dInsn(result, address.base, value, ++ { Assembler::MemoryAccess::Acquire, Assembler::MemoryAccess::Release }); ++ } ++ void atomicXchgXor32(RegisterID valueAndResult, Address address) { atomicXchgXor32(valueAndResult, address, valueAndResult); } ++ void atomicXchgXor64(RegisterID valueAndResult, Address address) { atomicXchgXor64(valueAndResult, address, valueAndResult); } ++ // atomicStrongCAS{32,64}(expectedAndResult, newValue, address): ++ // Loads *address into expectedAndResult; if old == caller's expected, ++ // stores newValue. Same external contract as ARM64-LSE casa. Caller ++ // checks expectedAndResult == old-expected to detect success. + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS8); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS16); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS32); +- MACRO_ASSEMBLER_RISCV64_TEMPLATED_UNIMPLEMENTED_METHOD(atomicStrongCAS64); ++ void atomicStrongCAS32(RegisterID expectedAndResult, RegisterID newValue, Address address) ++ { ++ ASSERT(!address.offset); ++ auto t = temps(); ++ Label loop = label(); ++ m_assembler.lr_wInsn(t.data(), address.base, { Assembler::MemoryAccess::Acquire }); ++ m_assembler.addiwInsn(t.memory(), expectedAndResult, Imm::I<0>()); ++ Jump mismatch = makeBranch(NotEqual, t.data(), t.memory()); ++ m_assembler.sc_wInsn(t.memory(), address.base, newValue, { Assembler::MemoryAccess::Release }); ++ Jump scFail = makeBranch(NotEqual, t.memory(), RISCV64Registers::zero); ++ scFail.linkTo(loop, this); ++ mismatch.link(this); ++ m_assembler.addiInsn(expectedAndResult, t.data(), Imm::I<0>()); ++ } ++ void atomicStrongCAS64(RegisterID expectedAndResult, RegisterID newValue, Address address) ++ { ++ ASSERT(!address.offset); ++ auto t = temps(); ++ Label loop = label(); ++ m_assembler.lr_dInsn(t.data(), address.base, { Assembler::MemoryAccess::Acquire }); ++ Jump mismatch = makeBranch(NotEqual, t.data(), expectedAndResult); ++ m_assembler.sc_dInsn(t.memory(), address.base, newValue, { Assembler::MemoryAccess::Release }); ++ Jump scFail = makeBranch(NotEqual, t.memory(), RISCV64Registers::zero); ++ scFail.linkTo(loop, this); ++ mismatch.link(this); ++ m_assembler.addiInsn(expectedAndResult, t.data(), Imm::I<0>()); ++ } ++ // 5-arg StatusCondition form (X86-style). Live only in BBQJIT's ++ // isX86_64() branch -- on RISC-V the surrounding code exits via ++ // an earlier `return;` so this never runs at runtime. Provide a ++ // viable overload so the source still compiles. ++ void atomicStrongCAS32(StatusCondition, RegisterID, RegisterID, Address, RegisterID) ++ { RELEASE_ASSERT_NOT_REACHED(); } ++ void atomicStrongCAS64(StatusCondition, RegisterID, RegisterID, Address, RegisterID) ++ { RELEASE_ASSERT_NOT_REACHED(); } + // Additional SIMD vector noop stubs uncovered by enabling BBQJIT. + MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorSplat); + MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(vectorUshl8); +--- a/Source/JavaScriptCore/wasm/WasmBBQJIT.h ++++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.h +@@ -1335,6 +1335,11 @@ public: + template + void emitAtomicOpGeneric(ExtAtomicOpType op, Address address, Location old, Location cur, const Functor& functor); + ++#if CPU(RISCV64) && USE(JSVALUE64) ++ template ++ void emitAtomicOpGenericRISCV64ByteMask(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, Location valueLocation, const Functor& functor); ++#endif ++ + [[nodiscard]] Value emitAtomicLoadOp(ExtAtomicOpType loadOp, Type valueType, Location pointer, uint32_t uoffset); + + [[nodiscard]] PartialResult atomicLoad(ExtAtomicOpType loadOp, Type valueType, ExpressionType pointer, ExpressionType& result, uint32_t uoffset); +--- a/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp ++++ b/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp +@@ -540,6 +540,47 @@ void BBQJIT::emitSanitizeAtomicResult(Ex + emitSanitizeAtomicResult(op, resultType, result, result); + } + ++#if CPU(RISCV64) ++template ++void BBQJIT::emitAtomicOpGenericRISCV64ByteMask(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, Location valueLocation, const Functor& functor) ++{ ++ Width accessWidth = this->accessWidth(op); ++ ASSERT(accessWidth == Width8 || accessWidth == Width16); ++ ++ ScratchScope<4, 0> rvScratches(*this, Location::fromGPR(oldGPR), Location::fromGPR(scratchGPR), valueLocation); ++ GPRReg alignedAddr = rvScratches.gpr(0); ++ GPRReg shift = rvScratches.gpr(1); ++ GPRReg invMask = rvScratches.gpr(2); ++ GPRReg rawOld = rvScratches.gpr(3); ++ int32_t byteMask = (accessWidth == Width8) ? 0xFF : 0xFFFF; ++ ++ m_jit.move(address.base, alignedAddr); ++ m_jit.and64(TrustedImm32(-4), alignedAddr); ++ m_jit.move(address.base, shift); ++ m_jit.and64(TrustedImm32(3), shift); ++ m_jit.lshift64(TrustedImm32(3), shift); ++ ++ m_jit.move(TrustedImm32(byteMask), invMask); ++ m_jit.lshift64(shift, invMask); ++ m_jit.not64(invMask); ++ ++ auto reloopLabel = m_jit.label(); ++ m_jit.loadLinkAcq32(Address(alignedAddr), rawOld); ++ m_jit.urshift64(rawOld, shift, oldGPR); ++ m_jit.and64(TrustedImm32(byteMask), oldGPR); ++ ++ functor(oldGPR, scratchGPR); ++ ++ m_jit.and64(TrustedImm32(byteMask), scratchGPR); ++ m_jit.lshift64(shift, scratchGPR); ++ m_jit.and64(invMask, rawOld); ++ m_jit.or64(scratchGPR, rawOld); ++ ++ m_jit.storeCondRel32(rawOld, Address(alignedAddr), scratchGPR); ++ m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit); ++} ++#endif ++ + template + void BBQJIT::emitAtomicOpGeneric(ExtAtomicOpType op, Address address, GPRReg oldGPR, GPRReg scratchGPR, const Functor& functor) + { +@@ -573,14 +614,14 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom + #endif + break; + case Width32: +-#if CPU(ARM64) ++#if CPU(ARM64) || CPU(RISCV64) + m_jit.loadLinkAcq32(address, oldGPR); + #else + m_jit.load32(address, oldGPR); + #endif + break; + case Width64: +-#if CPU(ARM64) ++#if CPU(ARM64) || CPU(RISCV64) + m_jit.loadLinkAcq64(address, oldGPR); + #else + m_jit.load64(address, oldGPR); +@@ -629,28 +670,25 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom + } + m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit); + #elif CPU(RISCV64) +- // Slow path: plain load+store (no LR/SC). rv64gc does include the +- // A-extension, but MacroAssemblerRISCV64.h's loadLinkAcq/storeCondRel +- // primitives are still stubs. This is single-threaded-correct only; +- // multi-threaded code would race. TODO: emit amo.* / lr.d+sc.d for +- // a truly atomic version. + switch (accessWidth) { + case Width8: + m_jit.store8(scratchGPR, address); ++ m_jit.move(TrustedImm32(0), scratchGPR); + break; + case Width16: + m_jit.store16(scratchGPR, address); ++ m_jit.move(TrustedImm32(0), scratchGPR); + break; + case Width32: +- m_jit.store32(scratchGPR, address); ++ m_jit.storeCondRel32(scratchGPR, address, scratchGPR); + break; + case Width64: +- m_jit.store64(scratchGPR, address); ++ m_jit.storeCondRel64(scratchGPR, address, scratchGPR); + break; + case Width128: + RELEASE_ASSERT_NOT_REACHED(); + } +- UNUSED_PARAM(reloopLabel); ++ m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(reloopLabel, &m_jit); + #endif + } + +@@ -671,9 +709,16 @@ void BBQJIT::emitAtomicOpGeneric(ExtAtom + + if (!(isARM64_LSE() || isX86_64())) { + ScratchScope<1, 0> scratches(*this); +- emitAtomicOpGeneric(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), [&](GPRReg oldGPR, GPRReg newGPR) { ++ auto opFunctor = [&](GPRReg oldGPR, GPRReg newGPR) { + emitSanitizeAtomicResult(loadOp, canonicalWidth(accessWidth(loadOp)) == Width64 ? TypeKind::I64 : TypeKind::I32, oldGPR, newGPR); +- }); ++ }; ++#if CPU(RISCV64) ++ Width w = accessWidth(loadOp); ++ if (w == Width8 || w == Width16) ++ emitAtomicOpGenericRISCV64ByteMask(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), Location(), opFunctor); ++ else ++#endif ++ emitAtomicOpGeneric(loadOp, address, resultLocation.asGPR(), scratches.gpr(0), opFunctor); + emitSanitizeAtomicResult(loadOp, valueType.kind, resultLocation.asGPR()); + return result; + } +@@ -778,9 +823,16 @@ void BBQJIT::emitAtomicStoreOp(ExtAtomic + consume(value); + + if (!(isARM64_LSE() || isX86_64())) { +- emitAtomicOpGeneric(storeOp, address, scratch1GPR, scratch2GPR, [&](GPRReg, GPRReg newGPR) { ++ auto opFunctor = [&](GPRReg, GPRReg newGPR) { + m_jit.move(valueLocation.asGPR(), newGPR); +- }); ++ }; ++#if CPU(RISCV64) ++ Width w = accessWidth(storeOp); ++ if (w == Width8 || w == Width16) ++ emitAtomicOpGenericRISCV64ByteMask(storeOp, address, scratch1GPR, scratch2GPR, valueLocation, opFunctor); ++ else ++#endif ++ emitAtomicOpGeneric(storeOp, address, scratch1GPR, scratch2GPR, opFunctor); + return; + } + +@@ -1135,7 +1187,7 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA + break; + } + +- emitAtomicOpGeneric(op, address, resultLocation.asGPR(), scratchGPR, [&](GPRReg oldGPR, GPRReg newGPR) { ++ auto rmwFunctor = [&](GPRReg oldGPR, GPRReg newGPR) { + switch (op) { + case ExtAtomicOpType::I32AtomicRmw16AddU: + case ExtAtomicOpType::I32AtomicRmw8AddU: +@@ -1205,7 +1257,13 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA + RELEASE_ASSERT_NOT_REACHED(); + break; + } +- }); ++ }; ++#if CPU(RISCV64) ++ if (accessWidth(op) == Width8 || accessWidth(op) == Width16) ++ emitAtomicOpGenericRISCV64ByteMask(op, address, resultLocation.asGPR(), scratchGPR, valueLocation, rmwFunctor); ++ else ++#endif ++ emitAtomicOpGeneric(op, address, resultLocation.asGPR(), scratchGPR, rmwFunctor); + emitSanitizeAtomicResult(op, valueType.kind, resultLocation.asGPR()); + return result; + } +@@ -1285,46 +1343,55 @@ Value BBQJIT::emitAtomicBinaryRMWOp(ExtA + } + + #if CPU(RISCV64) +- // Slow path: non-atomic CAS. Load current into resultGPR, compare +- // to expectedGPR, store valueGPR only on equality. Single-threaded +- // correct only. TODO: emit lr.{d,w}+sc.{d,w} for a truly atomic +- // version using the rv64gc A-extension. +- switch (accessWidth) { +- case Width8: +- m_jit.load8(address, resultGPR); +- break; +- case Width16: +- m_jit.load16(address, resultGPR); +- break; +- case Width32: +- m_jit.load32(address, resultGPR); +- break; +- case Width64: +- m_jit.load64(address, resultGPR); +- break; +- default: +- RELEASE_ASSERT_NOT_REACHED(); +- break; ++ // rv64gc A-extension. For 32/64 atomicStrongCAS uses LR/SC.aqrl. ++ // For 8/16 the base A-ext has no byte/half AMOs (Zabha is ++ // optional, not in rv64gc); emit a word-aligned LR.W/SC.W ++ // byte-mask CAS loop -- properly atomic. ++ if (accessWidth == Width8 || accessWidth == Width16) { ++ ScratchScope<4, 0> rvScratches(*this, valueLocation, expectedLocation, resultLocation); ++ GPRReg alignedAddr = rvScratches.gpr(0); ++ GPRReg shift = rvScratches.gpr(1); ++ GPRReg invMask = rvScratches.gpr(2); ++ GPRReg rawOld = rvScratches.gpr(3); ++ int32_t byteMask = (accessWidth == Width8) ? 0xFF : 0xFFFF; ++ ++ m_jit.move(address.base, alignedAddr); ++ m_jit.and64(TrustedImm32(-4), alignedAddr); ++ m_jit.move(address.base, shift); ++ m_jit.and64(TrustedImm32(3), shift); ++ m_jit.lshift64(TrustedImm32(3), shift); ++ ++ m_jit.move(TrustedImm32(byteMask), invMask); ++ m_jit.lshift64(shift, invMask); ++ m_jit.not64(invMask); ++ ++ auto loop = m_jit.label(); ++ m_jit.loadLinkAcq32(Address(alignedAddr), rawOld); ++ m_jit.urshift64(rawOld, shift, resultGPR); ++ m_jit.and64(TrustedImm32(byteMask), resultGPR); ++ Jump mismatch = m_jit.branch64(MacroAssembler::NotEqual, resultGPR, expectedGPR); ++ m_jit.and64(TrustedImm32(byteMask), valueGPR, scratchGPR); ++ m_jit.lshift64(shift, scratchGPR); ++ m_jit.and64(invMask, rawOld); ++ m_jit.or64(scratchGPR, rawOld); ++ m_jit.storeCondRel32(rawOld, Address(alignedAddr), scratchGPR); ++ m_jit.branchTest32(ResultCondition::NonZero, scratchGPR).linkTo(loop, &m_jit); ++ mismatch.link(&m_jit); ++ return; + } +- auto notEqual = m_jit.branch64(MacroAssembler::NotEqual, resultGPR, expectedGPR); + switch (accessWidth) { +- case Width8: +- m_jit.store8(valueGPR, address); +- break; +- case Width16: +- m_jit.store16(valueGPR, address); +- break; + case Width32: +- m_jit.store32(valueGPR, address); ++ m_jit.move(expectedGPR, resultGPR); ++ m_jit.atomicStrongCAS32(resultGPR, valueGPR, address); + break; + case Width64: +- m_jit.store64(valueGPR, address); ++ m_jit.move(expectedGPR, resultGPR); ++ m_jit.atomicStrongCAS64(resultGPR, valueGPR, address); + break; + default: + RELEASE_ASSERT_NOT_REACHED(); + break; + } +- notEqual.link(&m_jit); + UNUSED_PARAM(scratchGPR); + return; + #endif diff --git a/libs/wpewebkit/patches/149-JavaScriptCore-JSToWasm-RISCV64-all-8-arg-GPRs.patch b/libs/wpewebkit/patches/149-JavaScriptCore-JSToWasm-RISCV64-all-8-arg-GPRs.patch new file mode 100644 index 0000000..3e5aa2f --- /dev/null +++ b/libs/wpewebkit/patches/149-JavaScriptCore-JSToWasm-RISCV64-all-8-arg-GPRs.patch @@ -0,0 +1,56 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: JSToWasm: load/store all 8 wasm GPR args on RISCV64 + +The JS-to-wasm trampoline in JSToWasm.cpp handles register-arg shuffling +across three architectures. ARM64 (8 GPR args) and X86_64 (6 GPR args) +have dedicated branches. RISCV64 also has 8 GPR args (regWA0..regWA7, +a0..a7) but was falling through to the generic USE(JSVALUE64) branch +which only handles 6, leaving regWA6 and regWA7 with stale stack +contents on call entry and dropping them on call return. + +Symptom: any wasm function whose calling convention places i32/i64 +params in a6 or a7 sees garbage for those args. Reproducer is the +JSC wasm-spec "large-sig" function (17 mixed-type params -- locals +14 and 15 land in a6/a7); also propagates to wasm-to-wasm via JS +import (e.g. spec-tests/func.wast.js #124, memory_trap.wast.js #295). + +Add explicit CPU(RISCV64) branches mirroring the X86_64 form but +covering all 8 GPRs. FPRs already had a combined X86_64||RISCV64 +branch covering all 8 FPRs, so no change needed there. + +Signed-off-by: Daniel Golle +--- +--- a/Source/JavaScriptCore/wasm/js/JSToWasm.cpp ++++ b/Source/JavaScriptCore/wasm/js/JSToWasm.cpp +@@ -320,6 +320,15 @@ MacroAssemblerCodeRef cr + jit.loadPair64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 0 * 8), GPRInfo::regWA0, GPRInfo::regWA1); + jit.loadPair64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 2 * 8), GPRInfo::regWA2, GPRInfo::regWA3); + jit.loadPair64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 4 * 8), GPRInfo::regWA4, GPRInfo::regWA5); ++#elif CPU(RISCV64) ++ jit.load64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 0 * 8), GPRInfo::regWA0); ++ jit.load64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 1 * 8), GPRInfo::regWA1); ++ jit.load64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 2 * 8), GPRInfo::regWA2); ++ jit.load64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 3 * 8), GPRInfo::regWA3); ++ jit.load64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 4 * 8), GPRInfo::regWA4); ++ jit.load64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 5 * 8), GPRInfo::regWA5); ++ jit.load64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 6 * 8), GPRInfo::regWA6); ++ jit.load64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 7 * 8), GPRInfo::regWA7); + #elif USE(JSVALUE64) + jit.load64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 0 * 8), GPRInfo::regWA0); + jit.load64(CCallHelpers::Address(CCallHelpers::stackPointerRegister, 1 * 8), GPRInfo::regWA1); +@@ -415,6 +424,15 @@ MacroAssemblerCodeRef cr + jit.storePair64(GPRInfo::regWA0, GPRInfo::regWA1, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 0 * 8)); + jit.storePair64(GPRInfo::regWA2, GPRInfo::regWA3, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 2 * 8)); + jit.storePair64(GPRInfo::regWA4, GPRInfo::regWA5, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 4 * 8)); ++#elif CPU(RISCV64) ++ jit.store64(GPRInfo::regWA0, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 0 * 8)); ++ jit.store64(GPRInfo::regWA1, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 1 * 8)); ++ jit.store64(GPRInfo::regWA2, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 2 * 8)); ++ jit.store64(GPRInfo::regWA3, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 3 * 8)); ++ jit.store64(GPRInfo::regWA4, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 4 * 8)); ++ jit.store64(GPRInfo::regWA5, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 5 * 8)); ++ jit.store64(GPRInfo::regWA6, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 6 * 8)); ++ jit.store64(GPRInfo::regWA7, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 7 * 8)); + #elif USE(JSVALUE64) + jit.store64(GPRInfo::regWA0, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 0 * 8)); + jit.store64(GPRInfo::regWA1, CCallHelpers::Address(CCallHelpers::stackPointerRegister, 1 * 8)); diff --git a/libs/wpewebkit/patches/150-JavaScriptCore-LLInt-asm-RISCV64-all-8-arg-GPRs.patch b/libs/wpewebkit/patches/150-JavaScriptCore-LLInt-asm-RISCV64-all-8-arg-GPRs.patch new file mode 100644 index 0000000..edb936a --- /dev/null +++ b/libs/wpewebkit/patches/150-JavaScriptCore-LLInt-asm-RISCV64-all-8-arg-GPRs.patch @@ -0,0 +1,33 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: LLInt asm: iterate all 8 wasm GPR args on RISCV64 + +The forEachWasmArgumentGPR macro in InPlaceInterpreter.asm (used by +js_to_wasm_wrapper_entry, the LLInt counterpart of the C++ JIT shared +JS-to-wasm trampoline, and other wasm-arg shuffle macros) iterates 8 +GPRs only on ARM64; on JSVALUE64 it stops at wa5 to match X86_64's 6 +GPR args. RISC-V also has 8 GPR args (a0..a7), so the JSVALUE64 branch +leaves wa6/wa7 unhandled. + +Symptom: any wasm function whose calling convention places i32/i64 +params in a6 or a7 sees garbage for those args on paths that route +through this macro (e.g. js_to_wasm_wrapper_entry). Mirror the C++ +fix from patch 149 by adding an explicit RISCV64 branch with +fn(6, wa6, wa7). The inner preserve/restore impl macros' JSVALUE64 +branch (storeq/loadq) already handles 64-bit GPRs correctly. + +Signed-off-by: Daniel Golle +--- +--- a/Source/JavaScriptCore/llint/InPlaceInterpreter.asm ++++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.asm +@@ -534,6 +534,11 @@ macro forEachWasmArgumentGPR(fn) + fn(2, wa2, wa3) + fn(4, wa4, wa5) + fn(6, wa6, wa7) ++ elsif RISCV64 ++ fn(0, wa0, wa1) ++ fn(2, wa2, wa3) ++ fn(4, wa4, wa5) ++ fn(6, wa6, wa7) + elsif JSVALUE64 + fn(0, wa0, wa1) + fn(2, wa2, wa3) diff --git a/libs/wpewebkit/patches/151-JavaScriptCore-RISCV64-disable-useWasmFastMemory.patch b/libs/wpewebkit/patches/151-JavaScriptCore-RISCV64-disable-useWasmFastMemory.patch new file mode 100644 index 0000000..294693b --- /dev/null +++ b/libs/wpewebkit/patches/151-JavaScriptCore-RISCV64-disable-useWasmFastMemory.patch @@ -0,0 +1,54 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: disable useWasmFastMemory on RISCV64 + +Base RISC-V doesn't require stores that fault to be atomic w.r.t. the +fault: hardware may commit some in-bound bytes of a 2/4/8-byte store +before raising the page-fault exception when the access straddles a +page boundary into PROT_NONE memory. SiFive U74 (StarFive JH7110) +does this. + +JSC's "fast memory" mode (useWasmFastMemory=true) relies on the +SIGBUS/SIGSEGV handler to convert OOB accesses into wasm traps without +an explicit bounds check. The handler observes the fault and traps the +wasm correctly, but the in-bounds bytes already corrupted by the partial +commit remain in memory. Subsequent reads see wrong data. + +Reproducer: spec-tests/memory_trap.wast.js #295. Test 51 of the same +file does i32.store at offset 65535 (1 in-bound byte, 3 OOB bytes); +expected to trap. The trap fires but byte 65535 gets clobbered to 0 +before the fault. Later test #295 reads i64 at offset 65528 (covering +bytes 65528..65535) and finds 0x0067666564636261 instead of +0x6867666564636261 -- the assertion against the i64 const trips the +wasm "unreachable" guard. + +Force-disable useWasmFastMemory on RISCV64; bounds checking falls back +to the explicit BoundsChecking mode in emitCheckAndPreparePointer, +which checks the access upper bound against the memory size before +issuing any load/store. + +Signed-off-by: Daniel Golle +--- +--- a/Source/JavaScriptCore/runtime/Options.cpp ++++ b/Source/JavaScriptCore/runtime/Options.cpp +@@ -804,6 +804,21 @@ void Options::notifyOptionsChanged() + #endif + #endif + ++#if CPU(RISCV64) ++ // The base RISC-V ISA permits a faulting store to commit some of its ++ // bytes before raising the exception (single-copy atomicity is only ++ // guaranteed for naturally-aligned accesses up to XLEN, not for a ++ // store that straddles a page boundary into PROT_NONE). On hardware ++ // that does this (e.g. SiFive U74 in JH7110), JSC's signal-based ++ // bounds check sees the page fault but the in-bounds bytes have ++ // already been corrupted -- subsequent reads return wrong values. ++ // Force explicit bounds checking instead. Reproducer: ++ // spec-tests/memory_trap.wast.js #295 (i32.store at 65535 partially ++ // overwrites byte 65535 before trapping, then i64.load at 65528 ++ // sees the corrupted high byte). ++ Options::useWasmFastMemory() = false; ++#endif ++ + #if !CPU(ARM64) + Options::useRandomizingExecutableIslandAllocation() = false; + #endif diff --git a/libs/wpewebkit/patches/152-JavaScriptCore-BBQJIT-RISCV64-tail-call.patch b/libs/wpewebkit/patches/152-JavaScriptCore-BBQJIT-RISCV64-tail-call.patch new file mode 100644 index 0000000..9e2d04a --- /dev/null +++ b/libs/wpewebkit/patches/152-JavaScriptCore-BBQJIT-RISCV64-tail-call.patch @@ -0,0 +1,52 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: BBQJIT: implement wasm tail call on RISCV64 + +emitTailCall (direct) and emitIndirectTailCall in WasmBBQJIT.cpp branch +on the host CPU to set up the caller's frame pointer + return address +for the tail-jumped callee. ARM64/ARMv7 use loadPairPtr(fp, callerFP, lr) +to read both at once; X86_64 has its own dedicated branch; everything +else trips UNREACHABLE_FOR_PLATFORM() -> SIGABRT during BBQ compilation +of any wasm function that uses return_call (tail call). + +RISC-V's standard prologue saves fp and ra adjacent at [sp, sp+8] +(same layout as ARM64), so the ARM64 loadPair64-backed loadPairPtr +also works on RISCV64. The "fix SP and FP" sequence (addPtr to +stackPointerRegister + move callerFramePointer -> framePointerRegister) +is also generic. Add CPU(RISCV64) to all three guarded branches. + +Reproducer: spec-tests/try_table.wast.js line 117 +(assert_exception(() => call($2, "return-call-in-try-catch", []))). +BBQ aborts during compilation of the wasm function that does +return_call inside try-catch. + +Signed-off-by: Daniel Golle +--- +--- a/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp ++++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp +@@ -4381,7 +4381,7 @@ void BBQJIT::emitTailCall(FunctionSpaceI + m_jit.loadPtr(Address(MacroAssembler::framePointerRegister), callerFramePointer); + resolvedArguments.append(Value::pinned(pointerType(), Location::fromStack(sizeof(Register)))); + parameterLocations.append(Location::fromStack(tailCallStackOffsetFromFP + Checked(sizeof(Register)))); +-#elif CPU(ARM64) || CPU(ARM_THUMB2) ++#elif CPU(ARM64) || CPU(ARM_THUMB2) || CPU(RISCV64) + m_jit.loadPairPtr(MacroAssembler::framePointerRegister, callerFramePointer, MacroAssembler::linkRegister); + #else + UNUSED_PARAM(callerFramePointer); +@@ -4652,7 +4652,7 @@ void BBQJIT::emitIndirectTailCall(const + + resolvedArguments.append(Value::pinned(pointerType(), Location::fromStack(sizeof(Register)))); + parameterLocations.append(Location::fromStack(tailCallStackOffsetFromFP + Checked(sizeof(Register)))); +-#elif CPU(ARM64) || CPU(ARM_THUMB2) ++#elif CPU(ARM64) || CPU(ARM_THUMB2) || CPU(RISCV64) + auto preserved = callingConvention.argumentGPRs(); + preserved.add(importableFunction, IgnoreVectors); + if constexpr (isARM64E()) +@@ -4709,7 +4709,7 @@ void BBQJIT::emitIndirectTailCall(const + m_jit.loadPtr(Address(MacroAssembler::framePointerRegister, tailCallStackOffsetFromFP), wasmScratchGPR); + m_jit.addPtr(TrustedImm32(tailCallStackOffsetFromFP + Checked(sizeof(Register))), MacroAssembler::framePointerRegister, MacroAssembler::stackPointerRegister); + m_jit.move(wasmScratchGPR, MacroAssembler::framePointerRegister); +-#elif CPU(ARM64) || CPU(ARM_THUMB2) ++#elif CPU(ARM64) || CPU(ARM_THUMB2) || CPU(RISCV64) + m_jit.addPtr(TrustedImm32(tailCallStackOffsetFromFP + Checked(sizeof(CallerFrameAndPC))), MacroAssembler::framePointerRegister, MacroAssembler::stackPointerRegister); + m_jit.move(callerFramePointer, MacroAssembler::framePointerRegister); + #else diff --git a/libs/wpewebkit/patches/153-JavaScriptCore-RISCV64-FPR-scratch-fix.patch b/libs/wpewebkit/patches/153-JavaScriptCore-RISCV64-FPR-scratch-fix.patch new file mode 100644 index 0000000..a0ea014 --- /dev/null +++ b/libs/wpewebkit/patches/153-JavaScriptCore-RISCV64-FPR-scratch-fix.patch @@ -0,0 +1,41 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: RISCV64: fix nonPreservedNonArgumentFPR0 aliasing argumentFPR1 + +On RISCV64, FPRInfo declared + + nonPreservedNonArgumentFPR0 = RISCV64Registers::f11 + +but f11 is fa1 = argumentFPR1. BBQJIT pins wasmScratchFPR to +nonPreservedNonArgumentFPR0 and uses it to materialise f32/f64 +immediates for binary float comparisons (emitCompareF32/F64). When the +non-immediate operand is bound to fa1 (e.g. wasm result/argument 1), +emitMoveConst into the "scratch" register silently overwrites that +value before the comparison runs. + +Symptom: spec-tests/wasm/stress multi-return tests that compare each +returned f32/f64 against a constant fail at the fa1 slot: + + wasm-wasm-call-many-return-types-on-stack-no-args.js + wasm-wasm-call-indirect-many-return-types-on-stack.js + wasm-js-call-many-return-types-on-stack-no-args.js + +The bug is not about spill pressure; it is the scratch FPR aliasing an +argument/return FPR for any value bound to fa1. + +Use f0 (ft0) -- a RISC-V caller-saved temporary outside the f10..f17 +argument/return set -- matching the pattern used by ARM64 (q16) and +X86_64 (xmm8). + +Signed-off-by: Daniel Golle +--- +--- a/Source/JavaScriptCore/jit/FPRInfo.h ++++ b/Source/JavaScriptCore/jit/FPRInfo.h +@@ -366,7 +366,7 @@ public: + static constexpr FPRReg argumentFPR7 = RISCV64Registers::f17; // fpRegT7 + + static constexpr FPRReg returnValueFPR = RISCV64Registers::f10; // fpRegT0 +- static constexpr FPRReg nonPreservedNonArgumentFPR0 = RISCV64Registers::f11; ++ static constexpr FPRReg nonPreservedNonArgumentFPR0 = RISCV64Registers::f0; + + static FPRReg toRegister(unsigned index) + { diff --git a/libs/wpewebkit/patches/154-JavaScriptCore-RISCV64-FP-operation-return-no-struct.patch b/libs/wpewebkit/patches/154-JavaScriptCore-RISCV64-FP-operation-return-no-struct.patch new file mode 100644 index 0000000..0a394b3 --- /dev/null +++ b/libs/wpewebkit/patches/154-JavaScriptCore-RISCV64-FP-operation-return-no-struct.patch @@ -0,0 +1,63 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: RISCV64: do not wrap FP JIT-op returns in ExceptionOperationResult + +OperationReturnType wraps every JIT operation return value in +ExceptionOperationResult = { T value; Exception* exception; } so the +JIT can hoist the exception check next to the result. For T = float or +double on RISCV64, this produces a 16-byte aggregate that GCC returns +via the *integer* calling convention (a0 holds the value, a1 holds the +exception pointer) instead of the hardware floating-point convention +(value in fa0, exception in a0). The JIT-emitted call sites assume the +floating-point value is in fa0 and read it from there, so they get +whatever stale FP value happened to be left in fa0 by the C function's +prologue/epilogue, not the result. + +Reproducer (wasm-to-JS slow path for f32 results): + + let m = new WebAssembly.Module(/* (func (import "imp" "func") (result f32)) + (func (export "foo") (result f32) + (call 0)) */); + let i = new WebAssembly.Instance(m, + { imp: { func: () => ({ valueOf: () => 25.82 }) } }); + i.exports.foo(); // expected 25.82 (as f32: 0x41ce8f5c) + // actual on RISCV64: 0x851eb852 (== low 32 bits of + // the f64 representation of 25.82) + +Direct repro of GCC's ABI choice (cross-toolchain, lp64d): + + struct sf { float v; void *e; }; + __attribute__((noinline)) struct sf make_sf(double x) { + return (struct sf){ (float)x, 0 }; + } + + // gcc -O2 -S emits: + // make_sf: + // fcvt.s.d fa0, fa0 + // fsw fa0, 0(sp) + // ld a0, 0(sp) ; struct packed into a0+a1 (integer ABI), + // li a1, 0 ; *not* split fa0/a0 (hardware FP ABI) + +The same JSC source already excludes floating-point types from the +struct wrapper on ARM64 / ARM_THUMB2 (where the ABI similarly returns +single-FP values in the integer return register inside aggregates). +Extend the same guard to RISCV64. + +With the wrapper disabled for FP types, OperationReturnType +collapses to plain `float`, GCC returns it in fa0 NaN-boxed per the +plain RISC-V FP convention, and the JIT reads the correct value. The +manual exception checks already present at every FP-returning call +site (loadPtr of VM, branchTestPtr on VM::exception()) are unaffected. + +Signed-off-by: Daniel Golle +--- +--- a/Source/JavaScriptCore/jit/OperationResult.h ++++ b/Source/JavaScriptCore/jit/OperationResult.h +@@ -47,7 +47,7 @@ template + concept canMakeExceptionOperationResult = + std::is_standard_layout_v + && std::is_trivial_v +-#if CPU(ARM64) || CPU(ARM_THUMB2) ++#if CPU(ARM64) || CPU(ARM_THUMB2) || CPU(RISCV64) + && !std::is_floating_point_v // The ARM64 ABI says that this should be returned in x0 instead of d0. Seems unlikely it's worth it to do the extra fmov. + #endif + && sizeof(T) <= sizeof(CPURegister); diff --git a/libs/wpewebkit/patches/155-JavaScriptCore-RISCV64-import-call-i32-sext.patch b/libs/wpewebkit/patches/155-JavaScriptCore-RISCV64-import-call-i32-sext.patch new file mode 100644 index 0000000..44ba3a2 --- /dev/null +++ b/libs/wpewebkit/patches/155-JavaScriptCore-RISCV64-import-call-i32-sext.patch @@ -0,0 +1,61 @@ +From: Daniel Golle +Subject: [PATCH] JavaScriptCore: RISCV64: sign-extend i32 args at wasm import call sites + +The RISCV64 lp64d psABI requires that integer arguments narrower than +XLEN be sign-extended into the full 64-bit argument register by the +caller, and the callee is permitted to rely on this. GCC compiles the +wasm builtin and JS-import host glue (e.g. +`jsstring__substring(JSGlobalObject*, JSValue, int32_t start, int32_t end)`) +under this assumption: tests like `if (start < 0)` lower to 64-bit +`bgez` against the full argument register, not a 32-bit comparison of +its low half. + +BBQ's wasm-internal calling convention leaves wasm i32 values +zero-extended in 64-bit GPRs (the WebKit MacroAssembler contract for +`load32` is zero-extend, matching x86_64 and ARM64 hardware). When BBQ +emits a wasm-to-import call (going through `importFunctionStub`), the +zero-extended value is forwarded directly to the host entry point, so +a negative i32 such as -2 enters the C function as 0x00000000FFFFFFFE. +The 64-bit `bgez` then sees a positive number, the negative-clamp +branch is skipped, and the builtin operates on an unclamped value. + +Symptom (`stress/wasm-js-string-builtins.js`): + + let m = await WebAssembly.instantiate(/* (import "wasm:js-string" "substring") + + a relay wasm function */, + {}, { builtins: ["js-string"] }); + m.instance.exports.relay("Hello, world", -2, 2); // expected "He" + // actual on RV64: "" + +The relay's `local.get $start` loads -2 with `lwu` (per the MacroAsm +contract), `addCall` for the import forwards a2 unchanged, and GCC's +`bgez s3, .Lclamp_skipped` misreads it. `start` stays at -2, gets +unsigned-cast to 4294967294, then clamped down to length(12), and +substring returns "" because (clamped start) > end. + +Directly invoking the builtin as `instance.exports.exported(s, -2, 2)` +works because the JS-to-Wasm trampoline sign-extends the JS Number +when materialising the i32 wasm arg; only the wasm-to-import path is +broken. + +The existing `BBQJIT::emitSignExtendI32ArgsForCCall` is already +RV64-guarded and does this fix-up for runtime helpers invoked through +`emitCCall`. Apply it before the import-stub call in `addCall` so the +same sign-extension applies to every wasm-to-host transition. + +Wasm-to-wasm direct calls (the other branch of the same `if`) do not +need this: a wasm callee never observes the upper 32 bits of an i32 +argument and the BBQ "w-form" i32 ops mask to 32 bits. + +Signed-off-by: Daniel Golle +--- +--- a/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp ++++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp +@@ -4484,6 +4484,7 @@ void BBQJIT::emitTailCall(FunctionSpaceI + if (m_info.isImportedFunctionFromFunctionIndexSpace(functionIndexSpace)) { + static_assert(sizeof(WasmOrJSImportableFunctionCallLinkInfo) * maxImports < std::numeric_limits::max()); + RELEASE_ASSERT(JSWebAssemblyInstance::offsetOfImportFunctionStub(functionIndexSpace) < std::numeric_limits::max()); ++ emitSignExtendI32ArgsForCCall(callInfo, signature); + m_jit.call(Address(GPRInfo::wasmContextInstancePointer, JSWebAssemblyInstance::offsetOfImportFunctionStub(functionIndexSpace)), WasmEntryPtrTag); + } else { + // Record the callee so the callee knows to look for it in updateCallsitesToCallUs. diff --git a/libs/wpewebkit/patches/156-ANGLE-skip-roundToNearest-static_asserts-on-x87.patch b/libs/wpewebkit/patches/156-ANGLE-skip-roundToNearest-static_asserts-on-x87.patch new file mode 100644 index 0000000..aa21b54 --- /dev/null +++ b/libs/wpewebkit/patches/156-ANGLE-skip-roundToNearest-static_asserts-on-x87.patch @@ -0,0 +1,33 @@ +From 5a93e9a6c82d93970742972e0cf68ef62cb4f313 Mon Sep 17 00:00:00 2001 +From: Daniel Golle +Date: Tue, 26 May 2026 13:12:29 +0100 +Subject: [PATCH] ANGLE: skip excess-precision-sensitive roundToNearest + static_asserts on x87 + +On 32-bit x86 targets with x87 FP (FLT_EVAL_METHOD == 2), constexpr +float/double arithmetic is performed at 80-bit extended precision, so +0.49999997f + 0.5f compares to long-double 0.99999996... != 1.0 and the +static_assert fails. The runtime bias trick remains correct (the result +is still cast back to float and rounded). Skip the compile-time checks +when __FLT_EVAL_METHOD__ != 0. + +Signed-off-by: Daniel Golle +--- + Source/ThirdParty/ANGLE/src/common/mathutil.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/Source/ThirdParty/ANGLE/src/common/mathutil.h ++++ b/Source/ThirdParty/ANGLE/src/common/mathutil.h +@@ -606,10 +606,12 @@ inline R roundToNearest(T input) + // On armv8, this expression is compiled to a dedicated round-to-nearest instruction + return static_cast(std::round(input)); + #else ++#if !defined(__FLT_EVAL_METHOD__) || __FLT_EVAL_METHOD__ == 0 + static_assert(0.49999997f < 0.5f); + static_assert(0.49999997f + 0.5f == 1.0f); + static_assert(0.49999999999999994 < 0.5); + static_assert(0.49999999999999994 + 0.5 == 1.0); ++#endif + constexpr T bias = sizeof(T) == 8 ? 0.49999999999999994 : 0.49999997f; + return static_cast(input + (std::is_signed::value ? std::copysign(bias, input) : bias)); + #endif