Files
video/libs/wpewebkit/patches/142-JavaScriptCore-RISCV64-BBQJIT-rotate-ctz-impl.patch
T
Daniel Golle df0b899123 wpewebkit: update to version 2.52.3
Update WPEWebKit to the 2.52 stable major release branch.

Includes a pending patchset to get WASM BBQJIT working on RISCV64,
upstream PR https://github.com/WebKit/WebKit/pull/65621

Alltogether this brings acceptable performance (even with LLVMPipe
Mesa software renderer) on RISCV64.

Link: https://wpewebkit.org/release/wpewebkit-2.52.0.html
Link: https://wpewebkit.org/release/wpewebkit-2.52.1.html
Link: https://wpewebkit.org/release/wpewebkit-2.52.2.html
Link: https://wpewebkit.org/release/wpewebkit-2.52.3.html
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
2026-05-27 19:27:26 +01:00

144 lines
6.7 KiB
Diff

--- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h
+++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h
@@ -582,36 +582,51 @@ public:
void countTrailingZeros32(RegisterID src, RegisterID dest)
{
- auto temp = temps<Data>();
- m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<32>());
+ // Previously used slli (64-bit left shift) on a zero-extended 32-bit
+ // value, looking for temp == 0. That never zeros the value until the
+ // set bits fall off bit 63 — so dest decremented past zero and
+ // returned a negative result for any nonzero src. Use the more
+ // direct "right-shift and test bit 0" loop, which terminates in at
+ // most 32 iterations.
+ auto temp = temps<Data, Memory>();
m_assembler.zeroExtend<32>(temp.data(), src);
+ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<32>());
- JumpList zero(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
+ JumpList done(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
+
+ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<0>());
Label loop = label();
- m_assembler.slliInsn<1>(temp.data(), temp.data());
- m_assembler.addiInsn(dest, dest, Imm::I<-1>());
- zero.append(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
+ m_assembler.andiInsn(temp.memory(), temp.data(), Imm::I<1>());
+ done.append(makeBranch(NotEqual, temp.memory(), RISCV64Registers::zero));
+ m_assembler.srliInsn<1>(temp.data(), temp.data());
+ m_assembler.addiInsn(dest, dest, Imm::I<1>());
jump().linkTo(loop, this);
- zero.link(this);
+ done.link(this);
}
void countTrailingZeros64(RegisterID src, RegisterID dest)
{
- auto temp = temps<Data>();
- m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<64>());
+ // Same fix as countTrailingZeros32 for the 64-bit case: scan from
+ // bit 0 upward using right-shift + andi 1, instead of left-shifting
+ // until the value falls off the register.
+ auto temp = temps<Data, Memory>();
m_assembler.addiInsn(temp.data(), src, Imm::I<0>());
+ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<64>());
- JumpList zero(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
+ JumpList done(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
+
+ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<0>());
Label loop = label();
- m_assembler.slliInsn<1>(temp.data(), temp.data());
- m_assembler.addiInsn(dest, dest, Imm::I<-1>());
- zero.append(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
+ m_assembler.andiInsn(temp.memory(), temp.data(), Imm::I<1>());
+ done.append(makeBranch(NotEqual, temp.memory(), RISCV64Registers::zero));
+ m_assembler.srliInsn<1>(temp.data(), temp.data());
+ m_assembler.addiInsn(dest, dest, Imm::I<1>());
jump().linkTo(loop, this);
- zero.link(this);
+ done.link(this);
}
void byteSwap16(RegisterID reg)
@@ -844,8 +859,72 @@ public:
m_assembler.srliInsn(dest, src, uint32_t(imm.m_value & ((1 << 6) - 1)));
}
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(rotateRight32);
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(rotateRight64);
+ // rotateRight32/64: native rv64gc has no rotate instruction (Zbb's
+ // rorw/ror would do it in one), so synthesise via shift + shift + or.
+ // These are called by BBQJIT's I32Rotl/I32Rotr/I64Rotl/I64Rotr handlers
+ // on every non-x86 path (the rotl variants are routed through
+ // rotateRight32 with a negated shift, so a broken or missing
+ // rotateRight* silently miscompiles both rotr and rotl).
+ void rotateRight32(RegisterID src, TrustedImm32 imm, RegisterID dest)
+ {
+ int32_t shift = imm.m_value & 31;
+ if (!shift) {
+ if (src != dest)
+ move(src, dest);
+ m_assembler.maskRegister<32>(dest);
+ return;
+ }
+ auto temp = temps<Data, Memory>();
+ m_assembler.srliwInsn(temp.data(), src, uint32_t(shift));
+ m_assembler.slliwInsn(temp.memory(), src, uint32_t(32 - shift));
+ m_assembler.orInsn(dest, temp.data(), temp.memory());
+ m_assembler.maskRegister<32>(dest);
+ }
+
+ void rotateRight32(RegisterID src, RegisterID shift, RegisterID dest)
+ {
+ auto temp = temps<Data, Memory>();
+ m_assembler.addiInsn(temp.data(), RISCV64Registers::zero, Imm::I<32>());
+ m_assembler.subInsn(temp.data(), temp.data(), shift);
+ m_assembler.srlwInsn(temp.memory(), src, shift);
+ m_assembler.sllwInsn(temp.data(), src, temp.data());
+ m_assembler.orInsn(dest, temp.memory(), temp.data());
+ m_assembler.maskRegister<32>(dest);
+ }
+
+ void rotateRight64(RegisterID src, TrustedImm32 imm, RegisterID dest)
+ {
+ int32_t shift = imm.m_value & 63;
+ if (!shift) {
+ if (src != dest)
+ move(src, dest);
+ return;
+ }
+ auto temp = temps<Data, Memory>();
+ m_assembler.srliInsn(temp.data(), src, uint32_t(shift));
+ m_assembler.slliInsn(temp.memory(), src, uint32_t(64 - shift));
+ m_assembler.orInsn(dest, temp.data(), temp.memory());
+ }
+
+ void rotateRight64(RegisterID src, RegisterID shift, RegisterID dest)
+ {
+ auto temp = temps<Data, Memory>();
+ m_assembler.addiInsn(temp.data(), RISCV64Registers::zero, Imm::I<64>());
+ m_assembler.subInsn(temp.data(), temp.data(), shift);
+ m_assembler.srlInsn(temp.memory(), src, shift);
+ m_assembler.sllInsn(temp.data(), src, temp.data());
+ m_assembler.orInsn(dest, temp.memory(), temp.data());
+ }
+
+ // Two-operand in-place variants used by MacroAssembler.h convenience
+ // wrappers (e.g. urshiftPtr / rolPtr / FastRotation::apply). These were
+ // matched by the previous templated NOOP overload and silently did
+ // nothing; forward to the three-operand form so callers see the real
+ // rotate.
+ void rotateRight32(TrustedImm32 imm, RegisterID srcDst) { rotateRight32(srcDst, imm, srcDst); }
+ void rotateRight64(TrustedImm32 imm, RegisterID srcDst) { rotateRight64(srcDst, imm, srcDst); }
+ void rotateRight32(RegisterID shift, RegisterID srcDst) { rotateRight32(srcDst, shift, srcDst); }
+ void rotateRight64(RegisterID shift, RegisterID srcDst) { rotateRight64(srcDst, shift, srcDst); }
// Scalar BBQJIT primitives: fused shift/add and rotate-left, used by the
// wasm bytecode-to-machine-code path. RISC-V's baseline rv64gc has no