mirror of
https://github.com/openwrt/video.git
synced 2026-05-31 06:51:54 +08:00
df0b899123
Update WPEWebKit to the 2.52 stable major release branch. Includes a pending patchset to get WASM BBQJIT working on RISCV64, upstream PR https://github.com/WebKit/WebKit/pull/65621 Alltogether this brings acceptable performance (even with LLVMPipe Mesa software renderer) on RISCV64. Link: https://wpewebkit.org/release/wpewebkit-2.52.0.html Link: https://wpewebkit.org/release/wpewebkit-2.52.1.html Link: https://wpewebkit.org/release/wpewebkit-2.52.2.html Link: https://wpewebkit.org/release/wpewebkit-2.52.3.html Signed-off-by: Daniel Golle <daniel@makrotopia.org>
144 lines
6.7 KiB
Diff
144 lines
6.7 KiB
Diff
--- a/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h
|
|
+++ b/Source/JavaScriptCore/assembler/MacroAssemblerRISCV64.h
|
|
@@ -582,36 +582,51 @@ public:
|
|
|
|
void countTrailingZeros32(RegisterID src, RegisterID dest)
|
|
{
|
|
- auto temp = temps<Data>();
|
|
- m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<32>());
|
|
+ // Previously used slli (64-bit left shift) on a zero-extended 32-bit
|
|
+ // value, looking for temp == 0. That never zeros the value until the
|
|
+ // set bits fall off bit 63 — so dest decremented past zero and
|
|
+ // returned a negative result for any nonzero src. Use the more
|
|
+ // direct "right-shift and test bit 0" loop, which terminates in at
|
|
+ // most 32 iterations.
|
|
+ auto temp = temps<Data, Memory>();
|
|
m_assembler.zeroExtend<32>(temp.data(), src);
|
|
+ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<32>());
|
|
|
|
- JumpList zero(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
|
|
+ JumpList done(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
|
|
+
|
|
+ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<0>());
|
|
|
|
Label loop = label();
|
|
- m_assembler.slliInsn<1>(temp.data(), temp.data());
|
|
- m_assembler.addiInsn(dest, dest, Imm::I<-1>());
|
|
- zero.append(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
|
|
+ m_assembler.andiInsn(temp.memory(), temp.data(), Imm::I<1>());
|
|
+ done.append(makeBranch(NotEqual, temp.memory(), RISCV64Registers::zero));
|
|
+ m_assembler.srliInsn<1>(temp.data(), temp.data());
|
|
+ m_assembler.addiInsn(dest, dest, Imm::I<1>());
|
|
jump().linkTo(loop, this);
|
|
|
|
- zero.link(this);
|
|
+ done.link(this);
|
|
}
|
|
|
|
void countTrailingZeros64(RegisterID src, RegisterID dest)
|
|
{
|
|
- auto temp = temps<Data>();
|
|
- m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<64>());
|
|
+ // Same fix as countTrailingZeros32 for the 64-bit case: scan from
|
|
+ // bit 0 upward using right-shift + andi 1, instead of left-shifting
|
|
+ // until the value falls off the register.
|
|
+ auto temp = temps<Data, Memory>();
|
|
m_assembler.addiInsn(temp.data(), src, Imm::I<0>());
|
|
+ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<64>());
|
|
|
|
- JumpList zero(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
|
|
+ JumpList done(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
|
|
+
|
|
+ m_assembler.addiInsn(dest, RISCV64Registers::zero, Imm::I<0>());
|
|
|
|
Label loop = label();
|
|
- m_assembler.slliInsn<1>(temp.data(), temp.data());
|
|
- m_assembler.addiInsn(dest, dest, Imm::I<-1>());
|
|
- zero.append(makeBranch(Equal, temp.data(), RISCV64Registers::zero));
|
|
+ m_assembler.andiInsn(temp.memory(), temp.data(), Imm::I<1>());
|
|
+ done.append(makeBranch(NotEqual, temp.memory(), RISCV64Registers::zero));
|
|
+ m_assembler.srliInsn<1>(temp.data(), temp.data());
|
|
+ m_assembler.addiInsn(dest, dest, Imm::I<1>());
|
|
jump().linkTo(loop, this);
|
|
|
|
- zero.link(this);
|
|
+ done.link(this);
|
|
}
|
|
|
|
void byteSwap16(RegisterID reg)
|
|
@@ -844,8 +859,72 @@ public:
|
|
m_assembler.srliInsn(dest, src, uint32_t(imm.m_value & ((1 << 6) - 1)));
|
|
}
|
|
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(rotateRight32);
|
|
- MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD(rotateRight64);
|
|
+ // rotateRight32/64: native rv64gc has no rotate instruction (Zbb's
|
|
+ // rorw/ror would do it in one), so synthesise via shift + shift + or.
|
|
+ // These are called by BBQJIT's I32Rotl/I32Rotr/I64Rotl/I64Rotr handlers
|
|
+ // on every non-x86 path (the rotl variants are routed through
|
|
+ // rotateRight32 with a negated shift, so a broken or missing
|
|
+ // rotateRight* silently miscompiles both rotr and rotl).
|
|
+ void rotateRight32(RegisterID src, TrustedImm32 imm, RegisterID dest)
|
|
+ {
|
|
+ int32_t shift = imm.m_value & 31;
|
|
+ if (!shift) {
|
|
+ if (src != dest)
|
|
+ move(src, dest);
|
|
+ m_assembler.maskRegister<32>(dest);
|
|
+ return;
|
|
+ }
|
|
+ auto temp = temps<Data, Memory>();
|
|
+ m_assembler.srliwInsn(temp.data(), src, uint32_t(shift));
|
|
+ m_assembler.slliwInsn(temp.memory(), src, uint32_t(32 - shift));
|
|
+ m_assembler.orInsn(dest, temp.data(), temp.memory());
|
|
+ m_assembler.maskRegister<32>(dest);
|
|
+ }
|
|
+
|
|
+ void rotateRight32(RegisterID src, RegisterID shift, RegisterID dest)
|
|
+ {
|
|
+ auto temp = temps<Data, Memory>();
|
|
+ m_assembler.addiInsn(temp.data(), RISCV64Registers::zero, Imm::I<32>());
|
|
+ m_assembler.subInsn(temp.data(), temp.data(), shift);
|
|
+ m_assembler.srlwInsn(temp.memory(), src, shift);
|
|
+ m_assembler.sllwInsn(temp.data(), src, temp.data());
|
|
+ m_assembler.orInsn(dest, temp.memory(), temp.data());
|
|
+ m_assembler.maskRegister<32>(dest);
|
|
+ }
|
|
+
|
|
+ void rotateRight64(RegisterID src, TrustedImm32 imm, RegisterID dest)
|
|
+ {
|
|
+ int32_t shift = imm.m_value & 63;
|
|
+ if (!shift) {
|
|
+ if (src != dest)
|
|
+ move(src, dest);
|
|
+ return;
|
|
+ }
|
|
+ auto temp = temps<Data, Memory>();
|
|
+ m_assembler.srliInsn(temp.data(), src, uint32_t(shift));
|
|
+ m_assembler.slliInsn(temp.memory(), src, uint32_t(64 - shift));
|
|
+ m_assembler.orInsn(dest, temp.data(), temp.memory());
|
|
+ }
|
|
+
|
|
+ void rotateRight64(RegisterID src, RegisterID shift, RegisterID dest)
|
|
+ {
|
|
+ auto temp = temps<Data, Memory>();
|
|
+ m_assembler.addiInsn(temp.data(), RISCV64Registers::zero, Imm::I<64>());
|
|
+ m_assembler.subInsn(temp.data(), temp.data(), shift);
|
|
+ m_assembler.srlInsn(temp.memory(), src, shift);
|
|
+ m_assembler.sllInsn(temp.data(), src, temp.data());
|
|
+ m_assembler.orInsn(dest, temp.memory(), temp.data());
|
|
+ }
|
|
+
|
|
+ // Two-operand in-place variants used by MacroAssembler.h convenience
|
|
+ // wrappers (e.g. urshiftPtr / rolPtr / FastRotation::apply). These were
|
|
+ // matched by the previous templated NOOP overload and silently did
|
|
+ // nothing; forward to the three-operand form so callers see the real
|
|
+ // rotate.
|
|
+ void rotateRight32(TrustedImm32 imm, RegisterID srcDst) { rotateRight32(srcDst, imm, srcDst); }
|
|
+ void rotateRight64(TrustedImm32 imm, RegisterID srcDst) { rotateRight64(srcDst, imm, srcDst); }
|
|
+ void rotateRight32(RegisterID shift, RegisterID srcDst) { rotateRight32(srcDst, shift, srcDst); }
|
|
+ void rotateRight64(RegisterID shift, RegisterID srcDst) { rotateRight64(srcDst, shift, srcDst); }
|
|
|
|
// Scalar BBQJIT primitives: fused shift/add and rotate-left, used by the
|
|
// wasm bytecode-to-machine-code path. RISC-V's baseline rv64gc has no
|