diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index ce2a2eba3ff2..a79556b3854f 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -4130,11 +4130,6 @@ (rule (splat_const n size) (vec_dup (imm $I64 (ImmExtend.Zero) n) size)) -;; Lower a FloatCC to a Cond. -(decl fp_cond_code (FloatCC) Cond) -;; TODO: Port lower_fp_condcode() to ISLE. -(extern constructor fp_cond_code fp_cond_code) - ;; Lower an integer cond code. (spec (cond_code a) (provide (= a result))) (decl cond_code (IntCC) Cond) @@ -4637,12 +4632,18 @@ (Zero (reg Reg) (size OperandSize)) (NotZero (reg Reg) (size OperandSize)) (Cond (flags ProducesFlags) (cond Cond)) + (Or (flags ProducesFlags) (cc1 Cond) (cc2 Cond)) + (And (flags ProducesFlags) (cc1 Cond) (cc2 Cond)) )) (decl cond_result_invert (CondResult) CondResult) (rule (cond_result_invert (CondResult.Zero reg size)) (CondResult.NotZero reg size)) (rule (cond_result_invert (CondResult.NotZero reg size)) (CondResult.Zero reg size)) (rule (cond_result_invert (CondResult.Cond flags cc)) (CondResult.Cond flags (invert_cond cc))) +(rule (cond_result_invert (CondResult.Or flags cc1 cc2)) + (CondResult.And flags (invert_cond cc1) (invert_cond cc2))) +(rule (cond_result_invert (CondResult.And flags cc1 cc2)) + (CondResult.Or flags (invert_cond cc1) (invert_cond cc2))) (decl is_nonzero_cmp (Value) CondResult) @@ -4775,6 +4776,52 @@ (rule (emit_fcmp cc a b @ (value_type ty)) (CondResult.Cond (fpu_cmp (scalar_size ty) a b) (fp_cond_code cc))) +;; Special case `OrderedNotEqual` and `UnorderedOrEqual` which aren't possible +;; to select via one condition below. +;; OrderedNotEqual = LT | GT. +(rule 1 (emit_fcmp (FloatCC.OrderedNotEqual) a b @ (value_type ty)) + (CondResult.Or (fpu_cmp (scalar_size ty) a b) (Cond.Mi) (Cond.Gt))) +;; UnorderedOrEqual = UN | EQ +(rule 1 (emit_fcmp (FloatCC.UnorderedOrEqual) a b @ (value_type ty)) + (CondResult.Or (fpu_cmp (scalar_size ty) a b) (Cond.Eq) (Cond.Vs))) + +;;(rule 1 (emit_fcmp (FloatCC.UnorderedOrEqual) a b @ (value_type ty)) +;;) + +;; Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs. +;; The FCMP instruction sets: +;; NZCV +;; - PCSR.NZCV = 0011 on UN (unordered), +;; 0110 on EQ, +;; 1000 on LT, +;; 0010 on GT. +(decl fp_cond_code (FloatCC) Cond) +;; EQ | LT | GT. Vc => V clear. +(rule (fp_cond_code (FloatCC.Ordered)) (Cond.Vc)) +;; UN. Vs => V set. +;; FloatCC::Unordered => Cond::Vs, +(rule (fp_cond_code (FloatCC.Unordered)) (Cond.Vs)) +;; EQ. Eq => Z set. +(rule (fp_cond_code (FloatCC.Equal)) (Cond.Eq)) +;; UN | LT | GT. Ne => Z clear. +(rule (fp_cond_code (FloatCC.NotEqual)) (Cond.Ne)) +;; LT. Mi => N set. +(rule (fp_cond_code (FloatCC.LessThan)) (Cond.Mi)) +;; LT | EQ. Ls => C clear or Z set. +(rule (fp_cond_code (FloatCC.LessThanOrEqual)) (Cond.Ls)) +;; GT. Gt => Z clear, N = V. +(rule (fp_cond_code (FloatCC.GreaterThan)) (Cond.Gt)) +;; GT | EQ. Ge => N = V. +(rule (fp_cond_code (FloatCC.GreaterThanOrEqual)) (Cond.Ge)) +;; UN | LT. Lt => N != V. +(rule (fp_cond_code (FloatCC.UnorderedOrLessThan)) (Cond.Lt)) +;; UN | LT | EQ. Le => not (Z clear, N = V). +(rule (fp_cond_code (FloatCC.UnorderedOrLessThanOrEqual)) (Cond.Le)) +;; UN | GT. Hi => C set, Z clear. +(rule (fp_cond_code (FloatCC.UnorderedOrGreaterThan)) (Cond.Hi)) +;; UN | GT | EQ. Pl => N clear. +(rule (fp_cond_code (FloatCC.UnorderedOrGreaterThanOrEqual)) (Cond.Pl)) + ;; A tuple of `ProducesFlags` and `IntCC`. (type FlagsAndCC (enum (FlagsAndCC (flags ProducesFlags) (cc IntCC)))) @@ -4904,13 +4951,25 @@ ;; Helpers for generating select instruction sequences. (decl lower_select (Type CondResult Value Value) ValueRegs) -(rule (lower_select ty (CondResult.Zero reg size) a b) +(rule 1 (lower_select ty (CondResult.Zero reg size) a b) (lower_select_cond ty (cmp size reg (zero_reg)) (Cond.Eq) a b)) -(rule (lower_select ty (CondResult.NotZero reg size) a b) +(rule 1 (lower_select ty (CondResult.NotZero reg size) a b) (lower_select_cond ty (cmp size reg (zero_reg)) (Cond.Ne) a b)) -(rule (lower_select ty (CondResult.Cond flags cond) a b) +(rule 1 (lower_select ty (CondResult.Cond flags cond) a b) (lower_select_cond ty flags cond a b)) +;; TODO: there's probably a more optimal way to do this as this otherwise +;; requires putting lots of conditions in registers then doing another compare. +;; Pushing this down lower though for the "Or" and "And" cases would require +;; fancier handling in `lower_select_cond` below, however. +(rule 0 (lower_select ty c a b) + (lower_select_cond + ty + (cmp (OperandSize.Size32) (lower_cond_result_bool c) (zero_reg)) + (Cond.Ne) + a + b)) + (decl lower_select_cond (Type ProducesFlags Cond Value Value) ValueRegs) (rule 2 (lower_select_cond (ty_scalar_float (fits_in_64 ty)) flags cond rn rm) (with_flags flags (fpu_csel ty cond rn rm))) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 54f29ee4ce74..7480255e6060 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -2210,6 +2210,12 @@ (value_regs_get (with_flags (cmp_imm size reg (u8_into_imm12 0)) (cset (Cond.Ne))) 0)) (rule (lower_cond_result_bool (CondResult.Cond flags cc)) (value_regs_get (with_flags flags (cset cc)) 0)) +(rule (lower_cond_result_bool (CondResult.Or flags cc1 cc2)) + (let ((tmp ValueRegs (with_flags flags (consumes_flags_concat (cset cc1) (cset cc2))))) + (orr $I32 (value_regs_get tmp 0) (value_regs_get tmp 1)))) +(rule (lower_cond_result_bool (CondResult.And flags cc1 cc2)) + (let ((tmp ValueRegs (with_flags flags (consumes_flags_concat (cset cc1) (cset cc2))))) + (and_reg $I32 (value_regs_get tmp 0) (value_regs_get tmp 1)))) ;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -2223,13 +2229,19 @@ ;; Helper to emit a `TrapIf` instruction for the `CondResult` provided (decl trap_if_cond_result (CondResult TrapCode) SideEffectNoResult) -(rule (trap_if_cond_result (CondResult.Zero reg size) tc) +(rule 1 (trap_if_cond_result (CondResult.Zero reg size) tc) (trap_if_zero reg size tc)) -(rule (trap_if_cond_result (CondResult.NotZero reg size) tc) +(rule 1 (trap_if_cond_result (CondResult.NotZero reg size) tc) (trap_if_not_zero reg size tc)) -(rule (trap_if_cond_result (CondResult.Cond flags cc) tc) +(rule 1 (trap_if_cond_result (CondResult.Cond flags cc) tc) (with_flags_side_effect flags (trap_if_cond cc tc))) +;; TODO: see comments in `br_cond_result` below, but basically it'd be best to +;; thread "And" and "Or" into the trap instruction itself instead of reifying it +;; as a register here. +(rule 0 (trap_if_cond_result c tc) + (trap_if_not_zero (lower_cond_result_bool c) (OperandSize.Size32) tc)) + ;;;;; Rules for `trapnz`;;;;;;;;; (rule (lower (trapnz val trap_code)) @@ -3163,13 +3175,20 @@ ;; Helper to emit a branching instruction based on a `CondResult` (decl br_cond_result (CondResult MachLabel MachLabel) SideEffectNoResult) -(rule (br_cond_result (CondResult.Zero reg size) taken not_taken) +(rule 1 (br_cond_result (CondResult.Zero reg size) taken not_taken) (a64_br_zero reg size taken not_taken)) -(rule (br_cond_result (CondResult.NotZero reg size) taken not_taken) +(rule 1 (br_cond_result (CondResult.NotZero reg size) taken not_taken) (a64_br_not_zero reg size taken not_taken)) -(rule (br_cond_result (CondResult.Cond flags cc) taken not_taken) +(rule 1 (br_cond_result (CondResult.Cond flags cc) taken not_taken) (with_flags_side_effect flags (a64_br_cond cc taken not_taken))) +;; TODO: this handles "And" and "Or" cases but it's relatively inefficient. It'd +;; be better to thread the and/or into the branch itself similar to what x64 +;; does to generate two branches. That'd require more plumbing, however, and is +;; left for a future optimization. +(rule 0 (br_cond_result c taken not_taken) + (a64_br_not_zero (lower_cond_result_bool c) (OperandSize.Size32) taken not_taken)) + ;; Special lowerings for `tbnz` - "Test bit and Branch if Nonzero" (rule 1 (lower_branch (brif (band _ x @ (value_type ty) (u64_from_iconst n)) _ _) (two_targets taken not_taken)) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 27c48ec54777..5a7887bcb668 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -66,46 +66,6 @@ pub(crate) fn lower_condcode(cc: IntCC) -> Cond { } } -pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond { - // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs. - // The FCMP instruction sets: - // NZCV - // - PCSR.NZCV = 0011 on UN (unordered), - // 0110 on EQ, - // 1000 on LT, - // 0010 on GT. - match cc { - // EQ | LT | GT. Vc => V clear. - FloatCC::Ordered => Cond::Vc, - // UN. Vs => V set. - FloatCC::Unordered => Cond::Vs, - // EQ. Eq => Z set. - FloatCC::Equal => Cond::Eq, - // UN | LT | GT. Ne => Z clear. - FloatCC::NotEqual => Cond::Ne, - // LT | GT. - FloatCC::OrderedNotEqual => unimplemented!(), - // UN | EQ - FloatCC::UnorderedOrEqual => unimplemented!(), - // LT. Mi => N set. - FloatCC::LessThan => Cond::Mi, - // LT | EQ. Ls => C clear or Z set. - FloatCC::LessThanOrEqual => Cond::Ls, - // GT. Gt => Z clear, N = V. - FloatCC::GreaterThan => Cond::Gt, - // GT | EQ. Ge => N = V. - FloatCC::GreaterThanOrEqual => Cond::Ge, - // UN | LT. Lt => N != V. - FloatCC::UnorderedOrLessThan => Cond::Lt, - // UN | LT | EQ. Le => not (Z clear, N = V). - FloatCC::UnorderedOrLessThanOrEqual => Cond::Le, - // UN | GT. Hi => C set, Z clear. - FloatCC::UnorderedOrGreaterThan => Cond::Hi, - // UN | GT | EQ. Pl => N clear. - FloatCC::UnorderedOrGreaterThanOrEqual => Cond::Pl, - } -} - //============================================================================= // Lowering-backend trait implementation. diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 184bd0d23a30..b98436daac1d 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -9,8 +9,8 @@ use super::{ ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, MachLabel, MemLabel, MoveWideConst, MoveWideOp, NZCV, Opcode, OperandSize, Reg, SImm9, ScalarSize, ShiftOpAndAmt, - UImm5, UImm12Scaled, VecMisc2, VectorSize, fp_reg, lower_condcode, lower_fp_condcode, - stack_reg, writable_link_reg, writable_zero_reg, zero_reg, + UImm5, UImm12Scaled, VecMisc2, VectorSize, fp_reg, lower_condcode, stack_reg, + writable_link_reg, writable_zero_reg, zero_reg, }; use crate::ir::{ArgumentExtension, condcodes}; use crate::isa; @@ -582,10 +582,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { } } - fn fp_cond_code(&mut self, cc: &condcodes::FloatCC) -> Cond { - lower_fp_condcode(*cc) - } - fn cond_code(&mut self, cc: &condcodes::IntCC) -> Cond { lower_condcode(*cc) } diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif index cc1381be8545..ac51403c379a 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif @@ -806,3 +806,74 @@ block2: ; block2: ; offset 0x10 ; ret +function %br_fcmp_one(f32, f32) { +block0(v0: f32, v1: f32): + v2 = fcmp one v0, v1 + brif v2, block1, block2 + +block1: + return +block2: + return +} + +; VCode: +; block0: +; fcmp s0, s1 +; cset x4, mi +; cset x6, gt +; orr w8, w4, w6 +; cbnz w8, label2 ; b label1 +; block1: +; ret +; block2: +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp s0, s1 +; cset x4, mi +; cset x6, gt +; orr w8, w4, w6 +; cbnz w8, #0x18 +; block1: ; offset 0x14 +; ret +; block2: ; offset 0x18 +; ret + + +function %br_fcmp_ueq(f32, f32) { +block0(v0: f32, v1: f32): + v2 = fcmp ueq v0, v1 + brif v2, block1, block2 + +block1: + return +block2: + return +} + +; VCode: +; block0: +; fcmp s0, s1 +; cset x4, eq +; cset x6, vs +; orr w8, w4, w6 +; cbnz w8, label2 ; b label1 +; block1: +; ret +; block2: +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp s0, s1 +; cset x4, eq +; cset x6, vs +; orr w8, w4, w6 +; cbnz w8, #0x18 +; block1: ; offset 0x14 +; ret +; block2: ; offset 0x18 +; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/select.clif b/cranelift/filetests/filetests/isa/aarch64/select.clif index 27dbee27e4e1..98e47e5ba3f3 100644 --- a/cranelift/filetests/filetests/isa/aarch64/select.clif +++ b/cranelift/filetests/filetests/isa/aarch64/select.clif @@ -116,3 +116,229 @@ block0(v0: i8, v1: f128, v2: f128): ; mov v0.16b, v0.16b ; ret +function %select_fcmp_one_f32_i32(f32, f32, i32, i32) -> i32 { +block0(v0: f32, v1: f32, v2: i32, v3: i32): + v4 = fcmp one v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; VCode: +; block0: +; fcmp s0, s1 +; cset x6, mi +; cset x8, gt +; orr w10, w6, w8 +; subs wzr, w10, wzr +; csel x0, x0, x1, ne +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp s0, s1 +; cset x6, mi +; cset x8, gt +; orr w10, w6, w8 +; cmp w10, wzr +; csel x0, x0, x1, ne +; ret + +function %select_fcmp_ueq_f32_i32(f32, f32, i32, i32) -> i32 { +block0(v0: f32, v1: f32, v2: i32, v3: i32): + v4 = fcmp ueq v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; VCode: +; block0: +; fcmp s0, s1 +; cset x6, eq +; cset x8, vs +; orr w10, w6, w8 +; subs wzr, w10, wzr +; csel x0, x0, x1, ne +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp s0, s1 +; cset x6, eq +; cset x8, vs +; orr w10, w6, w8 +; cmp w10, wzr +; csel x0, x0, x1, ne +; ret + +function %select_fcmp_one_f32_f32(f32, f32, f32, f32) -> f32 { +block0(v0: f32, v1: f32, v2: f32, v3: f32): + v4 = fcmp one v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; VCode: +; block0: +; fcmp s0, s1 +; cset x6, mi +; cset x8, gt +; orr w10, w6, w8 +; subs wzr, w10, wzr +; fcsel s0, s2, s3, ne +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp s0, s1 +; cset x6, mi +; cset x8, gt +; orr w10, w6, w8 +; cmp w10, wzr +; fcsel s0, s2, s3, ne +; ret + +function %select_fcmp_ueq_f32_f32(f32, f32, f32, f32) -> f32 { +block0(v0: f32, v1: f32, v2: f32, v3: f32): + v4 = fcmp ueq v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; VCode: +; block0: +; fcmp s0, s1 +; cset x6, eq +; cset x8, vs +; orr w10, w6, w8 +; subs wzr, w10, wzr +; fcsel s0, s2, s3, ne +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp s0, s1 +; cset x6, eq +; cset x8, vs +; orr w10, w6, w8 +; cmp w10, wzr +; fcsel s0, s2, s3, ne +; ret + +function %select_fcmp_one_f32_i128(f32, f32, i128, i128) -> i128 { +block0(v0: f32, v1: f32, v2: i128, v3: i128): + v4 = fcmp one v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; VCode: +; block0: +; fcmp s0, s1 +; cset x8, mi +; cset x10, gt +; orr w12, w8, w10 +; subs wzr, w12, wzr +; csel x0, x0, x2, ne +; csel x1, x1, x3, ne +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp s0, s1 +; cset x8, mi +; cset x10, gt +; orr w12, w8, w10 +; cmp w12, wzr +; csel x0, x0, x2, ne +; csel x1, x1, x3, ne +; ret + +function %select_fcmp_ueq_f32_i128(f32, f32, i128, i128) -> i128 { +block0(v0: f32, v1: f32, v2: i128, v3: i128): + v4 = fcmp ueq v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; VCode: +; block0: +; fcmp s0, s1 +; cset x8, eq +; cset x10, vs +; orr w12, w8, w10 +; subs wzr, w12, wzr +; csel x0, x0, x2, ne +; csel x1, x1, x3, ne +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp s0, s1 +; cset x8, eq +; cset x10, vs +; orr w12, w8, w10 +; cmp w12, wzr +; csel x0, x0, x2, ne +; csel x1, x1, x3, ne +; ret + +function %select_fcmp_one_f32_i64x2(f32, f32, i64x2, i64x2) -> i64x2 { +block0(v0: f32, v1: f32, v2: i64x2, v3: i64x2): + v4 = fcmp one v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; VCode: +; block0: +; fcmp s0, s1 +; cset x6, mi +; cset x8, gt +; orr w10, w6, w8 +; subs wzr, w10, wzr +; vcsel v0.16b, v2.16b, v3.16b, ne (if-then-else diamond) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp s0, s1 +; cset x6, mi +; cset x8, gt +; orr w10, w6, w8 +; cmp w10, wzr +; b.ne #0x20 +; mov v0.16b, v3.16b +; b #0x24 +; mov v0.16b, v2.16b +; ret + +function %select_fcmp_ueq_f32_i64x2(f32, f32, i64x2, i64x2) -> i64x2 { +block0(v0: f32, v1: f32, v2: i64x2, v3: i64x2): + v4 = fcmp ueq v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; VCode: +; block0: +; fcmp s0, s1 +; cset x6, eq +; cset x8, vs +; orr w10, w6, w8 +; subs wzr, w10, wzr +; vcsel v0.16b, v2.16b, v3.16b, ne (if-then-else diamond) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp s0, s1 +; cset x6, eq +; cset x8, vs +; orr w10, w6, w8 +; cmp w10, wzr +; b.ne #0x20 +; mov v0.16b, v3.16b +; b #0x24 +; mov v0.16b, v2.16b +; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/traps.clif b/cranelift/filetests/filetests/isa/aarch64/traps.clif index 005b800f119d..a041eb906b12 100644 --- a/cranelift/filetests/filetests/isa/aarch64/traps.clif +++ b/cranelift/filetests/filetests/isa/aarch64/traps.clif @@ -269,3 +269,55 @@ block0(v0: f64, v1: f64): ; ret ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: user1 +function %trapz_fcmp_one(f64, f64) { +block0(v0: f64, v1: f64): + v2 = fcmp one v0, v1 + trapz v2, user1 + return +} + +; VCode: +; block0: +; fcmp d0, d1 +; cset x4, pl +; cset x6, le +; and w8, w4, w6 +; cbnz w8, #trap=user1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp d0, d1 +; cset x4, pl +; cset x6, le +; and w8, w4, w6 +; cbnz w8, #0x18 +; ret +; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: user1 + +function %trapz_fcmp_ueq(f64, f64) { +block0(v0: f64, v1: f64): + v2 = fcmp ueq v0, v1 + trapz v2, user1 + return +} + +; VCode: +; block0: +; fcmp d0, d1 +; cset x4, ne +; cset x6, vc +; and w8, w4, w6 +; cbnz w8, #trap=user1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; fcmp d0, d1 +; cset x4, ne +; cset x6, vc +; and w8, w4, w6 +; cbnz w8, #0x18 +; ret +; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: user1 + diff --git a/cranelift/filetests/filetests/runtests/fcmp-one.clif b/cranelift/filetests/filetests/runtests/fcmp-one.clif index d34a8e27cd41..44eb0a7056f1 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-one.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-one.clif @@ -3,6 +3,7 @@ test run target x86_64 target x86_64 has_avx target s390x +target aarch64 target riscv64 target riscv64 has_c has_zcb target pulley32 @@ -323,3 +324,34 @@ block0(v0: f64, v1: f64): ; run: %fcmp_one_f64(+sNaN:0x800000000001, -sNaN:0x1) == 0 ; run: %fcmp_one_f64(-sNaN:0x800000000001, -sNaN:0x1) == 0 ; run: %fcmp_one_f64(-sNaN:0x800000000001, +sNaN:0x1) == 0 + +function %select_fcmp_one_f32_i32(f32, f32, i32, i32) -> i32 { +block0(v0: f32, v1: f32, v2: i32, v3: i32): + v4 = fcmp one v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; run: %select_fcmp_one_f32_i32(0x1.0, 0x1.0, 0, 1) == 1 +; run: %select_fcmp_one_f32_i32(-0x1.0, 0x1.0, 0, 1) == 0 + +function %select_fcmp_one_f32_f32(f32, f32, f32, f32) -> f32 { +block0(v0: f32, v1: f32, v2: f32, v3: f32): + v4 = fcmp one v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; run: %select_fcmp_one_f32_f32(0x1.0, 0x1.0, 0x1.0, 0x2.0) == 0x2.0 +; run: %select_fcmp_one_f32_f32(-0x1.0, 0x1.0, 0x1.0, 0x2.0) == 0x1.0 + +function %brif_fcmp_one_f32(f32, f32, i32, i32) -> i32 { +block0(v0: f32, v1: f32, v2: i32, v3: i32): + v4 = fcmp one v0, v1 + brif v4, block1(v2), block1(v3) +block1(v5: i32): + return v5 +} + +; run: %brif_fcmp_one_f32(0x1.0, 0x1.0, 0, 1) == 1 +; run: %brif_fcmp_one_f32(-0x1.0, 0x1.0, 0, 1) == 0 diff --git a/cranelift/filetests/filetests/runtests/fcmp-ueq.clif b/cranelift/filetests/filetests/runtests/fcmp-ueq.clif index 6d7f3a936e77..101de1270da3 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-ueq.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-ueq.clif @@ -3,6 +3,7 @@ test run target x86_64 target x86_64 has_avx target s390x +target aarch64 target riscv64 target riscv64 has_c has_zcb target pulley32 diff --git a/cranelift/filetests/filetests/runtests/fcmp-uno.clif b/cranelift/filetests/filetests/runtests/fcmp-uno.clif index d3d93d2d0a01..f3c8ab87475d 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-uno.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-uno.clif @@ -325,3 +325,34 @@ block0(v0: f64, v1: f64): ; run: %fcmp_uno_f64(+sNaN:0x800000000001, -sNaN:0x1) == 1 ; run: %fcmp_uno_f64(-sNaN:0x800000000001, -sNaN:0x1) == 1 ; run: %fcmp_uno_f64(-sNaN:0x800000000001, +sNaN:0x1) == 1 + +function %select_fcmp_uno_f32_i32(f32, f32, i32, i32) -> i32 { +block0(v0: f32, v1: f32, v2: i32, v3: i32): + v4 = fcmp uno v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; run: %select_fcmp_uno_f32_i32(0x1.0, 0x1.0, 0, 1) == 1 +; run: %select_fcmp_uno_f32_i32(+NaN, +NaN, 0, 1) == 0 + +function %select_fcmp_uno_f32_f32(f32, f32, f32, f32) -> f32 { +block0(v0: f32, v1: f32, v2: f32, v3: f32): + v4 = fcmp uno v0, v1 + v5 = select v4, v2, v3 + return v5 +} + +; run: %select_fcmp_uno_f32_f32(0x1.0, 0x1.0, 0x1.0, 0x2.0) == 0x2.0 +; run: %select_fcmp_uno_f32_f32(+NaN, +NaN, 0x1.0, 0x2.0) == 0x1.0 + +function %brif_fcmp_uno_f32(f32, f32, i32, i32) -> i32 { +block0(v0: f32, v1: f32, v2: i32, v3: i32): + v4 = fcmp uno v0, v1 + brif v4, block1(v2), block1(v3) +block1(v5: i32): + return v5 +} + +; run: %brif_fcmp_uno_f32(0x1.0, 0x1.0, 0, 1) == 1 +; run: %brif_fcmp_uno_f32(+NaN, +NaN, 0, 1) == 0