Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 67 additions & 8 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -4130,11 +4130,6 @@
(rule (splat_const n size)
(vec_dup (imm $I64 (ImmExtend.Zero) n) size))

;; Lower a FloatCC to a Cond.
(decl fp_cond_code (FloatCC) Cond)
;; TODO: Port lower_fp_condcode() to ISLE.
(extern constructor fp_cond_code fp_cond_code)

;; Lower an integer cond code.
(spec (cond_code a) (provide (= a result)))
(decl cond_code (IntCC) Cond)
Expand Down Expand Up @@ -4637,12 +4632,18 @@
(Zero (reg Reg) (size OperandSize))
(NotZero (reg Reg) (size OperandSize))
(Cond (flags ProducesFlags) (cond Cond))
(Or (flags ProducesFlags) (cc1 Cond) (cc2 Cond))
(And (flags ProducesFlags) (cc1 Cond) (cc2 Cond))
))

(decl cond_result_invert (CondResult) CondResult)
(rule (cond_result_invert (CondResult.Zero reg size)) (CondResult.NotZero reg size))
(rule (cond_result_invert (CondResult.NotZero reg size)) (CondResult.Zero reg size))
(rule (cond_result_invert (CondResult.Cond flags cc)) (CondResult.Cond flags (invert_cond cc)))
(rule (cond_result_invert (CondResult.Or flags cc1 cc2))
(CondResult.And flags (invert_cond cc1) (invert_cond cc2)))
(rule (cond_result_invert (CondResult.And flags cc1 cc2))
(CondResult.Or flags (invert_cond cc1) (invert_cond cc2)))

(decl is_nonzero_cmp (Value) CondResult)

Expand Down Expand Up @@ -4775,6 +4776,52 @@
(rule (emit_fcmp cc a b @ (value_type ty))
(CondResult.Cond (fpu_cmp (scalar_size ty) a b) (fp_cond_code cc)))

;; Special case `OrderedNotEqual` and `UnorderedOrEqual` which aren't possible
;; to select via one condition below.
;; OrderedNotEqual = LT | GT.
(rule 1 (emit_fcmp (FloatCC.OrderedNotEqual) a b @ (value_type ty))
(CondResult.Or (fpu_cmp (scalar_size ty) a b) (Cond.Mi) (Cond.Gt)))
;; UnorderedOrEqual = UN | EQ
(rule 1 (emit_fcmp (FloatCC.UnorderedOrEqual) a b @ (value_type ty))
(CondResult.Or (fpu_cmp (scalar_size ty) a b) (Cond.Eq) (Cond.Vs)))

;;(rule 1 (emit_fcmp (FloatCC.UnorderedOrEqual) a b @ (value_type ty))
;;)

;; Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs.
;; The FCMP instruction sets:
;; NZCV
;; - PCSR.NZCV = 0011 on UN (unordered),
;; 0110 on EQ,
;; 1000 on LT,
;; 0010 on GT.
(decl fp_cond_code (FloatCC) Cond)
;; EQ | LT | GT. Vc => V clear.
(rule (fp_cond_code (FloatCC.Ordered)) (Cond.Vc))
;; UN. Vs => V set.
;; FloatCC::Unordered => Cond::Vs,
(rule (fp_cond_code (FloatCC.Unordered)) (Cond.Vs))
;; EQ. Eq => Z set.
(rule (fp_cond_code (FloatCC.Equal)) (Cond.Eq))
;; UN | LT | GT. Ne => Z clear.
(rule (fp_cond_code (FloatCC.NotEqual)) (Cond.Ne))
;; LT. Mi => N set.
(rule (fp_cond_code (FloatCC.LessThan)) (Cond.Mi))
;; LT | EQ. Ls => C clear or Z set.
(rule (fp_cond_code (FloatCC.LessThanOrEqual)) (Cond.Ls))
;; GT. Gt => Z clear, N = V.
(rule (fp_cond_code (FloatCC.GreaterThan)) (Cond.Gt))
;; GT | EQ. Ge => N = V.
(rule (fp_cond_code (FloatCC.GreaterThanOrEqual)) (Cond.Ge))
;; UN | LT. Lt => N != V.
(rule (fp_cond_code (FloatCC.UnorderedOrLessThan)) (Cond.Lt))
;; UN | LT | EQ. Le => not (Z clear, N = V).
(rule (fp_cond_code (FloatCC.UnorderedOrLessThanOrEqual)) (Cond.Le))
;; UN | GT. Hi => C set, Z clear.
(rule (fp_cond_code (FloatCC.UnorderedOrGreaterThan)) (Cond.Hi))
;; UN | GT | EQ. Pl => N clear.
(rule (fp_cond_code (FloatCC.UnorderedOrGreaterThanOrEqual)) (Cond.Pl))

;; A tuple of `ProducesFlags` and `IntCC`.
(type FlagsAndCC (enum (FlagsAndCC (flags ProducesFlags)
(cc IntCC))))
Expand Down Expand Up @@ -4904,13 +4951,25 @@

;; Helpers for generating select instruction sequences.
(decl lower_select (Type CondResult Value Value) ValueRegs)
(rule (lower_select ty (CondResult.Zero reg size) a b)
(rule 1 (lower_select ty (CondResult.Zero reg size) a b)
(lower_select_cond ty (cmp size reg (zero_reg)) (Cond.Eq) a b))
(rule (lower_select ty (CondResult.NotZero reg size) a b)
(rule 1 (lower_select ty (CondResult.NotZero reg size) a b)
(lower_select_cond ty (cmp size reg (zero_reg)) (Cond.Ne) a b))
(rule (lower_select ty (CondResult.Cond flags cond) a b)
(rule 1 (lower_select ty (CondResult.Cond flags cond) a b)
(lower_select_cond ty flags cond a b))

;; TODO: there's probably a more optimal way to do this as this otherwise
;; requires putting lots of conditions in registers then doing another compare.
;; Pushing this down lower though for the "Or" and "And" cases would require
;; fancier handling in `lower_select_cond` below, however.
(rule 0 (lower_select ty c a b)
(lower_select_cond
ty
(cmp (OperandSize.Size32) (lower_cond_result_bool c) (zero_reg))
(Cond.Ne)
a
b))

(decl lower_select_cond (Type ProducesFlags Cond Value Value) ValueRegs)
(rule 2 (lower_select_cond (ty_scalar_float (fits_in_64 ty)) flags cond rn rm)
(with_flags flags (fpu_csel ty cond rn rm)))
Expand Down
31 changes: 25 additions & 6 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2210,6 +2210,12 @@
(value_regs_get (with_flags (cmp_imm size reg (u8_into_imm12 0)) (cset (Cond.Ne))) 0))
(rule (lower_cond_result_bool (CondResult.Cond flags cc))
(value_regs_get (with_flags flags (cset cc)) 0))
(rule (lower_cond_result_bool (CondResult.Or flags cc1 cc2))
(let ((tmp ValueRegs (with_flags flags (consumes_flags_concat (cset cc1) (cset cc2)))))
(orr $I32 (value_regs_get tmp 0) (value_regs_get tmp 1))))
(rule (lower_cond_result_bool (CondResult.And flags cc1 cc2))
(let ((tmp ValueRegs (with_flags flags (consumes_flags_concat (cset cc1) (cset cc2)))))
(and_reg $I32 (value_regs_get tmp 0) (value_regs_get tmp 1))))

;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand All @@ -2223,13 +2229,19 @@

;; Helper to emit a `TrapIf` instruction for the `CondResult` provided
(decl trap_if_cond_result (CondResult TrapCode) SideEffectNoResult)
(rule (trap_if_cond_result (CondResult.Zero reg size) tc)
(rule 1 (trap_if_cond_result (CondResult.Zero reg size) tc)
(trap_if_zero reg size tc))
(rule (trap_if_cond_result (CondResult.NotZero reg size) tc)
(rule 1 (trap_if_cond_result (CondResult.NotZero reg size) tc)
(trap_if_not_zero reg size tc))
(rule (trap_if_cond_result (CondResult.Cond flags cc) tc)
(rule 1 (trap_if_cond_result (CondResult.Cond flags cc) tc)
(with_flags_side_effect flags (trap_if_cond cc tc)))

;; TODO: see comments in `br_cond_result` below, but basically it'd be best to
;; thread "And" and "Or" into the trap instruction itself instead of reifying it
;; as a register here.
(rule 0 (trap_if_cond_result c tc)
(trap_if_not_zero (lower_cond_result_bool c) (OperandSize.Size32) tc))

;;;;; Rules for `trapnz`;;;;;;;;;

(rule (lower (trapnz val trap_code))
Expand Down Expand Up @@ -3163,13 +3175,20 @@

;; Helper to emit a branching instruction based on a `CondResult`
(decl br_cond_result (CondResult MachLabel MachLabel) SideEffectNoResult)
(rule (br_cond_result (CondResult.Zero reg size) taken not_taken)
(rule 1 (br_cond_result (CondResult.Zero reg size) taken not_taken)
(a64_br_zero reg size taken not_taken))
(rule (br_cond_result (CondResult.NotZero reg size) taken not_taken)
(rule 1 (br_cond_result (CondResult.NotZero reg size) taken not_taken)
(a64_br_not_zero reg size taken not_taken))
(rule (br_cond_result (CondResult.Cond flags cc) taken not_taken)
(rule 1 (br_cond_result (CondResult.Cond flags cc) taken not_taken)
(with_flags_side_effect flags (a64_br_cond cc taken not_taken)))

;; TODO: this handles "And" and "Or" cases but it's relatively inefficient. It'd
;; be better to thread the and/or into the branch itself similar to what x64
;; does to generate two branches. That'd require more plumbing, however, and is
;; left for a future optimization.
(rule 0 (br_cond_result c taken not_taken)
(a64_br_not_zero (lower_cond_result_bool c) (OperandSize.Size32) taken not_taken))

;; Special lowerings for `tbnz` - "Test bit and Branch if Nonzero"
(rule 1 (lower_branch (brif (band _ x @ (value_type ty) (u64_from_iconst n)) _ _)
(two_targets taken not_taken))
Expand Down
40 changes: 0 additions & 40 deletions cranelift/codegen/src/isa/aarch64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,46 +66,6 @@ pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
}
}

pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
// Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs.
// The FCMP instruction sets:
// NZCV
// - PCSR.NZCV = 0011 on UN (unordered),
// 0110 on EQ,
// 1000 on LT,
// 0010 on GT.
match cc {
// EQ | LT | GT. Vc => V clear.
FloatCC::Ordered => Cond::Vc,
// UN. Vs => V set.
FloatCC::Unordered => Cond::Vs,
// EQ. Eq => Z set.
FloatCC::Equal => Cond::Eq,
// UN | LT | GT. Ne => Z clear.
FloatCC::NotEqual => Cond::Ne,
// LT | GT.
FloatCC::OrderedNotEqual => unimplemented!(),
// UN | EQ
FloatCC::UnorderedOrEqual => unimplemented!(),
// LT. Mi => N set.
FloatCC::LessThan => Cond::Mi,
// LT | EQ. Ls => C clear or Z set.
FloatCC::LessThanOrEqual => Cond::Ls,
// GT. Gt => Z clear, N = V.
FloatCC::GreaterThan => Cond::Gt,
// GT | EQ. Ge => N = V.
FloatCC::GreaterThanOrEqual => Cond::Ge,
// UN | LT. Lt => N != V.
FloatCC::UnorderedOrLessThan => Cond::Lt,
// UN | LT | EQ. Le => not (Z clear, N = V).
FloatCC::UnorderedOrLessThanOrEqual => Cond::Le,
// UN | GT. Hi => C set, Z clear.
FloatCC::UnorderedOrGreaterThan => Cond::Hi,
// UN | GT | EQ. Pl => N clear.
FloatCC::UnorderedOrGreaterThanOrEqual => Cond::Pl,
}
}

//=============================================================================
// Lowering-backend trait implementation.

Expand Down
8 changes: 2 additions & 6 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use super::{
ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI,
FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, MachLabel, MemLabel,
MoveWideConst, MoveWideOp, NZCV, Opcode, OperandSize, Reg, SImm9, ScalarSize, ShiftOpAndAmt,
UImm5, UImm12Scaled, VecMisc2, VectorSize, fp_reg, lower_condcode, lower_fp_condcode,
stack_reg, writable_link_reg, writable_zero_reg, zero_reg,
UImm5, UImm12Scaled, VecMisc2, VectorSize, fp_reg, lower_condcode, stack_reg,
writable_link_reg, writable_zero_reg, zero_reg,
};
use crate::ir::{ArgumentExtension, condcodes};
use crate::isa;
Expand Down Expand Up @@ -582,10 +582,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
}
}

fn fp_cond_code(&mut self, cc: &condcodes::FloatCC) -> Cond {
lower_fp_condcode(*cc)
}

fn cond_code(&mut self, cc: &condcodes::IntCC) -> Cond {
lower_condcode(*cc)
}
Expand Down
71 changes: 71 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/condbr.clif
Original file line number Diff line number Diff line change
Expand Up @@ -806,3 +806,74 @@ block2:
; block2: ; offset 0x10
; ret

function %br_fcmp_one(f32, f32) {
block0(v0: f32, v1: f32):
v2 = fcmp one v0, v1
brif v2, block1, block2

block1:
return
block2:
return
}

; VCode:
; block0:
; fcmp s0, s1
; cset x4, mi
; cset x6, gt
; orr w8, w4, w6
; cbnz w8, label2 ; b label1
; block1:
; ret
; block2:
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fcmp s0, s1
; cset x4, mi
; cset x6, gt
; orr w8, w4, w6
; cbnz w8, #0x18
; block1: ; offset 0x14
; ret
; block2: ; offset 0x18
; ret


function %br_fcmp_ueq(f32, f32) {
block0(v0: f32, v1: f32):
v2 = fcmp ueq v0, v1
brif v2, block1, block2

block1:
return
block2:
return
}

; VCode:
; block0:
; fcmp s0, s1
; cset x4, eq
; cset x6, vs
; orr w8, w4, w6
; cbnz w8, label2 ; b label1
; block1:
; ret
; block2:
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fcmp s0, s1
; cset x4, eq
; cset x6, vs
; orr w8, w4, w6
; cbnz w8, #0x18
; block1: ; offset 0x14
; ret
; block2: ; offset 0x18
; ret

Loading
Loading