Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThe 4 flavors of pow were originally ported from rocm Patch is 104.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/186890.diff 29 Files Affected:
diff --git a/libclc/clc/include/clc/math/clc_ep_decl.inc b/libclc/clc/include/clc/math/clc_ep_decl.inc
index d29cfdc6346ba..1739da9f3ae63 100644
--- a/libclc/clc/include/clc/math/clc_ep_decl.inc
+++ b/libclc/clc/include/clc/math/clc_ep_decl.inc
@@ -128,4 +128,9 @@ _CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqrt(__CLC_GENTYPE a);
_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqrt(__CLC_EP_PAIR a);
+#if __CLC_FPSIZE == 32 || __CLC_FPSIZE == 64
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_ep_exp(__CLC_EP_PAIR a);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ln(__CLC_GENTYPE a);
+#endif
+
#endif
diff --git a/libclc/clc/include/clc/math/clc_exp2_fast.h b/libclc/clc/include/clc/math/clc_exp2_fast.h
new file mode 100644
index 0000000000000..a42e6c9b7fd48
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_exp2_fast.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_EXP2_FAST_H__
+#define __CLC_MATH_CLC_EXP2_FAST_H__
+
+#define __CLC_FUNCTION __clc_exp2_fast
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_EXP2_FAST_H__
diff --git a/libclc/clc/include/clc/math/clc_log2_fast.h b/libclc/clc/include/clc/math/clc_log2_fast.h
new file mode 100644
index 0000000000000..5160afbedebf7
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_log2_fast.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_LOG2_FAST_H__
+#define __CLC_MATH_CLC_LOG2_FAST_H__
+
+#define __CLC_FUNCTION __clc_log2_fast
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_LOG2_FAST_H__
diff --git a/libclc/clc/include/clc/math/clc_pow.h b/libclc/clc/include/clc/math/clc_pow.h
index 5e37e5bf6da65..f7399873a4994 100644
--- a/libclc/clc/include/clc/math/clc_pow.h
+++ b/libclc/clc/include/clc/math/clc_pow.h
@@ -11,9 +11,14 @@
#define __CLC_BODY <clc/shared/binary_decl.inc>
#define __CLC_FUNCTION __clc_pow
-
#include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
+#define __CLC_FLOAT_ONLY
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+#define __CLC_FUNCTION __clc_pow_fast
+#include <clc/math/gentype.inc>
#undef __CLC_FUNCTION
+#undef __CLC_FLOAT_ONLY
#endif // __CLC_MATH_CLC_POW_H__
diff --git a/libclc/clc/include/clc/math/clc_pown.h b/libclc/clc/include/clc/math/clc_pown.h
index 30628efb19001..3e2b359468b48 100644
--- a/libclc/clc/include/clc/math/clc_pown.h
+++ b/libclc/clc/include/clc/math/clc_pown.h
@@ -9,11 +9,16 @@
#ifndef __CLC_MATH_CLC_POWN_H__
#define __CLC_MATH_CLC_POWN_H__
-#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
#define __CLC_FUNCTION __clc_pown
-
+#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
#include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
+#define __CLC_FLOAT_ONLY
+#define __CLC_FUNCTION __clc_pown_fast
+#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
+#include <clc/math/gentype.inc>
#undef __CLC_FUNCTION
+#undef __CLC_FLOAT_ONLY
#endif // __CLC_MATH_CLC_POWN_H__
diff --git a/libclc/clc/include/clc/math/clc_powr.h b/libclc/clc/include/clc/math/clc_powr.h
index baa494cce6989..67c591ca6aa82 100644
--- a/libclc/clc/include/clc/math/clc_powr.h
+++ b/libclc/clc/include/clc/math/clc_powr.h
@@ -9,11 +9,16 @@
#ifndef __CLC_MATH_CLC_POWR_H__
#define __CLC_MATH_CLC_POWR_H__
-#define __CLC_BODY <clc/shared/binary_decl.inc>
#define __CLC_FUNCTION __clc_powr
-
+#define __CLC_BODY <clc/shared/binary_decl.inc>
#include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
+#define __CLC_FLOAT_ONLY
+#define __CLC_FUNCTION __clc_powr_fast
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+#include <clc/math/gentype.inc>
#undef __CLC_FUNCTION
+#undef __CLC_FLOAT_ONLY
#endif // __CLC_MATH_CLC_POWR_H__
diff --git a/libclc/clc/include/clc/math/clc_rootn.h b/libclc/clc/include/clc/math/clc_rootn.h
index 90a25ad52d867..26d111a8671d4 100644
--- a/libclc/clc/include/clc/math/clc_rootn.h
+++ b/libclc/clc/include/clc/math/clc_rootn.h
@@ -9,11 +9,16 @@
#ifndef __CLC_MATH_CLC_ROOTN_H__
#define __CLC_MATH_CLC_ROOTN_H__
-#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
#define __CLC_FUNCTION __clc_rootn
-
+#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
#include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
+#define __CLC_FLOAT_ONLY
+#define __CLC_FUNCTION __clc_rootn_fast
+#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
+#include <clc/math/gentype.inc>
#undef __CLC_FUNCTION
+#undef __CLC_FLOAT_ONLY
#endif // __CLC_MATH_CLC_ROOTN_H__
diff --git a/libclc/clc/include/clc/shared/binary_def_with_int_second_arg_scalarize_loop.inc b/libclc/clc/include/clc/shared/binary_def_with_int_second_arg_scalarize_loop.inc
new file mode 100644
index 0000000000000..498df2fc420de
--- /dev/null
+++ b/libclc/clc/include/clc/shared/binary_def_with_int_second_arg_scalarize_loop.inc
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/utils.h"
+
+#if __CLC_VECSIZE_OR_1 >= 2
+
+#ifndef __CLC_IMPL_FUNCTION
+#define __CLC_IMPL_FUNCTION __CLC_FUNCTION
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
+ __CLC_INTN y) {
+ union {
+ __CLC_GENTYPE vec;
+ __CLC_SCALAR_GENTYPE arr[__CLC_VECSIZE_OR_1];
+ } u_x, u_result;
+
+ union {
+ __CLC_INTN vec;
+ int arr[__CLC_VECSIZE_OR_1];
+ } u_y;
+
+ u_x.vec = x;
+ u_y.vec = y;
+ for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i) {
+ u_result.arr[i] = __CLC_IMPL_FUNCTION(u_x.arr[i], u_y.arr[i]);
+ }
+ return u_result.vec;
+}
+
+#endif // __CLC_VECSIZE_OR_1 >= 2
diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt
index 9b6c9a231ade0..daccc00b841b3 100644
--- a/libclc/clc/lib/amdgpu/CMakeLists.txt
+++ b/libclc/clc/lib/amdgpu/CMakeLists.txt
@@ -3,6 +3,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
address_space/clc_qualifier.cl
math/clc_exp.cl
math/clc_exp2.cl
+ math/clc_exp2_fast.cl
math/clc_exp10.cl
math/clc_frexp.cl
math/clc_half_exp.cl
@@ -15,6 +16,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
math/clc_half_rsqrt.cl
math/clc_half_sqrt.cl
math/clc_ldexp.cl
+ math/clc_log2_fast.cl
math/clc_native_exp.cl
math/clc_native_exp2.cl
math/clc_native_log10.cl
diff --git a/libclc/clc/lib/amdgpu/math/clc_exp2_fast.cl b/libclc/clc/lib/amdgpu/math/clc_exp2_fast.cl
new file mode 100644
index 0000000000000..b73bc9f6e260b
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/math/clc_exp2_fast.cl
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/math/clc_exp2.h"
+#include "clc/math/clc_exp2_fast.h"
+
+#define __CLC_FLOAT_ONLY
+#define __CLC_MIN_VECSIZE 1
+#define __CLC_FUNCTION __clc_exp2_fast
+#define __CLC_IMPL_FUNCTION(x) __builtin_amdgcn_exp2f(x)
+#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_FLOAT_ONLY
+
+#define __CLC_HALF_ONLY
+#define __CLC_IMPL_FUNCTION(x) __clc_exp2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_HALF_ONLY
+
+#define __CLC_DOUBLE_ONLY
+#define __CLC_IMPL_FUNCTION(x) __clc_exp2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_DOUBLE_ONLY
diff --git a/libclc/clc/lib/amdgpu/math/clc_log2_fast.cl b/libclc/clc/lib/amdgpu/math/clc_log2_fast.cl
new file mode 100644
index 0000000000000..a47fc84b26a00
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/math/clc_log2_fast.cl
@@ -0,0 +1,25 @@
+#include "clc/math/clc_log2.h"
+#include "clc/math/clc_log2_fast.h"
+
+#define __CLC_FLOAT_ONLY
+#define __CLC_MIN_VECSIZE 1
+#define __CLC_FUNCTION __clc_log2_fast
+#define __CLC_IMPL_FUNCTION(x) __builtin_amdgcn_logf(x)
+#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_FLOAT_ONLY
+
+#define __CLC_HALF_ONLY
+#define __CLC_IMPL_FUNCTION(x) __clc_log2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_HALF_ONLY
+
+#define __CLC_DOUBLE_ONLY
+#define __CLC_IMPL_FUNCTION(x) __clc_log2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_DOUBLE_ONLY
diff --git a/libclc/clc/lib/generic/CMakeLists.txt b/libclc/clc/lib/generic/CMakeLists.txt
index 70a0863524b19..bda2ec67a55c3 100644
--- a/libclc/clc/lib/generic/CMakeLists.txt
+++ b/libclc/clc/lib/generic/CMakeLists.txt
@@ -80,6 +80,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
math/clc_erfc.cl
math/clc_exp.cl
math/clc_exp2.cl
+ math/clc_exp2_fast.cl
math/clc_exp10.cl
math/clc_exp_helper.cl
math/clc_expm1.cl
@@ -114,6 +115,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
math/clc_lgamma_r.cl
math/clc_log.cl
math/clc_log2.cl
+ math/clc_log2_fast.cl
math/clc_log10.cl
math/clc_log1p.cl
math/clc_logb.cl
diff --git a/libclc/clc/lib/generic/math/clc_ep.cl b/libclc/clc/lib/generic/math/clc_ep.cl
index f0e3020f65f6f..2e62563e2a107 100644
--- a/libclc/clc/lib/generic/math/clc_ep.cl
+++ b/libclc/clc/lib/generic/math/clc_ep.cl
@@ -9,8 +9,11 @@
#include "clc/clc_convert.h"
#include "clc/math/clc_div_fast.h"
#include "clc/math/clc_ep.h"
+#include "clc/math/clc_exp.h"
#include "clc/math/clc_fma.h"
+#include "clc/math/clc_frexp.h"
#include "clc/math/clc_ldexp.h"
+#include "clc/math/clc_mad.h"
#include "clc/math/clc_recip_fast.h"
#include "clc/math/clc_sqrt_fast.h"
#include "clc/relational/clc_isinf.h"
diff --git a/libclc/clc/lib/generic/math/clc_ep.inc b/libclc/clc/lib/generic/math/clc_ep.inc
index 38fa513c46aac..56a7f1a7becb2 100644
--- a/libclc/clc/lib/generic/math/clc_ep.inc
+++ b/libclc/clc/lib/generic/math/clc_ep.inc
@@ -387,5 +387,83 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqrt(__CLC_EP_PAIR a) {
a.hi == __CLC_FP_LIT(0.0) ? __CLC_FP_LIT(0.0) : slo);
}
+#if __CLC_FPSIZE == 32
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_ep_exp(__CLC_EP_PAIR x) {
+ float d = x.hi == 0x1.62e430p+6f ? 0x1.0p-17f : 0.0f;
+ x.hi -= d;
+ x.lo += d;
+ float z = __clc_exp(x.hi);
+ float zz = __clc_fma(z, x.lo, z);
+ return __clc_isinf(z) ? z : zz;
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ln(float a) {
+ int a_exp;
+ float m = __clc_frexp(a, &a_exp);
+ int b = m < (2.0f / 3.0f);
+ m = __clc_ldexp(m, b);
+ int e = a_exp - b;
+
+ __CLC_EP_PAIR x = __clc_ep_div(m - 1.0f, __clc_ep_fast_add(1.0f, m));
+ __CLC_EP_PAIR s = __clc_ep_sqr(x);
+ float t = s.hi;
+ float p = __clc_mad(t, __clc_mad(t, 0x1.ed89c2p-3f, 0x1.23e988p-2f),
+ 0x1.999bdep-2f);
+
+ // ln(2)*e + 2*x + x^3(c3 + x^2*p)
+ float2 r = __clc_ep_add(
+ __clc_ep_mul(__clc_ep_make_pair(0x1.62e430p-1f, -0x1.05c610p-29f),
+ (float)e),
+ __clc_ep_fast_add(
+ __clc_ep_ldexp(x, 1),
+ __clc_ep_mul(__clc_ep_mul(s, x),
+ __clc_ep_fast_add(
+ __clc_ep_make_pair(0x1.555554p-1f, 0x1.e72020p-29f),
+ __clc_ep_mul(s, p)))));
+ return r;
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_ep_exp(__CLC_EP_PAIR x) {
+ __CLC_GENTYPE z = __clc_exp(x.hi);
+ __CLC_GENTYPE zz = __clc_mad(z, x.lo, z);
+ return __clc_isinf(z) ? z : zz;
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ln(double a) {
+ int a_exp;
+ double m = __clc_frexp(a, &a_exp);
+ int b = m < __CLC_FP_LIT(2.0 / 3.0);
+ m = __clc_ldexp(m, b);
+ int e = a_exp - b;
+
+ double2 x = __clc_ep_div(m - 1.0, __clc_ep_fast_add(1.0, m));
+ double2 s = __clc_ep_sqr(x);
+ double t = s.hi;
+ double p = __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
+ __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
+ 0x1.dee674222de17p-4, 0x1.a6564968915a9p-4), 0x1.e25e43abe935ap-4), 0x1.110ef47e6c9c2p-3),
+ 0x1.3b13bcfa74449p-3), 0x1.745d171bf3c30p-3), 0x1.c71c71c7792cep-3), 0x1.24924924920dap-2),
+ 0x1.999999999999cp-2);
+
+ // ln(2)*e + 2*x + x^3(c3 + x^2*p)
+ double2 r = __clc_ep_add(
+ __clc_ep_mul(
+ __clc_ep_make_pair(0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56),
+ (double)e),
+ __clc_ep_fast_add(
+ __clc_ep_ldexp(x, 1),
+ __clc_ep_mul(
+ __clc_ep_mul(s, x),
+ __clc_ep_fast_add(__clc_ep_make_pair(0x1.5555555555555p-1,
+ 0x1.543b0d5df274dp-55),
+ __clc_ep_mul(s, p)))));
+ return r;
+}
+
+#endif
+
#undef __CLC_EP_USE_FMA
#endif
diff --git a/libclc/clc/lib/generic/math/clc_exp2_fast.cl b/libclc/clc/lib/generic/math/clc_exp2_fast.cl
new file mode 100644
index 0000000000000..e09bd65d7e02a
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_exp2_fast.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/math/clc_exp2.h"
+#include "clc/math/clc_exp2_fast.h"
+
+#define __CLC_FUNCTION __clc_exp2_fast
+#define __CLC_IMPL_FUNCTION(x) __clc_exp2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include "clc/math/gentype.inc"
diff --git a/libclc/clc/lib/generic/math/clc_log2_fast.cl b/libclc/clc/lib/generic/math/clc_log2_fast.cl
new file mode 100644
index 0000000000000..2aad63967e888
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_log2_fast.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/math/clc_log2.h"
+#include "clc/math/clc_log2_fast.h"
+
+#define __CLC_FUNCTION __clc_log2_fast
+#define __CLC_IMPL_FUNCTION(x) __clc_log2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include "clc/math/gentype.inc"
diff --git a/libclc/clc/lib/generic/math/clc_pow.cl b/libclc/clc/lib/generic/math/clc_pow.cl
index 70d3d614a8d36..14fbfb68359f5 100644
--- a/libclc/clc/lib/generic/math/clc_pow.cl
+++ b/libclc/clc/lib/generic/math/clc_pow.cl
@@ -6,16 +6,35 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clc_convert.h>
-#include <clc/internal/clc.h>
-#include <clc/math/clc_fabs.h>
-#include <clc/math/clc_fma.h>
-#include <clc/math/clc_ldexp.h>
-#include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
-#include <clc/math/math.h>
-#include <clc/math/tables.h>
-#include <clc/relational/clc_select.h>
+#include "clc/clc_convert.h"
+#include "clc/float/definitions.h"
+#include "clc/internal/clc.h"
+#include "clc/math/clc_copysign.h"
+#include "clc/math/clc_ep.h"
+#include "clc/math/clc_exp2.h"
+#include "clc/math/clc_exp2_fast.h"
+#include "clc/math/clc_fabs.h"
+#include "clc/math/clc_ldexp.h"
+#include "clc/math/clc_log.h"
+#include "clc/math/clc_log2.h"
+#include "clc/math/clc_log2_fast.h"
+#include "clc/math/clc_mad.h"
+#include "clc/math/clc_recip_fast.h"
+#include "clc/math/clc_trunc.h"
+#include "clc/math/math.h"
+#include "clc/relational/clc_isinf.h"
+#include "clc/relational/clc_isunordered.h"
-#define __CLC_BODY <clc_pow.inc>
-#include <clc/math/gentype.inc>
+#define COMPILING_POW
+#define __CLC_BODY "clc_pow_base.inc"
+#include "clc/math/gentype.inc"
+
+#define __CLC_FUNCTION __clc_pow
+#define __CLC_BODY "clc/shared/binary_def_scalarize.inc"
+#include "clc/math/gentype.inc"
+#undef __CLC_FUNCTION
+
+#define __CLC_FLOAT_ONLY
+#define __CLC_FUNCTION __clc_pow_fast
+#define __CLC_BODY "clc/shared/binary_def_scalarize.inc"
+#include "clc/math/gentype.inc"
diff --git a/libclc/clc/lib/generic/math/clc_pow.inc b/libclc/clc/lib/generic/math/clc_pow.inc
deleted file mode 100644
index 35cbcdae8ffff..0000000000000
--- a/libclc/clc/lib/generic/math/clc_pow.inc
+++ /dev/null
@@ -1,438 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Computes pow using log and exp
-//
-// x^y = exp(y * log(x))
-//
-// We take care not to lose precision in the intermediate steps
-//
-// When computing log, calculate it in splits:
-//
-// r = f * (p_invead + p_inv_tail)
-// r = rh + rt
-//
-// Calculate log polynomial using r, in end addition, do:
-//
-// poly = poly + ((rh-r) + rt)
-//
-// lth = -r
-// ltt = ((xexp * log2_t) - poly) + logT
-// lt = lth + ltt
-//
-// lh = (xexp * log2_h) + logH
-// l = lh + lt
-//
-// Calculate final log answer as gh and gt:
-//
-// gh = l & higher-half bits
-// gt = (((ltt - (lt - lth)) + ((lh - l) + lt)) + (l - gh))
-//
-// yh = y & higher-half bits
-// yt = y - yh
-//
-// Before entering computation of exp:
-//
-// vs = ((yt*gt + yt*gh) + yh*gt)
-// v = vs + yh*gh
-// vt = ((yh*gh - v) + vs)
-//
-// In calculation of exp, add vt to r that is used for poly.
-//
-// At the end of exp, do
-//
-// ((((expT * poly) + expT) + expH*poly) + expH)
-//
-//===----------------------------------------------------------------------===//
-
-#if __CLC_FPSIZE == 32
-
-_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_pow(__CLC_GENTYPE x,
- __CLC_GENTYPE y) {
- __CLC_GENTYPE absx = __clc_fabs(x);
- __CLC_INTN ix = __CLC_AS_INTN(x);
- __CLC_INTN ax = __CLC_AS_INTN(absx);
- __CLC_INTN xpos = ix == ax;
-
- __CLC_INTN iy = __CLC_AS_INTN(y);
- __CLC_INTN ay = __CLC_AS_INTN(__clc_fabs(y));
- __CLC_INTN ypos = iy == ay;
-
- /* Extra precise log calculation
- * First handle case that x is close to 1
- */
- __CLC_GENTYPE r = 1.0f - absx;
- __CLC_INTN near1 = __clc_fabs(r) < 0x1.0p-4f;
- __CLC_GENTYPE r2 = r * r;
-
- /* Coefficients are just 1/3, 1/4, 1/5 and 1/6 */
- __CLC_GENTYPE poly = __clc_mad(
- r,
- __clc_mad(r,
- __clc_mad(r, __clc_mad(r, 0x1.24924ap-3f, 0x1.555556p-3f),
- 0x1.99999ap-3f),
- 0x1.000000p-2f),
- 0x1.555556p-2f);
-
- poly *= r2 * r;
-
- __CLC_GENTYPE lth_near1 = -r2 * 0.5f;
- __CLC_GENTYPE ltt_near1 = -poly;
- _...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions cl,h,inc -- libclc/clc/include/clc/math/clc_exp2_fast.h libclc/clc/include/clc/math/clc_log2_fast.h libclc/clc/include/clc/shared/binary_def_with_int_second_arg_scalarize_loop.inc libclc/clc/lib/amdgpu/math/clc_exp2_fast.cl libclc/clc/lib/amdgpu/math/clc_log2_fast.cl libclc/clc/lib/generic/math/clc_exp2_fast.cl libclc/clc/lib/generic/math/clc_log2_fast.cl libclc/clc/lib/generic/math/clc_pow_base.inc libclc/clc/include/clc/math/clc_ep_decl.inc libclc/clc/include/clc/math/clc_pow.h libclc/clc/include/clc/math/clc_pown.h libclc/clc/include/clc/math/clc_powr.h libclc/clc/include/clc/math/clc_rootn.h libclc/clc/lib/generic/math/clc_ep.cl libclc/clc/lib/generic/math/clc_ep.inc libclc/clc/lib/generic/math/clc_pow.cl libclc/clc/lib/generic/math/clc_pown.cl libclc/clc/lib/generic/math/clc_powr.cl libclc/clc/lib/generic/math/clc_rootn.cl libclc/opencl/lib/generic/math/pow.cl libclc/opencl/lib/generic/math/pown.cl libclc/opencl/lib/generic/math/powr.cl libclc/opencl/lib/generic/math/rootn.cl --diff_from_common_commit
View the diff from clang-format here.diff --git a/libclc/clc/lib/generic/math/clc_ep.inc b/libclc/clc/lib/generic/math/clc_ep.inc
index 56a7f1a7b..2b189fbdb 100644
--- a/libclc/clc/lib/generic/math/clc_ep.inc
+++ b/libclc/clc/lib/generic/math/clc_ep.inc
@@ -442,11 +442,27 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ln(double a) {
double2 x = __clc_ep_div(m - 1.0, __clc_ep_fast_add(1.0, m));
double2 s = __clc_ep_sqr(x);
double t = s.hi;
- double p = __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
- __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
- 0x1.dee674222de17p-4, 0x1.a6564968915a9p-4), 0x1.e25e43abe935ap-4), 0x1.110ef47e6c9c2p-3),
- 0x1.3b13bcfa74449p-3), 0x1.745d171bf3c30p-3), 0x1.c71c71c7792cep-3), 0x1.24924924920dap-2),
- 0x1.999999999999cp-2);
+ double p = __clc_mad(
+ t,
+ __clc_mad(
+ t,
+ __clc_mad(
+ t,
+ __clc_mad(
+ t,
+ __clc_mad(
+ t,
+ __clc_mad(t,
+ __clc_mad(t,
+ __clc_mad(t, 0x1.dee674222de17p-4,
+ 0x1.a6564968915a9p-4),
+ 0x1.e25e43abe935ap-4),
+ 0x1.110ef47e6c9c2p-3),
+ 0x1.3b13bcfa74449p-3),
+ 0x1.745d171bf3c30p-3),
+ 0x1.c71c71c7792cep-3),
+ 0x1.24924924920dap-2),
+ 0x1.999999999999cp-2);
// ln(2)*e + 2*x + x^3(c3 + x^2*p)
double2 r = __clc_ep_add(
|
| @@ -0,0 +1,37 @@ | |||
| //===----------------------------------------------------------------------===// | |||
There was a problem hiding this comment.
remove this file, use binary_def_scalarize_loop.inc and define __CLC_ARG2_IS_INTN at use site?

The 4 flavors of pow were originally ported from rocm
device libs between c45ec60,
cc5c65b, and
fe8e00b. Update to a newer
version. Additionally expose fast variants for use by the
libcall optimizer (e.g, __pow_fast) for float types.