Skip to content

libclc: Update pow functions#186890

Open
arsenm wants to merge 1 commit intomainfrom
users/arsenm/libclc/update-pow
Open

libclc: Update pow functions#186890
arsenm wants to merge 1 commit intomainfrom
users/arsenm/libclc/update-pow

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Mar 16, 2026

The 4 flavors of pow were originally ported from rocm
device libs between c45ec60,
cc5c65b, and
fe8e00b. Update to a newer
version. Additionally expose fast variants for use by the
libcall optimizer (e.g, __pow_fast) for float types.

The 4 flavors of pow were originally ported from rocm
device libs between c45ec60,
cc5c65b, and
fe8e00b. Update to a newer
version. Additionally expose fast variants for use by the
libcall optimizer (e.g, __pow_fast) for float types.
Copy link
Contributor Author

arsenm commented Mar 16, 2026

This stack of pull requests is managed by Graphite. Learn more about stacking.

@arsenm arsenm requested a review from wenju-he March 16, 2026 21:16
@arsenm arsenm added the libclc libclc OpenCL library label Mar 16, 2026 — with Graphite App
@arsenm arsenm marked this pull request as ready for review March 16, 2026 21:16
@llvmbot
Copy link
Member

llvmbot commented Mar 16, 2026

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

The 4 flavors of pow were originally ported from rocm
device libs between c45ec60,
cc5c65b, and
fe8e00b. Update to a newer
version. Additionally expose fast variants for use by the
libcall optimizer (e.g, __pow_fast) for float types.


Patch is 104.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/186890.diff

29 Files Affected:

  • (modified) libclc/clc/include/clc/math/clc_ep_decl.inc (+5)
  • (added) libclc/clc/include/clc/math/clc_exp2_fast.h (+19)
  • (added) libclc/clc/include/clc/math/clc_log2_fast.h (+19)
  • (modified) libclc/clc/include/clc/math/clc_pow.h (+6-1)
  • (modified) libclc/clc/include/clc/math/clc_pown.h (+7-2)
  • (modified) libclc/clc/include/clc/math/clc_powr.h (+7-2)
  • (modified) libclc/clc/include/clc/math/clc_rootn.h (+7-2)
  • (added) libclc/clc/include/clc/shared/binary_def_with_int_second_arg_scalarize_loop.inc (+37)
  • (modified) libclc/clc/lib/amdgpu/CMakeLists.txt (+2)
  • (added) libclc/clc/lib/amdgpu/math/clc_exp2_fast.cl (+33)
  • (added) libclc/clc/lib/amdgpu/math/clc_log2_fast.cl (+25)
  • (modified) libclc/clc/lib/generic/CMakeLists.txt (+2)
  • (modified) libclc/clc/lib/generic/math/clc_ep.cl (+3)
  • (modified) libclc/clc/lib/generic/math/clc_ep.inc (+78)
  • (added) libclc/clc/lib/generic/math/clc_exp2_fast.cl (+15)
  • (added) libclc/clc/lib/generic/math/clc_log2_fast.cl (+15)
  • (modified) libclc/clc/lib/generic/math/clc_pow.cl (+31-12)
  • (removed) libclc/clc/lib/generic/math/clc_pow.inc (-438)
  • (added) libclc/clc/lib/generic/math/clc_pow_base.inc (+542)
  • (modified) libclc/clc/lib/generic/math/clc_pown.cl (+30-12)
  • (removed) libclc/clc/lib/generic/math/clc_pown.inc (-402)
  • (modified) libclc/clc/lib/generic/math/clc_powr.cl (+32-12)
  • (removed) libclc/clc/lib/generic/math/clc_powr.inc (-414)
  • (modified) libclc/clc/lib/generic/math/clc_rootn.cl (+30-11)
  • (removed) libclc/clc/lib/generic/math/clc_rootn.inc (-405)
  • (modified) libclc/opencl/lib/generic/math/pow.cl (+11-3)
  • (modified) libclc/opencl/lib/generic/math/pown.cl (+10-2)
  • (modified) libclc/opencl/lib/generic/math/powr.cl (+10-2)
  • (modified) libclc/opencl/lib/generic/math/rootn.cl (+10-2)
diff --git a/libclc/clc/include/clc/math/clc_ep_decl.inc b/libclc/clc/include/clc/math/clc_ep_decl.inc
index d29cfdc6346ba..1739da9f3ae63 100644
--- a/libclc/clc/include/clc/math/clc_ep_decl.inc
+++ b/libclc/clc/include/clc/math/clc_ep_decl.inc
@@ -128,4 +128,9 @@ _CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqrt(__CLC_GENTYPE a);
 
 _CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqrt(__CLC_EP_PAIR a);
 
+#if __CLC_FPSIZE == 32 || __CLC_FPSIZE == 64
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_ep_exp(__CLC_EP_PAIR a);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ln(__CLC_GENTYPE a);
+#endif
+
 #endif
diff --git a/libclc/clc/include/clc/math/clc_exp2_fast.h b/libclc/clc/include/clc/math/clc_exp2_fast.h
new file mode 100644
index 0000000000000..a42e6c9b7fd48
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_exp2_fast.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_EXP2_FAST_H__
+#define __CLC_MATH_CLC_EXP2_FAST_H__
+
+#define __CLC_FUNCTION __clc_exp2_fast
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_EXP2_FAST_H__
diff --git a/libclc/clc/include/clc/math/clc_log2_fast.h b/libclc/clc/include/clc/math/clc_log2_fast.h
new file mode 100644
index 0000000000000..5160afbedebf7
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_log2_fast.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_LOG2_FAST_H__
+#define __CLC_MATH_CLC_LOG2_FAST_H__
+
+#define __CLC_FUNCTION __clc_log2_fast
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_LOG2_FAST_H__
diff --git a/libclc/clc/include/clc/math/clc_pow.h b/libclc/clc/include/clc/math/clc_pow.h
index 5e37e5bf6da65..f7399873a4994 100644
--- a/libclc/clc/include/clc/math/clc_pow.h
+++ b/libclc/clc/include/clc/math/clc_pow.h
@@ -11,9 +11,14 @@
 
 #define __CLC_BODY <clc/shared/binary_decl.inc>
 #define __CLC_FUNCTION __clc_pow
-
 #include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
 
+#define __CLC_FLOAT_ONLY
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+#define __CLC_FUNCTION __clc_pow_fast
+#include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
+#undef __CLC_FLOAT_ONLY
 
 #endif // __CLC_MATH_CLC_POW_H__
diff --git a/libclc/clc/include/clc/math/clc_pown.h b/libclc/clc/include/clc/math/clc_pown.h
index 30628efb19001..3e2b359468b48 100644
--- a/libclc/clc/include/clc/math/clc_pown.h
+++ b/libclc/clc/include/clc/math/clc_pown.h
@@ -9,11 +9,16 @@
 #ifndef __CLC_MATH_CLC_POWN_H__
 #define __CLC_MATH_CLC_POWN_H__
 
-#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
 #define __CLC_FUNCTION __clc_pown
-
+#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
 #include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
 
+#define __CLC_FLOAT_ONLY
+#define __CLC_FUNCTION __clc_pown_fast
+#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
+#include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
+#undef __CLC_FLOAT_ONLY
 
 #endif // __CLC_MATH_CLC_POWN_H__
diff --git a/libclc/clc/include/clc/math/clc_powr.h b/libclc/clc/include/clc/math/clc_powr.h
index baa494cce6989..67c591ca6aa82 100644
--- a/libclc/clc/include/clc/math/clc_powr.h
+++ b/libclc/clc/include/clc/math/clc_powr.h
@@ -9,11 +9,16 @@
 #ifndef __CLC_MATH_CLC_POWR_H__
 #define __CLC_MATH_CLC_POWR_H__
 
-#define __CLC_BODY <clc/shared/binary_decl.inc>
 #define __CLC_FUNCTION __clc_powr
-
+#define __CLC_BODY <clc/shared/binary_decl.inc>
 #include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
 
+#define __CLC_FLOAT_ONLY
+#define __CLC_FUNCTION __clc_powr_fast
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+#include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
+#undef __CLC_FLOAT_ONLY
 
 #endif // __CLC_MATH_CLC_POWR_H__
diff --git a/libclc/clc/include/clc/math/clc_rootn.h b/libclc/clc/include/clc/math/clc_rootn.h
index 90a25ad52d867..26d111a8671d4 100644
--- a/libclc/clc/include/clc/math/clc_rootn.h
+++ b/libclc/clc/include/clc/math/clc_rootn.h
@@ -9,11 +9,16 @@
 #ifndef __CLC_MATH_CLC_ROOTN_H__
 #define __CLC_MATH_CLC_ROOTN_H__
 
-#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
 #define __CLC_FUNCTION __clc_rootn
-
+#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
 #include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
 
+#define __CLC_FLOAT_ONLY
+#define __CLC_FUNCTION __clc_rootn_fast
+#define __CLC_BODY <clc/shared/binary_decl_with_int_second_arg.inc>
+#include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
+#undef __CLC_FLOAT_ONLY
 
 #endif // __CLC_MATH_CLC_ROOTN_H__
diff --git a/libclc/clc/include/clc/shared/binary_def_with_int_second_arg_scalarize_loop.inc b/libclc/clc/include/clc/shared/binary_def_with_int_second_arg_scalarize_loop.inc
new file mode 100644
index 0000000000000..498df2fc420de
--- /dev/null
+++ b/libclc/clc/include/clc/shared/binary_def_with_int_second_arg_scalarize_loop.inc
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/utils.h"
+
+#if __CLC_VECSIZE_OR_1 >= 2
+
+#ifndef __CLC_IMPL_FUNCTION
+#define __CLC_IMPL_FUNCTION __CLC_FUNCTION
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
+                                                    __CLC_INTN y) {
+  union {
+    __CLC_GENTYPE vec;
+    __CLC_SCALAR_GENTYPE arr[__CLC_VECSIZE_OR_1];
+  } u_x, u_result;
+
+  union {
+    __CLC_INTN vec;
+    int arr[__CLC_VECSIZE_OR_1];
+  } u_y;
+
+  u_x.vec = x;
+  u_y.vec = y;
+  for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i) {
+    u_result.arr[i] = __CLC_IMPL_FUNCTION(u_x.arr[i], u_y.arr[i]);
+  }
+  return u_result.vec;
+}
+
+#endif // __CLC_VECSIZE_OR_1 >= 2
diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt
index 9b6c9a231ade0..daccc00b841b3 100644
--- a/libclc/clc/lib/amdgpu/CMakeLists.txt
+++ b/libclc/clc/lib/amdgpu/CMakeLists.txt
@@ -3,6 +3,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
   address_space/clc_qualifier.cl
   math/clc_exp.cl
   math/clc_exp2.cl
+  math/clc_exp2_fast.cl
   math/clc_exp10.cl
   math/clc_frexp.cl
   math/clc_half_exp.cl
@@ -15,6 +16,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
   math/clc_half_rsqrt.cl
   math/clc_half_sqrt.cl
   math/clc_ldexp.cl
+  math/clc_log2_fast.cl
   math/clc_native_exp.cl
   math/clc_native_exp2.cl
   math/clc_native_log10.cl
diff --git a/libclc/clc/lib/amdgpu/math/clc_exp2_fast.cl b/libclc/clc/lib/amdgpu/math/clc_exp2_fast.cl
new file mode 100644
index 0000000000000..b73bc9f6e260b
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/math/clc_exp2_fast.cl
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/math/clc_exp2.h"
+#include "clc/math/clc_exp2_fast.h"
+
+#define __CLC_FLOAT_ONLY
+#define __CLC_MIN_VECSIZE 1
+#define __CLC_FUNCTION __clc_exp2_fast
+#define __CLC_IMPL_FUNCTION(x) __builtin_amdgcn_exp2f(x)
+#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_FLOAT_ONLY
+
+#define __CLC_HALF_ONLY
+#define __CLC_IMPL_FUNCTION(x) __clc_exp2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_HALF_ONLY
+
+#define __CLC_DOUBLE_ONLY
+#define __CLC_IMPL_FUNCTION(x) __clc_exp2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_DOUBLE_ONLY
diff --git a/libclc/clc/lib/amdgpu/math/clc_log2_fast.cl b/libclc/clc/lib/amdgpu/math/clc_log2_fast.cl
new file mode 100644
index 0000000000000..a47fc84b26a00
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/math/clc_log2_fast.cl
@@ -0,0 +1,25 @@
+#include "clc/math/clc_log2.h"
+#include "clc/math/clc_log2_fast.h"
+
+#define __CLC_FLOAT_ONLY
+#define __CLC_MIN_VECSIZE 1
+#define __CLC_FUNCTION __clc_log2_fast
+#define __CLC_IMPL_FUNCTION(x) __builtin_amdgcn_logf(x)
+#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_FLOAT_ONLY
+
+#define __CLC_HALF_ONLY
+#define __CLC_IMPL_FUNCTION(x) __clc_log2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_HALF_ONLY
+
+#define __CLC_DOUBLE_ONLY
+#define __CLC_IMPL_FUNCTION(x) __clc_log2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_IMPL_FUNCTION
+#undef __CLC_DOUBLE_ONLY
diff --git a/libclc/clc/lib/generic/CMakeLists.txt b/libclc/clc/lib/generic/CMakeLists.txt
index 70a0863524b19..bda2ec67a55c3 100644
--- a/libclc/clc/lib/generic/CMakeLists.txt
+++ b/libclc/clc/lib/generic/CMakeLists.txt
@@ -80,6 +80,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
   math/clc_erfc.cl
   math/clc_exp.cl
   math/clc_exp2.cl
+  math/clc_exp2_fast.cl
   math/clc_exp10.cl
   math/clc_exp_helper.cl
   math/clc_expm1.cl
@@ -114,6 +115,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
   math/clc_lgamma_r.cl
   math/clc_log.cl
   math/clc_log2.cl
+  math/clc_log2_fast.cl
   math/clc_log10.cl
   math/clc_log1p.cl
   math/clc_logb.cl
diff --git a/libclc/clc/lib/generic/math/clc_ep.cl b/libclc/clc/lib/generic/math/clc_ep.cl
index f0e3020f65f6f..2e62563e2a107 100644
--- a/libclc/clc/lib/generic/math/clc_ep.cl
+++ b/libclc/clc/lib/generic/math/clc_ep.cl
@@ -9,8 +9,11 @@
 #include "clc/clc_convert.h"
 #include "clc/math/clc_div_fast.h"
 #include "clc/math/clc_ep.h"
+#include "clc/math/clc_exp.h"
 #include "clc/math/clc_fma.h"
+#include "clc/math/clc_frexp.h"
 #include "clc/math/clc_ldexp.h"
+#include "clc/math/clc_mad.h"
 #include "clc/math/clc_recip_fast.h"
 #include "clc/math/clc_sqrt_fast.h"
 #include "clc/relational/clc_isinf.h"
diff --git a/libclc/clc/lib/generic/math/clc_ep.inc b/libclc/clc/lib/generic/math/clc_ep.inc
index 38fa513c46aac..56a7f1a7becb2 100644
--- a/libclc/clc/lib/generic/math/clc_ep.inc
+++ b/libclc/clc/lib/generic/math/clc_ep.inc
@@ -387,5 +387,83 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqrt(__CLC_EP_PAIR a) {
                            a.hi == __CLC_FP_LIT(0.0) ? __CLC_FP_LIT(0.0) : slo);
 }
 
+#if __CLC_FPSIZE == 32
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_ep_exp(__CLC_EP_PAIR x) {
+  float d = x.hi == 0x1.62e430p+6f ? 0x1.0p-17f : 0.0f;
+  x.hi -= d;
+  x.lo += d;
+  float z = __clc_exp(x.hi);
+  float zz = __clc_fma(z, x.lo, z);
+  return __clc_isinf(z) ? z : zz;
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ln(float a) {
+  int a_exp;
+  float m = __clc_frexp(a, &a_exp);
+  int b = m < (2.0f / 3.0f);
+  m = __clc_ldexp(m, b);
+  int e = a_exp - b;
+
+  __CLC_EP_PAIR x = __clc_ep_div(m - 1.0f, __clc_ep_fast_add(1.0f, m));
+  __CLC_EP_PAIR s = __clc_ep_sqr(x);
+  float t = s.hi;
+  float p = __clc_mad(t, __clc_mad(t, 0x1.ed89c2p-3f, 0x1.23e988p-2f),
+                      0x1.999bdep-2f);
+
+  // ln(2)*e + 2*x + x^3(c3 + x^2*p)
+  float2 r = __clc_ep_add(
+      __clc_ep_mul(__clc_ep_make_pair(0x1.62e430p-1f, -0x1.05c610p-29f),
+                   (float)e),
+      __clc_ep_fast_add(
+          __clc_ep_ldexp(x, 1),
+          __clc_ep_mul(__clc_ep_mul(s, x),
+                       __clc_ep_fast_add(
+                           __clc_ep_make_pair(0x1.555554p-1f, 0x1.e72020p-29f),
+                           __clc_ep_mul(s, p)))));
+  return r;
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_ep_exp(__CLC_EP_PAIR x) {
+  __CLC_GENTYPE z = __clc_exp(x.hi);
+  __CLC_GENTYPE zz = __clc_mad(z, x.lo, z);
+  return __clc_isinf(z) ? z : zz;
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ln(double a) {
+  int a_exp;
+  double m = __clc_frexp(a, &a_exp);
+  int b = m < __CLC_FP_LIT(2.0 / 3.0);
+  m = __clc_ldexp(m, b);
+  int e = a_exp - b;
+
+  double2 x = __clc_ep_div(m - 1.0, __clc_ep_fast_add(1.0, m));
+  double2 s = __clc_ep_sqr(x);
+  double t = s.hi;
+  double p = __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
+             __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
+                 0x1.dee674222de17p-4, 0x1.a6564968915a9p-4), 0x1.e25e43abe935ap-4), 0x1.110ef47e6c9c2p-3),
+                 0x1.3b13bcfa74449p-3), 0x1.745d171bf3c30p-3), 0x1.c71c71c7792cep-3), 0x1.24924924920dap-2),
+                 0x1.999999999999cp-2);
+
+  // ln(2)*e + 2*x + x^3(c3 + x^2*p)
+  double2 r = __clc_ep_add(
+      __clc_ep_mul(
+          __clc_ep_make_pair(0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56),
+          (double)e),
+      __clc_ep_fast_add(
+          __clc_ep_ldexp(x, 1),
+          __clc_ep_mul(
+              __clc_ep_mul(s, x),
+              __clc_ep_fast_add(__clc_ep_make_pair(0x1.5555555555555p-1,
+                                                   0x1.543b0d5df274dp-55),
+                                __clc_ep_mul(s, p)))));
+  return r;
+}
+
+#endif
+
 #undef __CLC_EP_USE_FMA
 #endif
diff --git a/libclc/clc/lib/generic/math/clc_exp2_fast.cl b/libclc/clc/lib/generic/math/clc_exp2_fast.cl
new file mode 100644
index 0000000000000..e09bd65d7e02a
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_exp2_fast.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/math/clc_exp2.h"
+#include "clc/math/clc_exp2_fast.h"
+
+#define __CLC_FUNCTION __clc_exp2_fast
+#define __CLC_IMPL_FUNCTION(x) __clc_exp2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include "clc/math/gentype.inc"
diff --git a/libclc/clc/lib/generic/math/clc_log2_fast.cl b/libclc/clc/lib/generic/math/clc_log2_fast.cl
new file mode 100644
index 0000000000000..2aad63967e888
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_log2_fast.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/math/clc_log2.h"
+#include "clc/math/clc_log2_fast.h"
+
+#define __CLC_FUNCTION __clc_log2_fast
+#define __CLC_IMPL_FUNCTION(x) __clc_log2
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include "clc/math/gentype.inc"
diff --git a/libclc/clc/lib/generic/math/clc_pow.cl b/libclc/clc/lib/generic/math/clc_pow.cl
index 70d3d614a8d36..14fbfb68359f5 100644
--- a/libclc/clc/lib/generic/math/clc_pow.cl
+++ b/libclc/clc/lib/generic/math/clc_pow.cl
@@ -6,16 +6,35 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <clc/clc_convert.h>
-#include <clc/internal/clc.h>
-#include <clc/math/clc_fabs.h>
-#include <clc/math/clc_fma.h>
-#include <clc/math/clc_ldexp.h>
-#include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
-#include <clc/math/math.h>
-#include <clc/math/tables.h>
-#include <clc/relational/clc_select.h>
+#include "clc/clc_convert.h"
+#include "clc/float/definitions.h"
+#include "clc/internal/clc.h"
+#include "clc/math/clc_copysign.h"
+#include "clc/math/clc_ep.h"
+#include "clc/math/clc_exp2.h"
+#include "clc/math/clc_exp2_fast.h"
+#include "clc/math/clc_fabs.h"
+#include "clc/math/clc_ldexp.h"
+#include "clc/math/clc_log.h"
+#include "clc/math/clc_log2.h"
+#include "clc/math/clc_log2_fast.h"
+#include "clc/math/clc_mad.h"
+#include "clc/math/clc_recip_fast.h"
+#include "clc/math/clc_trunc.h"
+#include "clc/math/math.h"
+#include "clc/relational/clc_isinf.h"
+#include "clc/relational/clc_isunordered.h"
 
-#define __CLC_BODY <clc_pow.inc>
-#include <clc/math/gentype.inc>
+#define COMPILING_POW
+#define __CLC_BODY "clc_pow_base.inc"
+#include "clc/math/gentype.inc"
+
+#define __CLC_FUNCTION __clc_pow
+#define __CLC_BODY "clc/shared/binary_def_scalarize.inc"
+#include "clc/math/gentype.inc"
+#undef __CLC_FUNCTION
+
+#define __CLC_FLOAT_ONLY
+#define __CLC_FUNCTION __clc_pow_fast
+#define __CLC_BODY "clc/shared/binary_def_scalarize.inc"
+#include "clc/math/gentype.inc"
diff --git a/libclc/clc/lib/generic/math/clc_pow.inc b/libclc/clc/lib/generic/math/clc_pow.inc
deleted file mode 100644
index 35cbcdae8ffff..0000000000000
--- a/libclc/clc/lib/generic/math/clc_pow.inc
+++ /dev/null
@@ -1,438 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Computes pow using log and exp
-//
-//   x^y = exp(y * log(x))
-//
-// We take care not to lose precision in the intermediate steps
-//
-// When computing log, calculate it in splits:
-//
-//   r = f * (p_invead + p_inv_tail)
-//   r = rh + rt
-//
-// Calculate log polynomial using r, in end addition, do:
-//
-//   poly = poly + ((rh-r) + rt)
-//
-//   lth = -r
-//   ltt = ((xexp * log2_t) - poly) + logT
-//   lt = lth + ltt
-//
-//   lh = (xexp * log2_h) + logH
-//   l = lh + lt
-//
-// Calculate final log answer as gh and gt:
-//
-//   gh = l & higher-half bits
-//   gt = (((ltt - (lt - lth)) + ((lh - l) + lt)) + (l - gh))
-//
-//   yh = y & higher-half bits
-//   yt = y - yh
-//
-// Before entering computation of exp:
-//
-//   vs = ((yt*gt + yt*gh) + yh*gt)
-//   v = vs + yh*gh
-//   vt = ((yh*gh - v) + vs)
-//
-// In calculation of exp, add vt to r that is used for poly.
-//
-// At the end of exp, do
-//
-//   ((((expT * poly) + expT) + expH*poly) + expH)
-//
-//===----------------------------------------------------------------------===//
-
-#if __CLC_FPSIZE == 32
-
-_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_pow(__CLC_GENTYPE x,
-                                               __CLC_GENTYPE y) {
-  __CLC_GENTYPE absx = __clc_fabs(x);
-  __CLC_INTN ix = __CLC_AS_INTN(x);
-  __CLC_INTN ax = __CLC_AS_INTN(absx);
-  __CLC_INTN xpos = ix == ax;
-
-  __CLC_INTN iy = __CLC_AS_INTN(y);
-  __CLC_INTN ay = __CLC_AS_INTN(__clc_fabs(y));
-  __CLC_INTN ypos = iy == ay;
-
-  /* Extra precise log calculation
-   *  First handle case that x is close to 1
-   */
-  __CLC_GENTYPE r = 1.0f - absx;
-  __CLC_INTN near1 = __clc_fabs(r) < 0x1.0p-4f;
-  __CLC_GENTYPE r2 = r * r;
-
-  /* Coefficients are just 1/3, 1/4, 1/5 and 1/6 */
-  __CLC_GENTYPE poly = __clc_mad(
-      r,
-      __clc_mad(r,
-                __clc_mad(r, __clc_mad(r, 0x1.24924ap-3f, 0x1.555556p-3f),
-                          0x1.99999ap-3f),
-                0x1.000000p-2f),
-      0x1.555556p-2f);
-
-  poly *= r2 * r;
-
-  __CLC_GENTYPE lth_near1 = -r2 * 0.5f;
-  __CLC_GENTYPE ltt_near1 = -poly;
-  _...
[truncated]

@github-actions
Copy link

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff origin/main HEAD --extensions cl,h,inc -- libclc/clc/include/clc/math/clc_exp2_fast.h libclc/clc/include/clc/math/clc_log2_fast.h libclc/clc/include/clc/shared/binary_def_with_int_second_arg_scalarize_loop.inc libclc/clc/lib/amdgpu/math/clc_exp2_fast.cl libclc/clc/lib/amdgpu/math/clc_log2_fast.cl libclc/clc/lib/generic/math/clc_exp2_fast.cl libclc/clc/lib/generic/math/clc_log2_fast.cl libclc/clc/lib/generic/math/clc_pow_base.inc libclc/clc/include/clc/math/clc_ep_decl.inc libclc/clc/include/clc/math/clc_pow.h libclc/clc/include/clc/math/clc_pown.h libclc/clc/include/clc/math/clc_powr.h libclc/clc/include/clc/math/clc_rootn.h libclc/clc/lib/generic/math/clc_ep.cl libclc/clc/lib/generic/math/clc_ep.inc libclc/clc/lib/generic/math/clc_pow.cl libclc/clc/lib/generic/math/clc_pown.cl libclc/clc/lib/generic/math/clc_powr.cl libclc/clc/lib/generic/math/clc_rootn.cl libclc/opencl/lib/generic/math/pow.cl libclc/opencl/lib/generic/math/pown.cl libclc/opencl/lib/generic/math/powr.cl libclc/opencl/lib/generic/math/rootn.cl --diff_from_common_commit

⚠️
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing origin/main to the base branch/commit you want to compare against.
⚠️

View the diff from clang-format here.
diff --git a/libclc/clc/lib/generic/math/clc_ep.inc b/libclc/clc/lib/generic/math/clc_ep.inc
index 56a7f1a7b..2b189fbdb 100644
--- a/libclc/clc/lib/generic/math/clc_ep.inc
+++ b/libclc/clc/lib/generic/math/clc_ep.inc
@@ -442,11 +442,27 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ln(double a) {
   double2 x = __clc_ep_div(m - 1.0, __clc_ep_fast_add(1.0, m));
   double2 s = __clc_ep_sqr(x);
   double t = s.hi;
-  double p = __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
-             __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
-                 0x1.dee674222de17p-4, 0x1.a6564968915a9p-4), 0x1.e25e43abe935ap-4), 0x1.110ef47e6c9c2p-3),
-                 0x1.3b13bcfa74449p-3), 0x1.745d171bf3c30p-3), 0x1.c71c71c7792cep-3), 0x1.24924924920dap-2),
-                 0x1.999999999999cp-2);
+  double p = __clc_mad(
+      t,
+      __clc_mad(
+          t,
+          __clc_mad(
+              t,
+              __clc_mad(
+                  t,
+                  __clc_mad(
+                      t,
+                      __clc_mad(t,
+                                __clc_mad(t,
+                                          __clc_mad(t, 0x1.dee674222de17p-4,
+                                                    0x1.a6564968915a9p-4),
+                                          0x1.e25e43abe935ap-4),
+                                0x1.110ef47e6c9c2p-3),
+                      0x1.3b13bcfa74449p-3),
+                  0x1.745d171bf3c30p-3),
+              0x1.c71c71c7792cep-3),
+          0x1.24924924920dap-2),
+      0x1.999999999999cp-2);
 
   // ln(2)*e + 2*x + x^3(c3 + x^2*p)
   double2 r = __clc_ep_add(

@@ -0,0 +1,37 @@
//===----------------------------------------------------------------------===//
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove this file, use binary_def_scalarize_loop.inc and define __CLC_ARG2_IS_INTN at use site?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU libclc libclc OpenCL library

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants