From aa1918320b700e5fe42f218a39cf4edac8610a91 Mon Sep 17 00:00:00 2001 From: Soowon Jeong Date: Fri, 15 May 2026 15:51:46 +0900 Subject: [PATCH 1/2] [BugFix][LLVM] Use libm for asin/acos instead of buggy inline Taylor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `tirx.asin` LLVM legalization rule used a 6-term Taylor series for |x| < 0.5 with incorrect coefficients. The recurrence multipliers (`9/40`, `25/112`, `1225/3456`, `3969/28160`) do not match the standard asin Taylor series ratios (`9/20`, `25/42`, `49/72`, `81/110`), so the series under-counted higher-order terms and lost roughly 1e-3 of precision in the mid-range (x ≈ 0.3–0.5) — well over a thousand ULP at float32. `tirx.acos` inherits the same error via `π/2 - asin(x)` for |x| < 0.5. The Taylor branch was added as the original implementation in #17945 with no libm fallback; #18582 later patched the boundary at |x| ≥ 0.5 by switching to the libm extern. There is no evidence the inline series was ever a deliberate fast-path — it was simply incomplete. This change drops the inline series and routes the whole input range through the existing libm extern (`asinf`/`acosf`), keeping only the out-of-range NaN guard. ULP-grade precision is restored across the full domain. The Asin/Acos cases that were commented out of `test_unary` with a "Taylor approximation precision loss" TODO are re-enabled. Fixes #19563. --- src/target/llvm/intrin_rule_llvm.cc | 25 ++---------------------- tests/python/relax/test_frontend_onnx.py | 6 +++--- 2 files changed, 5 insertions(+), 26 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 3244deab875b..9509e59e6c2a 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -179,19 +179,6 @@ TVM_REGISTER_OP("tirx.asin") TVM_FFI_ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; - PrimExpr threshold = make_const(x.dtype(), 0.5); - PrimExpr abs_x = tvm::abs(x); - PrimExpr use_lib = abs_x >= threshold; - - PrimExpr x2 = x * x; - PrimExpr term1 = x; - PrimExpr term3 = term1 * x2 / make_const(x.dtype(), 6); - PrimExpr term5 = term3 * x2 * make_const(x.dtype(), 9) / make_const(x.dtype(), 40); - PrimExpr term7 = term5 * x2 * make_const(x.dtype(), 25) / make_const(x.dtype(), 112); - PrimExpr term9 = term7 * x2 * make_const(x.dtype(), 1225) / make_const(x.dtype(), 3456); - PrimExpr term11 = term9 * x2 * make_const(x.dtype(), 3969) / make_const(x.dtype(), 28160); - PrimExpr series = term1 + term3 + term5 + term7 + term9 + term11; - PrimExpr lib_result = ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); @@ -200,7 +187,7 @@ TVM_REGISTER_OP("tirx.asin") PrimExpr out_range = tirx::Or(x upper); PrimExpr nan_const = make_const(x.dtype(), std::numeric_limits::quiet_NaN()); - return tirx::Select(out_range, nan_const, tirx::Select(use_lib, lib_result, series)); + return tirx::Select(out_range, nan_const, lib_result); }); TVM_REGISTER_OP("tirx.acos") @@ -211,14 +198,6 @@ TVM_REGISTER_OP("tirx.acos") TVM_FFI_ICHECK(call != nullptr) << "Invalid call node in acos legalization"; const PrimExpr& x = call->args[0]; - PrimExpr threshold = make_const(x.dtype(), 0.5); - PrimExpr abs_x = tvm::abs(x); - PrimExpr use_lib = abs_x >= threshold; - - PrimExpr half_pi = make_const(x.dtype(), M_PI / 2); - PrimExpr asin_x = asin(x); - PrimExpr formula_result = half_pi - asin_x; - PrimExpr lib_result = ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); @@ -227,7 +206,7 @@ TVM_REGISTER_OP("tirx.acos") PrimExpr out_range = tirx::Or(x upper); PrimExpr nan_const = make_const(x.dtype(), std::numeric_limits::quiet_NaN()); - return tirx::Select(out_range, nan_const, tirx::Select(use_lib, lib_result, formula_result)); + return tirx::Select(out_range, nan_const, lib_result); }); TVM_REGISTER_OP("tirx.atan") diff --git a/tests/python/relax/test_frontend_onnx.py b/tests/python/relax/test_frontend_onnx.py index 151ec35e897f..464c06c3b6a9 100644 --- a/tests/python/relax/test_frontend_onnx.py +++ b/tests/python/relax/test_frontend_onnx.py @@ -705,9 +705,9 @@ def test_bitwise_shift(direction: str): "Sinh", "Cosh", "Tanh", - # "Asin", // TODO @jikechao, fix the precision loss due to the Taylor approximation - # "Acos", - # "Atan", + "Asin", + "Acos", + # "Atan", // TODO: fix x²+1 overflow in llvm legalize for huge inputs (issue #19560) "Asinh", "Acosh", "Atanh", From 9d9730d716723681450b2ec82f168afb863438a3 Mon Sep 17 00:00:00 2001 From: Soowon Jeong Date: Fri, 15 May 2026 16:22:27 +0900 Subject: [PATCH 2/2] [BugFix][Target][LLVM] Drop redundant NaN guard around libm asin/acos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review feedback: with the inline Taylor branch removed, the manual out-of-range Select(out_range, NaN, lib_result) is redundant. The libm externs (asinf / acosf) already return NaN for |x| > 1 by C11 and IEEE 754, so the guard only adds dead TIR. Verified against ±2.0 inputs. --- src/target/llvm/intrin_rule_llvm.cc | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 9509e59e6c2a..ae57e8d9a607 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -173,40 +173,18 @@ TVM_REGISTER_OP("tirx.sinh") TVM_REGISTER_OP("tirx.asin") .set_attr("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr { - using tirx::make_const; using namespace intrin; const tirx::CallNode* call = e.as(); TVM_FFI_ICHECK(call != nullptr); - const PrimExpr& x = call->args[0]; - - PrimExpr lib_result = - ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); - - PrimExpr lower = make_const(x.dtype(), -1.0); - PrimExpr upper = make_const(x.dtype(), 1.0); - PrimExpr out_range = tirx::Or(x upper); - PrimExpr nan_const = make_const(x.dtype(), std::numeric_limits::quiet_NaN()); - - return tirx::Select(out_range, nan_const, lib_result); + return ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); }); TVM_REGISTER_OP("tirx.acos") .set_attr("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr { - using tirx::make_const; using namespace intrin; const tirx::CallNode* call = e.as(); TVM_FFI_ICHECK(call != nullptr) << "Invalid call node in acos legalization"; - const PrimExpr& x = call->args[0]; - - PrimExpr lib_result = - ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); - - PrimExpr lower = make_const(x.dtype(), -1.0); - PrimExpr upper = make_const(x.dtype(), 1.0); - PrimExpr out_range = tirx::Or(x upper); - PrimExpr nan_const = make_const(x.dtype(), std::numeric_limits::quiet_NaN()); - - return tirx::Select(out_range, nan_const, lib_result); + return ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); }); TVM_REGISTER_OP("tirx.atan")