diff --git a/CHANGELOG.md b/CHANGELOG.md index a77cd9840e99..096eabef6720 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum * `dpnp` uses pybind11 3.0.2 [#27734](https://github.com/IntelPython/dpnp/pull/2773) * Modified CMake files for the extension to explicitly mark DPC++ compiler and dpctl headers as system ones and so to suppress the build warning generated inside them [#2770](https://github.com/IntelPython/dpnp/pull/2770) * Updated QR tests to avoid element-wise comparisons for `raw` and `r` modes [#2785](https://github.com/IntelPython/dpnp/pull/2785) +* Moved all SYCL kernel functors from `backend/extensions/` to a unified `backend/kernels/` directory hierarchy [#2816](https://github.com/IntelPython/dpnp/pull/2816) ### Deprecated diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt index 370d59f95585..e1bc34c9ae8b 100644 --- a/dpnp/backend/extensions/indexing/CMakeLists.txt +++ b/dpnp/backend/extensions/indexing/CMakeLists.txt @@ -62,7 +62,7 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ ${CMAKE_CURRENT_SOURCE_DIR}/../common ) # treat below headers as system to suppress the warnings there during the build diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp index 99d91744366f..3b2df73f46ef 100644 --- a/dpnp/backend/extensions/indexing/choose.cpp +++ b/dpnp/backend/extensions/indexing/choose.cpp @@ -30,41 +30,123 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include #include -#include "choose_kernel.hpp" +#include + #include "dpctl4pybind11.hpp" +#include +#include -// utils extension header #include "ext/common.hpp" +#include "kernels/indexing/choose.hpp" // dpctl tensor headers #include "utils/indexing_utils.hpp" #include "utils/memory_overlap.hpp" +#include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" #include "utils/sycl_alloc_utils.hpp" #include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" namespace dpnp::extensions::indexing { +namespace py = pybind11; +namespace impl +{ namespace td_ns = dpctl::tensor::type_dispatch; -static kernels::choose_fn_ptr_t choose_clip_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static kernels::choose_fn_ptr_t choose_wrap_dispatch_table[td_ns::num_types] - [td_ns::num_types]; +using dpctl::tensor::ssize_t; + +typedef sycl::event (*choose_fn_ptr_t)(sycl::queue &, + size_t, + ssize_t, + int, + const ssize_t *, + const char *, + char *, + char **, + ssize_t, + ssize_t, + const ssize_t *, + const std::vector &); + +static choose_fn_ptr_t choose_clip_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static choose_fn_ptr_t choose_wrap_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +template +sycl::event choose_impl(sycl::queue &q, + size_t nelems, + ssize_t n_chcs, + int nd, + const ssize_t *shape_and_strides, + const char *ind_cp, + char *dst_cp, + char **chcs_cp, + ssize_t ind_offset, + ssize_t dst_offset, + const ssize_t *chc_offsets, + const std::vector &depends) +{ + dpctl::tensor::type_utils::validate_type_for_device(q); -namespace py = pybind11; + const indTy *ind_tp = reinterpret_cast(ind_cp); + Ty *dst_tp = reinterpret_cast(dst_cp); -namespace detail + sycl::event choose_ev = q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + using InOutIndexerT = + dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + const InOutIndexerT ind_out_indexer{nd, ind_offset, dst_offset, + shape_and_strides}; + + using NthChoiceIndexerT = + dpnp::kernels::choose::strides::NthStrideOffsetUnpacked; + const NthChoiceIndexerT choices_indexer{ + nd, chc_offsets, shape_and_strides, shape_and_strides + 3 * nd}; + + using ChooseFunc = + dpnp::kernels::choose::ChooseFunctor; + + cgh.parallel_for(sycl::range<1>(nelems), + ChooseFunc(ind_tp, dst_tp, chcs_cp, n_chcs, + ind_out_indexer, + choices_indexer)); + }); + + return choose_ev; +} + +template +struct ChooseFactory { + fnT get() + { + if constexpr (std::is_integral::value && + !std::is_same::value) { + fnT fn = choose_impl; + return fn; + } + else { + fnT fn = nullptr; + return fn; + } + } +}; +namespace detail +{ using host_ptrs_allocator_t = dpctl::tensor::alloc_utils::usm_host_allocator; using ptrs_t = std::vector; @@ -191,7 +273,6 @@ std::vector parse_py_chcs(const sycl::queue &q, return res; } - } // namespace detail std::pair @@ -412,23 +493,6 @@ std::pair return std::make_pair(arg_cleanup_ev, choose_generic_ev); } -template -struct ChooseFactory -{ - fnT get() - { - if constexpr (std::is_integral::value && - !std::is_same::value) { - fnT fn = kernels::choose_impl; - return fn; - } - else { - fnT fn = nullptr; - return fn; - } - } -}; - using dpctl::tensor::indexing_utils::ClipIndex; using dpctl::tensor::indexing_utils::WrapIndex; @@ -441,23 +505,22 @@ using ChooseClipFactory = ChooseFactory>; void init_choose_dispatch_tables(void) { using ext::common::init_dispatch_table; - using kernels::choose_fn_ptr_t; init_dispatch_table( choose_clip_dispatch_table); init_dispatch_table( choose_wrap_dispatch_table); } +} // namespace impl void init_choose(py::module_ m) { - dpnp::extensions::indexing::init_choose_dispatch_tables(); + impl::init_choose_dispatch_tables(); - m.def("_choose", &py_choose, "", py::arg("src"), py::arg("chcs"), + m.def("_choose", &impl::py_choose, "", py::arg("src"), py::arg("chcs"), py::arg("dst"), py::arg("mode"), py::arg("sycl_queue"), py::arg("depends") = py::list()); return; } - } // namespace dpnp::extensions::indexing diff --git a/dpnp/backend/extensions/indexing/choose_kernel.hpp b/dpnp/backend/extensions/indexing/choose_kernel.hpp deleted file mode 100644 index 6b1ac8005054..000000000000 --- a/dpnp/backend/extensions/indexing/choose_kernel.hpp +++ /dev/null @@ -1,191 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2025, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// - Neither the name of the copyright holder nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#pragma once - -#include -#include -#include -#include -#include - -#include - -#include "kernels/dpctl_tensor_types.hpp" -#include "utils/indexing_utils.hpp" -#include "utils/offset_utils.hpp" -#include "utils/strided_iters.hpp" -#include "utils/type_utils.hpp" - -namespace dpnp::extensions::indexing::strides_detail -{ - -struct NthStrideOffsetUnpacked -{ - NthStrideOffsetUnpacked(int common_nd, - dpctl::tensor::ssize_t const *_offsets, - dpctl::tensor::ssize_t const *_shape, - dpctl::tensor::ssize_t const *_strides) - : _ind(common_nd), nd(common_nd), offsets(_offsets), shape(_shape), - strides(_strides) - { - } - - template - size_t operator()(dpctl::tensor::ssize_t gid, nT n) const - { - dpctl::tensor::ssize_t relative_offset(0); - _ind.get_displacement( - gid, shape, strides + (n * nd), relative_offset); - - return relative_offset + offsets[n]; - } - -private: - dpctl::tensor::strides::CIndexer_vector _ind; - - int nd; - dpctl::tensor::ssize_t const *offsets; - dpctl::tensor::ssize_t const *shape; - dpctl::tensor::ssize_t const *strides; -}; - -static_assert(sycl::is_device_copyable_v); - -} // namespace dpnp::extensions::indexing::strides_detail - -namespace dpnp::extensions::indexing::kernels -{ - -template -class ChooseFunctor -{ -private: - const IndT *ind = nullptr; - T *dst = nullptr; - char **chcs = nullptr; - dpctl::tensor::ssize_t n_chcs; - const IndOutIndexerT ind_out_indexer; - const ChoicesIndexerT chcs_indexer; - -public: - ChooseFunctor(const IndT *ind_, - T *dst_, - char **chcs_, - dpctl::tensor::ssize_t n_chcs_, - const IndOutIndexerT &ind_out_indexer_, - const ChoicesIndexerT &chcs_indexer_) - : ind(ind_), dst(dst_), chcs(chcs_), n_chcs(n_chcs_), - ind_out_indexer(ind_out_indexer_), chcs_indexer(chcs_indexer_) - { - } - - void operator()(sycl::id<1> id) const - { - const ProjectorT proj{}; - - dpctl::tensor::ssize_t i = id[0]; - - auto ind_dst_offsets = ind_out_indexer(i); - dpctl::tensor::ssize_t ind_offset = ind_dst_offsets.get_first_offset(); - dpctl::tensor::ssize_t dst_offset = ind_dst_offsets.get_second_offset(); - - IndT chc_idx = ind[ind_offset]; - // proj produces an index in the range of n_chcs - dpctl::tensor::ssize_t projected_idx = proj(n_chcs, chc_idx); - - dpctl::tensor::ssize_t chc_offset = chcs_indexer(i, projected_idx); - - T *chc = reinterpret_cast(chcs[projected_idx]); - - dst[dst_offset] = chc[chc_offset]; - } -}; - -typedef sycl::event (*choose_fn_ptr_t)(sycl::queue &, - size_t, - dpctl::tensor::ssize_t, - int, - const dpctl::tensor::ssize_t *, - const char *, - char *, - char **, - dpctl::tensor::ssize_t, - dpctl::tensor::ssize_t, - const dpctl::tensor::ssize_t *, - const std::vector &); - -template -sycl::event choose_impl(sycl::queue &q, - size_t nelems, - dpctl::tensor::ssize_t n_chcs, - int nd, - const dpctl::tensor::ssize_t *shape_and_strides, - const char *ind_cp, - char *dst_cp, - char **chcs_cp, - dpctl::tensor::ssize_t ind_offset, - dpctl::tensor::ssize_t dst_offset, - const dpctl::tensor::ssize_t *chc_offsets, - const std::vector &depends) -{ - dpctl::tensor::type_utils::validate_type_for_device(q); - - const indTy *ind_tp = reinterpret_cast(ind_cp); - Ty *dst_tp = reinterpret_cast(dst_cp); - - sycl::event choose_ev = q.submit([&](sycl::handler &cgh) { - cgh.depends_on(depends); - - using InOutIndexerT = - dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; - const InOutIndexerT ind_out_indexer{nd, ind_offset, dst_offset, - shape_and_strides}; - - using NthChoiceIndexerT = strides_detail::NthStrideOffsetUnpacked; - const NthChoiceIndexerT choices_indexer{ - nd, chc_offsets, shape_and_strides, shape_and_strides + 3 * nd}; - - using ChooseFunc = ChooseFunctor; - - cgh.parallel_for(sycl::range<1>(nelems), - ChooseFunc(ind_tp, dst_tp, chcs_cp, n_chcs, - ind_out_indexer, - choices_indexer)); - }); - - return choose_ev; -} - -} // namespace dpnp::extensions::indexing::kernels diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt index 7ccb05238ae4..36786c8cbaf3 100644 --- a/dpnp/backend/extensions/statistics/CMakeLists.txt +++ b/dpnp/backend/extensions/statistics/CMakeLists.txt @@ -67,7 +67,7 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ ${CMAKE_CURRENT_SOURCE_DIR}/../common ) # treat below headers as system to suppress the warnings there during the build diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp index 02fc66f26610..8091e8874d17 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.hpp +++ b/dpnp/backend/extensions/statistics/histogram_common.hpp @@ -28,24 +28,26 @@ #pragma once +#include +#include +#include +#include + #include +#include "dpctl4pybind11.hpp" + #include "ext/common.hpp" +#include "kernels/statistics/histogram.hpp" -namespace dpctl::tensor +namespace statistics::histogram { -class usm_ndarray; -} - using dpctl::tensor::usm_ndarray; using ext::common::AtomicOp; using ext::common::IsNan; using ext::common::Less; -namespace statistics::histogram -{ - template struct CachedData { @@ -69,23 +71,23 @@ struct CachedData template void init(const sycl::nd_item<_Dims> &item) const { - uint32_t llid = item.get_local_linear_id(); + std::uint32_t llid = item.get_local_linear_id(); auto local_ptr = &local_data[0]; - uint32_t size = local_data.size(); + std::uint32_t size = local_data.size(); auto group = item.get_group(); - uint32_t local_size = group.get_local_linear_range(); + std::uint32_t local_size = group.get_local_linear_range(); - for (uint32_t i = llid; i < size; i += local_size) { + for (std::uint32_t i = llid; i < size; i += local_size) { local_ptr[i] = global_data[i]; } } - size_t size() const { return local_data.size(); } + std::size_t size() const { return local_data.size(); } T &operator[](const sycl::id &id) const { return local_data[id]; } template > - T &operator[](const size_t id) const + T &operator[](const std::size_t id) const { return local_data[id]; } @@ -117,12 +119,12 @@ struct UncachedData { } - size_t size() const { return _shape.size(); } + std::size_t size() const { return _shape.size(); } T &operator[](const sycl::id &id) const { return global_data[id]; } template > - T &operator[](const size_t id) const + T &operator[](const std::size_t id) const { return global_data[id]; } @@ -139,15 +141,15 @@ struct HistLocalType }; template <> -struct HistLocalType +struct HistLocalType { - using type = uint32_t; + using type = std::uint32_t; }; template <> -struct HistLocalType +struct HistLocalType { - using type = int32_t; + using type = std::int32_t; }; template ::type> @@ -159,8 +161,8 @@ struct HistWithLocalCopies using LocalHist = sycl::local_accessor; HistWithLocalCopies(T *global_data, - size_t bins_count, - int32_t copies_count, + std::size_t bins_count, + std::int32_t copies_count, sycl::handler &cgh) { local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh); @@ -170,23 +172,25 @@ struct HistWithLocalCopies template void init(const sycl::nd_item<_Dims> &item, localT val = 0) const { - uint32_t llid = item.get_local_linear_id(); + std::uint32_t llid = item.get_local_linear_id(); auto *local_ptr = &local_hist[0][0]; - uint32_t size = local_hist.size(); + std::uint32_t size = local_hist.size(); auto group = item.get_group(); - uint32_t local_size = group.get_local_linear_range(); + std::uint32_t local_size = group.get_local_linear_range(); - for (uint32_t i = llid; i < size; i += local_size) { + for (std::uint32_t i = llid; i < size; i += local_size) { local_ptr[i] = val; } } template - void add(const sycl::nd_item<_Dims> &item, int32_t bin, localT value) const + void add(const sycl::nd_item<_Dims> &item, + std::int32_t bin, + localT value) const { - int32_t llid = item.get_local_linear_id(); - int32_t local_hist_count = local_hist.get_range().get(0); - int32_t local_copy_id = + std::int32_t llid = item.get_local_linear_id(); + std::int32_t local_hist_count = local_hist.get_range().get(0); + std::int32_t local_copy_id = local_hist_count == 1 ? 0 : llid % local_hist_count; AtomicOp void finalize(const sycl::nd_item<_Dims> &item) const { - uint32_t llid = item.get_local_linear_id(); - uint32_t bins_count = local_hist.get_range().get(1); - uint32_t local_hist_count = local_hist.get_range().get(0); + std::uint32_t llid = item.get_local_linear_id(); + std::uint32_t bins_count = local_hist.get_range().get(1); + std::uint32_t local_hist_count = local_hist.get_range().get(0); auto group = item.get_group(); - uint32_t local_size = group.get_local_linear_range(); + std::uint32_t local_size = group.get_local_linear_range(); - for (uint32_t i = llid; i < bins_count; i += local_size) { + for (std::uint32_t i = llid; i < bins_count; i += local_size) { auto value = local_hist[0][i]; - for (uint32_t lhc = 1; lhc < local_hist_count; ++lhc) { + for (std::uint32_t lhc = 1; lhc < local_hist_count; ++lhc) { value += local_hist[lhc][i]; } if (value != T(0)) { @@ -217,7 +221,7 @@ struct HistWithLocalCopies } } - uint32_t size() const { return local_hist.size(); } + std::uint32_t size() const { return local_hist.size(); } private: LocalHist local_hist; @@ -238,7 +242,7 @@ struct HistGlobalMemory } template - void add(const sycl::nd_item<_Dims> &, int32_t bin, T value) const + void add(const sycl::nd_item<_Dims> &, std::int32_t bin, T value) const { AtomicOp::add(global_hist[bin], value); @@ -253,10 +257,10 @@ struct HistGlobalMemory T *global_hist = nullptr; }; -template +template struct NoWeights { - constexpr T get(size_t) const { return 1; } + constexpr T get(std::size_t) const { return 1; } }; template @@ -264,7 +268,7 @@ struct Weights { Weights(T *weights) { data = weights; } - T get(size_t id) const { return data[id]; } + T get(std::size_t id) const { return data[id]; } private: T *data = nullptr; @@ -277,55 +281,23 @@ bool check_in_bounds(const dT &val, const dT &min, const dT &max) return !_less(val, min) && !_less(max, val) && !IsNan
::isnan(val); } -template -class histogram_kernel; - template void submit_histogram(const T *in, - const size_t size, - const size_t dims, - const uint32_t WorkPI, + const std::size_t size, + const std::size_t dims, + const std::uint32_t WorkPI, const HistImpl &hist, const Edges &edges, const Weights &weights, sycl::nd_range<1> nd_range, sycl::handler &cgh) { - cgh.parallel_for>( - nd_range, [=](sycl::nd_item<1> item) { - auto id = item.get_group_linear_id(); - auto lid = item.get_local_linear_id(); - auto group = item.get_group(); - auto local_size = item.get_local_range(0); - - hist.init(item); - edges.init(item); - - if constexpr (HistImpl::sync_after_init || Edges::sync_after_init) { - sycl::group_barrier(group, sycl::memory_scope::work_group); - } - - auto bounds = edges.get_bounds(); - - for (uint32_t i = 0; i < WorkPI; ++i) { - auto data_idx = id * WorkPI * local_size + i * local_size + lid; - if (data_idx < size) { - auto *d = &in[data_idx * dims]; - - if (edges.in_bounds(d, bounds)) { - auto bin = edges.get_bin(item, d, bounds); - auto weight = weights.get(data_idx); - hist.add(item, bin, weight); - } - } - } - - if constexpr (HistImpl::sync_before_finalize) { - sycl::group_barrier(group, sycl::memory_scope::work_group); - } + using HistogramKernel = + dpnp::kernels::histogram::HistogramFunctor; - hist.finalize(item); - }); + cgh.parallel_for( + nd_range, + HistogramKernel(in, size, dims, WorkPI, hist, edges, weights)); } void validate(const usm_ndarray &sample, @@ -333,8 +305,8 @@ void validate(const usm_ndarray &sample, const std::optional &weights, const usm_ndarray &histogram); -uint32_t get_local_hist_copies_count(uint32_t loc_mem_size_in_items, - uint32_t local_size, - uint32_t hist_size_in_items); +std::uint32_t get_local_hist_copies_count(std::uint32_t loc_mem_size_in_items, + std::uint32_t local_size, + std::uint32_t hist_size_in_items); } // namespace statistics::histogram diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.hpp b/dpnp/backend/extensions/statistics/sliding_window1d.hpp index f33a23609666..329c96dfc1c6 100644 --- a/dpnp/backend/extensions/statistics/sliding_window1d.hpp +++ b/dpnp/backend/extensions/statistics/sliding_window1d.hpp @@ -28,25 +28,21 @@ #pragma once -#include - -#include "utils/math_utils.hpp" -#include +#include +#include #include -#include - -#include "ext/common.hpp" +#include -using dpctl::tensor::usm_ndarray; +#include "dpctl4pybind11.hpp" -using ext::common::Align; -using ext::common::CeilDiv; +#include "kernels/statistics/sliding_window1d.hpp" namespace statistics::sliding_window1d { +using dpctl::tensor::usm_ndarray; -template +template class _RegistryDataStorage { public: @@ -144,7 +140,7 @@ class _RegistryDataStorage ncT data[Size]; }; -template +template struct RegistryData : public _RegistryDataStorage { using SizeT = typename _RegistryDataStorage::SizeT; @@ -336,7 +332,7 @@ struct RegistryData : public _RegistryDataStorage T *store(T *const data) { return store(data, true); } }; -template +template struct RegistryWindow : public RegistryData { using SizeT = typename RegistryData::SizeT; @@ -349,7 +345,7 @@ struct RegistryWindow : public RegistryData static_assert(std::is_integral_v, "shift must be of an integral type"); - uint32_t shift_r = this->size_x() - shift; + std::uint32_t shift_r = this->size_x() - shift; for (SizeT i = 0; i < Size; ++i) { this->data[i] = this->shift_left(i, shift); auto border = @@ -369,7 +365,7 @@ struct RegistryWindow : public RegistryData } }; -template +template class Span { public: @@ -391,13 +387,13 @@ class Span const SizeT size_; }; -template +template Span make_span(T *const data, const SizeT size) { return Span(data, size); } -template +template class PaddedSpan : public Span { public: @@ -417,68 +413,14 @@ class PaddedSpan : public Span const SizeT pad_; }; -template +template PaddedSpan make_padded_span(T *const data, const SizeT size, const SizeT offset) { return PaddedSpan(data, size, offset); } -template -void process_block(Results &results, - uint32_t r_size, - AData &a_data, - VData &v_data, - uint32_t block_size, - Op op, - Red red) -{ - for (uint32_t i = 0; i < block_size; ++i) { - auto v_val = v_data.broadcast(i); - for (uint32_t r = 0; r < r_size; ++r) { - results[r] = red(results[r], op(a_data[r], v_val)); - } - a_data.advance_left(); - } -} - -template -SizeT get_global_linear_id(const uint32_t wpi, const sycl::nd_item<1> &item) -{ - auto sbgroup = item.get_sub_group(); - const auto sg_loc_id = sbgroup.get_local_linear_id(); - - const SizeT sg_base_id = wpi * (item.get_global_linear_id() - sg_loc_id); - const SizeT id = sg_base_id + sg_loc_id; - - return id; -} - -template -uint32_t get_results_num(const uint32_t wpi, - const SizeT size, - const SizeT global_id, - const sycl::nd_item<1> &item) -{ - auto sbgroup = item.get_sub_group(); - - const auto sbg_size = sbgroup.get_max_local_range()[0]; - const auto size_ = sycl::sub_sat(size, global_id); - return std::min(SizeT(wpi), CeilDiv(size_, sbg_size)); -} - -template -class sliding_window1d_kernel; - -template &a, sycl::nd_range<1> nd_range, sycl::handler &cgh) { - cgh.parallel_for>( - nd_range, [=](sycl::nd_item<1> item) { - auto glid = get_global_linear_id(WorkPI, item); - - auto results = RegistryData(item); - results.fill(0); - - auto results_num = get_results_num(WorkPI, out.size(), glid, item); - - const auto *a_begin = a.begin(); - const auto *a_end = a.end(); + using SlidingWindow1dKernel = + dpnp::kernels::sliding_window1d::SlidingWindow1dFunctor< + WorkPI, PaddedSpan, Span, Op, Red, + Span, RegistryData, RegistryWindow>; - auto sbgroup = item.get_sub_group(); - - const auto chunks_count = - CeilDiv(v.size(), sbgroup.get_max_local_range()[0]); - - const auto *a_ptr = &a.padded_begin()[glid]; - - auto _a_load_cond = [a_begin, a_end](auto &&ptr) { - return ptr >= a_begin && ptr < a_end; - }; - - auto a_data = RegistryWindow(item); - a_ptr = a_data.load(a_ptr, _a_load_cond, 0); - - const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; - auto v_size = v.size(); - - for (uint32_t b = 0; b < chunks_count; ++b) { - auto v_data = RegistryData(item); - v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); - - uint32_t chunk_size_ = - std::min(v_size, SizeT(v_data.total_size())); - process_block(results, results_num, a_data, v_data, chunk_size_, - op, red); - - if (b != chunks_count - 1) { - a_ptr = a_data.load_lane(a_data.size_y() - 1, a_ptr, - _a_load_cond, 0); - v_size -= v_data.total_size(); - } - } - - auto *const out_ptr = out.begin(); - // auto *const out_end = out.end(); - - auto y_start = glid; - auto y_stop = - std::min(y_start + WorkPI * results.size_x(), out.size()); - uint32_t i = 0; - for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { - out_ptr[y] = results[i++]; - } - // while the code itself seems to be valid, inside correlate - // kernel it results in memory corruption. Further investigation - // is needed. SAT-7693 - // corruption results.store(&out_ptr[glid], - // [out_end](auto &&ptr) { return ptr < out_end; }); - }); + cgh.parallel_for( + nd_range, SlidingWindow1dKernel(a, v, op, red, out)); } -template -class sliding_window1d_small_kernel; - -template &a, sycl::nd_range<1> nd_range, sycl::handler &cgh) { - cgh.parallel_for>( - nd_range, [=](sycl::nd_item<1> item) { - auto glid = get_global_linear_id(WorkPI, item); - - auto results = RegistryData(item); - results.fill(0); - - auto sbgroup = item.get_sub_group(); - auto sg_size = sbgroup.get_max_local_range()[0]; - - const uint32_t to_read = WorkPI * sg_size + v.size(); - const auto *a_begin = a.begin(); - - const auto *a_ptr = &a.padded_begin()[glid]; - const auto *a_end = std::min(a_ptr + to_read, a.end()); - - auto _a_load_cond = [a_begin, a_end](auto &&ptr) { - return ptr >= a_begin && ptr < a_end; - }; + using SlidingWindow1dSmallKernel = + dpnp::kernels::sliding_window1d::SlidingWindow1dSmallFunctor< + WorkPI, PaddedSpan, Span, Op, Red, + Span, RegistryData, RegistryWindow>; - auto a_data = RegistryWindow(item); - a_data.load(a_ptr, _a_load_cond, 0); - - const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; - auto v_size = v.size(); - - auto v_data = RegistryData(item); - v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); - - auto results_num = get_results_num(WorkPI, out.size(), glid, item); - - process_block(results, results_num, a_data, v_data, v_size, op, - red); - - auto *const out_ptr = out.begin(); - // auto *const out_end = out.end(); - - auto y_start = glid; - auto y_stop = - std::min(y_start + WorkPI * results.size_x(), out.size()); - uint32_t i = 0; - for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { - out_ptr[y] = results[i++]; - } - // while the code itself seems to be valid, inside correlate - // kernel it results in memory corruption. Further investigation - // is needed. SAT-7693 - // corruption results.store(&out_ptr[glid], - // [out_end](auto &&ptr) { return ptr < out_end; }); - }); + cgh.parallel_for( + nd_range, SlidingWindow1dSmallKernel(a, v, op, red, out)); } void validate(const usm_ndarray &a, const usm_ndarray &v, const usm_ndarray &out, - const size_t l_pad, - const size_t r_pad); + const std::size_t l_pad, + const std::size_t r_pad); } // namespace statistics::sliding_window1d diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index fca8c43f816e..8830569ce9cf 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -41,40 +41,29 @@ #include #include +#include "kernels/elementwise_functions/interpolate.hpp" + // dpctl tensor headers #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" -#include "kernels/elementwise_functions/interpolate.hpp" - // utils extension headers #include "ext/common.hpp" #include "ext/validation_utils.hpp" -namespace py = pybind11; -namespace td_ns = dpctl::tensor::type_dispatch; -namespace type_utils = dpctl::tensor::type_utils; - -using ext::common::value_type_of; -using ext::validation::array_names; -using ext::validation::array_ptr; - -using ext::common::dtype_from_typenum; -using ext::validation::check_has_dtype; -using ext::validation::check_num_dims; -using ext::validation::check_same_dtype; -using ext::validation::check_same_size; -using ext::validation::common_checks; - namespace dpnp::extensions::ufunc { +namespace py = pybind11; namespace impl { -using ext::common::init_dispatch_vector; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace type_utils = dpctl::tensor::type_utils; template -using value_type_of_t = typename value_type_of::type; +using value_type_of_t = typename ext::common::value_type_of::type; + +using ext::common::dtype_from_typenum; typedef sycl::event (*interpolate_fn_ptr_t)(sycl::queue &, const void *, // x @@ -88,8 +77,10 @@ typedef sycl::event (*interpolate_fn_ptr_t)(sycl::queue &, const std::size_t, // xp_size const std::vector &); +interpolate_fn_ptr_t interpolate_dispatch_vector[td_ns::num_types]; + template -sycl::event interpolate_call(sycl::queue &exec_q, +sycl::event interpolate_impl(sycl::queue &q, const void *vx, const void *vidx, const void *vxp, @@ -101,6 +92,8 @@ sycl::event interpolate_call(sycl::queue &exec_q, const std::size_t xp_size, const std::vector &depends) { + dpctl::tensor::type_utils::validate_type_for_device(q); + using type_utils::is_complex_v; using TCoord = std::conditional_t, value_type_of_t, T>; @@ -112,23 +105,69 @@ sycl::event interpolate_call(sycl::queue &exec_q, const T *right = static_cast(vright); T *out = static_cast(vout); - using dpnp::kernels::interpolate::interpolate_impl; - sycl::event interpolate_ev = interpolate_impl( - exec_q, x, idx, xp, fp, left, right, out, n, xp_size, depends); + sycl::event interpolate_ev = q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + using InterpolateFunc = + dpnp::kernels::interpolate::InterpolateFunctor; + + cgh.parallel_for( + sycl::range<1>(n), + InterpolateFunc(x, idx, xp, fp, left, right, out, xp_size)); + }); return interpolate_ev; } -interpolate_fn_ptr_t interpolate_dispatch_vector[td_ns::num_types]; +/** + * @brief A factory to define pairs of supported types for which + * interpolate function is available. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct InterpolateOutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; -void common_interpolate_checks( - const dpctl::tensor::usm_ndarray &x, - const dpctl::tensor::usm_ndarray &idx, - const dpctl::tensor::usm_ndarray &xp, - const dpctl::tensor::usm_ndarray &fp, - const dpctl::tensor::usm_ndarray &out, - const std::optional &left, - const std::optional &right) +template +struct InterpolateFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename InterpolateOutputType::value_type, + void>) { + return nullptr; + } + else { + return interpolate_impl; + } + } +}; + +namespace detail +{ +using ext::validation::array_names; +using ext::validation::check_has_dtype; +using ext::validation::check_num_dims; +using ext::validation::check_same_dtype; +using ext::validation::check_same_size; +using ext::validation::common_checks; + +void validate(const dpctl::tensor::usm_ndarray &x, + const dpctl::tensor::usm_ndarray &idx, + const dpctl::tensor::usm_ndarray &xp, + const dpctl::tensor::usm_ndarray &fp, + const dpctl::tensor::usm_ndarray &out, + const std::optional &left, + const std::optional &right) { array_names names = {{&x, "x"}, {&xp, "xp"}, {&fp, "fp"}, {&out, "out"}}; @@ -158,6 +197,7 @@ void common_interpolate_checks( throw py::value_error("array of sample points is empty"); } } +} // namespace detail std::pair py_interpolate(const dpctl::tensor::usm_ndarray &x, @@ -170,7 +210,7 @@ std::pair sycl::queue &exec_q, const std::vector &depends) { - common_interpolate_checks(x, idx, xp, fp, out, left, right); + detail::validate(x, idx, xp, fp, out, left, right); int out_typenum = out.get_typenum(); @@ -214,56 +254,21 @@ std::pair return std::make_pair(args_ev, ev); } -/** - * @brief A factory to define pairs of supported types for which - * interpolate function is available. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct InterpolateOutputType -{ - using value_type = typename std::disjunction< - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry>, - td_ns::TypeMapResultEntry>, - td_ns::DefaultResultEntry>::result_type; -}; - -template -struct InterpolateFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename InterpolateOutputType::value_type, - void>) { - return nullptr; - } - else { - return interpolate_call; - } - } -}; - static void init_interpolate_dispatch_vectors() { - init_dispatch_vector( + using ext::common::init_dispatch_vector; + init_dispatch_vector( interpolate_dispatch_vector); } - } // namespace impl void init_interpolate(py::module_ m) { impl::init_interpolate_dispatch_vectors(); - using impl::py_interpolate; - m.def("_interpolate", &py_interpolate, "", py::arg("x"), py::arg("idx"), - py::arg("xp"), py::arg("fp"), py::arg("left"), py::arg("right"), - py::arg("out"), py::arg("sycl_queue"), + m.def("_interpolate", &impl::py_interpolate, "", py::arg("x"), + py::arg("idx"), py::arg("xp"), py::arg("fp"), py::arg("left"), + py::arg("right"), py::arg("out"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } - } // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/window/common.hpp b/dpnp/backend/extensions/window/common.hpp index cb084e972d78..9e7b1192e3a2 100644 --- a/dpnp/backend/extensions/window/common.hpp +++ b/dpnp/backend/extensions/window/common.hpp @@ -28,11 +28,18 @@ #pragma once -#include -#include +#include +#include +#include +#include +#include +#include + #include #include "dpctl4pybind11.hpp" +#include +#include // dpctl tensor headers #include "utils/output_validation.hpp" @@ -41,10 +48,8 @@ namespace dpnp::extensions::window { - -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; - namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; typedef sycl::event (*window_fn_ptr_t)(sycl::queue &, char *, @@ -72,6 +77,20 @@ sycl::event window_impl(sycl::queue &exec_q, return window_ev; } +template typename FunctorT> +struct Factory +{ + fnT get() + { + if constexpr (std::is_floating_point_v) { + return window_impl; + } + else { + return nullptr; + } + } +}; + template std::tuple window_fn(sycl::queue &exec_q, @@ -101,7 +120,7 @@ std::tuple } const int result_typenum = result.get_typenum(); - auto array_types = dpctl_td_ns::usm_ndarray_types(); + auto array_types = td_ns::usm_ndarray_types(); const int result_type_id = array_types.typenum_to_lookup_id(result_typenum); funcPtrT fn = window_dispatch_vector[result_type_id]; diff --git a/dpnp/backend/extensions/window/kaiser.cpp b/dpnp/backend/extensions/window/kaiser.cpp index b83f88f69a9b..e5c1aa837a64 100644 --- a/dpnp/backend/extensions/window/kaiser.cpp +++ b/dpnp/backend/extensions/window/kaiser.cpp @@ -26,26 +26,24 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include "kaiser.hpp" +#include + #include "common.hpp" +#include "kaiser.hpp" + +#include "kernels/window/kaiser.hpp" // utils extension header #include "ext/common.hpp" // dpctl tensor headers -#include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" -#include - -#include "kernels/elementwise_functions/i0.hpp" - namespace dpnp::extensions::window { -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; - -using ext::common::init_dispatch_vector; +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; typedef sycl::event (*kaiser_fn_ptr_t)(sycl::queue &, char *, @@ -53,34 +51,10 @@ typedef sycl::event (*kaiser_fn_ptr_t)(sycl::queue &, const py::object &, const std::vector &); -static kaiser_fn_ptr_t kaiser_dispatch_vector[dpctl_td_ns::num_types]; +static kaiser_fn_ptr_t kaiser_dispatch_vector[td_ns::num_types]; -template -class KaiserFunctor +namespace impl { -private: - T *res = nullptr; - const std::size_t N; - const T beta; - -public: - KaiserFunctor(T *res, const std::size_t N, const T beta) - : res(res), N(N), beta(beta) - { - } - - void operator()(sycl::id<1> id) const - { - using dpnp::kernels::i0::cyl_bessel_i0; - - const auto i = id.get(0); - const T alpha = (N - 1) / T(2); - const T tmp = (i - alpha) / alpha; - res[i] = cyl_bessel_i0(beta * sycl::sqrt(1 - tmp * tmp)) / - cyl_bessel_i0(beta); - } -}; - template sycl::event kaiser_impl(sycl::queue &exec_q, char *result, @@ -96,7 +70,7 @@ sycl::event kaiser_impl(sycl::queue &exec_q, sycl::event kaiser_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); - using KaiserKernel = KaiserFunctor; + using KaiserKernel = dpnp::kernels::kaiser::KaiserFunctor; cgh.parallel_for(sycl::range<1>(nelems), KaiserKernel(res, nelems, beta)); }); @@ -117,6 +91,7 @@ struct KaiserFactory } } }; +} // namespace impl std::pair py_kaiser(sycl::queue &exec_q, @@ -141,8 +116,8 @@ std::pair void init_kaiser_dispatch_vectors() { - init_dispatch_vector( + using ext::common::init_dispatch_vector; + init_dispatch_vector( kaiser_dispatch_vector); } - } // namespace dpnp::extensions::window diff --git a/dpnp/backend/extensions/window/kaiser.hpp b/dpnp/backend/extensions/window/kaiser.hpp index 0a4712cc594e..4ba506620db2 100644 --- a/dpnp/backend/extensions/window/kaiser.hpp +++ b/dpnp/backend/extensions/window/kaiser.hpp @@ -28,11 +28,15 @@ #pragma once -#include #include +#include +#include + namespace dpnp::extensions::window { +namespace py = pybind11; + extern std::pair py_kaiser(sycl::queue &exec_q, const py::object &beta, @@ -40,5 +44,4 @@ extern std::pair const std::vector &depends); extern void init_kaiser_dispatch_vectors(void); - } // namespace dpnp::extensions::window diff --git a/dpnp/backend/extensions/window/window_py.cpp b/dpnp/backend/extensions/window/window_py.cpp index 2b8090c40cca..5ae80f4027b5 100644 --- a/dpnp/backend/extensions/window/window_py.cpp +++ b/dpnp/backend/extensions/window/window_py.cpp @@ -33,11 +33,12 @@ #include #include -#include "bartlett.hpp" -#include "blackman.hpp" +#include "kernels/window/bartlett.hpp" +#include "kernels/window/blackman.hpp" +#include "kernels/window/hamming.hpp" +#include "kernels/window/hanning.hpp" + #include "common.hpp" -#include "hamming.hpp" -#include "hanning.hpp" #include "kaiser.hpp" // utils extension header @@ -51,6 +52,22 @@ using window_ns::window_fn_ptr_t; namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +template +using BartlettFactory = + window_ns::Factory; + +template +using BlackmanFactory = + window_ns::Factory; + +template +using HammingFactory = + window_ns::Factory; + +template +using HanningFactory = + window_ns::Factory; + static window_fn_ptr_t bartlett_dispatch_vector[dpctl_td_ns::num_types]; static window_fn_ptr_t blackman_dispatch_vector[dpctl_td_ns::num_types]; static window_fn_ptr_t hamming_dispatch_vector[dpctl_td_ns::num_types]; @@ -62,8 +79,7 @@ PYBIND11_MODULE(_window_impl, m) using event_vecT = std::vector; { - init_dispatch_vector( + init_dispatch_vector( bartlett_dispatch_vector); auto bartlett_pyapi = [&](sycl::queue &exec_q, const arrayT &result, @@ -78,8 +94,7 @@ PYBIND11_MODULE(_window_impl, m) } { - init_dispatch_vector( + init_dispatch_vector( blackman_dispatch_vector); auto blackman_pyapi = [&](sycl::queue &exec_q, const arrayT &result, @@ -94,8 +109,7 @@ PYBIND11_MODULE(_window_impl, m) } { - init_dispatch_vector( + init_dispatch_vector( hamming_dispatch_vector); auto hamming_pyapi = [&](sycl::queue &exec_q, const arrayT &result, @@ -110,8 +124,7 @@ PYBIND11_MODULE(_window_impl, m) } { - init_dispatch_vector( + init_dispatch_vector( hanning_dispatch_vector); auto hanning_pyapi = [&](sycl::queue &exec_q, const arrayT &result, diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp index ef38157b00e9..c85dafea24b0 100644 --- a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -28,67 +28,79 @@ #pragma once +#include +#include + #include -#include #include "ext/common.hpp" -using ext::common::IsNan; - namespace dpnp::kernels::interpolate { +using ext::common::IsNan; + template -sycl::event interpolate_impl(sycl::queue &q, - const TCoord *x, - const TIdx *idx, - const TCoord *xp, - const TValue *fp, - const TValue *left, - const TValue *right, - TValue *out, - const std::size_t n, - const std::size_t xp_size, - const std::vector &depends) +class InterpolateFunctor { +private: + const TCoord *x = nullptr; + const TIdx *idx = nullptr; + const TCoord *xp = nullptr; + const TValue *fp = nullptr; + const TValue *left = nullptr; + const TValue *right = nullptr; + TValue *out = nullptr; + const std::size_t xp_size; + +public: + InterpolateFunctor(const TCoord *x_, + const TIdx *idx_, + const TCoord *xp_, + const TValue *fp_, + const TValue *left_, + const TValue *right_, + TValue *out_, + const std::size_t xp_size_) + : x(x_), idx(idx_), xp(xp_), fp(fp_), left(left_), right(right_), + out(out_), xp_size(xp_size_) + { + } + // Selected over the work-group version // due to simpler execution and slightly better performance. - return q.submit([&](sycl::handler &h) { - h.depends_on(depends); - h.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { - TValue left_val = left ? *left : fp[0]; - TValue right_val = right ? *right : fp[xp_size - 1]; + void operator()(sycl::id<1> id) const + { + TValue left_val = left ? *left : fp[0]; + TValue right_val = right ? *right : fp[xp_size - 1]; - TCoord x_val = x[i]; - TIdx x_idx = idx[i] - 1; + TCoord x_val = x[id]; + TIdx x_idx = idx[id] - 1; - if (IsNan::isnan(x_val)) { - out[i] = x_val; - } - else if (x_idx < 0) { - out[i] = left_val; - } - else if (x_val == xp[xp_size - 1]) { - out[i] = fp[xp_size - 1]; - } - else if (x_idx >= static_cast(xp_size - 1)) { - out[i] = right_val; - } - else { - TValue slope = - (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); - TValue res = slope * (x_val - xp[x_idx]) + fp[x_idx]; + if (IsNan::isnan(x_val)) { + out[id] = x_val; + } + else if (x_idx < 0) { + out[id] = left_val; + } + else if (x_val == xp[xp_size - 1]) { + out[id] = fp[xp_size - 1]; + } + else if (x_idx >= static_cast(xp_size - 1)) { + out[id] = right_val; + } + else { + TValue slope = + (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); + TValue res = slope * (x_val - xp[x_idx]) + fp[x_idx]; - if (IsNan::isnan(res)) { - res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1]; - if (IsNan::isnan(res) && - (fp[x_idx] == fp[x_idx + 1])) { - res = fp[x_idx]; - } + if (IsNan::isnan(res)) { + res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1]; + if (IsNan::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) { + res = fp[x_idx]; } - out[i] = res; } - }); - }); -} - + out[id] = res; + } + } +}; } // namespace dpnp::kernels::interpolate diff --git a/dpnp/backend/kernels/indexing/choose.hpp b/dpnp/backend/kernels/indexing/choose.hpp new file mode 100644 index 000000000000..49b71d05c96b --- /dev/null +++ b/dpnp/backend/kernels/indexing/choose.hpp @@ -0,0 +1,128 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include + +#include "kernels/dpctl_tensor_types.hpp" +#include "utils/strided_iters.hpp" + +namespace dpnp::kernels::choose +{ +using dpctl::tensor::ssize_t; + +template +class ChooseFunctor +{ +private: + const IndT *ind = nullptr; + T *dst = nullptr; + char **chcs = nullptr; + ssize_t n_chcs; + const IndOutIndexerT ind_out_indexer; + const ChoicesIndexerT chcs_indexer; + +public: + ChooseFunctor(const IndT *ind_, + T *dst_, + char **chcs_, + ssize_t n_chcs_, + const IndOutIndexerT &ind_out_indexer_, + const ChoicesIndexerT &chcs_indexer_) + : ind(ind_), dst(dst_), chcs(chcs_), n_chcs(n_chcs_), + ind_out_indexer(ind_out_indexer_), chcs_indexer(chcs_indexer_) + { + } + + void operator()(sycl::id<1> id) const + { + const ProjectorT proj{}; + + ssize_t i = id[0]; + + auto ind_dst_offsets = ind_out_indexer(i); + ssize_t ind_offset = ind_dst_offsets.get_first_offset(); + ssize_t dst_offset = ind_dst_offsets.get_second_offset(); + + IndT chc_idx = ind[ind_offset]; + // proj produces an index in the range of n_chcs + ssize_t projected_idx = proj(n_chcs, chc_idx); + + ssize_t chc_offset = chcs_indexer(i, projected_idx); + + T *chc = reinterpret_cast(chcs[projected_idx]); + + dst[dst_offset] = chc[chc_offset]; + } +}; + +namespace strides +{ +using dpctl::tensor::strides::CIndexer_vector; + +struct NthStrideOffsetUnpacked +{ + NthStrideOffsetUnpacked(int common_nd, + ssize_t const *_offsets, + ssize_t const *_shape, + ssize_t const *_strides) + : _ind(common_nd), nd(common_nd), offsets(_offsets), shape(_shape), + strides(_strides) + { + } + + template + size_t operator()(ssize_t gid, nT n) const + { + ssize_t relative_offset(0); + _ind.get_displacement( + gid, shape, strides + (n * nd), relative_offset); + + return relative_offset + offsets[n]; + } + +private: + CIndexer_vector _ind; + + int nd; + ssize_t const *offsets; + ssize_t const *shape; + ssize_t const *strides; +}; + +static_assert(sycl::is_device_copyable_v); + +} // namespace strides +} // namespace dpnp::kernels::choose diff --git a/dpnp/backend/kernels/statistics/histogram.hpp b/dpnp/backend/kernels/statistics/histogram.hpp new file mode 100644 index 000000000000..6d0fedbe0bc3 --- /dev/null +++ b/dpnp/backend/kernels/statistics/histogram.hpp @@ -0,0 +1,99 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +namespace dpnp::kernels::histogram +{ +template +class HistogramFunctor +{ +private: + const T *in = nullptr; + const std::size_t size; + const std::size_t dims; + const std::uint32_t WorkPI; + const HistImpl hist; + const Edges edges; + const Weights weights; + +public: + HistogramFunctor(const T *in_, + const std::size_t size_, + const std::size_t dims_, + const std::uint32_t WorkPI_, + const HistImpl &hist_, + const Edges &edges_, + const Weights &weights_) + : in(in_), size(size_), dims(dims_), WorkPI(WorkPI_), hist(hist_), + edges(edges_), weights(weights_) + { + } + + void operator()(sycl::nd_item<1> item) const + { + auto id = item.get_group_linear_id(); + auto lid = item.get_local_linear_id(); + auto group = item.get_group(); + auto local_size = item.get_local_range(0); + + hist.init(item); + edges.init(item); + + if constexpr (HistImpl::sync_after_init || Edges::sync_after_init) { + sycl::group_barrier(group, sycl::memory_scope::work_group); + } + + auto bounds = edges.get_bounds(); + + for (std::uint32_t i = 0; i < WorkPI; ++i) { + auto data_idx = id * WorkPI * local_size + i * local_size + lid; + if (data_idx < size) { + auto *d = &in[data_idx * dims]; + + if (edges.in_bounds(d, bounds)) { + auto bin = edges.get_bin(item, d, bounds); + auto weight = weights.get(data_idx); + hist.add(item, bin, weight); + } + } + } + + if constexpr (HistImpl::sync_before_finalize) { + sycl::group_barrier(group, sycl::memory_scope::work_group); + } + + hist.finalize(item); + } +}; +} // namespace dpnp::kernels::histogram diff --git a/dpnp/backend/kernels/statistics/sliding_window1d.hpp b/dpnp/backend/kernels/statistics/sliding_window1d.hpp new file mode 100644 index 000000000000..5b3c5535afd4 --- /dev/null +++ b/dpnp/backend/kernels/statistics/sliding_window1d.hpp @@ -0,0 +1,274 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +#include "ext/common.hpp" + +namespace dpnp::kernels::sliding_window1d +{ +using ext::common::CeilDiv; + +namespace detail +{ +template +SizeT get_global_linear_id(const std::uint32_t wpi, + const sycl::nd_item<1> &item) +{ + auto sbgroup = item.get_sub_group(); + const auto sg_loc_id = sbgroup.get_local_linear_id(); + + const SizeT sg_base_id = wpi * (item.get_global_linear_id() - sg_loc_id); + const SizeT id = sg_base_id + sg_loc_id; + + return id; +} + +template +std::uint32_t get_results_num(const std::uint32_t wpi, + const SizeT size, + const SizeT global_id, + const sycl::nd_item<1> &item) +{ + auto sbgroup = item.get_sub_group(); + + const auto sbg_size = sbgroup.get_max_local_range()[0]; + const auto size_ = sycl::sub_sat(size, global_id); + return std::min(SizeT(wpi), CeilDiv(size_, sbg_size)); +} + +template +void process_block(Results &results, + std::uint32_t r_size, + AData &a_data, + VData &v_data, + std::uint32_t block_size, + Op op, + Red red) +{ + for (std::uint32_t i = 0; i < block_size; ++i) { + auto v_val = v_data.broadcast(i); + for (std::uint32_t r = 0; r < r_size; ++r) { + results[r] = red(results[r], op(a_data[r], v_val)); + } + a_data.advance_left(); + } +} +} // namespace detail + +template class RegistryDataT, + template class RegistryWindowT> +class SlidingWindow1dFunctor +{ +private: + const SpanT a; + const KernelT v; + const OpT op; + const RedT red; + ResultT out; + + static constexpr std::uint32_t default_reg_data_size = 1; + using SizeT = typename SpanT::size_type; + +public: + SlidingWindow1dFunctor(const SpanT &a_, + const KernelT &v_, + const OpT &op_, + const RedT &red_, + ResultT &out_) + : a(a_), v(v_), op(op_), red(red_), out(out_) + { + } + + void operator()(sycl::nd_item<1> item) const + { + auto glid = detail::get_global_linear_id(WorkPI, item); + + auto results = + RegistryDataT(item); + results.fill(0); + + auto results_num = + detail::get_results_num(WorkPI, out.size(), glid, item); + + const auto *a_begin = a.begin(); + const auto *a_end = a.end(); + + auto sbgroup = item.get_sub_group(); + + const auto chunks_count = + CeilDiv(v.size(), sbgroup.get_max_local_range()[0]); + + const auto *a_ptr = &a.padded_begin()[glid]; + + auto _a_load_cond = [a_begin, a_end](auto &&ptr) { + return ptr >= a_begin && ptr < a_end; + }; + + auto a_data = + RegistryWindowT(item); + a_ptr = a_data.load(a_ptr, _a_load_cond, 0); + + const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; + auto v_size = v.size(); + + for (std::uint32_t b = 0; b < chunks_count; ++b) { + auto v_data = RegistryDataT(item); + v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); + + std::uint32_t chunk_size_ = + std::min(v_size, SizeT(v_data.total_size())); + detail::process_block(results, results_num, a_data, v_data, + chunk_size_, op, red); + + if (b != chunks_count - 1) { + a_ptr = a_data.load_lane(a_data.size_y() - 1, a_ptr, + _a_load_cond, 0); + v_size -= v_data.total_size(); + } + } + + auto *const out_ptr = out.begin(); + // auto *const out_end = out.end(); + + auto y_start = glid; + auto y_stop = std::min(y_start + WorkPI * results.size_x(), out.size()); + std::uint32_t i = 0; + for (std::uint32_t y = y_start; y < y_stop; y += results.size_x()) { + out_ptr[y] = results[i++]; + } + // while the code itself seems to be valid, inside correlate + // kernel it results in memory corruption. Further investigation + // is needed. SAT-7693 + // corruption results.store(&out_ptr[glid], + // [out_end](auto &&ptr) { return ptr < out_end; }); + } +}; + +template class RegistryDataT, + template class RegistryWindowT> +class SlidingWindow1dSmallFunctor +{ +private: + const SpanT a; + const KernelT v; + const OpT op; + const RedT red; + ResultT out; + + static constexpr std::uint32_t default_reg_data_size = 1; + using SizeT = typename SpanT::size_type; + +public: + SlidingWindow1dSmallFunctor(const SpanT &a_, + const KernelT &v_, + const OpT &op_, + const RedT &red_, + ResultT &out_) + : a(a_), v(v_), op(op_), red(red_), out(out_) + { + } + + void operator()(sycl::nd_item<1> item) const + { + auto glid = detail::get_global_linear_id(WorkPI, item); + + auto results = + RegistryDataT(item); + results.fill(0); + + auto sbgroup = item.get_sub_group(); + auto sg_size = sbgroup.get_max_local_range()[0]; + + const std::uint32_t to_read = WorkPI * sg_size + v.size(); + const auto *a_begin = a.begin(); + + const auto *a_ptr = &a.padded_begin()[glid]; + const auto *a_end = std::min(a_ptr + to_read, a.end()); + + auto _a_load_cond = [a_begin, a_end](auto &&ptr) { + return ptr >= a_begin && ptr < a_end; + }; + + auto a_data = + RegistryWindowT(item); + a_data.load(a_ptr, _a_load_cond, 0); + + const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; + auto v_size = v.size(); + + auto v_data = + RegistryDataT( + item); + v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); + + auto results_num = + detail::get_results_num(WorkPI, out.size(), glid, item); + + detail::process_block(results, results_num, a_data, v_data, v_size, op, + red); + + auto *const out_ptr = out.begin(); + // auto *const out_end = out.end(); + + auto y_start = glid; + auto y_stop = std::min(y_start + WorkPI * results.size_x(), out.size()); + std::uint32_t i = 0; + for (std::uint32_t y = y_start; y < y_stop; y += results.size_x()) { + out_ptr[y] = results[i++]; + } + // while the code itself seems to be valid, inside correlate + // kernel it results in memory corruption. Further investigation + // is needed. SAT-7693 + // corruption results.store(&out_ptr[glid], + // [out_end](auto &&ptr) { return ptr < out_end; }); + } +}; +} // namespace dpnp::kernels::sliding_window1d diff --git a/dpnp/backend/extensions/window/bartlett.hpp b/dpnp/backend/kernels/window/bartlett.hpp similarity index 80% rename from dpnp/backend/extensions/window/bartlett.hpp rename to dpnp/backend/kernels/window/bartlett.hpp index 69d3be627c84..20d410150dcb 100644 --- a/dpnp/backend/extensions/window/bartlett.hpp +++ b/dpnp/backend/kernels/window/bartlett.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2025, Intel Corporation +// Copyright (c) 2026, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -19,7 +19,7 @@ // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, RES, OR PROFITS; OR BUSINESS +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF @@ -28,12 +28,12 @@ #pragma once -#include "common.hpp" +#include + #include -namespace dpnp::extensions::window::kernels +namespace dpnp::kernels::bartlett { - template class BartlettFunctor { @@ -52,19 +52,4 @@ class BartlettFunctor res[i] = T(1) - sycl::fabs(i - alpha) / alpha; } }; - -template -struct BartlettFactory -{ - fnT get() - { - if constexpr (std::is_floating_point_v) { - return window_impl; - } - else { - return nullptr; - } - } -}; - -} // namespace dpnp::extensions::window::kernels +} // namespace dpnp::kernels::bartlett diff --git a/dpnp/backend/extensions/window/blackman.hpp b/dpnp/backend/kernels/window/blackman.hpp similarity index 83% rename from dpnp/backend/extensions/window/blackman.hpp rename to dpnp/backend/kernels/window/blackman.hpp index 7a75d226792f..9df7cb8728e2 100644 --- a/dpnp/backend/extensions/window/blackman.hpp +++ b/dpnp/backend/kernels/window/blackman.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2025, Intel Corporation +// Copyright (c) 2026, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -28,12 +28,12 @@ #pragma once -#include "common.hpp" +#include + #include -namespace dpnp::extensions::window::kernels +namespace dpnp::kernels::blackman { - template class BlackmanFunctor { @@ -53,19 +53,4 @@ class BlackmanFunctor T(0.08) * sycl::cospi(T(2) * alpha); } }; - -template -struct BlackmanFactory -{ - fnT get() - { - if constexpr (std::is_floating_point_v) { - return window_impl; - } - else { - return nullptr; - } - } -}; - -} // namespace dpnp::extensions::window::kernels +} // namespace dpnp::kernels::blackman diff --git a/dpnp/backend/extensions/window/hamming.hpp b/dpnp/backend/kernels/window/hamming.hpp similarity index 83% rename from dpnp/backend/extensions/window/hamming.hpp rename to dpnp/backend/kernels/window/hamming.hpp index 521ebc10c281..895ecb0e588c 100644 --- a/dpnp/backend/extensions/window/hamming.hpp +++ b/dpnp/backend/kernels/window/hamming.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2025, Intel Corporation +// Copyright (c) 2026, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -28,12 +28,12 @@ #pragma once -#include "common.hpp" +#include + #include -namespace dpnp::extensions::window::kernels +namespace dpnp::kernels::hamming { - template class HammingFunctor { @@ -51,19 +51,4 @@ class HammingFunctor res[i] = T(0.54) - T(0.46) * sycl::cospi(T(2) * i / (N - 1)); } }; - -template -struct HammingFactory -{ - fnT get() - { - if constexpr (std::is_floating_point_v) { - return window_impl; - } - else { - return nullptr; - } - } -}; - -} // namespace dpnp::extensions::window::kernels +} // namespace dpnp::kernels::hamming diff --git a/dpnp/backend/extensions/window/hanning.hpp b/dpnp/backend/kernels/window/hanning.hpp similarity index 83% rename from dpnp/backend/extensions/window/hanning.hpp rename to dpnp/backend/kernels/window/hanning.hpp index 612036d6b05a..35b441f921f8 100644 --- a/dpnp/backend/extensions/window/hanning.hpp +++ b/dpnp/backend/kernels/window/hanning.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2025, Intel Corporation +// Copyright (c) 2026, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -28,12 +28,12 @@ #pragma once -#include "common.hpp" +#include + #include -namespace dpnp::extensions::window::kernels +namespace dpnp::kernels::hanning { - template class HanningFunctor { @@ -51,19 +51,4 @@ class HanningFunctor res[i] = T(0.5) - T(0.5) * sycl::cospi(T(2) * i / (N - 1)); } }; - -template -struct HanningFactory -{ - fnT get() - { - if constexpr (std::is_floating_point_v) { - return window_impl; - } - else { - return nullptr; - } - } -}; - -} // namespace dpnp::extensions::window::kernels +} // namespace dpnp::kernels::hanning diff --git a/dpnp/backend/kernels/window/kaiser.hpp b/dpnp/backend/kernels/window/kaiser.hpp new file mode 100644 index 000000000000..ce8c8e52fd18 --- /dev/null +++ b/dpnp/backend/kernels/window/kaiser.hpp @@ -0,0 +1,64 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include + +#include "kernels/elementwise_functions/i0.hpp" + +namespace dpnp::kernels::kaiser +{ +template +class KaiserFunctor +{ +private: + T *res = nullptr; + const std::size_t N; + const T beta; + +public: + KaiserFunctor(T *res, const std::size_t N, const T beta) + : res(res), N(N), beta(beta) + { + } + + void operator()(sycl::id<1> id) const + { + using dpnp::kernels::i0::cyl_bessel_i0; + + const auto i = id.get(0); + const T alpha = (N - 1) / T(2); + const T tmp = (i - alpha) / alpha; + res[i] = cyl_bessel_i0(beta * sycl::sqrt(1 - tmp * tmp)) / + cyl_bessel_i0(beta); + } +}; +} // namespace dpnp::kernels::kaiser diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py index 588345d91b2e..545fd888c1ba 100644 --- a/scripts/gen_coverage.py +++ b/scripts/gen_coverage.py @@ -259,6 +259,9 @@ def find_objects(): "-format=lcov", "-ignore-filename-regex=/tmp/icpx*", r"-ignore-filename-regex=.*/backend/kernels/elementwise_functions/.*\.hpp$", + r"-ignore-filename-regex=.*/backend/kernels/indexing/.*\.hpp$", + r"-ignore-filename-regex=.*/backend/kernels/statistics/.*\.hpp$", + r"-ignore-filename-regex=.*/backend/kernels/window/.*\.hpp$", "-instr-profile=" + instr_profile_fn, ] + objects