diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/array.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/array.hpp index 6c87fb2ad86..9a5e6432a25 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/array.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/array.hpp @@ -360,6 +360,12 @@ constexpr auto make_const_array(T x, Ts... xs) return integral_const_array{}; } +template +constexpr auto generate_const_array(N n, F f) +{ + return sequence_c([=](auto... is) { return make_const_array(f(is)...); }); +} + template constexpr auto generate_array(N n, F f) { diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp index 5e5e16b1315..aa46782ce58 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -206,6 +206,14 @@ MIGRAPHX_HIP_NORETURN inline __host__ __device__ void assert_fail(const source_l #define MIGRAPHX_CHECK(cond) \ MIGRAPHX_ASSERT_FAIL(cond, #cond, __FILE__, __LINE__, __PRETTY_FUNCTION__) +#ifdef CPPCHECK +// NOLINTNEXTLINE +#define MIGRAPHX_CAPTURE_SOURCE_LOCATION(T) T +#define MIGRAPHX_ASSUME assert(cond) +#define MIGRAPHX_UNREACHABLE assert(false) +#define MIGRAPHX_ASSERT(cond) assert(cond) +#define MIGRAPHX_WARN(cond, ...) assert(cond) +#else #ifdef MIGRAPHX_DEBUG // NOLINTNEXTLINE #define MIGRAPHX_CAPTURE_SOURCE_LOCATION(T) source_location_capture @@ -221,6 +229,7 @@ MIGRAPHX_HIP_NORETURN inline __host__ __device__ void assert_fail(const source_l #define MIGRAPHX_ASSERT(cond) #define MIGRAPHX_WARN(...) #endif +#endif #define MIGRAPHX_STATIC_ASSERT_FOR(...) \ static_assert(__VA_ARGS__); \ diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/float8.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/float8.hpp index 43ee2ca5d87..08640e9e07b 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/float8.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/float8.hpp @@ -2,7 +2,7 @@ * * The MIT License (MIT) * - * Copyright (C) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -59,7 +59,7 @@ class numeric_limits; template struct float8 { - uint8_t data; + uint8_t data = 0; // default constructor __device__ constexpr float8() = default; // default copy constructor @@ -140,7 +140,7 @@ struct float8 migraphx::fp8::rounding_mode rm = migraphx::fp8::rounding_mode::standard, uint32_t rng = 0) { - if(__builtin_is_constant_evaluated() or !FNUZ) + if(__builtin_is_constant_evaluated() or not FNUZ) { if constexpr(T == migraphx::fp8::f8_type::fp8) { @@ -249,7 +249,7 @@ struct float8 // upcast using device specific intrinsic constexpr __device__ operator float() const { - if(__builtin_is_constant_evaluated() or !FNUZ) + if(__builtin_is_constant_evaluated() or not FNUZ) { if constexpr(T == migraphx::fp8::f8_type::fp8) { @@ -261,7 +261,7 @@ struct float8 else { float fval = 0; - uint32_t i32val = static_cast(data); + uint32_t i32val = data; // upcast if constexpr(T == migraphx::fp8::f8_type::fp8) @@ -312,7 +312,7 @@ struct float8 } else { - if(T == migraphx::fp8::f8_type::bf8) + if constexpr(T == migraphx::fp8::f8_type::bf8) { return (data == 0x7D) or (data == 0x7E) or (data == 0x7F) or (data == 0xFD) or (data == 0xFE) or (data == 0xFF); @@ -333,7 +333,7 @@ struct float8 } else { - if(T == migraphx::fp8::f8_type::bf8) + if constexpr(T == migraphx::fp8::f8_type::bf8) { return (data == 0x7C) or (data == 0xFC); } @@ -370,16 +370,12 @@ struct float8 __device__ constexpr bool operator<(const float8& rhs) const { - const auto we = static_cast(*this); - const auto them = static_cast(rhs); - return we < them; + return static_cast(*this) < static_cast(rhs); } __device__ constexpr bool operator>(const float8& rhs) const { - const auto we = static_cast(*this); - const auto them = static_cast(rhs); - return we > them; + return static_cast(*this) > static_cast(rhs); } }; diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/float8_impl.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/float8_impl.hpp index 09ab146fbed..9fbe5e6f740 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/float8_impl.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/float8_impl.hpp @@ -118,6 +118,7 @@ __device__ constexpr uint8_t cast_to_f8(T f_x, bool stoch = false, uint32_t rng if(x == 0) return 0; // handle negative zero + // cppcheck-suppress compareValueOutOfTypeRangeError else if((sizeof(T) == 4 and x == 0x80000000) or (sizeof(T) == 2 and x == 0x8000)) { return NegativeZeroNan ? 0 : 0x80; // For FNUZ types neg zero is just positive zero diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp index 74a4aa51cb5..e8f16b9d5e0 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -131,6 +131,9 @@ struct is_integral_constant> : true_type template using index_constant = integral_constant; +template +static constexpr auto index_c = index_constant{}; + template static constexpr auto _c = integral_constant{}; // NOLINT diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/slice.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/slice.hpp index 89f1a4a615e..f7adee4eec5 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/slice.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/slice.hpp @@ -98,13 +98,12 @@ template constexpr auto slice_group() { return slice_size_transform{[](auto input, auto s) { - auto r = return_array_c([] { + return return_array_c([] { auto lens = decltype(s){}.lens.base(); lens.back() *= N; lens -= 1; return decltype(input){}.lens.carry(lens) + index_int{1}; }); - return r; }}; } diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/spatial_tiler.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/spatial_tiler.hpp index 72ca68deac2..bc89ef88268 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/spatial_tiler.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/spatial_tiler.hpp @@ -39,7 +39,7 @@ constexpr bool has_nonzero(index_ints) return ((Ps != 0) or ...); } -template > +template > struct spatial_tiler { static constexpr auto keep_spatial() @@ -72,39 +72,50 @@ struct spatial_tiler static constexpr index_int tiles_total() { return tiles_per_dim().product(); } static constexpr auto ndim() { return out_spatial_lens().size(); } - static constexpr bool is_padded() + static constexpr auto get_padding() { - return (out_spatial_lens() != tiles_per_dim() * output_lens()); + if constexpr(Padding{}.size() < 2) + { + auto pre = transform(TileLens{}, [](auto) { return index_c<0>; }); + return join(pre, pre); + } + else + { + return Padding{}; + } } - static constexpr bool has_conv_padding() { return has_nonzero(Padding{}); } - // Left (begin) padding per dim: (0, 0, left_h, left_w) static constexpr auto left_padding() { - return return_array_c([] { - constexpr auto p = Padding{}; - constexpr auto ns = p.size() / 2; - auto result = array(index_int{0}); - for(index_int i = 0; i < ns; i++) - result[i + 2] = p[i]; - return result; + constexpr auto p = get_padding(); + constexpr auto ns = p.size() / 2; + return generate_const_array(_c, [&](auto i) { + if constexpr(i < 2) + return index_c<0>; + else + return index_c; }); } // Total (left+right) padding per dim: (0, 0, left_h+right_h, left_w+right_w) static constexpr auto total_padding() { - return return_array_c([] { - constexpr auto p = Padding{}; - constexpr auto ns = p.size() / 2; - auto result = array(index_int{0}); - for(index_int i = 0; i < ns; i++) - result[i + 2] = p[i] + p[i + ns]; - return result; + constexpr auto p = get_padding(); + constexpr auto ns = p.size() / 2; + return generate_const_array(_c, [&](auto i) { + if constexpr(i < 2) + return index_c<0>; + else + return index_c; }); } + static constexpr bool is_padded() + { + return (out_spatial_lens() != (tiles_per_dim() * output_lens() + total_padding())); + } + index idx; array tile_origin; @@ -114,19 +125,10 @@ struct spatial_tiler static constexpr auto halo_lens_for() { constexpr auto halo_extra = [] { - if constexpr(has_conv_padding()) - { - return return_array_c([] { - return make_slice(InputShape{}, keep_spatial()).lens - out_spatial_lens() + - total_padding(); - }); - } - else - { - constexpr auto input_spatial = make_slice(InputShape{}, keep_spatial()).lens; - return transform( - input_spatial, out_spatial_lens(), [](auto is, auto os) { return is - os; }); - } + return return_array_c([] { + return make_slice(InputShape{}, keep_spatial()).lens - out_spatial_lens() + + total_padding(); + }); }(); return transform(output_lens(), halo_extra, [](auto o, auto h) { return o + h; }); } @@ -167,19 +169,14 @@ struct spatial_tiler idx.local_stride(_c, [&](auto i) { auto halo_multi = halo_shape.multi(i); auto src_pos = tile_origin + halo_multi; - if constexpr(has_conv_padding()) + auto input_pos = src_pos - left_padding(); + if constexpr(is_padded()) { - constexpr auto pad = left_padding(); - auto input_pos = src_pos - pad; smem[i] = in_bounds(input_pos, input_spatial) ? type{input_ch[input_pos]} : type{0}; } - else if constexpr(is_padded()) - { - smem[i] = in_bounds(src_pos, input_spatial) ? type{input_ch[src_pos]} : type{0}; - } else { - smem[i] = input_ch[src_pos]; + smem[i] = input_ch[input_pos]; } }); @@ -203,7 +200,7 @@ struct spatial_tiler } }; -template > +template > __device__ auto make_spatial_tiler(index idx, TileLens, OutputShape, Padding = {}) { using tiler_type = spatial_tiler; diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/tile.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/tile.hpp index 1f11b214fd1..6ccbd0ba17f 100644 --- a/src/targets/gpu/kernels/include/migraphx/kernels/tile.hpp +++ b/src/targets/gpu/kernels/include/migraphx/kernels/tile.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -28,6 +28,7 @@ #include #include #include +#include #include namespace migraphx { @@ -61,8 +62,8 @@ struct tile using type = typename T::type; constexpr auto s = pad_shape(make_packed_shape(get_shape_c{})); constexpr auto size = s.element_space(); - __shared__ type buffer[size]; - auto b = make_tensor_view(buffer, s); + __shared__ uninitialized_buffer buffer; + auto b = make_tensor_view(buffer.data(), s); local_tensor_copy(idx, x, b); f(b); }; @@ -77,8 +78,8 @@ struct tile using type = typename T::type; constexpr auto s = pad_shape(make_packed_shape(get_shape_c{})); constexpr auto size = s.element_space(); - __shared__ type buffer[size]; - auto b = make_tensor_view(buffer, s); + __shared__ uninitialized_buffer buffer; + auto b = make_tensor_view(buffer.data(), s); f(b); local_tensor_copy(idx, b, x); }; diff --git a/test/gpu/kernels/spatial_tiler.cpp b/test/gpu/kernels/spatial_tiler.cpp new file mode 100644 index 00000000000..058eb8e4274 --- /dev/null +++ b/test/gpu/kernels/spatial_tiler.cpp @@ -0,0 +1,320 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +// cppcheck-suppress-file constStatement +#include +#include + +// Helper: create a standard 4D shape from lens +template +static constexpr auto make_4d_shape() +{ + constexpr auto lens = migraphx::index_ints{}; + return migraphx::make_shape(lens); +} + +// ======== output_lens ======== + +// Tile {4, 4} with NTiles=1 → output_lens = {1, 1, 4, 4} +TEST_CASE(output_lens_ntiles_1) +{ + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 8, 8>())>; + constexpr auto ol = tiler::output_lens(); + EXPECT(ol.size() == 4); + EXPECT(ol[0] == 1); + EXPECT(ol[1] == 1); + EXPECT(ol[2] == 4); + EXPECT(ol[3] == 4); +} + +// Tile {4, 4} with NTiles=2 → last dim doubled: {1, 1, 4, 8} +TEST_CASE(output_lens_ntiles_2) +{ + using tiler = migraphx:: + spatial_tiler<2, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 8, 8>())>; + constexpr auto ol = tiler::output_lens(); + EXPECT(ol[2] == 4); + EXPECT(ol[3] == 8); +} + +// ======== out_spatial_lens ======== + +TEST_CASE(out_spatial_lens_basic) +{ + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<2, 3, 16, 16>())>; + constexpr auto sl = tiler::out_spatial_lens(); + // keep_spatial sets dims 0,1 to 1; keeps H,W + EXPECT(sl[0] == 1); + EXPECT(sl[1] == 1); + EXPECT(sl[2] == 16); + EXPECT(sl[3] == 16); +} + +// ======== tiles_per_dim ======== + +// 8x8 output, 4x4 tile, NTiles=1 → ceil(8/4)=2 per spatial dim +TEST_CASE(tiles_per_dim_exact) +{ + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 8, 8>())>; + constexpr auto tpd = tiler::tiles_per_dim(); + EXPECT(tpd[2] == 2); + EXPECT(tpd[3] == 2); +} + +// 10x10 output, 4x4 tile → ceil(10/4)=3 per spatial dim +TEST_CASE(tiles_per_dim_inexact) +{ + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 10, 10>())>; + constexpr auto tpd = tiler::tiles_per_dim(); + EXPECT(tpd[2] == 3); + EXPECT(tpd[3] == 3); +} + +// NTiles=2 scales last dim: tile output is {4, 8} → ceil(16/4)=4, ceil(16/8)=2 +TEST_CASE(tiles_per_dim_ntiles) +{ + using tiler = migraphx:: + spatial_tiler<2, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 16, 16>())>; + constexpr auto tpd = tiler::tiles_per_dim(); + EXPECT(tpd[2] == 4); + EXPECT(tpd[3] == 2); +} + +// ======== tiles_total ======== + +TEST_CASE(tiles_total_exact) +{ + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 8, 8>())>; + // tiles_per_dim = {1, 1, 2, 2}, product = 4 + EXPECT(tiler::tiles_total() == 4); +} + +// ======== get_padding / left_padding / total_padding ======== + +// No Padding arg → get_padding returns zeros matching TileLens size +TEST_CASE(get_padding_default) +{ + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 8, 8>())>; + constexpr auto gp = tiler::get_padding(); + EXPECT(gp.size() == 4); + EXPECT(gp[0] == 0); + EXPECT(gp[1] == 0); + EXPECT(gp[2] == 0); + EXPECT(gp[3] == 0); +} + +// No padding template arg → all zeros +TEST_CASE(padding_default_no_padding) +{ + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 8, 8>())>; + constexpr auto lp = tiler::left_padding(); + constexpr auto tp = tiler::total_padding(); + EXPECT(lp[0] == 0); + EXPECT(lp[1] == 0); + EXPECT(lp[2] == 0); + EXPECT(lp[3] == 0); + EXPECT(tp[0] == 0); + EXPECT(tp[1] == 0); + EXPECT(tp[2] == 0); + EXPECT(tp[3] == 0); +} + +// Symmetric padding {1, 1, 1, 1} → left={0,0,1,1}, total={0,0,2,2} +TEST_CASE(padding_symmetric) +{ + using tiler = migraphx::spatial_tiler<1, + migraphx::index_ints<4, 4>, + decltype(make_4d_shape<1, 1, 8, 8>()), + migraphx::index_ints<1, 1, 1, 1>>; + constexpr auto lp = tiler::left_padding(); + EXPECT(lp[0] == 0); + EXPECT(lp[1] == 0); + EXPECT(lp[2] == 1); + EXPECT(lp[3] == 1); + + constexpr auto tp = tiler::total_padding(); + EXPECT(tp[0] == 0); + EXPECT(tp[1] == 0); + EXPECT(tp[2] == 2); + EXPECT(tp[3] == 2); +} + +// Asymmetric padding {1, 2, 3, 4} → left={0,0,1,2}, total={0,0,1+3,2+4}={0,0,4,6} +TEST_CASE(padding_asymmetric) +{ + using tiler = migraphx::spatial_tiler<1, + migraphx::index_ints<4, 4>, + decltype(make_4d_shape<1, 1, 8, 8>()), + migraphx::index_ints<1, 2, 3, 4>>; + constexpr auto lp = tiler::left_padding(); + EXPECT(lp[2] == 1); + EXPECT(lp[3] == 2); + + constexpr auto tp = tiler::total_padding(); + EXPECT(tp[2] == 4); + EXPECT(tp[3] == 6); +} + +// ======== is_padded ======== + +// Tiles exactly cover output, no conv padding → not padded +TEST_CASE(is_padded_exact_no_padding) +{ + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 8, 8>())>; + EXPECT(not tiler::is_padded()); +} + +// Tiles don't exactly cover output (10 not divisible by 4) → padded +TEST_CASE(is_padded_overhang) +{ + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 10, 10>())>; + EXPECT(tiler::is_padded()); +} + +// Tiles exactly cover output but conv padding present → padded +TEST_CASE(is_padded_conv_padding_exact_tiles) +{ + using tiler = migraphx::spatial_tiler<1, + migraphx::index_ints<4, 4>, + decltype(make_4d_shape<1, 1, 8, 8>()), + migraphx::index_ints<1, 1, 1, 1>>; + EXPECT(tiler::is_padded()); +} + +// Both overhang and conv padding → padded +TEST_CASE(is_padded_overhang_and_conv_padding) +{ + using tiler = migraphx::spatial_tiler<1, + migraphx::index_ints<4, 4>, + decltype(make_4d_shape<1, 1, 10, 10>()), + migraphx::index_ints<1, 1, 1, 1>>; + EXPECT(tiler::is_padded()); +} + +// Edge case: tile overhang equals total padding → still padded +// out_spatial=10, tile=8, tiles_per_dim=2, tiles*tile=16, total_pad=6 +// Without the fix: 10 != 16 → padded (only by coincidence). +// With total_padding in formula: 10 != 16+6=22 → padded. +TEST_CASE(is_padded_overhang_equals_padding) +{ + // tiles_per_dim = ceil(10/8) = 2, coverage = 16, total_pad_h=3+3=6 + using tiler = migraphx::spatial_tiler<1, + migraphx::index_ints<8, 8>, + decltype(make_4d_shape<1, 1, 10, 10>()), + migraphx::index_ints<3, 3, 3, 3>>; + EXPECT(tiler::is_padded()); +} + +// Only one spatial dim has overhang +TEST_CASE(is_padded_partial_overhang) +{ + // H=8 exactly tiled by tile_h=4. W=10 not divisible by tile_w=4. + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 8, 10>())>; + EXPECT(tiler::is_padded()); +} + +// Large padding values +TEST_CASE(is_padded_large_padding) +{ + using tiler = migraphx::spatial_tiler<1, + migraphx::index_ints<4, 4>, + decltype(make_4d_shape<1, 1, 8, 8>()), + migraphx::index_ints<3, 3, 3, 3>>; + EXPECT(tiler::is_padded()); +} + +// ======== has_nonzero ======== + +TEST_CASE(has_nonzero_all_zero) +{ + EXPECT(not migraphx::has_nonzero(migraphx::index_ints<0, 0, 0, 0>{})); +} + +TEST_CASE(has_nonzero_some_nonzero) +{ + EXPECT(migraphx::has_nonzero(migraphx::index_ints<0, 0, 1, 0>{})); +} + +TEST_CASE(has_nonzero_all_nonzero) +{ + EXPECT(migraphx::has_nonzero(migraphx::index_ints<1, 2, 3, 4>{})); +} + +// ======== halo_lens_for ======== + +// No padding: halo = output_lens + (input_spatial - out_spatial) +TEST_CASE(halo_lens_no_padding) +{ + // Output 8x8, input 10x10 (e.g. 3x3 conv), tile 4x4 + // out_spatial = {1,1,8,8}, input_spatial = {1,1,10,10} + // halo_extra = {1,1,10,10} - {1,1,8,8} + {0,0,0,0} = {0,0,2,2} + // halo_lens = output_lens + halo_extra = {1,1,4,4} + {0,0,2,2} = {1,1,6,6} + using output_shape = decltype(make_4d_shape<1, 1, 8, 8>()); + using input_shape = decltype(make_4d_shape<1, 1, 10, 10>()); + using tiler = migraphx::spatial_tiler<1, migraphx::index_ints<4, 4>, output_shape>; + + constexpr auto hl = tiler::template halo_lens_for(); + EXPECT(hl[2] == 6); + EXPECT(hl[3] == 6); +} + +// With padding: halo = output_lens + (input_spatial - out_spatial + total_padding) +TEST_CASE(halo_lens_with_padding) +{ + // Output 8x8, input 8x8 (same-padding conv), pad {1,1,1,1} → total_pad={0,0,2,2} + // halo_extra = {1,1,8,8} - {1,1,8,8} + {0,0,2,2} = {0,0,2,2} + // halo_lens = {1,1,4,4} + {0,0,2,2} = {1,1,6,6} + using output_shape = decltype(make_4d_shape<1, 1, 8, 8>()); + using input_shape = decltype(make_4d_shape<1, 1, 8, 8>()); + using tiler = migraphx::spatial_tiler<1, + migraphx::index_ints<4, 4>, + output_shape, + migraphx::index_ints<1, 1, 1, 1>>; + + constexpr auto hl = tiler::template halo_lens_for(); + EXPECT(hl[2] == 6); + EXPECT(hl[3] == 6); +} + +// ======== ndim ======== + +TEST_CASE(ndim_4d) +{ + using tiler = migraphx:: + spatial_tiler<1, migraphx::index_ints<4, 4>, decltype(make_4d_shape<1, 1, 8, 8>())>; + EXPECT(tiler::ndim() == 4); +} diff --git a/test/verify/test_channelwise_conv_padding.cpp b/test/verify/test_channelwise_conv_padding.cpp new file mode 100644 index 00000000000..fa38209e455 --- /dev/null +++ b/test/verify/test_channelwise_conv_padding.cpp @@ -0,0 +1,46 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "verify_program.hpp" +#include +#include +#include + +template +struct test_channelwise_conv_padding : verify_program> +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + auto input = mm->add_parameter("x", migraphx::shape{DType, {2, 4, 8, 8}}); + auto weights = mm->add_parameter("w", migraphx::shape{DType, {4, 1, 3, 3}}); + mm->add_instruction( + migraphx::make_op("convolution", {{"group", 4}, {"padding", {1, 1}}}), input, weights); + return p; + } + std::string section() const { return "conv"; } +}; +template struct test_channelwise_conv_padding; +template struct test_channelwise_conv_padding; diff --git a/test/verify/test_channelwise_conv_padding_1d.cpp b/test/verify/test_channelwise_conv_padding_1d.cpp new file mode 100644 index 00000000000..7e4c5f3d170 --- /dev/null +++ b/test/verify/test_channelwise_conv_padding_1d.cpp @@ -0,0 +1,49 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "verify_program.hpp" +#include +#include +#include + +template +struct test_channelwise_conv_padding_1d : verify_program> +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + auto input = mm->add_parameter("x", migraphx::shape{DType, {2, 4, 16}}); + auto weights = mm->add_parameter("w", migraphx::shape{DType, {4, 1, 3}}); + mm->add_instruction( + migraphx::make_op("convolution", + {{"padding", {1}}, {"stride", {1}}, {"dilation", {1}}, {"group", 4}}), + input, + weights); + return p; + } + std::string section() const { return "conv"; } +}; +template struct test_channelwise_conv_padding_1d; +template struct test_channelwise_conv_padding_1d; diff --git a/test/verify/test_channelwise_conv_padding_5x5.cpp b/test/verify/test_channelwise_conv_padding_5x5.cpp new file mode 100644 index 00000000000..4fcf2ce218b --- /dev/null +++ b/test/verify/test_channelwise_conv_padding_5x5.cpp @@ -0,0 +1,46 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "verify_program.hpp" +#include +#include +#include + +template +struct test_channelwise_conv_padding_5x5 : verify_program> +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + auto input = mm->add_parameter("x", migraphx::shape{DType, {1, 8, 12, 12}}); + auto weights = mm->add_parameter("w", migraphx::shape{DType, {8, 1, 5, 5}}); + mm->add_instruction( + migraphx::make_op("convolution", {{"group", 8}, {"padding", {2, 2}}}), input, weights); + return p; + } + std::string section() const { return "conv"; } +}; +template struct test_channelwise_conv_padding_5x5; +template struct test_channelwise_conv_padding_5x5; diff --git a/test/verify/test_channelwise_conv_padding_non_divisible.cpp b/test/verify/test_channelwise_conv_padding_non_divisible.cpp new file mode 100644 index 00000000000..4a1fdde33cf --- /dev/null +++ b/test/verify/test_channelwise_conv_padding_non_divisible.cpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "verify_program.hpp" +#include +#include +#include + +template +struct test_channelwise_conv_padding_non_divisible + : verify_program> +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + auto input = mm->add_parameter("x", migraphx::shape{DType, {1, 8, 30, 30}}); + auto weights = mm->add_parameter("w", migraphx::shape{DType, {8, 1, 3, 3}}); + mm->add_instruction( + migraphx::make_op("convolution", {{"group", 8}, {"padding", {1, 1}}}), input, weights); + return p; + } + std::string section() const { return "conv"; } +}; +template struct test_channelwise_conv_padding_non_divisible; +template struct test_channelwise_conv_padding_non_divisible; diff --git a/test/verify/test_channelwise_conv_padding_relu.cpp b/test/verify/test_channelwise_conv_padding_relu.cpp new file mode 100644 index 00000000000..2d934d39ac0 --- /dev/null +++ b/test/verify/test_channelwise_conv_padding_relu.cpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "verify_program.hpp" +#include +#include +#include + +template +struct test_channelwise_conv_padding_relu + : verify_program> +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + auto input = mm->add_parameter("x", migraphx::shape{DType, {1, 8, 12, 12}}); + auto weights = mm->add_parameter("w", migraphx::shape{DType, {8, 1, 3, 3}}); + auto conv = mm->add_instruction( + migraphx::make_op("convolution", {{"group", 8}, {"padding", {1, 1}}}), input, weights); + mm->add_instruction(migraphx::make_op("relu"), conv); + return p; + } + std::string section() const { return "conv"; } +}; +template struct test_channelwise_conv_padding_relu; +template struct test_channelwise_conv_padding_relu;