Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,15 @@
"CMAKE_CUDA_STANDARD": "20"
}
},
{
"name": "libcudacxx-cpp23",
"displayName": "libcu++: C++23",
"inherits": "libcudacxx",
"cacheVariables": {
"CMAKE_CXX_STANDARD": "23",
"CMAKE_CUDA_STANDARD": "23"
}
},
{
"name": "libcudacxx-nvrtc",
"inherits": "libcudacxx",
Expand Down
29 changes: 0 additions & 29 deletions c2h/include/c2h/catch2_test_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,35 +33,6 @@
#include <catch2/matchers/catch_matchers_templated.hpp>
#include <catch2/matchers/catch_matchers_vector.hpp>

// workaround for error #3185-D: no '#pragma diagnostic push' was found to match this 'diagnostic pop'
#if _CCCL_COMPILER(NVHPC)
# undef CATCH_INTERNAL_START_WARNINGS_SUPPRESSION
# undef CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION
# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION _Pragma("diag push")
# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION _Pragma("diag pop")
#endif
// The nv_diagnostic pragmas in Catch2 macros cause cicc to hang indefinitely in CTK 13.0.
// See NVBugs 5475335.
#if _CCCL_VERSION_COMPARE(_CCCL_CTK_, _CCCL_CTK, ==, 13, 0)
# undef CATCH_INTERNAL_START_WARNINGS_SUPPRESSION
# undef CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION
# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION
# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION
#endif
// workaround for error
// * MSVC14.39: #3185-D: no '#pragma diagnostic push' was found to match this 'diagnostic pop'
// * MSVC14.29: internal error: assertion failed: alloc_copy_of_pending_pragma: copied pragma has source sequence entry
// (pragma.c, line 526 in alloc_copy_of_pending_pragma)
// see also upstream Catch2 issue: https://github.com/catchorg/Catch2/issues/2636
#if _CCCL_COMPILER(MSVC)
# undef CATCH_INTERNAL_START_WARNINGS_SUPPRESSION
# undef CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION
# undef CATCH_INTERNAL_SUPPRESS_UNUSED_VARIABLE_WARNINGS
# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION
# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION
# define CATCH_INTERNAL_SUPPRESS_UNUSED_VARIABLE_WARNINGS
#endif

#ifndef VAR_IDX
# define VAR_IDX 0
#endif
Expand Down
29 changes: 29 additions & 0 deletions c2h/include/c2h/catch2_test_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,35 @@
// Only a subset of the Catch2's macro are provided. If needed, feel free to extend the support. Host-only macros can
// be determined by missing NV_IF_ELSE_TARGET wrapper and immediate dispatch to CATCH_-prefixed variant.

// workaround for error #3185-D: no '#pragma diagnostic push' was found to match this 'diagnostic pop'
#if _CCCL_COMPILER(NVHPC)
# undef CATCH_INTERNAL_START_WARNINGS_SUPPRESSION
# undef CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION
# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION _Pragma("diag push")
# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION _Pragma("diag pop")
#endif
// The nv_diagnostic pragmas in Catch2 macros cause cicc to hang indefinitely in CTK 13.0.
// See NVBugs 5475335.
#if _CCCL_VERSION_COMPARE(_CCCL_CTK_, _CCCL_CTK, ==, 13, 0)
# undef CATCH_INTERNAL_START_WARNINGS_SUPPRESSION
# undef CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION
# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION
# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION
#endif
// workaround for error
// * MSVC14.39: #3185-D: no '#pragma diagnostic push' was found to match this 'diagnostic pop'
// * MSVC14.29: internal error: assertion failed: alloc_copy_of_pending_pragma: copied pragma has source sequence entry
// (pragma.c, line 526 in alloc_copy_of_pending_pragma)
// see also upstream Catch2 issue: https://github.com/catchorg/Catch2/issues/2636
#if _CCCL_COMPILER(MSVC)
# undef CATCH_INTERNAL_START_WARNINGS_SUPPRESSION
# undef CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION
# undef CATCH_INTERNAL_SUPPRESS_UNUSED_VARIABLE_WARNINGS
# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION
# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION
# define CATCH_INTERNAL_SUPPRESS_UNUSED_VARIABLE_WARNINGS
#endif

// We must pass the COND as a cstring parameter, because it might contain the '%' character that would break the printf
// formatting.
#define C2H_INTERNAL_DEVICE_TEST_PRINT(KIND, COND) \
Expand Down
3 changes: 3 additions & 0 deletions ci/matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ workflows:
- {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
# clang-cuda
- {jobs: ['build'], cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', std: 'all', sm: '75;80;90;100;120'}
- {jobs: ['build'], project: 'libcudacxx', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', std: 23, sm: '75;80;90;100;120'}
# libc++
# - arm64 for now as it's closest to android.
# - {jobs: ['build'], cpu: 'arm64', project: 'libcudacxx', std: 'all', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', cmake_options: '-DCCCL_USE_LIBCXX=ON', sm: '75;80;90;100;120'}
Expand Down Expand Up @@ -229,6 +230,7 @@ workflows:
- {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
# clang-cuda
- {jobs: ['build'], cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', std: 'all', sm: '75;80;90;100;120'}
- {jobs: ['build'], project: 'libcudacxx', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', std: 23, sm: '75;80;90;100;120'}
# clang-tidy
- { jobs: ['build'], project: 'tidy', std: 'min', cxx: ['clang'], cudacxx: ['clang'], ctk: 'clang-cuda', sm: '75' }
# arch-specific and family-specific arch builds
Expand Down Expand Up @@ -321,6 +323,7 @@ workflows:
- {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
# clang-cuda
- {jobs: ['build'], cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', std: 'all', sm: '75;80;90;100;120'}
- {jobs: ['build'], project: 'libcudacxx', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', std: 23, sm: '75;80;90;100;120'}
# compute-sanitizer
- {jobs: ['compute_sanitizer'], project: 'cub', std: 'max', gpu: 'rtxa6000', sm: 'gpu', cmake_options: '-DCMAKE_CUDA_FLAGS=-lineinfo'}
# clang-tidy
Expand Down
4 changes: 2 additions & 2 deletions libcudacxx/include/cuda/std/__cmath/fpclassify.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ _CCCL_BEGIN_NAMESPACE_CUDA_STD
template <class _Tp>
[[nodiscard]] _CCCL_API constexpr int __fpclassify_impl(_Tp __x) noexcept
{
static_assert(numeric_limits<_Tp>::has_denorm, "The type must have denorm support");
static_assert(__fp_has_denorm_v<__fp_format_of_v<_Tp>>, "The type must have denorm support");

if constexpr (numeric_limits<_Tp>::has_quiet_NaN || numeric_limits<_Tp>::has_signaling_NaN)
{
Expand Down Expand Up @@ -186,7 +186,7 @@ template <class _Tp>
#if _CCCL_HAS_NVFP8_E8M0()
[[nodiscard]] _CCCL_API constexpr int fpclassify(__nv_fp8_e8m0 __x) noexcept
{
return ((__x.__x & __fp_exp_mask_of_v<__nv_fp8_e8m0>) == __fp_exp_mask_of_v<__nv_fp8_e8m0>) ? FP_NAN : FP_NORMAL;
return (__x.__x == __fp_exp_mask_of_v<__nv_fp8_e8m0>) ? FP_NAN : FP_NORMAL;
}
#endif // _CCCL_HAS_NVFP8_E8M0()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//
// UNSUPPORTED: libcpp-has-no-threads
// UNSUPPORTED: pre-sm-90
// UNSUPPORTED: clang && !nvcc

// UNSUPPORTED: no_execute

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ int main(int, char**)
if (threadIdx.x == 0) { init(bar_ptr, blockDim.x); } __syncthreads();

// Should fail because the barrier is in device memory.
auto token = cuda::device::barrier_arrive_tx(*bar_ptr, 1, 0);));
[[maybe_unused]] auto token = cuda::device::barrier_arrive_tx(*bar_ptr, 1, 0);));
return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
// UNSUPPORTED: libcpp-has-no-threads
// UNSUPPORTED: pre-sm-90
// ADDITIONAL_COMPILE_DEFINITIONS: CCCL_IGNORE_DEPRECATED_API
// UNSUPPORTED: clang && !nvcc

// UNSUPPORTED: nvrtc
// NVRTC_SKIP_KERNEL_RUN // This will have effect once PR 433 is merged (line above should be removed.)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
// Suppress warning about barrier in shared memory
TEST_NV_DIAG_SUPPRESS(static_var_with_dynamic_init)

TEST_GLOBAL_VARIABLE uint64_t bar_storage;
[[maybe_unused]] TEST_GLOBAL_VARIABLE uint64_t bar_storage;

int main(int, char**)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,19 @@ TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0)
}

#if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS()
template <
class MDS,
class... Indices,
class = cuda::std::enable_if_t<
cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Indices>()...]), typename MDS::reference>,
int> = 0>
template <class MDS, class... Indices>
requires requires(MDS mds, Indices... indices) { mds[indices...]; }
TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs)
{
unused(m[idxs...]);
return true;
}
#else // ^^^ _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() ^^^ / vvv !_CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() vvv
template <
class MDS,
class Index,
class = cuda::std::enable_if_t<cuda::std::is_same<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Index>()]),
typename MDS::reference>::value>>
template <class MDS,
class Index,
cuda::std::enable_if_t<cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Index>()]),
typename MDS::reference>,
int> = 0>
TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Index idx)
{
unused(m[idx]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,19 @@ constexpr auto& access(MDS mds, int64_t i0)
}

#if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS()
template <
class MDS,
class... Indices,
class = cuda::std::enable_if_t<
cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Indices>()...]), typename MDS::reference>,
int> = 0>
template <class MDS, class... Indices>
requires requires(MDS mds, Indices... indices) { mds[indices...]; }
constexpr bool check_operator_constraints(MDS m, Indices... idxs)
{
unused(m[idxs...]);
return true;
}
#else // ^^^ _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() ^^^ / vvv !_CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() vvv
template <
class MDS,
class Index,
class = cuda::std::enable_if_t<cuda::std::is_same<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Index>()]),
typename MDS::reference>::value>>
template <class MDS,
class Index,
cuda::std::enable_if_t<cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Index>()]),
typename MDS::reference>,
int> = 0>
constexpr bool check_operator_constraints(MDS m, Index idx)
{
unused(m[idx]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,8 @@ TEST_FUNC constexpr auto& access(MDS mds, int64_t i0)
}

#if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS()
template <
class MDS,
class... Indices,
class = cuda::std::enable_if_t<
cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Indices>()...]), typename MDS::reference>,
int> = 0>
template <class MDS, class... Indices>
requires requires(MDS mds, Indices... indices) { mds[indices...]; }
TEST_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs)
{
unused(m[idxs...]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,8 @@ TEST_FUNC constexpr auto& access(MDS mds, int64_t i0)
}

#if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS()
template <
class MDS,
class... Indices,
class = cuda::std::enable_if_t<
cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Indices>()...]), typename MDS::reference>,
int> = 0>
template <class MDS, class... Indices>
requires requires(MDS mds, Indices... indices) { mds[indices...]; }
TEST_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs)
{
unused(m[idxs...]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,19 @@ TEST_DEVICE_FUNC constexpr auto& access(MDS mds, int64_t i0)
}

#if _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS()
template <
class MDS,
class... Indices,
class = cuda::std::enable_if_t<
cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Indices>()...]), typename MDS::reference>>,
int>
= 0 > TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs)
template <class MDS, class... Indices>
requires requires(MDS mds, Indices... indices) { mds[indices...]; }
TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Indices... idxs)
{
unused(m[idxs...]);
return true;
}
#else // ^^^ _CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() ^^^ / vvv !_CCCL_HAS_MULTIARG_OPERATOR_BRACKETS() vvv
template <
class MDS,
class Index,
class = cuda::std::enable_if_t<
cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Index>()]), typename MDS::reference>>>
template <class MDS,
class Index,
cuda::std::enable_if_t<cuda::std::is_same_v<decltype(cuda::std::declval<MDS>()[cuda::std::declval<Index>()]),
typename MDS::reference>,
int> = 0>
TEST_DEVICE_FUNC constexpr bool check_operator_constraints(MDS m, Index idx)
{
unused(m[idx]);
Expand Down Expand Up @@ -166,52 +162,52 @@ TEST_DEVICE_FUNC void test_layout()
test_iteration(construct_mapping(Layout(), cuda::std::extents<int>()));
__shared__ int data[16];
// Check operator constraint for number of arguments
static_assert(check_operator_constraints(
assert(check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D>(1))), 0));
static_assert(!check_operator_constraints(
assert(!check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D>(1))), 0, 0));

// Check operator constraint for convertibility of arguments to index_type
static_assert(check_operator_constraints(
assert(check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D>(1))), IntType(0)));
static_assert(!check_operator_constraints(
assert(!check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<unsigned, D>(1))), IntType(0)));

// Check operator constraint for no-throw-constructibility of index_type from arguments
static_assert(!check_operator_constraints(
assert(!check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<unsigned char, D>(1))),
IntType(0)));

// Check that mixed integrals work: note the second one tests that mdspan casts: layout_wrapping_integral does not
// accept IntType
static_assert(check_operator_constraints(
assert(check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<unsigned char, D, D>(1, 1))),
int(0),
size_t(0)));
static_assert(check_operator_constraints(
assert(check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D, D>(1, 1))),
unsigned(0),
IntType(0)));

constexpr bool t = true;
constexpr bool o = false;
static_assert(!check_operator_constraints(
assert(!check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D, D>(1, 1))),
unsigned(0),
IntConfig<o, o, t, t>(0)));
static_assert(check_operator_constraints(
assert(check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D, D>(1, 1))),
unsigned(0),
IntConfig<o, t, t, t>(0)));
static_assert(check_operator_constraints(
assert(check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D, D>(1, 1))),
unsigned(0),
IntConfig<o, t, o, t>(0)));
static_assert(!check_operator_constraints(
assert(!check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D, D>(1, 1))),
unsigned(0),
IntConfig<t, o, o, t>(0)));
static_assert(check_operator_constraints(
assert(check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D, D>(1, 1))),
unsigned(0),
IntConfig<t, o, t, o>(0)));
Expand All @@ -221,22 +217,22 @@ TEST_DEVICE_FUNC void test_layout()
// const&, no-throw-ctor from non-const
if constexpr (cuda::std::is_same_v<Layout, cuda::std::layout_left>)
{
static_assert(!check_operator_constraints(
assert(!check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D>(1))),
cuda::std::array{IntConfig<o, o, t, t>(0)}));
static_assert(!check_operator_constraints(
assert(!check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D>(1))),
cuda::std::array{IntConfig<o, t, t, t>(0)}));
static_assert(!check_operator_constraints(
assert(!check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D>(1))),
cuda::std::array{IntConfig<t, o, o, t>(0)}));
static_assert(!check_operator_constraints(
assert(!check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D>(1))),
cuda::std::array{IntConfig<t, t, o, t>(0)}));
static_assert(check_operator_constraints(
assert(check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D>(1))),
cuda::std::array{IntConfig<t, o, t, o>(0)}));
static_assert(check_operator_constraints(
assert(check_operator_constraints(
cuda::shared_memory_mdspan(data, construct_mapping(Layout(), cuda::std::extents<int, D>(1))),
cuda::std::array{IntConfig<t, t, t, t>(0)}));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ TEST_NV_DIAG_SUPPRESS(static_var_with_dynamic_init)
static_assert(false, "Insufficient CUDA Compute Capability: cuda::device::memcpy_async_tx is not available.");
#endif // __CUDA_MINIMUM_ARCH__

alignas(16) TEST_GLOBAL_VARIABLE int gmem_x[2048];
[[maybe_unused]] alignas(16) TEST_GLOBAL_VARIABLE int gmem_x[2048];

int main(int, char**)
{
Expand Down
Loading
Loading