Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
.idea/
build*/
.cache
# Shared caches for the cu12/cu13 Python wheel builds (ccache + CPM source).
.ccache/
.cpm-cache/
.aws
.config
_deps/catch2-src/
Expand Down
11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ option(CCCL_ENABLE_THRUST "Enable the Thrust developer build." OFF)
option(CCCL_ENABLE_TESTING "Enable CUDA C++ Core Library tests." OFF)
option(CCCL_ENABLE_EXAMPLES "Enable CUDA C++ Core Library examples." OFF)
option(CCCL_ENABLE_C_PARALLEL "Enable CUDA C Parallel Library." OFF)
option(
CCCL_ENABLE_C_PARALLEL_V2
"Enable CUDA C Parallel Library v2 (HostJIT-based)."
OFF
)
option(CCCL_ENABLE_C_EXPERIMENTAL_STF "Enable CUDA C CUDASTF Library." OFF)
option(CCCL_ENABLE_NVBENCH_HELPER "Enable the NVBench Helper Dev Build." OFF)

Expand Down Expand Up @@ -122,7 +127,11 @@ if (CCCL_ENABLE_UNSTABLE)
add_subdirectory(cudax)
endif()

if (CCCL_ENABLE_C_PARALLEL OR CCCL_ENABLE_C_EXPERIMENTAL_STF)
if (
CCCL_ENABLE_C_PARALLEL
OR CCCL_ENABLE_C_PARALLEL_V2
OR CCCL_ENABLE_C_EXPERIMENTAL_STF
)
add_subdirectory(c)
endif()

Expand Down
17 changes: 9 additions & 8 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -407,11 +407,12 @@
}
},
{
"name": "cccl-c-parallel-hostjit",
"displayName": "CCCL C Parallel Library (HostJIT)",
"inherits": "cccl-c-parallel",
"name": "cccl-c-parallel-v2",
"displayName": "CCCL C Parallel Library v2 (HostJIT)",
"inherits": "base",
"cacheVariables": {
"CCCL_C_Parallel_ENABLE_HOSTJIT": true
"CCCL_ENABLE_C_PARALLEL_V2": true,
"CCCL_C_Parallel_V2_ENABLE_TESTING": true
}
},
{
Expand Down Expand Up @@ -647,8 +648,8 @@
"configurePreset": "cccl-c-parallel"
},
{
"name": "cccl-c-parallel-hostjit",
"configurePreset": "cccl-c-parallel-hostjit"
"name": "cccl-c-parallel-v2",
"configurePreset": "cccl-c-parallel-v2"
},
{
"name": "cccl-c-stf",
Expand Down Expand Up @@ -930,8 +931,8 @@
"inherits": "base"
},
{
"name": "cccl-c-parallel-hostjit",
"configurePreset": "cccl-c-parallel-hostjit",
"name": "cccl-c-parallel-v2",
"configurePreset": "cccl-c-parallel-v2",
"inherits": "base"
},
{
Expand Down
12 changes: 12 additions & 0 deletions c/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
if (CCCL_ENABLE_C_PARALLEL AND CCCL_ENABLE_C_PARALLEL_V2)
message(
FATAL_ERROR
"CCCL_ENABLE_C_PARALLEL and CCCL_ENABLE_C_PARALLEL_V2 are mutually exclusive. "
"v2 is the HostJIT-based successor of v1; pick one."
)
endif()

if (CCCL_ENABLE_C_PARALLEL)
add_subdirectory(parallel)
endif()

if (CCCL_ENABLE_C_PARALLEL_V2)
add_subdirectory(parallel.v2)
endif()

if (CCCL_ENABLE_C_EXPERIMENTAL_STF)
add_subdirectory(experimental/stf)
endif()
110 changes: 110 additions & 0 deletions c/parallel.v2/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
cmake_minimum_required(VERSION 3.21)

project(CCCL_C_Parallel_V2 LANGUAGES CUDA CXX C)

# Bootstrap CCCL cmake helpers when building c/parallel.v2 in isolation
# (i.e. not as a subdirectory of the CCCL super-project).
if (NOT COMMAND cccl_configure_target)
# Repo root is two levels up from this file (c/parallel.v2 -> c -> cccl)
get_filename_component(
_cccl_root
"${CMAKE_CURRENT_SOURCE_DIR}/../.."
ABSOLUTE
)
set(CCCL_SOURCE_DIR "${_cccl_root}" CACHE PATH "CCCL repo root" FORCE)
set(
CCCL_BINARY_DIR
"${CMAKE_CURRENT_BINARY_DIR}"
CACHE PATH
"CCCL binary root"
FORCE
)
include("${_cccl_root}/cmake/CCCLUtilities.cmake")
include("${_cccl_root}/cmake/CCCLConfigureTarget.cmake")
include("${_cccl_root}/cmake/CCCLGetDependencies.cmake")
if (NOT TARGET cccl.compiler_interface)
add_library(cccl.compiler_interface INTERFACE)
endif()
endif()

option(CCCL_C_Parallel_V2_ENABLE_TESTING "Build cccl.c.parallel.v2 tests." OFF)

set(
CCCL_C_PARALLEL_V2_LIBRARY_OUTPUT_DIRECTORY
""
CACHE PATH
"Override output directory for the cccl.c.parallel.v2 library"
)
mark_as_advanced(CCCL_C_PARALLEL_V2_LIBRARY_OUTPUT_DIRECTORY)

file(
GLOB_RECURSE srcs
RELATIVE "${CMAKE_CURRENT_LIST_DIR}"
CONFIGURE_DEPENDS
"src/*.cu"
"src/*.cpp"
)
# hostjit sources are built as a separate library
list(FILTER srcs EXCLUDE REGEX "^src/hostjit/")
# Editor lock/temp files
list(FILTER srcs EXCLUDE REGEX "/\\.#")

add_library(cccl.c.parallel.v2 SHARED ${srcs})
set_property(TARGET cccl.c.parallel.v2 PROPERTY POSITION_INDEPENDENT_CODE ON)
cccl_configure_target(cccl.c.parallel.v2 DIALECT 20)

if (CCCL_C_PARALLEL_V2_LIBRARY_OUTPUT_DIRECTORY)
set_target_properties(
cccl.c.parallel.v2
PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CCCL_C_PARALLEL_V2_LIBRARY_OUTPUT_DIRECTORY}"
ARCHIVE_OUTPUT_DIRECTORY "${CCCL_C_PARALLEL_V2_LIBRARY_OUTPUT_DIRECTORY}"
RUNTIME_OUTPUT_DIRECTORY "${CCCL_C_PARALLEL_V2_LIBRARY_OUTPUT_DIRECTORY}"
)
endif()

cccl_get_cub()
cccl_get_cudatoolkit()
cccl_get_thrust()

add_subdirectory(src/hostjit)

set_target_properties(cccl.c.parallel.v2 PROPERTIES CUDA_RUNTIME_LIBRARY STATIC)
target_link_libraries(
cccl.c.parallel.v2
PRIVATE
cccl.compiler_interface
CUDA::cudart_static
CUDA::nvrtc # for nvrtcGetTypeName in src/util/types.h
CUDA::cuda_driver
CUB::CUB
Thrust::Thrust
cccl.c.parallel.v2.hostjit_lib # transitively brings in nvJitLink, nvfatbin, nvptxcompiler
)

if (WIN32)
target_link_libraries(cccl.c.parallel.v2 PRIVATE Dbghelp)
endif()

target_compile_definitions(
cccl.c.parallel.v2
PUBLIC CCCL_C_EXPERIMENTAL=1
PRIVATE #
NVRTC_GET_TYPE_NAME=1
CUB_DISABLE_CDP=1
CUB_DEFINE_RUNTIME_POLICIES
)
target_compile_options(
cccl.c.parallel.v2
PRIVATE $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--extended-lambda>
)

target_include_directories(
cccl.c.parallel.v2 #
PUBLIC "include"
PRIVATE "src" "src/hostjit/include"
)

if (CCCL_C_Parallel_V2_ENABLE_TESTING)
add_subdirectory(test)
endif()
76 changes: 76 additions & 0 deletions c/parallel.v2/include/cccl/c/binary_search.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#pragma once

#ifndef CCCL_C_EXPERIMENTAL
# error "C exposure is experimental and subject to change. Define CCCL_C_EXPERIMENTAL to acknowledge this notice."
#endif // !CCCL_C_EXPERIMENTAL

#include <cuda.h>
#include <stdint.h>

#include <cccl/c/extern_c.h>
#include <cccl/c/types.h>

CCCL_C_EXTERN_C_BEGIN

typedef struct cccl_device_binary_search_build_result_t
{
int cc;
void* cubin;
size_t cubin_size;
void* jit_compiler; // hostjit::JITCompiler*
void* binary_search_fn; // int(*)(void*, ull, void*, ull, void*, void*)
} cccl_device_binary_search_build_result_t;

CCCL_C_API CUresult cccl_device_binary_search_build(
cccl_device_binary_search_build_result_t* build,
cccl_binary_search_mode_t mode,
cccl_iterator_t d_data,
cccl_iterator_t d_values,
cccl_iterator_t d_out,
cccl_op_t op,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path);

// Extended version with build configuration
CCCL_C_API CUresult cccl_device_binary_search_build_ex(
cccl_device_binary_search_build_result_t* build,
cccl_binary_search_mode_t mode,
cccl_iterator_t d_data,
cccl_iterator_t d_values,
cccl_iterator_t d_out,
cccl_op_t op,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_binary_search(
cccl_device_binary_search_build_result_t build,
cccl_iterator_t d_data,
uint64_t num_items,
cccl_iterator_t d_values,
uint64_t num_values,
cccl_iterator_t d_out,
cccl_op_t op,
CUstream stream);

CCCL_C_API CUresult cccl_device_binary_search_cleanup(cccl_device_binary_search_build_result_t* bld_ptr);

CCCL_C_EXTERN_C_END
23 changes: 23 additions & 0 deletions c/parallel.v2/include/cccl/c/extern_c.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA Core Compute Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#pragma once

#ifdef __cplusplus

# define CCCL_C_EXTERN_C_BEGIN extern "C" {
# define CCCL_C_EXTERN_C_END }

#else

# define CCCL_C_EXTERN_C_BEGIN
# define CCCL_C_EXTERN_C_END

#endif
63 changes: 63 additions & 0 deletions c/parallel.v2/include/cccl/c/for.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#pragma once

#ifndef CCCL_C_EXPERIMENTAL
# error "C exposure is experimental and subject to change. Define CCCL_C_EXPERIMENTAL to acknowledge this notice."
#endif // !CCCL_C_EXPERIMENTAL

#include <cuda.h>
#include <stdint.h>

#include <cccl/c/extern_c.h>
#include <cccl/c/types.h>

CCCL_C_EXTERN_C_BEGIN

typedef struct cccl_device_for_build_result_t
{
int cc;
void* cubin;
size_t cubin_size;
void* jit_compiler; // hostjit::JITCompiler*
void* for_fn; // int(*)(void*, unsigned long long, void*)
} cccl_device_for_build_result_t;

CCCL_C_API CUresult cccl_device_for_build(
cccl_device_for_build_result_t* build,
cccl_iterator_t d_data,
cccl_op_t op,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path);

// Extended version with build configuration
CCCL_C_API CUresult cccl_device_for_build_ex(
cccl_device_for_build_result_t* build,
cccl_iterator_t d_data,
cccl_op_t op,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_for(
cccl_device_for_build_result_t build, cccl_iterator_t d_data, uint64_t num_items, cccl_op_t op, CUstream stream);

CCCL_C_API CUresult cccl_device_for_cleanup(cccl_device_for_build_result_t* bld_ptr);

CCCL_C_EXTERN_C_END
Loading
Loading