From 64dfbd5386bd472fa2d71d2f8768e3665cd8012f Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 5 May 2026 17:23:11 +0200 Subject: [PATCH 1/7] update headers for OpenCL 3.1 --- intercept/CL/cl.h | 168 ++-- intercept/CL/cl_function_types.h | 1201 ++++++++++++++++++++++++ intercept/CL/cl_gl.h | 472 +++++++--- intercept/CL/cl_icd.h | 1487 +++++------------------------- intercept/CL/cl_platform.h | 186 ++-- intercept/CL/cl_version.h | 16 +- 6 files changed, 2041 insertions(+), 1489 deletions(-) create mode 100644 intercept/CL/cl_function_types.h diff --git a/intercept/CL/cl.h b/intercept/CL/cl.h index 754d8b1d..a9779d5d 100644 --- a/intercept/CL/cl.h +++ b/intercept/CL/cl.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. + * Copyright (c) 2008-2026 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,12 @@ extern "C" { #endif +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) +#endif + /******************************************************************************/ typedef struct _cl_platform_id * cl_platform_id; @@ -114,8 +120,11 @@ typedef cl_uint cl_kernel_exec_info; typedef cl_bitfield cl_device_atomic_capabilities; typedef cl_bitfield cl_device_device_enqueue_capabilities; typedef cl_uint cl_khronos_vendor_id; -typedef cl_properties cl_mem_properties; -typedef cl_uint cl_version; +typedef cl_properties cl_mem_properties; +#endif +typedef cl_uint cl_version; +#ifdef CL_VERSION_3_1 +typedef cl_bitfield cl_device_integer_dot_product_capabilities; #endif typedef struct _cl_image_format { @@ -135,23 +144,13 @@ typedef struct _cl_image_desc { size_t image_slice_pitch; cl_uint num_mip_levels; cl_uint num_samples; -#ifdef CL_VERSION_2_0 -#ifdef __GNUC__ - __extension__ /* Prevents warnings about anonymous union in -pedantic builds */ -#endif -#ifdef _MSC_VER -#pragma warning( push ) -#pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 /Za builds */ -#endif - union { +#if defined(CL_VERSION_2_0) && __CL_HAS_ANON_UNION__ + __CL_ANON_UNION__ union { #endif cl_mem buffer; -#ifdef CL_VERSION_2_0 +#if defined(CL_VERSION_2_0) && __CL_HAS_ANON_UNION__ cl_mem mem_object; }; -#ifdef _MSC_VER -#pragma warning( pop ) -#endif #endif } cl_image_desc; @@ -177,6 +176,19 @@ typedef struct _cl_name_version { #endif +#ifdef CL_VERSION_3_1 + +typedef struct _cl_device_integer_dot_product_acceleration_properties { + cl_bool signed_accelerated; + cl_bool unsigned_accelerated; + cl_bool mixed_signedness_accelerated; + cl_bool accumulating_saturating_signed_accelerated; + cl_bool accumulating_saturating_unsigned_accelerated; + cl_bool accumulating_saturating_mixed_signedness_accelerated; +} cl_device_integer_dot_product_acceleration_properties; + +#endif + /******************************************************************************/ /* Error Codes */ @@ -296,7 +308,10 @@ typedef struct _cl_name_version { #define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 #define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 -#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 /* deprecated */ +#ifdef CL_VERSION_3_1 +#define CL_DEVICE_MAX_WORK_GROUP_SIZES 0x1005 +#endif #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 @@ -350,7 +365,7 @@ typedef struct _cl_name_version { /* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */ #ifdef CL_VERSION_1_1 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 -#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 @@ -410,12 +425,28 @@ typedef struct _cl_name_version { #define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x1067 #define CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT 0x1068 #define CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT 0x1069 -/* 0x106A to 0x106E - Reserved for upcoming KHR extension */ +#endif +#ifdef CL_VERSION_3_1 +#define CL_DEVICE_UUID 0x106A +#define CL_DRIVER_UUID 0x106B +#define CL_DEVICE_LUID_VALID 0x106C +#define CL_DEVICE_LUID 0x106D +#define CL_DEVICE_NODE_MASK 0x106E +#endif +#ifdef CL_VERSION_3_0 #define CL_DEVICE_OPENCL_C_FEATURES 0x106F #define CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES 0x1070 #define CL_DEVICE_PIPE_SUPPORT 0x1071 #define CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED 0x1072 #endif +#ifdef CL_VERSION_3_1 +#define CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES 0x1073 +#define CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT 0x1074 +#define CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED 0x1075 +#define CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS 0x12B9 +#define CL_DEVICE_SPIRV_EXTENSIONS 0x12BA +#define CL_DEVICE_SPIRV_CAPABILITIES 0x12BB +#endif /* cl_device_fp_config - bitfield */ #define CL_FP_DENORM (1 << 0) @@ -556,11 +587,8 @@ typedef struct _cl_name_version { #define CL_RGx 0x10BB #define CL_RGBx 0x10BC #endif -#ifdef CL_VERSION_1_2 -#define CL_DEPTH 0x10BD -#define CL_DEPTH_STENCIL 0x10BE -#endif #ifdef CL_VERSION_2_0 +#define CL_DEPTH 0x10BD #define CL_sRGB 0x10BF #define CL_sRGBx 0x10C0 #define CL_sRGBA 0x10C1 @@ -584,9 +612,6 @@ typedef struct _cl_name_version { #define CL_UNSIGNED_INT32 0x10DC #define CL_HALF_FLOAT 0x10DD #define CL_FLOAT 0x10DE -#ifdef CL_VERSION_1_2 -#define CL_UNORM_INT24 0x10DF -#endif #ifdef CL_VERSION_2_1 #define CL_UNORM_INT_101010_2 0x10E0 #endif @@ -907,8 +932,6 @@ typedef struct _cl_name_version { /* cl_khronos_vendor_id */ #define CL_KHRONOS_VENDOR_ID_CODEPLAY 0x10004 -#ifdef CL_VERSION_3_0 - /* cl_version */ #define CL_VERSION_MAJOR_BITS (10) #define CL_VERSION_MINOR_BITS (10) @@ -932,10 +955,27 @@ typedef struct _cl_name_version { (((minor) & CL_VERSION_MINOR_MASK) << CL_VERSION_PATCH_BITS) | \ ((patch) & CL_VERSION_PATCH_MASK)) +#ifdef CL_VERSION_3_1 + +/* Size Constants */ +#define CL_UUID_SIZE 16 +#define CL_LUID_SIZE 8 + +/* cl_device_integer_dot_product_capabilities - bitfield */ +#define CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED (1 << 0) +#define CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT (1 << 1) + #endif /********************************************************************************************************/ +/* CL_NO_PROTOTYPES implies CL_NO_CORE_PROTOTYPES: */ +#if defined(CL_NO_PROTOTYPES) && !defined(CL_NO_CORE_PROTOTYPES) +#define CL_NO_CORE_PROTOTYPES +#endif + +#if !defined(CL_NO_CORE_PROTOTYPES) + /* Platform API */ extern CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(cl_uint num_entries, @@ -1305,11 +1345,11 @@ clLinkProgram(cl_context context, #ifdef CL_VERSION_2_2 -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL clSetProgramReleaseCallback(cl_program program, void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), - void * user_data) CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED; + void * user_data) CL_API_SUFFIX__VERSION_2_2_DEPRECATED; extern CL_API_ENTRY cl_int CL_API_CALL clSetProgramSpecializationConstant(cl_program program, @@ -1504,8 +1544,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRect(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, - const size_t * buffer_offset, - const size_t * host_offset, + const size_t * buffer_origin, + const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, @@ -1535,8 +1575,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRect(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, - const size_t * buffer_offset, - const size_t * host_offset, + const size_t * buffer_origin, + const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, @@ -1821,6 +1861,18 @@ clEnqueueSVMMigrateMem(cl_command_queue command_queue, #endif +#ifdef CL_VERSION_3_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSuggestedLocalWorkSize(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + size_t * suggested_local_work_size) CL_API_SUFFIX__VERSION_3_1; + +#endif + #ifdef CL_VERSION_1_2 /* Extension function access @@ -1851,11 +1903,11 @@ clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, clSetCommandQueueProperty(cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, - cl_command_queue_properties * old_properties) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; + cl_command_queue_properties * old_properties) CL_API_SUFFIX__VERSION_1_0_DEPRECATED; #endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ /* Deprecated OpenCL 1.1 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateImage2D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, @@ -1863,9 +1915,9 @@ clCreateImage2D(cl_context context, size_t image_height, size_t image_row_pitch, void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateImage3D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, @@ -1875,49 +1927,55 @@ clCreateImage3D(cl_context context, size_t image_row_pitch, size_t image_slice_pitch, void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueMarker(cl_command_queue command_queue, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_event * event) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueWaitForEvents(cl_command_queue command_queue, cl_uint num_events, - const cl_event * event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL -clEnqueueBarrier(cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL -clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL -clGetExtensionFunctionAddress(const char * func_name) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; /* Deprecated OpenCL 2.0 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL clCreateSampler(cl_context context, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + cl_event * event) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; + +#endif /* !defined(CL_NO_CORE_PROTOTYPES) */ #ifdef __cplusplus } #endif +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ + #pragma warning( pop ) +#endif + #endif /* __OPENCL_CL_H */ diff --git a/intercept/CL/cl_function_types.h b/intercept/CL/cl_function_types.h new file mode 100644 index 00000000..a7963373 --- /dev/null +++ b/intercept/CL/cl_function_types.h @@ -0,0 +1,1201 @@ +/* + * Copyright (c) 2023-2026 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * OpenCL is a trademark of Apple Inc. used under license by Khronos. + */ + +#ifndef OPENCL_CL_FUNCTION_TYPES_H_ +#define OPENCL_CL_FUNCTION_TYPES_H_ + +// Unlike the Khronos header file, we want to unconditionally include the +// CLIntercept cl.h and not the system cl.h. +#include "CL/cl.h" + +typedef cl_int CL_API_CALL clGetPlatformIDs_t( + cl_uint num_entries, + cl_platform_id* platforms, + cl_uint* num_platforms); + +typedef clGetPlatformIDs_t * +clGetPlatformIDs_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetPlatformInfo_t( + cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetPlatformInfo_t * +clGetPlatformInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetDeviceIDs_t( + cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices); + +typedef clGetDeviceIDs_t * +clGetDeviceIDs_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetDeviceInfo_t( + cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetDeviceInfo_t * +clGetDeviceInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_context CL_API_CALL clCreateContext_t( + const cl_context_properties* properties, + cl_uint num_devices, + const cl_device_id* devices, + void (CL_CALLBACK* pfn_notify)(const char* errinfo, const void* private_info, size_t cb, void* user_data), + void* user_data, + cl_int* errcode_ret); + +typedef clCreateContext_t * +clCreateContext_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_context CL_API_CALL clCreateContextFromType_t( + const cl_context_properties* properties, + cl_device_type device_type, + void (CL_CALLBACK* pfn_notify)(const char* errinfo, const void* private_info, size_t cb, void* user_data), + void* user_data, + cl_int* errcode_ret); + +typedef clCreateContextFromType_t * +clCreateContextFromType_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clRetainContext_t( + cl_context context); + +typedef clRetainContext_t * +clRetainContext_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clReleaseContext_t( + cl_context context); + +typedef clReleaseContext_t * +clReleaseContext_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetContextInfo_t( + cl_context context, + cl_context_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetContextInfo_t * +clGetContextInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clRetainCommandQueue_t( + cl_command_queue command_queue); + +typedef clRetainCommandQueue_t * +clRetainCommandQueue_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clReleaseCommandQueue_t( + cl_command_queue command_queue); + +typedef clReleaseCommandQueue_t * +clReleaseCommandQueue_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetCommandQueueInfo_t( + cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetCommandQueueInfo_t * +clGetCommandQueueInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem CL_API_CALL clCreateBuffer_t( + cl_context context, + cl_mem_flags flags, + size_t size, + void* host_ptr, + cl_int* errcode_ret); + +typedef clCreateBuffer_t * +clCreateBuffer_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clRetainMemObject_t( + cl_mem memobj); + +typedef clRetainMemObject_t * +clRetainMemObject_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clReleaseMemObject_t( + cl_mem memobj); + +typedef clReleaseMemObject_t * +clReleaseMemObject_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetSupportedImageFormats_t( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format* image_formats, + cl_uint* num_image_formats); + +typedef clGetSupportedImageFormats_t * +clGetSupportedImageFormats_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetMemObjectInfo_t( + cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetMemObjectInfo_t * +clGetMemObjectInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetImageInfo_t( + cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetImageInfo_t * +clGetImageInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clRetainSampler_t( + cl_sampler sampler); + +typedef clRetainSampler_t * +clRetainSampler_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clReleaseSampler_t( + cl_sampler sampler); + +typedef clReleaseSampler_t * +clReleaseSampler_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetSamplerInfo_t( + cl_sampler sampler, + cl_sampler_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetSamplerInfo_t * +clGetSamplerInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_program CL_API_CALL clCreateProgramWithSource_t( + cl_context context, + cl_uint count, + const char** strings, + const size_t* lengths, + cl_int* errcode_ret); + +typedef clCreateProgramWithSource_t * +clCreateProgramWithSource_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_program CL_API_CALL clCreateProgramWithBinary_t( + cl_context context, + cl_uint num_devices, + const cl_device_id* device_list, + const size_t* lengths, + const unsigned char** binaries, + cl_int* binary_status, + cl_int* errcode_ret); + +typedef clCreateProgramWithBinary_t * +clCreateProgramWithBinary_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clRetainProgram_t( + cl_program program); + +typedef clRetainProgram_t * +clRetainProgram_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clReleaseProgram_t( + cl_program program); + +typedef clReleaseProgram_t * +clReleaseProgram_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clBuildProgram_t( + cl_program program, + cl_uint num_devices, + const cl_device_id* device_list, + const char* options, + void (CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), + void* user_data); + +typedef clBuildProgram_t * +clBuildProgram_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetProgramInfo_t( + cl_program program, + cl_program_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetProgramInfo_t * +clGetProgramInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetProgramBuildInfo_t( + cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetProgramBuildInfo_t * +clGetProgramBuildInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_kernel CL_API_CALL clCreateKernel_t( + cl_program program, + const char* kernel_name, + cl_int* errcode_ret); + +typedef clCreateKernel_t * +clCreateKernel_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clCreateKernelsInProgram_t( + cl_program program, + cl_uint num_kernels, + cl_kernel* kernels, + cl_uint* num_kernels_ret); + +typedef clCreateKernelsInProgram_t * +clCreateKernelsInProgram_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clRetainKernel_t( + cl_kernel kernel); + +typedef clRetainKernel_t * +clRetainKernel_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clReleaseKernel_t( + cl_kernel kernel); + +typedef clReleaseKernel_t * +clReleaseKernel_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clSetKernelArg_t( + cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void* arg_value); + +typedef clSetKernelArg_t * +clSetKernelArg_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetKernelInfo_t( + cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetKernelInfo_t * +clGetKernelInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetKernelWorkGroupInfo_t( + cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetKernelWorkGroupInfo_t * +clGetKernelWorkGroupInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clWaitForEvents_t( + cl_uint num_events, + const cl_event* event_list); + +typedef clWaitForEvents_t * +clWaitForEvents_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetEventInfo_t( + cl_event event, + cl_event_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetEventInfo_t * +clGetEventInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clRetainEvent_t( + cl_event event); + +typedef clRetainEvent_t * +clRetainEvent_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clReleaseEvent_t( + cl_event event); + +typedef clReleaseEvent_t * +clReleaseEvent_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clGetEventProfilingInfo_t( + cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetEventProfilingInfo_t * +clGetEventProfilingInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clFlush_t( + cl_command_queue command_queue); + +typedef clFlush_t * +clFlush_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clFinish_t( + cl_command_queue command_queue); + +typedef clFinish_t * +clFinish_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueReadBuffer_t( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t size, + void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueReadBuffer_t * +clEnqueueReadBuffer_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueWriteBuffer_t( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t size, + const void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueWriteBuffer_t * +clEnqueueWriteBuffer_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueCopyBuffer_t( + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueCopyBuffer_t * +clEnqueueCopyBuffer_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueReadImage_t( + cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t* origin, + const size_t* region, + size_t row_pitch, + size_t slice_pitch, + void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueReadImage_t * +clEnqueueReadImage_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueWriteImage_t( + cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t* origin, + const size_t* region, + size_t input_row_pitch, + size_t input_slice_pitch, + const void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueWriteImage_t * +clEnqueueWriteImage_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueCopyImage_t( + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueCopyImage_t * +clEnqueueCopyImage_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueCopyImageToBuffer_t( + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* region, + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueCopyImageToBuffer_t * +clEnqueueCopyImageToBuffer_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueCopyBufferToImage_t( + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t* dst_origin, + const size_t* region, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueCopyBufferToImage_t * +clEnqueueCopyBufferToImage_fn CL_API_SUFFIX__VERSION_1_0; + +typedef void* CL_API_CALL clEnqueueMapBuffer_t( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event, + cl_int* errcode_ret); + +typedef clEnqueueMapBuffer_t * +clEnqueueMapBuffer_fn CL_API_SUFFIX__VERSION_1_0; + +typedef void* CL_API_CALL clEnqueueMapImage_t( + cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t* origin, + const size_t* region, + size_t* image_row_pitch, + size_t* image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event, + cl_int* errcode_ret); + +typedef clEnqueueMapImage_t * +clEnqueueMapImage_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueUnmapMemObject_t( + cl_command_queue command_queue, + cl_mem memobj, + void* mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueUnmapMemObject_t * +clEnqueueUnmapMemObject_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueNDRangeKernel_t( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueNDRangeKernel_t * +clEnqueueNDRangeKernel_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clEnqueueNativeKernel_t( + cl_command_queue command_queue, + void (CL_CALLBACK* user_func)(void*), + void* args, + size_t cb_args, + cl_uint num_mem_objects, + const cl_mem* mem_list, + const void** args_mem_loc, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueNativeKernel_t * +clEnqueueNativeKernel_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL clSetCommandQueueProperty_t( + cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties* old_properties); + +typedef clSetCommandQueueProperty_t * +clSetCommandQueueProperty_fn CL_API_SUFFIX__VERSION_1_0_DEPRECATED; + +typedef cl_mem CL_API_CALL clCreateImage2D_t( + cl_context context, + cl_mem_flags flags, + const cl_image_format* image_format, + size_t image_width, + size_t image_height, + size_t image_row_pitch, + void* host_ptr, + cl_int* errcode_ret); + +typedef clCreateImage2D_t * +clCreateImage2D_fn CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_mem CL_API_CALL clCreateImage3D_t( + cl_context context, + cl_mem_flags flags, + const cl_image_format* image_format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch, + void* host_ptr, + cl_int* errcode_ret); + +typedef clCreateImage3D_t * +clCreateImage3D_fn CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_int CL_API_CALL clEnqueueMarker_t( + cl_command_queue command_queue, + cl_event* event); + +typedef clEnqueueMarker_t * +clEnqueueMarker_fn CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_int CL_API_CALL clEnqueueWaitForEvents_t( + cl_command_queue command_queue, + cl_uint num_events, + const cl_event* event_list); + +typedef clEnqueueWaitForEvents_t * +clEnqueueWaitForEvents_fn CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_int CL_API_CALL clEnqueueBarrier_t( + cl_command_queue command_queue); + +typedef clEnqueueBarrier_t * +clEnqueueBarrier_fn CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_int CL_API_CALL clUnloadCompiler_t( + void ); + +typedef clUnloadCompiler_t * +clUnloadCompiler_fn CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef void* CL_API_CALL clGetExtensionFunctionAddress_t( + const char* func_name); + +typedef clGetExtensionFunctionAddress_t * +clGetExtensionFunctionAddress_fn CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_command_queue CL_API_CALL clCreateCommandQueue_t( + cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int* errcode_ret); + +typedef clCreateCommandQueue_t * +clCreateCommandQueue_fn CL_API_SUFFIX__VERSION_1_2_DEPRECATED; + +typedef cl_sampler CL_API_CALL clCreateSampler_t( + cl_context context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* errcode_ret); + +typedef clCreateSampler_t * +clCreateSampler_fn CL_API_SUFFIX__VERSION_1_2_DEPRECATED; + +typedef cl_int CL_API_CALL clEnqueueTask_t( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueTask_t * +clEnqueueTask_fn CL_API_SUFFIX__VERSION_1_2_DEPRECATED; + +#ifdef CL_VERSION_1_1 + +typedef cl_mem CL_API_CALL clCreateSubBuffer_t( + cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void* buffer_create_info, + cl_int* errcode_ret); + +typedef clCreateSubBuffer_t * +clCreateSubBuffer_fn CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int CL_API_CALL clSetMemObjectDestructorCallback_t( + cl_mem memobj, + void (CL_CALLBACK* pfn_notify)(cl_mem memobj, void* user_data), + void* user_data); + +typedef clSetMemObjectDestructorCallback_t * +clSetMemObjectDestructorCallback_fn CL_API_SUFFIX__VERSION_1_1; + +typedef cl_event CL_API_CALL clCreateUserEvent_t( + cl_context context, + cl_int* errcode_ret); + +typedef clCreateUserEvent_t * +clCreateUserEvent_fn CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int CL_API_CALL clSetUserEventStatus_t( + cl_event event, + cl_int execution_status); + +typedef clSetUserEventStatus_t * +clSetUserEventStatus_fn CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int CL_API_CALL clSetEventCallback_t( + cl_event event, + cl_int command_exec_callback_type, + void (CL_CALLBACK* pfn_notify)(cl_event event, cl_int event_command_status, void *user_data), + void* user_data); + +typedef clSetEventCallback_t * +clSetEventCallback_fn CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int CL_API_CALL clEnqueueReadBufferRect_t( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t* buffer_origin, + const size_t* host_origin, + const size_t* region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueReadBufferRect_t * +clEnqueueReadBufferRect_fn CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int CL_API_CALL clEnqueueWriteBufferRect_t( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t* buffer_origin, + const size_t* host_origin, + const size_t* region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueWriteBufferRect_t * +clEnqueueWriteBufferRect_fn CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int CL_API_CALL clEnqueueCopyBufferRect_t( + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueCopyBufferRect_t * +clEnqueueCopyBufferRect_fn CL_API_SUFFIX__VERSION_1_1; + +#endif /* CL_VERSION_1_1 */ + +#ifdef CL_VERSION_1_2 + +typedef cl_int CL_API_CALL clCreateSubDevices_t( + cl_device_id in_device, + const cl_device_partition_property* properties, + cl_uint num_devices, + cl_device_id* out_devices, + cl_uint* num_devices_ret); + +typedef clCreateSubDevices_t * +clCreateSubDevices_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int CL_API_CALL clRetainDevice_t( + cl_device_id device); + +typedef clRetainDevice_t * +clRetainDevice_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int CL_API_CALL clReleaseDevice_t( + cl_device_id device); + +typedef clReleaseDevice_t * +clReleaseDevice_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem CL_API_CALL clCreateImage_t( + cl_context context, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc* image_desc, + void* host_ptr, + cl_int* errcode_ret); + +typedef clCreateImage_t * +clCreateImage_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program CL_API_CALL clCreateProgramWithBuiltInKernels_t( + cl_context context, + cl_uint num_devices, + const cl_device_id* device_list, + const char* kernel_names, + cl_int* errcode_ret); + +typedef clCreateProgramWithBuiltInKernels_t * +clCreateProgramWithBuiltInKernels_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int CL_API_CALL clCompileProgram_t( + cl_program program, + cl_uint num_devices, + const cl_device_id* device_list, + const char* options, + cl_uint num_input_headers, + const cl_program* input_headers, + const char** header_include_names, + void (CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), + void* user_data); + +typedef clCompileProgram_t * +clCompileProgram_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program CL_API_CALL clLinkProgram_t( + cl_context context, + cl_uint num_devices, + const cl_device_id* device_list, + const char* options, + cl_uint num_input_programs, + const cl_program* input_programs, + void (CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), + void* user_data, + cl_int* errcode_ret); + +typedef clLinkProgram_t * +clLinkProgram_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int CL_API_CALL clUnloadPlatformCompiler_t( + cl_platform_id platform); + +typedef clUnloadPlatformCompiler_t * +clUnloadPlatformCompiler_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int CL_API_CALL clGetKernelArgInfo_t( + cl_kernel kernel, + cl_uint arg_index, + cl_kernel_arg_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetKernelArgInfo_t * +clGetKernelArgInfo_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int CL_API_CALL clEnqueueFillBuffer_t( + cl_command_queue command_queue, + cl_mem buffer, + const void* pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueFillBuffer_t * +clEnqueueFillBuffer_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int CL_API_CALL clEnqueueFillImage_t( + cl_command_queue command_queue, + cl_mem image, + const void* fill_color, + const size_t* origin, + const size_t* region, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueFillImage_t * +clEnqueueFillImage_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int CL_API_CALL clEnqueueMigrateMemObjects_t( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueMigrateMemObjects_t * +clEnqueueMigrateMemObjects_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int CL_API_CALL clEnqueueMarkerWithWaitList_t( + cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueMarkerWithWaitList_t * +clEnqueueMarkerWithWaitList_fn CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int CL_API_CALL clEnqueueBarrierWithWaitList_t( + cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueBarrierWithWaitList_t * +clEnqueueBarrierWithWaitList_fn CL_API_SUFFIX__VERSION_1_2; + +typedef void* CL_API_CALL clGetExtensionFunctionAddressForPlatform_t( + cl_platform_id platform, + const char* func_name); + +typedef clGetExtensionFunctionAddressForPlatform_t * +clGetExtensionFunctionAddressForPlatform_fn CL_API_SUFFIX__VERSION_1_2; + +#endif /* CL_VERSION_1_2 */ + +#ifdef CL_VERSION_2_0 + +typedef cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties_t( + cl_context context, + cl_device_id device, + const cl_queue_properties* properties, + cl_int* errcode_ret); + +typedef clCreateCommandQueueWithProperties_t * +clCreateCommandQueueWithProperties_fn CL_API_SUFFIX__VERSION_2_0; + +typedef cl_mem CL_API_CALL clCreatePipe_t( + cl_context context, + cl_mem_flags flags, + cl_uint pipe_packet_size, + cl_uint pipe_max_packets, + const cl_pipe_properties* properties, + cl_int* errcode_ret); + +typedef clCreatePipe_t * +clCreatePipe_fn CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int CL_API_CALL clGetPipeInfo_t( + cl_mem pipe, + cl_pipe_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetPipeInfo_t * +clGetPipeInfo_fn CL_API_SUFFIX__VERSION_2_0; + +typedef void* CL_API_CALL clSVMAlloc_t( + cl_context context, + cl_svm_mem_flags flags, + size_t size, + cl_uint alignment); + +typedef clSVMAlloc_t * +clSVMAlloc_fn CL_API_SUFFIX__VERSION_2_0; + +typedef void CL_API_CALL clSVMFree_t( + cl_context context, + void* svm_pointer); + +typedef clSVMFree_t * +clSVMFree_fn CL_API_SUFFIX__VERSION_2_0; + +typedef cl_sampler CL_API_CALL clCreateSamplerWithProperties_t( + cl_context context, + const cl_sampler_properties* sampler_properties, + cl_int* errcode_ret); + +typedef clCreateSamplerWithProperties_t * +clCreateSamplerWithProperties_fn CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int CL_API_CALL clSetKernelArgSVMPointer_t( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value); + +typedef clSetKernelArgSVMPointer_t * +clSetKernelArgSVMPointer_fn CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int CL_API_CALL clSetKernelExecInfo_t( + cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void* param_value); + +typedef clSetKernelExecInfo_t * +clSetKernelExecInfo_fn CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int CL_API_CALL clEnqueueSVMFree_t( + cl_command_queue command_queue, + cl_uint num_svm_pointers, + void* svm_pointers[], + void (CL_CALLBACK* pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), + void* user_data, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueSVMFree_t * +clEnqueueSVMFree_fn CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int CL_API_CALL clEnqueueSVMMemcpy_t( + cl_command_queue command_queue, + cl_bool blocking_copy, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueSVMMemcpy_t * +clEnqueueSVMMemcpy_fn CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int CL_API_CALL clEnqueueSVMMemFill_t( + cl_command_queue command_queue, + void* svm_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueSVMMemFill_t * +clEnqueueSVMMemFill_fn CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int CL_API_CALL clEnqueueSVMMap_t( + cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void* svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueSVMMap_t * +clEnqueueSVMMap_fn CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int CL_API_CALL clEnqueueSVMUnmap_t( + cl_command_queue command_queue, + void* svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueSVMUnmap_t * +clEnqueueSVMUnmap_fn CL_API_SUFFIX__VERSION_2_0; + +#endif /* CL_VERSION_2_0 */ + +#ifdef CL_VERSION_2_1 + +typedef cl_int CL_API_CALL clSetDefaultDeviceCommandQueue_t( + cl_context context, + cl_device_id device, + cl_command_queue command_queue); + +typedef clSetDefaultDeviceCommandQueue_t * +clSetDefaultDeviceCommandQueue_fn CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int CL_API_CALL clGetDeviceAndHostTimer_t( + cl_device_id device, + cl_ulong* device_timestamp, + cl_ulong* host_timestamp); + +typedef clGetDeviceAndHostTimer_t * +clGetDeviceAndHostTimer_fn CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int CL_API_CALL clGetHostTimer_t( + cl_device_id device, + cl_ulong* host_timestamp); + +typedef clGetHostTimer_t * +clGetHostTimer_fn CL_API_SUFFIX__VERSION_2_1; + +typedef cl_program CL_API_CALL clCreateProgramWithIL_t( + cl_context context, + const void* il, + size_t length, + cl_int* errcode_ret); + +typedef clCreateProgramWithIL_t * +clCreateProgramWithIL_fn CL_API_SUFFIX__VERSION_2_1; + +typedef cl_kernel CL_API_CALL clCloneKernel_t( + cl_kernel source_kernel, + cl_int* errcode_ret); + +typedef clCloneKernel_t * +clCloneKernel_fn CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int CL_API_CALL clGetKernelSubGroupInfo_t( + cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetKernelSubGroupInfo_t * +clGetKernelSubGroupInfo_fn CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int CL_API_CALL clEnqueueSVMMigrateMem_t( + cl_command_queue command_queue, + cl_uint num_svm_pointers, + const void** svm_pointers, + const size_t* sizes, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueSVMMigrateMem_t * +clEnqueueSVMMigrateMem_fn CL_API_SUFFIX__VERSION_2_1; + +#endif /* CL_VERSION_2_1 */ + +#ifdef CL_VERSION_2_2 + +typedef cl_int CL_API_CALL clSetProgramSpecializationConstant_t( + cl_program program, + cl_uint spec_id, + size_t spec_size, + const void* spec_value); + +typedef clSetProgramSpecializationConstant_t * +clSetProgramSpecializationConstant_fn CL_API_SUFFIX__VERSION_2_2; + +typedef cl_int CL_API_CALL clSetProgramReleaseCallback_t( + cl_program program, + void (CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), + void* user_data); + +typedef clSetProgramReleaseCallback_t * +clSetProgramReleaseCallback_fn CL_API_SUFFIX__VERSION_2_2_DEPRECATED; + +#endif /* CL_VERSION_2_2 */ + +#ifdef CL_VERSION_3_0 + +typedef cl_int CL_API_CALL clSetContextDestructorCallback_t( + cl_context context, + void (CL_CALLBACK* pfn_notify)(cl_context context, void* user_data), + void* user_data); + +typedef clSetContextDestructorCallback_t * +clSetContextDestructorCallback_fn CL_API_SUFFIX__VERSION_3_0; + +typedef cl_mem CL_API_CALL clCreateBufferWithProperties_t( + cl_context context, + const cl_mem_properties* properties, + cl_mem_flags flags, + size_t size, + void* host_ptr, + cl_int* errcode_ret); + +typedef clCreateBufferWithProperties_t * +clCreateBufferWithProperties_fn CL_API_SUFFIX__VERSION_3_0; + +typedef cl_mem CL_API_CALL clCreateImageWithProperties_t( + cl_context context, + const cl_mem_properties* properties, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc* image_desc, + void* host_ptr, + cl_int* errcode_ret); + +typedef clCreateImageWithProperties_t * +clCreateImageWithProperties_fn CL_API_SUFFIX__VERSION_3_0; + +#endif /* CL_VERSION_3_0 */ + +#ifdef CL_VERSION_3_1 + +typedef cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSize_t( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_1; + +typedef clGetKernelSuggestedLocalWorkSize_t * +clGetKernelSuggestedLocalWorkSize_fn CL_API_SUFFIX__VERSION_3_1; + +#endif /* CL_VERSION_3_1 */ + +#endif /* OPENCL_CL_FUNCTION_TYPES_H_ */ diff --git a/intercept/CL/cl_gl.h b/intercept/CL/cl_gl.h index b704ffe4..5769054e 100644 --- a/intercept/CL/cl_gl.h +++ b/intercept/CL/cl_gl.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. + * Copyright (c) 2008-2026 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,148 +14,406 @@ * limitations under the License. ******************************************************************************/ -#ifndef __OPENCL_CL_GL_H -#define __OPENCL_CL_GL_H +#ifndef OPENCL_CL_GL_H_ +#define OPENCL_CL_GL_H_ // Unlike the Khronos header file, we want to unconditonally include the // CLIntercept cl.h, and not the system cl.h. #include "CL/cl.h" -#ifdef __cplusplus -extern "C" { +/* CL_NO_PROTOTYPES implies CL_NO_EXTENSION_PROTOTYPES: */ +#if defined(CL_NO_PROTOTYPES) && !defined(CL_NO_EXTENSION_PROTOTYPES) +#define CL_NO_EXTENSION_PROTOTYPES #endif -typedef cl_uint cl_gl_object_type; -typedef cl_uint cl_gl_texture_info; -typedef cl_uint cl_gl_platform_info; -typedef struct __GLsync *cl_GLsync; - -/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ -#define CL_GL_OBJECT_BUFFER 0x2000 -#define CL_GL_OBJECT_TEXTURE2D 0x2001 -#define CL_GL_OBJECT_TEXTURE3D 0x2002 -#define CL_GL_OBJECT_RENDERBUFFER 0x2003 -#ifdef CL_VERSION_1_2 -#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E -#define CL_GL_OBJECT_TEXTURE1D 0x200F -#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 -#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 +/* CL_NO_EXTENSION_PROTOTYPES implies + CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES and + CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES: */ +#if defined(CL_NO_EXTENSION_PROTOTYPES) && \ + !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) +#define CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES +#endif +#if defined(CL_NO_EXTENSION_PROTOTYPES) && \ + !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) +#define CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES #endif -/* cl_gl_texture_info */ -#define CL_GL_TEXTURE_TARGET 0x2004 -#define CL_GL_MIPMAP_LEVEL 0x2005 -#ifdef CL_VERSION_1_2 -#define CL_GL_NUM_SAMPLES 0x2012 +#ifdef __cplusplus +extern "C" { #endif +/*************************************************************** +* cl_khr_gl_sharing +***************************************************************/ +#define cl_khr_gl_sharing 1 +#define CL_KHR_GL_SHARING_EXTENSION_NAME \ + "cl_khr_gl_sharing" + + +#define CL_KHR_GL_SHARING_EXTENSION_VERSION CL_MAKE_VERSION(1, 0, 0) + +typedef int cl_GLint; +typedef unsigned int cl_GLenum; +typedef unsigned int cl_GLuint; + +typedef cl_uint cl_gl_context_info; + +/* Error codes */ +#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 + +/* cl_gl_context_info */ +#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 +#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 + +/* Additional cl_context_properties */ +#define CL_GL_CONTEXT_KHR 0x2008 +#define CL_EGL_DISPLAY_KHR 0x2009 +#define CL_GLX_DISPLAY_KHR 0x200A +#define CL_WGL_HDC_KHR 0x200B +#define CL_CGL_SHAREGROUP_KHR 0x200C + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; + +/* cl_gl_object_type */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 + +#if defined(CL_VERSION_1_2) +/* cl_gl_object_type */ +#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E +#define CL_GL_OBJECT_TEXTURE1D 0x200F +#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 +#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 + +#endif /* defined(CL_VERSION_1_2) */ + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 + + +typedef cl_int CL_API_CALL +clGetGLContextInfoKHR_t( + const cl_context_properties* properties, + cl_gl_context_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetGLContextInfoKHR_t * +clGetGLContextInfoKHR_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem CL_API_CALL +clCreateFromGLBuffer_t( + cl_context context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int* errcode_ret); + +typedef clCreateFromGLBuffer_t * +clCreateFromGLBuffer_fn CL_API_SUFFIX__VERSION_1_0; + +#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLContextInfoKHR( + const cl_context_properties* properties, + cl_gl_context_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromGLBuffer(cl_context context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; +clCreateFromGLBuffer( + cl_context context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */ + +#if defined(CL_VERSION_1_2) + +typedef cl_mem CL_API_CALL +clCreateFromGLTexture_t( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret); + +typedef clCreateFromGLTexture_t * +clCreateFromGLTexture_fn CL_API_SUFFIX__VERSION_1_2; -#ifdef CL_VERSION_1_2 +#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromGLTexture(cl_context context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texture, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; +clCreateFromGLTexture( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; -#endif +#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */ + +#endif /* defined(CL_VERSION_1_2) */ + + +typedef cl_mem CL_API_CALL +clCreateFromGLRenderbuffer_t( + cl_context context, + cl_mem_flags flags, + cl_GLuint renderbuffer, + cl_int* errcode_ret); + +typedef clCreateFromGLRenderbuffer_t * +clCreateFromGLRenderbuffer_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL +clGetGLObjectInfo_t( + cl_mem memobj, + cl_gl_object_type* gl_object_type, + cl_GLuint* gl_object_name); + +typedef clGetGLObjectInfo_t * +clGetGLObjectInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL +clGetGLTextureInfo_t( + cl_mem memobj, + cl_gl_texture_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef clGetGLTextureInfo_t * +clGetGLTextureInfo_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL +clEnqueueAcquireGLObjects_t( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueAcquireGLObjects_t * +clEnqueueAcquireGLObjects_fn CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int CL_API_CALL +clEnqueueReleaseGLObjects_t( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef clEnqueueReleaseGLObjects_t * +clEnqueueReleaseGLObjects_fn CL_API_SUFFIX__VERSION_1_0; + +#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromGLRenderbuffer(cl_context context, - cl_mem_flags flags, - cl_GLuint renderbuffer, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; +clCreateFromGLRenderbuffer( + cl_context context, + cl_mem_flags flags, + cl_GLuint renderbuffer, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL -clGetGLObjectInfo(cl_mem memobj, - cl_gl_object_type * gl_object_type, - cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0; +clGetGLObjectInfo( + cl_mem memobj, + cl_gl_object_type* gl_object_type, + cl_GLuint* gl_object_name) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL -clGetGLTextureInfo(cl_mem memobj, - cl_gl_texture_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; +clGetGLTextureInfo( + cl_mem memobj, + cl_gl_texture_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueAcquireGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; +clEnqueueAcquireGLObjects( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReleaseGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - - -/* Deprecated OpenCL 1.1 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL -clCreateFromGLTexture2D(cl_context context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texture, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL -clCreateFromGLTexture3D(cl_context context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texture, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -/* cl_khr_gl_sharing extension */ +clEnqueueReleaseGLObjects( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_0; -#define cl_khr_gl_sharing 1 +#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */ + +/* OpenCL 1.0 APIs that were deprecated in OpenCL 1.2 */ + +typedef cl_mem CL_API_CALL +clCreateFromGLTexture2D_t( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret); + +typedef clCreateFromGLTexture2D_t * +clCreateFromGLTexture2D_fn CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_mem CL_API_CALL +clCreateFromGLTexture3D_t( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret); + +typedef clCreateFromGLTexture3D_t * +clCreateFromGLTexture3D_fn CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture2D( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture3D( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */ + +/*************************************************************** +* cl_khr_gl_event +***************************************************************/ +#define cl_khr_gl_event 1 +#define CL_KHR_GL_EVENT_EXTENSION_NAME \ + "cl_khr_gl_event" + + +#define CL_KHR_GL_EVENT_EXTENSION_VERSION CL_MAKE_VERSION(1, 0, 0) + +typedef struct __GLsync * cl_GLsync; -typedef cl_uint cl_gl_context_info; +/* cl_command_type */ +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D -/* Additional Error Codes */ -#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 -/* cl_gl_context_info */ -#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 -#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 +typedef cl_event CL_API_CALL +clCreateEventFromGLsyncKHR_t( + cl_context context, + cl_GLsync sync, + cl_int* errcode_ret); -/* Additional cl_context_properties */ -#define CL_GL_CONTEXT_KHR 0x2008 -#define CL_EGL_DISPLAY_KHR 0x2009 -#define CL_GLX_DISPLAY_KHR 0x200A -#define CL_WGL_HDC_KHR 0x200B -#define CL_CGL_SHAREGROUP_KHR 0x200C +typedef clCreateEventFromGLsyncKHR_t * +clCreateEventFromGLsyncKHR_fn CL_API_SUFFIX__VERSION_1_1; + +#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromGLsyncKHR( + cl_context context, + cl_GLsync sync, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */ + +/*************************************************************** +* cl_khr_gl_depth_images +***************************************************************/ +#define cl_khr_gl_depth_images 1 +#define CL_KHR_GL_DEPTH_IMAGES_EXTENSION_NAME \ + "cl_khr_gl_depth_images" + + +#define CL_KHR_GL_DEPTH_IMAGES_EXTENSION_VERSION CL_MAKE_VERSION(1, 0, 0) + +/* cl_channel_order */ +#define CL_DEPTH_STENCIL 0x10BE + +/* cl_channel_type */ +#define CL_UNORM_INT24 0x10DF + +/*************************************************************** +* cl_khr_gl_msaa_sharing +***************************************************************/ +#define cl_khr_gl_msaa_sharing 1 +#define CL_KHR_GL_MSAA_SHARING_EXTENSION_NAME \ + "cl_khr_gl_msaa_sharing" + + +#define CL_KHR_GL_MSAA_SHARING_EXTENSION_VERSION CL_MAKE_VERSION(1, 0, 0) + +/* cl_gl_texture_info */ +#define CL_GL_NUM_SAMPLES 0x2012 + +/*************************************************************** +* cl_intel_sharing_format_query_gl +***************************************************************/ +#define cl_intel_sharing_format_query_gl 1 +#define CL_INTEL_SHARING_FORMAT_QUERY_GL_EXTENSION_NAME \ + "cl_intel_sharing_format_query_gl" + + +#define CL_INTEL_SHARING_FORMAT_QUERY_GL_EXTENSION_VERSION CL_MAKE_VERSION(0, 0, 0) + +/* when cl_khr_gl_sharing is supported */ + +typedef cl_int CL_API_CALL +clGetSupportedGLTextureFormatsINTEL_t( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_GLenum* gl_formats, + cl_uint* num_texture_formats); + +typedef clGetSupportedGLTextureFormatsINTEL_t * +clGetSupportedGLTextureFormatsINTEL_fn ; + +#if !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) extern CL_API_ENTRY cl_int CL_API_CALL -clGetGLContextInfoKHR(const cl_context_properties * properties, - cl_gl_context_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( - const cl_context_properties * properties, - cl_gl_context_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); +clGetSupportedGLTextureFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_GLenum* gl_formats, + cl_uint* num_texture_formats) ; + +#endif /* !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) */ #ifdef __cplusplus } #endif -#endif /* __OPENCL_CL_GL_H */ +#endif /* OPENCL_CL_GL_H_ */ diff --git a/intercept/CL/cl_icd.h b/intercept/CL/cl_icd.h index bf740dd8..89a927da 100644 --- a/intercept/CL/cl_icd.h +++ b/intercept/CL/cl_icd.h @@ -19,1296 +19,299 @@ #include "CL/cl.h" #include "CL/cl_gl.h" +#include "CL/cl_function_types.h" #include "cli_ext.h" -#ifdef __cplusplus -extern "C" { -#endif - -/* - * This file contains pointer type definitions for each of the CL API calls as - * well as a type definition for the dispatch table used by the Khronos ICD - * loader (see cl_khr_icd extension specification for background). - */ - -/* API function pointer definitions */ - -// Platform APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( - cl_uint num_entries, cl_platform_id *platforms, - cl_uint *num_platforms) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( - cl_platform_id platform, cl_platform_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -// Device APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( - cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, - cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( - cl_device_id device, cl_device_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( - cl_device_id in_device, - const cl_device_partition_property *partition_properties, - cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDevice)( - cl_device_id device) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDevice)( - cl_device_id device) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clCreateSubDevices; -typedef void *cl_api_clRetainDevice; -typedef void *cl_api_clReleaseDevice; - -#endif - -// Context APIs -typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContext)( - const cl_context_properties *properties, cl_uint num_devices, - const cl_device_id *devices, - void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), - void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( - const cl_context_properties *properties, cl_device_type device_type, - void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), - void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainContext)( - cl_context context) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseContext)( - cl_context context) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetContextInfo)( - cl_context context, cl_context_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -// Command Queue APIs -typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( - cl_context context, cl_device_id device, - cl_command_queue_properties properties, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_0 - -typedef CL_API_ENTRY -cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueueWithProperties)( - cl_context /* context */, cl_device_id /* device */, - const cl_queue_properties * /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; - -#else - -typedef void *cl_api_clCreateCommandQueueWithProperties; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( - cl_command_queue command_queue, cl_command_queue_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -// Memory Object APIs -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( - cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage)( - cl_context context, cl_mem_flags flags, const cl_image_format *image_format, - const cl_image_desc *image_desc, void *host_ptr, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clCreateImage; - -#endif - -#ifdef CL_VERSION_3_0 - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBufferWithProperties)( - cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, - size_t size, void *host_ptr, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImageWithProperties)( - cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, - const cl_image_format *image_format, const cl_image_desc *image_desc, - void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL* cl_api_clSetContextDestructorCallback)( - cl_context context, - void(CL_CALLBACK* pfn_notify)(cl_context context, void* user_data), - void* user_data) CL_API_SUFFIX__VERSION_3_0; - -#else - -typedef void *cl_api_clCreateBufferWithProperties; -typedef void *cl_api_clCreateImageWithProperties; -typedef void *cl_api_clSetContextDestructorCallback; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainMemObject)( - cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( - cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( - cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, - cl_uint num_entries, cl_image_format *image_formats, - cl_uint *num_image_formats) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( - cl_mem memobj, cl_mem_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetImageInfo)( - cl_mem image, cl_image_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_0 - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreatePipe)( - cl_context /* context */, cl_mem_flags /* flags */, - cl_uint /* pipe_packet_size */, cl_uint /* pipe_max_packets */, - const cl_pipe_properties * /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( - cl_mem /* pipe */, cl_pipe_info /* param_name */, - size_t /* param_value_size */, void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clSVMAlloc)( - cl_context /* context */, cl_svm_mem_flags /* flags */, size_t /* size */, - unsigned int /* alignment */)CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY void(CL_API_CALL *cl_api_clSVMFree)( - cl_context /* context */, - void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; - -#else - -typedef void *cl_api_clCreatePipe; -typedef void *cl_api_clGetPipeInfo; -typedef void *cl_api_clSVMAlloc; -typedef void *cl_api_clSVMFree; - -#endif - -// Sampler APIs -typedef CL_API_ENTRY cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( - cl_context context, cl_bool normalized_coords, - cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainSampler)( - cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseSampler)( - cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( - cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_0 - -typedef CL_API_ENTRY -cl_sampler(CL_API_CALL *cl_api_clCreateSamplerWithProperties)( - cl_context /* context */, - const cl_sampler_properties * /* sampler_properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; - -#else - -typedef void *cl_api_clCreateSamplerWithProperties; - -#endif - -// Program Object APIs -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( - cl_context context, cl_uint count, const char **strings, - const size_t *lengths, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( - cl_context context, cl_uint num_devices, const cl_device_id *device_list, - const size_t *lengths, const unsigned char **binaries, - cl_int *binary_status, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY -cl_program(CL_API_CALL *cl_api_clCreateProgramWithBuiltInKernels)( - cl_context context, cl_uint num_devices, const cl_device_id *device_list, - const char *kernel_names, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clCreateProgramWithBuiltInKernels; - +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainProgram)( - cl_program program) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseProgram)( - cl_program program) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clBuildProgram)( - cl_program program, cl_uint num_devices, const cl_device_id *device_list, - const char *options, - void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void *user_data) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCompileProgram)( - cl_program program, cl_uint num_devices, const cl_device_id *device_list, - const char *options, cl_uint num_input_headers, - const cl_program *input_headers, const char **header_include_names, - void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void *user_data) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clLinkProgram)( - cl_context context, cl_uint num_devices, const cl_device_id *device_list, - const char *options, cl_uint num_input_programs, - const cl_program *input_programs, - void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clCompileProgram; -typedef void *cl_api_clLinkProgram; - -#endif - -#ifdef CL_VERSION_2_2 - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clSetProgramSpecializationConstant)( - cl_program program, cl_uint spec_id, size_t spec_size, - const void *spec_value) CL_API_SUFFIX__VERSION_2_2; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( - cl_program program, - void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void *user_data) CL_API_SUFFIX__VERSION_2_2; - -#else - -typedef void *cl_api_clSetProgramSpecializationConstant; -typedef void *cl_api_clSetProgramReleaseCallback; - -#endif - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( - cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clUnloadPlatformCompiler; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( - cl_program program, cl_program_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( - cl_program program, cl_device_id device, cl_program_build_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -// Kernel Object APIs -typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( - cl_program program, const char *kernel_name, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( - cl_program program, cl_uint num_kernels, cl_kernel *kernels, - cl_uint *num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainKernel)( - cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseKernel)( - cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArg)( - cl_kernel kernel, cl_uint arg_index, size_t arg_size, - const void *arg_value) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( - cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( - cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clGetKernelArgInfo; - +#ifdef __cplusplus +extern "C" { #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( - cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_0 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( - cl_kernel /* kernel */, cl_uint /* arg_index */, - const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( - cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, - size_t /* param_value_size */, - const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( - cl_kernel /* in_kernel */, cl_device_id /*in_device*/, - cl_kernel_sub_group_info /* param_name */, size_t /*input_value_size*/, - const void * /*input_value*/, size_t /*param_value_size*/, - void * /*param_value*/, - size_t * /*param_value_size_ret*/) CL_EXT_SUFFIX__VERSION_2_0; - -#else - -typedef void *cl_api_clSetKernelArgSVMPointer; -typedef void *cl_api_clSetKernelExecInfo; -typedef void *cl_api_clGetKernelSubGroupInfoKHR; +/* Vendor dispatch table structure */ +typedef struct _cl_icd_dispatch { + /* OpenCL 1.0 */ +#if __CL_HAS_ANON_UNION__ + __CL_ANON_UNION__ union { #endif - -// Event Object APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clWaitForEvents)( - cl_uint num_events, const cl_event *event_list) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventInfo)( - cl_event event, cl_event_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) - CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) - CL_API_SUFFIX__VERSION_1_0; - -// Profiling APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( - cl_event event, cl_profiling_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -// Flush and Finish APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFlush)( - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFinish)( - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -// Enqueued Commands APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( - cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, - size_t offset, size_t cb, void *ptr, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( - cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, - const size_t *buffer_origin, const size_t *host_origin, - const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, - size_t host_row_pitch, size_t host_slice_pitch, void *ptr, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_1; - -#else - -typedef void *cl_api_clEnqueueReadBufferRect; - + clGetPlatformIDs_t *clGetPlatformIDs; +#if __CL_HAS_ANON_UNION__ + /* Set to CL_ICD2_TAG_KHR for cl_khr_icd 2.0.0 */ + intptr_t clGetPlatformIDs_icd2_tag; + }; #endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( - cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, - size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( - cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, - const size_t *buffer_origin, const size_t *host_origin, - const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, - size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_1; - -#else - -typedef void *cl_api_clEnqueueWriteBufferRect; - + clGetPlatformInfo_t *clGetPlatformInfo; + clGetDeviceIDs_t *clGetDeviceIDs; + clGetDeviceInfo_t *clGetDeviceInfo; + clCreateContext_t *clCreateContext; + clCreateContextFromType_t *clCreateContextFromType; + clRetainContext_t *clRetainContext; + clReleaseContext_t *clReleaseContext; + clGetContextInfo_t *clGetContextInfo; + clCreateCommandQueue_t *clCreateCommandQueue; + clRetainCommandQueue_t *clRetainCommandQueue; + clReleaseCommandQueue_t *clReleaseCommandQueue; + clGetCommandQueueInfo_t *clGetCommandQueueInfo; + clSetCommandQueueProperty_t *clSetCommandQueueProperty; + clCreateBuffer_t *clCreateBuffer; + clCreateImage2D_t *clCreateImage2D; + clCreateImage3D_t *clCreateImage3D; + clRetainMemObject_t *clRetainMemObject; + clReleaseMemObject_t *clReleaseMemObject; + clGetSupportedImageFormats_t *clGetSupportedImageFormats; + clGetMemObjectInfo_t *clGetMemObjectInfo; + clGetImageInfo_t *clGetImageInfo; + clCreateSampler_t *clCreateSampler; + clRetainSampler_t *clRetainSampler; + clReleaseSampler_t *clReleaseSampler; + clGetSamplerInfo_t *clGetSamplerInfo; + clCreateProgramWithSource_t *clCreateProgramWithSource; + clCreateProgramWithBinary_t *clCreateProgramWithBinary; + clRetainProgram_t *clRetainProgram; + clReleaseProgram_t *clReleaseProgram; + clBuildProgram_t *clBuildProgram; +#if __CL_HAS_ANON_UNION__ + __CL_ANON_UNION__ union { #endif - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( - cl_command_queue command_queue, cl_mem buffer, const void *pattern, - size_t pattern_size, size_t offset, size_t cb, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clEnqueueFillBuffer; - + clUnloadCompiler_t *clUnloadCompiler; +#if __CL_HAS_ANON_UNION__ + /* Set to CL_ICD2_TAG_KHR for cl_khr_icd 2.0.0 */ + intptr_t clUnloadCompiler_icd2_tag; + }; #endif + clGetProgramInfo_t *clGetProgramInfo; + clGetProgramBuildInfo_t *clGetProgramBuildInfo; + clCreateKernel_t *clCreateKernel; + clCreateKernelsInProgram_t *clCreateKernelsInProgram; + clRetainKernel_t *clRetainKernel; + clReleaseKernel_t *clReleaseKernel; + clSetKernelArg_t *clSetKernelArg; + clGetKernelInfo_t *clGetKernelInfo; + clGetKernelWorkGroupInfo_t *clGetKernelWorkGroupInfo; + clWaitForEvents_t *clWaitForEvents; + clGetEventInfo_t *clGetEventInfo; + clRetainEvent_t *clRetainEvent; + clReleaseEvent_t *clReleaseEvent; + clGetEventProfilingInfo_t *clGetEventProfilingInfo; + clFlush_t *clFlush; + clFinish_t *clFinish; + clEnqueueReadBuffer_t *clEnqueueReadBuffer; + clEnqueueWriteBuffer_t *clEnqueueWriteBuffer; + clEnqueueCopyBuffer_t *clEnqueueCopyBuffer; + clEnqueueReadImage_t *clEnqueueReadImage; + clEnqueueWriteImage_t *clEnqueueWriteImage; + clEnqueueCopyImage_t *clEnqueueCopyImage; + clEnqueueCopyImageToBuffer_t *clEnqueueCopyImageToBuffer; + clEnqueueCopyBufferToImage_t *clEnqueueCopyBufferToImage; + clEnqueueMapBuffer_t *clEnqueueMapBuffer; + clEnqueueMapImage_t *clEnqueueMapImage; + clEnqueueUnmapMemObject_t *clEnqueueUnmapMemObject; + clEnqueueNDRangeKernel_t *clEnqueueNDRangeKernel; + clEnqueueTask_t *clEnqueueTask; + clEnqueueNativeKernel_t *clEnqueueNativeKernel; + clEnqueueMarker_t *clEnqueueMarker; + clEnqueueWaitForEvents_t *clEnqueueWaitForEvents; + clEnqueueBarrier_t *clEnqueueBarrier; + clGetExtensionFunctionAddress_t *clGetExtensionFunctionAddress; + clCreateFromGLBuffer_t *clCreateFromGLBuffer; + clCreateFromGLTexture2D_t *clCreateFromGLTexture2D; + clCreateFromGLTexture3D_t *clCreateFromGLTexture3D; + clCreateFromGLRenderbuffer_t *clCreateFromGLRenderbuffer; + clGetGLObjectInfo_t *clGetGLObjectInfo; + clGetGLTextureInfo_t *clGetGLTextureInfo; + clEnqueueAcquireGLObjects_t *clEnqueueAcquireGLObjects; + clEnqueueReleaseGLObjects_t *clEnqueueReleaseGLObjects; + clGetGLContextInfoKHR_t *clGetGLContextInfoKHR; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( - cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, - size_t src_offset, size_t dst_offset, size_t cb, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; + /* cl_khr_d3d10_sharing */ + void *clGetDeviceIDsFromD3D10KHR; + void *clCreateFromD3D10BufferKHR; + void *clCreateFromD3D10Texture2DKHR; + void *clCreateFromD3D10Texture3DKHR; + void *clEnqueueAcquireD3D10ObjectsKHR; + void *clEnqueueReleaseD3D10ObjectsKHR; + /* OpenCL 1.1 */ #ifdef CL_VERSION_1_1 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( - cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, - const size_t *src_origin, const size_t *dst_origin, const size_t *region, - size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, - size_t dst_slice_pitch, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_1; - -#else - -typedef void *cl_api_clEnqueueCopyBufferRect; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( - cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, - const size_t *origin, const size_t *region, size_t row_pitch, - size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( - cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, - const size_t *origin, const size_t *region, size_t input_row_pitch, - size_t input_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( - cl_command_queue command_queue, cl_mem image, const void *fill_color, - const size_t origin[3], const size_t region[3], - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - + clSetEventCallback_t *clSetEventCallback; + clCreateSubBuffer_t *clCreateSubBuffer; + clSetMemObjectDestructorCallback_t *clSetMemObjectDestructorCallback; + clCreateUserEvent_t *clCreateUserEvent; + clSetUserEventStatus_t *clSetUserEventStatus; + clEnqueueReadBufferRect_t *clEnqueueReadBufferRect; + clEnqueueWriteBufferRect_t *clEnqueueWriteBufferRect; + clEnqueueCopyBufferRect_t *clEnqueueCopyBufferRect; #else - -typedef void *cl_api_clEnqueueFillImage; - + void *clSetEventCallback; + void *clCreateSubBuffer; + void *clSetMemObjectDestructorCallback; + void *clCreateUserEvent; + void *clSetUserEventStatus; + void *clEnqueueReadBufferRect; + void *clEnqueueWriteBufferRect; + void *clEnqueueCopyBufferRect; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( - cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, - const size_t *src_origin, const size_t *dst_origin, const size_t *region, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( - cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, - const size_t *src_origin, const size_t *region, size_t dst_offset, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( - cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, - size_t src_offset, const size_t *dst_origin, const size_t *region, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( - cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, - cl_map_flags map_flags, size_t offset, size_t cb, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapImage)( - cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, - cl_map_flags map_flags, const size_t *origin, const size_t *region, - size_t *image_row_pitch, size_t *image_slice_pitch, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; + /* cl_ext_device_fission */ + void *clCreateSubDevicesEXT; + void *clRetainDeviceEXT; + void *clReleaseDeviceEXT; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( - cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; + /* cl_khr_gl_event */ + void *clCreateEventFromGLsyncKHR; + /* OpenCL 1.2 */ #ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( - cl_command_queue command_queue, cl_uint num_mem_objects, - const cl_mem *mem_objects, cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - + clCreateSubDevices_t *clCreateSubDevices; + clRetainDevice_t *clRetainDevice; + clReleaseDevice_t *clReleaseDevice; + clCreateImage_t *clCreateImage; + clCreateProgramWithBuiltInKernels_t *clCreateProgramWithBuiltInKernels; + clCompileProgram_t *clCompileProgram; + clLinkProgram_t *clLinkProgram; + clUnloadPlatformCompiler_t *clUnloadPlatformCompiler; + clGetKernelArgInfo_t *clGetKernelArgInfo; + clEnqueueFillBuffer_t *clEnqueueFillBuffer; + clEnqueueFillImage_t *clEnqueueFillImage; + clEnqueueMigrateMemObjects_t *clEnqueueMigrateMemObjects; + clEnqueueMarkerWithWaitList_t *clEnqueueMarkerWithWaitList; + clEnqueueBarrierWithWaitList_t *clEnqueueBarrierWithWaitList; + clGetExtensionFunctionAddressForPlatform_t * + clGetExtensionFunctionAddressForPlatform; + clCreateFromGLTexture_t *clCreateFromGLTexture; #else - -typedef void *cl_api_clEnqueueMigrateMemObjects; - + void *clCreateSubDevices; + void *clRetainDevice; + void *clReleaseDevice; + void *clCreateImage; + void *clCreateProgramWithBuiltInKernels; + void *clCompileProgram; + void *clLinkProgram; + void *clUnloadPlatformCompiler; + void *clGetKernelArgInfo; + void *clEnqueueFillBuffer; + void *clEnqueueFillImage; + void *clEnqueueMigrateMemObjects; + void *clEnqueueMarkerWithWaitList; + void *clEnqueueBarrierWithWaitList; + void *clGetExtensionFunctionAddressForPlatform; + void *clCreateFromGLTexture; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( - cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueTask)( - cl_command_queue command_queue, cl_kernel kernel, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; + /* cl_khr_d3d11_sharing and cl_khr_dx9_media_sharing */ + void *clGetDeviceIDsFromD3D11KHR; + void *clCreateFromD3D11BufferKHR; + void *clCreateFromD3D11Texture2DKHR; + void *clCreateFromD3D11Texture3DKHR; + void *clCreateFromDX9MediaSurfaceKHR; + void *clEnqueueAcquireD3D11ObjectsKHR; + void *clEnqueueReleaseD3D11ObjectsKHR; + void *clGetDeviceIDsFromDX9MediaAdapterKHR; + void *clEnqueueAcquireDX9MediaSurfacesKHR; + void *clEnqueueReleaseDX9MediaSurfacesKHR; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( - cl_command_queue command_queue, void(CL_CALLBACK *user_func)(void *), - void *args, size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list, - const void **args_mem_loc, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( - cl_command_queue command_queue, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( - cl_command_queue command_queue, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY void *( - CL_API_CALL *cl_api_clGetExtensionFunctionAddressForPlatform)( - cl_platform_id platform, - const char *function_name)CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clEnqueueMarkerWithWaitList; -typedef void *cl_api_clEnqueueBarrierWithWaitList; -typedef void *cl_api_clGetExtensionFunctionAddressForPlatform; - -#endif + /* cl_khr_egl_image */ + void *clCreateFromEGLImageKHR; + void *clEnqueueAcquireEGLObjectsKHR; + void *clEnqueueReleaseEGLObjectsKHR; -// Shared Virtual Memory APIs + /* cl_khr_egl_event */ + void *clCreateEventFromEGLSyncKHR; + /* OpenCL 2.0 */ #ifdef CL_VERSION_2_0 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( - cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, - void ** /* svm_pointers */, - void(CL_CALLBACK *pfn_free_func)(cl_command_queue /* queue */, - cl_uint /* num_svm_pointers */, - void ** /* svm_pointers[] */, - void * /* user_data */), - void * /* user_data */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( - cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, - void * /* dst_ptr */, const void * /* src_ptr */, size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( - cl_command_queue /* command_queue */, void * /* svm_ptr */, - const void * /* pattern */, size_t /* pattern_size */, size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( - cl_command_queue /* command_queue */, cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, void * /* svm_ptr */, size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( - cl_command_queue /* command_queue */, void * /* svm_ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - + clCreateCommandQueueWithProperties_t *clCreateCommandQueueWithProperties; + clCreatePipe_t *clCreatePipe; + clGetPipeInfo_t *clGetPipeInfo; + clSVMAlloc_t *clSVMAlloc; + clSVMFree_t *clSVMFree; + clEnqueueSVMFree_t *clEnqueueSVMFree; + clEnqueueSVMMemcpy_t *clEnqueueSVMMemcpy; + clEnqueueSVMMemFill_t *clEnqueueSVMMemFill; + clEnqueueSVMMap_t *clEnqueueSVMMap; + clEnqueueSVMUnmap_t *clEnqueueSVMUnmap; + clCreateSamplerWithProperties_t *clCreateSamplerWithProperties; + clSetKernelArgSVMPointer_t *clSetKernelArgSVMPointer; + clSetKernelExecInfo_t *clSetKernelExecInfo; #else - -typedef void *cl_api_clEnqueueSVMFree; -typedef void *cl_api_clEnqueueSVMMemcpy; -typedef void *cl_api_clEnqueueSVMMemFill; -typedef void *cl_api_clEnqueueSVMMap; -typedef void *cl_api_clEnqueueSVMUnmap; - + void *clCreateCommandQueueWithProperties; + void *clCreatePipe; + void *clGetPipeInfo; + void *clSVMAlloc; + void *clSVMFree; + void *clEnqueueSVMFree; + void *clEnqueueSVMMemcpy; + void *clEnqueueSVMMemFill; + void *clEnqueueSVMMap; + void *clEnqueueSVMUnmap; + void *clCreateSamplerWithProperties; + void *clSetKernelArgSVMPointer; + void *clSetKernelExecInfo; #endif -// Deprecated APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( - cl_command_queue command_queue, cl_command_queue_properties properties, - cl_bool enable, cl_command_queue_properties *old_properties) - CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( - cl_context context, cl_mem_flags flags, const cl_image_format *image_format, - size_t image_width, size_t image_height, size_t image_row_pitch, - void *host_ptr, cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( - cl_context context, cl_mem_flags flags, const cl_image_format *image_format, - size_t image_width, size_t image_height, size_t image_depth, - size_t image_row_pitch, size_t image_slice_pitch, void *host_ptr, - cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) - CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( - cl_command_queue command_queue, - cl_event *event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( - cl_command_queue command_queue, cl_uint num_events, - const cl_event *event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( - cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( - const char *function_name)CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -// GL and other APIs -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( - cl_context context, cl_mem_flags flags, cl_GLuint bufobj, - int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( - cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, - cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( - cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, - cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( - cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, - cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( - cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( - cl_mem memobj, cl_gl_object_type *gl_object_type, - cl_GLuint *gl_object_name) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( - cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -/* cl_khr_gl_sharing */ -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( - const cl_context_properties *properties, cl_gl_context_info param_name, - size_t param_value_size, void *param_value, size_t *param_value_size_ret); - -/* cl_khr_gl_event */ -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( - cl_context context, cl_GLsync sync, cl_int *errcode_ret); - -#if defined(_WIN32) - -/* cl_khr_d3d10_sharing */ - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, - void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, cl_device_id *devices, - cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( - cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, - UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( - cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, - UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( - cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, - void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromD3D10BufferKHR(cl_context context, cl_mem_flags flags, - ID3D10Buffer *resource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( - cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, - UINT subresource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( - cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, - UINT subresource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -/* cl_khr_d3d11_sharing */ -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( - cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, - void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, - cl_uint num_entries, cl_device_id *devices, - cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( - cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, - UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( - cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, - UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D11ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -/* cl_khr_dx9_media_sharing */ -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR)( - cl_platform_id platform, cl_uint num_media_adapters, - cl_dx9_media_adapter_type_khr *media_adapters_type, void *media_adapters, - cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, - cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( - cl_context context, cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, - cl_uint plane, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -/* cl_khr_d3d11_sharing */ -extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( - cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, - void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, - cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromD3D11BufferKHR(cl_context context, cl_mem_flags flags, - ID3D11Buffer *resource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( - cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, - UINT subresource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( - cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, - UINT subresource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -/* cl_khr_dx9_media_sharing */ -extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( - cl_platform_id platform, cl_uint num_media_adapters, - cl_dx9_media_adapter_type_khr *media_adapter_type, void *media_adapters, - cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, - cl_device_id *devices, cl_uint *num_devices); - -extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( - cl_context context, cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, - cl_uint plane, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -#else - -/* cl_khr_d3d10_sharing */ -typedef void *cl_api_clGetDeviceIDsFromD3D10KHR; -typedef void *cl_api_clCreateFromD3D10BufferKHR; -typedef void *cl_api_clCreateFromD3D10Texture2DKHR; -typedef void *cl_api_clCreateFromD3D10Texture3DKHR; -typedef void *cl_api_clEnqueueAcquireD3D10ObjectsKHR; -typedef void *cl_api_clEnqueueReleaseD3D10ObjectsKHR; - -/* cl_khr_d3d11_sharing */ -typedef void *cl_api_clGetDeviceIDsFromD3D11KHR; -typedef void *cl_api_clCreateFromD3D11BufferKHR; -typedef void *cl_api_clCreateFromD3D11Texture2DKHR; -typedef void *cl_api_clCreateFromD3D11Texture3DKHR; -typedef void *cl_api_clEnqueueAcquireD3D11ObjectsKHR; -typedef void *cl_api_clEnqueueReleaseD3D11ObjectsKHR; - -/* cl_khr_dx9_media_sharing */ -typedef void *cl_api_clCreateFromDX9MediaSurfaceKHR; -typedef void *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR; -typedef void *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR; -typedef void *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR; - -#endif - -/* OpenCL 1.1 */ - -#ifdef CL_VERSION_1_1 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetEventCallback)( - cl_event /* event */, cl_int /* command_exec_callback_type */, - void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( - cl_mem /* buffer */, cl_mem_flags /* flags */, - cl_buffer_create_type /* buffer_create_type */, - const void * /* buffer_create_info */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clSetMemObjectDestructorCallback)( - cl_mem /* memobj */, - void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, - void * /*user_data*/), - void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( - cl_context /* context */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( - cl_event /* event */, - cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + /* cl_khr_sub_groups */ + void *clGetKernelSubGroupInfoKHR; + /* OpenCL 2.1 */ +#ifdef CL_VERSION_2_1 + clCloneKernel_t *clCloneKernel; + clCreateProgramWithIL_t *clCreateProgramWithIL; + clEnqueueSVMMigrateMem_t *clEnqueueSVMMigrateMem; + clGetDeviceAndHostTimer_t *clGetDeviceAndHostTimer; + clGetHostTimer_t *clGetHostTimer; + clGetKernelSubGroupInfo_t *clGetKernelSubGroupInfo; + clSetDefaultDeviceCommandQueue_t *clSetDefaultDeviceCommandQueue; #else - -typedef void *cl_api_clSetEventCallback; -typedef void *cl_api_clCreateSubBuffer; -typedef void *cl_api_clSetMemObjectDestructorCallback; -typedef void *cl_api_clCreateUserEvent; -typedef void *cl_api_clSetUserEventStatus; - + void *clCloneKernel; + void *clCreateProgramWithIL; + void *clEnqueueSVMMigrateMem; + void *clGetDeviceAndHostTimer; + void *clGetHostTimer; + void *clGetKernelSubGroupInfo; + void *clSetDefaultDeviceCommandQueue; #endif -#ifdef cl_ext_device_fission - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( - cl_device_id in_device, - const cl_device_partition_property_ext *partition_properties, - cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( - cl_device_id device) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( - cl_device_id device) CL_API_SUFFIX__VERSION_1_0; - + /* OpenCL 2.2 */ +#ifdef CL_VERSION_2_2 + clSetProgramReleaseCallback_t *clSetProgramReleaseCallback; + clSetProgramSpecializationConstant_t *clSetProgramSpecializationConstant; #else - -typedef void* cl_api_clCreateSubDevicesEXT; -typedef void* cl_api_clRetainDeviceEXT; -typedef void* cl_api_clReleaseDeviceEXT; - + void *clSetProgramReleaseCallback; + void *clSetProgramSpecializationConstant; #endif -#ifdef cl_khr_egl_image - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( - cl_context context, CLeglDisplayKHR display, CLeglImageKHR image, - cl_mem_flags flags, const cl_egl_image_properties_khr *properties, - cl_int *errcode_ret); - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - + /* OpenCL 3.0 */ +#ifdef CL_VERSION_3_0 + clCreateBufferWithProperties_t *clCreateBufferWithProperties; + clCreateImageWithProperties_t *clCreateImageWithProperties; + clSetContextDestructorCallback_t *clSetContextDestructorCallback; #else - -typedef void* cl_api_clCreateFromEGLImageKHR; -typedef void* cl_api_clEnqueueAcquireEGLObjectsKHR; -typedef void* cl_api_clEnqueueReleaseEGLObjectsKHR; - + void *clCreateBufferWithProperties; + void *clCreateImageWithProperties; + void *clSetContextDestructorCallback; #endif -#ifdef cl_khr_egl_event - -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( - cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, - cl_int *errcode_ret); - + /* OpenCL 3.1 */ +#ifdef CL_VERSION_3_1 + clGetKernelSuggestedLocalWorkSize_t *clGetKernelSuggestedLocalWorkSize; #else - -typedef void* cl_api_clCreateEventFromEGLSyncKHR; - + void *clGetKernelSuggestedLocalWorkSize; #endif -#ifdef CL_VERSION_2_1 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( - cl_context context, cl_device_id device, - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( - cl_context context, const void *il, size_t length, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( - cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, - size_t input_value_size, const void *input_value, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( - cl_kernel source_kernel, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( - cl_command_queue command_queue, cl_uint num_svm_pointers, - const void **svm_pointers, const size_t *sizes, - cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( - cl_device_id device, cl_ulong *device_timestamp, - cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetHostTimer)( - cl_device_id device, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; - -#else - -typedef void *cl_api_clSetDefaultDeviceCommandQueue; -typedef void *cl_api_clCreateProgramWithIL; -typedef void *cl_api_clGetKernelSubGroupInfo; -typedef void *cl_api_clCloneKernel; -typedef void *cl_api_clEnqueueSVMMigrateMem; -typedef void *cl_api_clGetDeviceAndHostTimer; -typedef void *cl_api_clGetHostTimer; - -#endif - -/* Vendor dispatch table struture */ - -typedef struct _cl_icd_dispatch { - /* OpenCL 1.0 */ - cl_api_clGetPlatformIDs clGetPlatformIDs; - cl_api_clGetPlatformInfo clGetPlatformInfo; - cl_api_clGetDeviceIDs clGetDeviceIDs; - cl_api_clGetDeviceInfo clGetDeviceInfo; - cl_api_clCreateContext clCreateContext; - cl_api_clCreateContextFromType clCreateContextFromType; - cl_api_clRetainContext clRetainContext; - cl_api_clReleaseContext clReleaseContext; - cl_api_clGetContextInfo clGetContextInfo; - cl_api_clCreateCommandQueue clCreateCommandQueue; - cl_api_clRetainCommandQueue clRetainCommandQueue; - cl_api_clReleaseCommandQueue clReleaseCommandQueue; - cl_api_clGetCommandQueueInfo clGetCommandQueueInfo; - cl_api_clSetCommandQueueProperty clSetCommandQueueProperty; - cl_api_clCreateBuffer clCreateBuffer; - cl_api_clCreateImage2D clCreateImage2D; - cl_api_clCreateImage3D clCreateImage3D; - cl_api_clRetainMemObject clRetainMemObject; - cl_api_clReleaseMemObject clReleaseMemObject; - cl_api_clGetSupportedImageFormats clGetSupportedImageFormats; - cl_api_clGetMemObjectInfo clGetMemObjectInfo; - cl_api_clGetImageInfo clGetImageInfo; - cl_api_clCreateSampler clCreateSampler; - cl_api_clRetainSampler clRetainSampler; - cl_api_clReleaseSampler clReleaseSampler; - cl_api_clGetSamplerInfo clGetSamplerInfo; - cl_api_clCreateProgramWithSource clCreateProgramWithSource; - cl_api_clCreateProgramWithBinary clCreateProgramWithBinary; - cl_api_clRetainProgram clRetainProgram; - cl_api_clReleaseProgram clReleaseProgram; - cl_api_clBuildProgram clBuildProgram; - cl_api_clUnloadCompiler clUnloadCompiler; - cl_api_clGetProgramInfo clGetProgramInfo; - cl_api_clGetProgramBuildInfo clGetProgramBuildInfo; - cl_api_clCreateKernel clCreateKernel; - cl_api_clCreateKernelsInProgram clCreateKernelsInProgram; - cl_api_clRetainKernel clRetainKernel; - cl_api_clReleaseKernel clReleaseKernel; - cl_api_clSetKernelArg clSetKernelArg; - cl_api_clGetKernelInfo clGetKernelInfo; - cl_api_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; - cl_api_clWaitForEvents clWaitForEvents; - cl_api_clGetEventInfo clGetEventInfo; - cl_api_clRetainEvent clRetainEvent; - cl_api_clReleaseEvent clReleaseEvent; - cl_api_clGetEventProfilingInfo clGetEventProfilingInfo; - cl_api_clFlush clFlush; - cl_api_clFinish clFinish; - cl_api_clEnqueueReadBuffer clEnqueueReadBuffer; - cl_api_clEnqueueWriteBuffer clEnqueueWriteBuffer; - cl_api_clEnqueueCopyBuffer clEnqueueCopyBuffer; - cl_api_clEnqueueReadImage clEnqueueReadImage; - cl_api_clEnqueueWriteImage clEnqueueWriteImage; - cl_api_clEnqueueCopyImage clEnqueueCopyImage; - cl_api_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; - cl_api_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; - cl_api_clEnqueueMapBuffer clEnqueueMapBuffer; - cl_api_clEnqueueMapImage clEnqueueMapImage; - cl_api_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; - cl_api_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; - cl_api_clEnqueueTask clEnqueueTask; - cl_api_clEnqueueNativeKernel clEnqueueNativeKernel; - cl_api_clEnqueueMarker clEnqueueMarker; - cl_api_clEnqueueWaitForEvents clEnqueueWaitForEvents; - cl_api_clEnqueueBarrier clEnqueueBarrier; - cl_api_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; - cl_api_clCreateFromGLBuffer clCreateFromGLBuffer; - cl_api_clCreateFromGLTexture2D clCreateFromGLTexture2D; - cl_api_clCreateFromGLTexture3D clCreateFromGLTexture3D; - cl_api_clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer; - cl_api_clGetGLObjectInfo clGetGLObjectInfo; - cl_api_clGetGLTextureInfo clGetGLTextureInfo; - cl_api_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; - cl_api_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; - cl_api_clGetGLContextInfoKHR clGetGLContextInfoKHR; - - /* cl_khr_d3d10_sharing */ - cl_api_clGetDeviceIDsFromD3D10KHR clGetDeviceIDsFromD3D10KHR; - cl_api_clCreateFromD3D10BufferKHR clCreateFromD3D10BufferKHR; - cl_api_clCreateFromD3D10Texture2DKHR clCreateFromD3D10Texture2DKHR; - cl_api_clCreateFromD3D10Texture3DKHR clCreateFromD3D10Texture3DKHR; - cl_api_clEnqueueAcquireD3D10ObjectsKHR clEnqueueAcquireD3D10ObjectsKHR; - cl_api_clEnqueueReleaseD3D10ObjectsKHR clEnqueueReleaseD3D10ObjectsKHR; - - /* OpenCL 1.1 */ - cl_api_clSetEventCallback clSetEventCallback; - cl_api_clCreateSubBuffer clCreateSubBuffer; - cl_api_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; - cl_api_clCreateUserEvent clCreateUserEvent; - cl_api_clSetUserEventStatus clSetUserEventStatus; - cl_api_clEnqueueReadBufferRect clEnqueueReadBufferRect; - cl_api_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; - cl_api_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; - - /* cl_ext_device_fission */ - cl_api_clCreateSubDevicesEXT clCreateSubDevicesEXT; - cl_api_clRetainDeviceEXT clRetainDeviceEXT; - cl_api_clReleaseDeviceEXT clReleaseDeviceEXT; - - /* cl_khr_gl_event */ - cl_api_clCreateEventFromGLsyncKHR clCreateEventFromGLsyncKHR; - - /* OpenCL 1.2 */ - cl_api_clCreateSubDevices clCreateSubDevices; - cl_api_clRetainDevice clRetainDevice; - cl_api_clReleaseDevice clReleaseDevice; - cl_api_clCreateImage clCreateImage; - cl_api_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; - cl_api_clCompileProgram clCompileProgram; - cl_api_clLinkProgram clLinkProgram; - cl_api_clUnloadPlatformCompiler clUnloadPlatformCompiler; - cl_api_clGetKernelArgInfo clGetKernelArgInfo; - cl_api_clEnqueueFillBuffer clEnqueueFillBuffer; - cl_api_clEnqueueFillImage clEnqueueFillImage; - cl_api_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; - cl_api_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; - cl_api_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; - cl_api_clGetExtensionFunctionAddressForPlatform - clGetExtensionFunctionAddressForPlatform; - cl_api_clCreateFromGLTexture clCreateFromGLTexture; - - /* cl_khr_d3d11_sharing */ - cl_api_clGetDeviceIDsFromD3D11KHR clGetDeviceIDsFromD3D11KHR; - cl_api_clCreateFromD3D11BufferKHR clCreateFromD3D11BufferKHR; - cl_api_clCreateFromD3D11Texture2DKHR clCreateFromD3D11Texture2DKHR; - cl_api_clCreateFromD3D11Texture3DKHR clCreateFromD3D11Texture3DKHR; - cl_api_clCreateFromDX9MediaSurfaceKHR clCreateFromDX9MediaSurfaceKHR; - cl_api_clEnqueueAcquireD3D11ObjectsKHR clEnqueueAcquireD3D11ObjectsKHR; - cl_api_clEnqueueReleaseD3D11ObjectsKHR clEnqueueReleaseD3D11ObjectsKHR; - - /* cl_khr_dx9_media_sharing */ - cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR - clGetDeviceIDsFromDX9MediaAdapterKHR; - cl_api_clEnqueueAcquireDX9MediaSurfacesKHR - clEnqueueAcquireDX9MediaSurfacesKHR; - cl_api_clEnqueueReleaseDX9MediaSurfacesKHR - clEnqueueReleaseDX9MediaSurfacesKHR; - - /* cl_khr_egl_image */ - cl_api_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; - cl_api_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; - cl_api_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; - - /* cl_khr_egl_event */ - cl_api_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; - - /* OpenCL 2.0 */ - cl_api_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; - cl_api_clCreatePipe clCreatePipe; - cl_api_clGetPipeInfo clGetPipeInfo; - cl_api_clSVMAlloc clSVMAlloc; - cl_api_clSVMFree clSVMFree; - cl_api_clEnqueueSVMFree clEnqueueSVMFree; - cl_api_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; - cl_api_clEnqueueSVMMemFill clEnqueueSVMMemFill; - cl_api_clEnqueueSVMMap clEnqueueSVMMap; - cl_api_clEnqueueSVMUnmap clEnqueueSVMUnmap; - cl_api_clCreateSamplerWithProperties clCreateSamplerWithProperties; - cl_api_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; - cl_api_clSetKernelExecInfo clSetKernelExecInfo; - - /* cl_khr_sub_groups */ - cl_api_clGetKernelSubGroupInfoKHR clGetKernelSubGroupInfoKHR; - - /* OpenCL 2.1 */ - cl_api_clCloneKernel clCloneKernel; - cl_api_clCreateProgramWithIL clCreateProgramWithIL; - cl_api_clEnqueueSVMMigrateMem clEnqueueSVMMigrateMem; - cl_api_clGetDeviceAndHostTimer clGetDeviceAndHostTimer; - cl_api_clGetHostTimer clGetHostTimer; - cl_api_clGetKernelSubGroupInfo clGetKernelSubGroupInfo; - cl_api_clSetDefaultDeviceCommandQueue clSetDefaultDeviceCommandQueue; - - /* OpenCL 2.2 */ - cl_api_clSetProgramReleaseCallback clSetProgramReleaseCallback; - cl_api_clSetProgramSpecializationConstant clSetProgramSpecializationConstant; - - /* OpenCL 3.0 */ - cl_api_clCreateBufferWithProperties clCreateBufferWithProperties; - cl_api_clCreateImageWithProperties clCreateImageWithProperties; - cl_api_clSetContextDestructorCallback clSetContextDestructorCallback; - } cl_icd_dispatch; #ifdef __cplusplus } #endif +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ + #pragma warning( pop ) +#endif + #endif /* #ifndef OPENCL_CL_ICD_H */ diff --git a/intercept/CL/cl_platform.h b/intercept/CL/cl_platform.h index ed174ef9..a1c20775 100644 --- a/intercept/CL/cl_platform.h +++ b/intercept/CL/cl_platform.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. + * Copyright (c) 2008-2026 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,14 +25,22 @@ extern "C" { #endif -#if defined(_WIN32) +#if !defined(CL_API_ENTRY) #define CL_API_ENTRY - #define CL_API_CALL __stdcall - #define CL_CALLBACK __stdcall -#else - #define CL_API_ENTRY - #define CL_API_CALL - #define CL_CALLBACK +#endif +#if !defined(CL_API_CALL) + #if defined(_WIN32) && !defined(__aarch64__) && !defined(__arm64__) + #define CL_API_CALL __stdcall + #else + #define CL_API_CALL + #endif +#endif +#if !defined(CL_CALLBACK) + #if defined(_WIN32) && !defined(__aarch64__) && !defined(__arm64__) + #define CL_CALLBACK __stdcall + #else + #define CL_CALLBACK + #endif #endif /* @@ -43,86 +51,100 @@ extern "C" { * deprecation but is deprecated in versions later than 1.1. */ -#define CL_EXTENSION_WEAK_LINK -#define CL_API_SUFFIX__VERSION_1_0 -#define CL_EXT_SUFFIX__VERSION_1_0 -#define CL_API_SUFFIX__VERSION_1_1 -#define CL_EXT_SUFFIX__VERSION_1_1 -#define CL_API_SUFFIX__VERSION_1_2 -#define CL_EXT_SUFFIX__VERSION_1_2 -#define CL_API_SUFFIX__VERSION_2_0 -#define CL_EXT_SUFFIX__VERSION_2_0 -#define CL_API_SUFFIX__VERSION_2_1 -#define CL_EXT_SUFFIX__VERSION_2_1 -#define CL_API_SUFFIX__VERSION_2_2 -#define CL_EXT_SUFFIX__VERSION_2_2 -#define CL_API_SUFFIX__VERSION_3_0 -#define CL_EXT_SUFFIX__VERSION_3_0 -#define CL_API_SUFFIX__EXPERIMENTAL -#define CL_EXT_SUFFIX__EXPERIMENTAL +#ifndef CL_API_SUFFIX_USER +#define CL_API_SUFFIX_USER +#endif + +#ifndef CL_API_PREFIX_USER +#define CL_API_PREFIX_USER +#endif + +#define CL_API_SUFFIX_COMMON CL_API_SUFFIX_USER +#define CL_API_PREFIX_COMMON CL_API_PREFIX_USER + +#define CL_API_SUFFIX__VERSION_1_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_1_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_1_2 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_2 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_3_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_3_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__EXPERIMENTAL CL_API_SUFFIX_COMMON #ifdef __GNUC__ - #define CL_EXT_SUFFIX_DEPRECATED __attribute__((deprecated)) - #define CL_EXT_PREFIX_DEPRECATED -#elif defined(_WIN32) - #define CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX_DEPRECATED __declspec(deprecated) + #define CL_API_SUFFIX_DEPRECATED __attribute__((deprecated)) + #define CL_API_PREFIX_DEPRECATED +#elif defined(_MSC_VER) && !defined(__clang__) + #define CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX_DEPRECATED __declspec(deprecated) #else - #define CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS - #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS - #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS - #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS - #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS - #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_2_APIS - #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #if (defined (_WIN32) && defined(_MSC_VER)) +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlanguage-extension-token" +#endif + +/* intptr_t is used in cl.h and provided by stddef.h in Visual C++, but not in clang */ +/* stdint.h was missing before Visual Studio 2010, include it for later versions and for clang */ +#if defined(__clang__) || _MSC_VER >= 1600 + #include +#endif + /* scalar types */ typedef signed __int8 cl_char; typedef unsigned __int8 cl_uchar; @@ -137,6 +159,10 @@ typedef unsigned __int16 cl_half; typedef float cl_float; typedef double cl_double; +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 @@ -334,11 +360,6 @@ typedef double cl_double; #include -/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ -typedef unsigned int cl_GLuint; -typedef int cl_GLint; -typedef unsigned int cl_GLenum; - /* * Vector types * @@ -483,27 +504,38 @@ typedef unsigned int cl_GLenum; #if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ -#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#elif defined(_WIN32) && defined(_MSC_VER) && !defined(__STDC__) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ +#elif defined(__GNUC__) && ! defined(__STRICT_ANSI__) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined(__clang__) #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ __extension__ -#elif defined( _WIN32) && defined(_MSC_VER) - #if _MSC_VER >= 1500 - /* Microsoft Developer Studio 2008 supports anonymous structs, but - * complains by default. */ - #define __CL_HAS_ANON_STRUCT__ 1 - #define __CL_ANON_STRUCT__ - /* Disable warning C4201: nonstandard extension used : nameless - * struct/union */ - #pragma warning( push ) - #pragma warning( disable : 4201 ) - #endif #else #define __CL_HAS_ANON_STRUCT__ 0 #define __CL_ANON_STRUCT__ #endif +/* Define capabilities for anonymous union members. */ +#if defined(__cplusplus) && __cplusplus >= 201103L +#define __CL_HAS_ANON_UNION__ 1 +#define __CL_ANON_UNION__ +#else + /* Follow anonymous struct logic */ +#define __CL_HAS_ANON_UNION__ __CL_HAS_ANON_STRUCT__ +#define __CL_ANON_UNION__ __CL_ANON_STRUCT__ +#endif + +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) +#endif + /* Define alignment keys */ -#if defined( __GNUC__ ) +#if defined( __GNUC__ ) || defined(__INTEGRITY) #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) #elif defined( _WIN32) && (_MSC_VER) /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ @@ -1377,12 +1409,8 @@ typedef union } #endif -#undef __CL_HAS_ANON_STRUCT__ -#undef __CL_ANON_STRUCT__ -#if defined( _WIN32) && defined(_MSC_VER) - #if _MSC_VER >=1500 +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ #pragma warning( pop ) - #endif #endif #endif /* __CL_PLATFORM_H */ diff --git a/intercept/CL/cl_version.h b/intercept/CL/cl_version.h index f38280a8..5d18e7f2 100644 --- a/intercept/CL/cl_version.h +++ b/intercept/CL/cl_version.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2018-2020 The Khronos Group Inc. + * Copyright (c) 2018-2026 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,8 @@ /* Detect which version to target */ #if !defined(CL_TARGET_OPENCL_VERSION) -#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)") -#define CL_TARGET_OPENCL_VERSION 220 +#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 310 (OpenCL 3.1)") +#define CL_TARGET_OPENCL_VERSION 310 #endif #if CL_TARGET_OPENCL_VERSION != 100 && \ CL_TARGET_OPENCL_VERSION != 110 && \ @@ -28,14 +28,18 @@ CL_TARGET_OPENCL_VERSION != 200 && \ CL_TARGET_OPENCL_VERSION != 210 && \ CL_TARGET_OPENCL_VERSION != 220 && \ - CL_TARGET_OPENCL_VERSION != 300 -#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 220 (OpenCL 2.2)") + CL_TARGET_OPENCL_VERSION != 300 && \ + CL_TARGET_OPENCL_VERSION != 310 +#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300, 310). Defaulting to 310 (OpenCL 3.1)") #undef CL_TARGET_OPENCL_VERSION -#define CL_TARGET_OPENCL_VERSION 220 +#define CL_TARGET_OPENCL_VERSION 310 #endif /* OpenCL Version */ +#if CL_TARGET_OPENCL_VERSION >= 310 && !defined(CL_VERSION_3_1) +#define CL_VERSION_3_1 1 +#endif #if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0) #define CL_VERSION_3_0 1 #endif From a4aab2ba976432a34e2ab01ae6d7bcc6a15f1fa7 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 6 May 2026 16:43:25 +0200 Subject: [PATCH 2/7] add enums and tracing for clGetKernelSuggestedLocalWorkSize --- intercept/src/common.h | 2 +- intercept/src/dispatch.cpp | 41 +++++++++++++++++++++++ intercept/src/enummap.cpp | 65 ++++++++++++++++++++++--------------- intercept/src/intercept.cpp | 3 ++ 4 files changed, 83 insertions(+), 28 deletions(-) diff --git a/intercept/src/common.h b/intercept/src/common.h index 0deeb75c..3fe8d240 100644 --- a/intercept/src/common.h +++ b/intercept/src/common.h @@ -12,7 +12,7 @@ #define CL_USE_DEPRECATED_OPENCL_2_0_APIS #define CL_USE_DEPRECATED_OPENCL_2_1_APIS #define CL_USE_DEPRECATED_OPENCL_2_2_APIS -#define CL_TARGET_OPENCL_VERSION 300 +#define CL_TARGET_OPENCL_VERSION 310 #if defined(__ANDROID__) #include diff --git a/intercept/src/dispatch.cpp b/intercept/src/dispatch.cpp index 61a36170..a4b820e0 100644 --- a/intercept/src/dispatch.cpp +++ b/intercept/src/dispatch.cpp @@ -7557,6 +7557,47 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMigrateMem( NULL_FUNCTION_POINTER_RETURN_ERROR(CL_INVALID_COMMAND_QUEUE); } +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 3.1 +CL_API_ENTRY cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSize( + cl_command_queue commandQueue, + cl_kernel kernel, + cl_uint workDim, + const size_t *globalWorkOffset, + const size_t *globalWorkSize, + size_t *suggestedLocalWorkSize) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && pIntercept->dispatch().clGetKernelSuggestedLocalWorkSize ) + { + GET_ENQUEUE_COUNTER(); + CALL_LOGGING_ENTER_KERNEL( + kernel, + "queue = %p, kernel = %p", + commandQueue, + kernel ); + HOST_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetKernelSuggestedLocalWorkSize( + commandQueue, + kernel, + workDim, + globalWorkOffset, + globalWorkSize, + suggestedLocalWorkSize ); + + HOST_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT( retVal ); + + return retVal; + } + + NULL_FUNCTION_POINTER_RETURN_ERROR(CL_INVALID_COMMAND_QUEUE); +} + /////////////////////////////////////////////////////////////////////////////// // // cl_khr_external_memory diff --git a/intercept/src/enummap.cpp b/intercept/src/enummap.cpp index d902f622..ba896a2d 100644 --- a/intercept/src/enummap.cpp +++ b/intercept/src/enummap.cpp @@ -122,7 +122,8 @@ CEnumNameMap::CEnumNameMap() ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_COMPUTE_UNITS ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_WORK_GROUP_SIZE ); - ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_WORK_ITEM_SIZES ); + // CL_DEVICE_MAX_WORK_ITEM_SIZES was deprecated and replaced by: + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_WORK_GROUP_SIZES ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT ); @@ -228,6 +229,17 @@ CEnumNameMap::CEnumNameMap() ADD_ENUM_NAME( m_cl_int, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PIPE_SUPPORT ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_UUID ); + ADD_ENUM_NAME( m_cl_int, CL_DRIVER_UUID ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_LUID_VALID ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_LUID ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NODE_MASK ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIRV_EXTENSIONS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIRV_CAPABILITIES ); /* cl_device_fp_config - bitfield */ ADD_ENUM_NAME( m_cl_device_fp_config, CL_FP_DENORM ); @@ -655,12 +667,12 @@ CEnumNameMap::CEnumNameMap() ADD_ENUM_NAME( m_cl_int, CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR ); #endif - // cl_khr_device_uuid - ADD_ENUM_NAME( m_cl_int, CL_DEVICE_UUID_KHR ); - ADD_ENUM_NAME( m_cl_int, CL_DRIVER_UUID_KHR ); - ADD_ENUM_NAME( m_cl_int, CL_DEVICE_LUID_VALID_KHR ); - ADD_ENUM_NAME( m_cl_int, CL_DEVICE_LUID_KHR ); - ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NODE_MASK_KHR ); + // cl_khr_device_uuid - to OpenCL 3.1 + //CL_DEVICE_UUID_KHR + //CL_DRIVER_UUID_KHR + //CL_DEVICE_LUID_VALID_KHR + //CL_DEVICE_LUID_KHR + //CL_DEVICE_NODE_MASK_KHR #if defined(_WIN32) // cl_khr_dx9_media_sharing @@ -764,13 +776,13 @@ CEnumNameMap::CEnumNameMap() // cl_khr_extended_versioning extension // Most enums for this extension were added to OpenCL 3.0. - //CL_PLATFORM_NUMERIC_VERSION_KHR 0x0906 - //CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR 0x0907 - //CL_DEVICE_NUMERIC_VERSION_KHR 0x105E + //CL_PLATFORM_NUMERIC_VERSION_KHR + //CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR + //CL_DEVICE_NUMERIC_VERSION_KHR ADD_ENUM_NAME( m_cl_int, CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR ); - //CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR 0x1060 - //CL_DEVICE_ILS_WITH_VERSION_KHR 0x1061 - //CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR 0x1062 + //CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR + //CL_DEVICE_ILS_WITH_VERSION_KHR + //CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR // cl_khr_external_memory ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR ); @@ -827,16 +839,16 @@ CEnumNameMap::CEnumNameMap() ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_ICD_SUFFIX_KHR ); ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_NOT_FOUND_KHR ); - // cl_khr_il_program - // These enums are core in OpenCL 2.1. - //CL_DEVICE_IL_VERSION_KHR 0x105B - //CL_PROGRAM_IL_KHR 0x1169 + // cl_khr_il_program - to OpenCL 2.1 + //CL_DEVICE_IL_VERSION_KHR + //CL_PROGRAM_IL_KHR // cl_khr_initalize_memory ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_MEMORY_INITIALIZE_KHR ); - // cl_khr_integer_dot_product - ADD_ENUM_NAME( m_cl_int, CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR ); + // cl_khr_integer_dot_product - to OpenCL 3.1 + //CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR + //CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR // cl_khr_kernel_clock ADD_ENUM_NAME( m_cl_int, CL_DEVICE_KERNEL_CLOCK_CAPABILITIES_KHR ); @@ -866,18 +878,17 @@ CEnumNameMap::CEnumNameMap() ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIR_VERSIONS ); ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_BINARY_TYPE_INTERMEDIATE ); - // cl_khr_spirv_queries - ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR ); - ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIRV_EXTENSIONS_KHR ); - ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIRV_CAPABILITIES_KHR ); + // cl_khr_spirv_queries - to OpenCL 3.1 + //CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR + //CL_DEVICE_SPIRV_EXTENSIONS_KHR + //CL_DEVICE_SPIRV_CAPABILITIES_KHR // cl_khr_subgroup_named_barrier ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR ); - // cl_khr_subgroups - // These enums were promoted to core in OpenCL 2.1. - //CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033 - //CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034 + // cl_khr_subgroups - to OpenCL 2.1 + //CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR + //CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR // cl_khr_terminate_context ADD_ENUM_NAME( m_cl_int, CL_DEVICE_TERMINATE_CAPABILITY_KHR ); diff --git a/intercept/src/intercept.cpp b/intercept/src/intercept.cpp index e6ce96f5..541c4680 100644 --- a/intercept/src/intercept.cpp +++ b/intercept/src/intercept.cpp @@ -13795,6 +13795,9 @@ bool CLIntercept::initDispatch( const std::string& libName ) INIT_EXPORTED_FUNC(clCreateImageWithProperties); INIT_EXPORTED_FUNC(clSetContextDestructorCallback); + // OpenCL 3.1 Entry Points (optional) + INIT_EXPORTED_FUNC(clGetKernelSuggestedLocalWorkSize); + success = savedSuccess; } From 62e961877c9798a0c8758124010c33bbbf7b34c9 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 6 May 2026 16:56:00 +0200 Subject: [PATCH 3/7] add device performance timing support for clGetKernelSuggestedLocalWorkSize --- intercept/src/intercept.cpp | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/intercept/src/intercept.cpp b/intercept/src/intercept.cpp index 541c4680..6fa96dc0 100644 --- a/intercept/src/intercept.cpp +++ b/intercept/src/intercept.cpp @@ -5876,13 +5876,30 @@ void CLIntercept::getTimingTagsKernel( size_t suggestedLWS[3] = { 0, 0, 0 }; size_t emptyGWO[3] = { 0, 0, 0 }; - if( lws == NULL && + if( config().DevicePerformanceTimeSuggestedLWSTracking && + lws == NULL && workDim <= 3 && - config().DevicePerformanceTimeSuggestedLWSTracking ) + device ) { + // Try the OpenCL 3.1 core API first. + const SDeviceInfo& deviceInfo = m_DeviceInfoMap[device]; + if( useSuggestedLWS == false && + deviceInfo.NumericVersion >= CL_MAKE_VERSION_KHR(3, 1, 0) && + dispatch().clGetKernelSuggestedLocalWorkSize ) + { + cl_int testErrorCode = dispatch().clGetKernelSuggestedLocalWorkSize( + queue, + kernel, + workDim, + gwo, + gws, + suggestedLWS ); + useSuggestedLWS = ( testErrorCode == CL_SUCCESS ); + } + cl_platform_id platform = getPlatform(device); - // Try the cl_khr_suggested_local_work_size version first. + // Try the cl_khr_suggested_local_work_size version next. if( useSuggestedLWS == false ) { if( dispatchX(platform).clGetKernelSuggestedLocalWorkSizeKHR == NULL ) From 0dc032c5cd603f7d95a446b629e6305d3db57ea0 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 1 Jun 2026 08:51:36 -0700 Subject: [PATCH 4/7] tidy up --- intercept/src/dispatch.cpp | 1 + intercept/src/enummap.cpp | 13 ++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/intercept/src/dispatch.cpp b/intercept/src/dispatch.cpp index a4b820e0..b2204740 100644 --- a/intercept/src/dispatch.cpp +++ b/intercept/src/dispatch.cpp @@ -8985,6 +8985,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetPerformanceConfigurationINTEL( /////////////////////////////////////////////////////////////////////////////// // // cl_khr_suggested_local_work_size +// This function should stay in sync with clGetKernelSuggestedLocalWorkSize, above. CL_API_ENTRY cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeKHR( cl_command_queue commandQueue, cl_kernel kernel, diff --git a/intercept/src/enummap.cpp b/intercept/src/enummap.cpp index ba896a2d..2af2203b 100644 --- a/intercept/src/enummap.cpp +++ b/intercept/src/enummap.cpp @@ -236,7 +236,6 @@ CEnumNameMap::CEnumNameMap() ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NODE_MASK ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT ); - ADD_ENUM_NAME( m_cl_int, CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIRV_EXTENSIONS ); ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIRV_CAPABILITIES ); @@ -667,7 +666,7 @@ CEnumNameMap::CEnumNameMap() ADD_ENUM_NAME( m_cl_int, CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR ); #endif - // cl_khr_device_uuid - to OpenCL 3.1 + // cl_khr_device_uuid - promoted to OpenCL 3.1 //CL_DEVICE_UUID_KHR //CL_DRIVER_UUID_KHR //CL_DEVICE_LUID_VALID_KHR @@ -775,7 +774,7 @@ CEnumNameMap::CEnumNameMap() //CL_COMMAND_BUFFER_STATE_FINALIZED_KHR 2 // cl_khr_extended_versioning extension - // Most enums for this extension were added to OpenCL 3.0. + // Most enums for this extension were promoted to OpenCL 3.0. //CL_PLATFORM_NUMERIC_VERSION_KHR //CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR //CL_DEVICE_NUMERIC_VERSION_KHR @@ -839,14 +838,14 @@ CEnumNameMap::CEnumNameMap() ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_ICD_SUFFIX_KHR ); ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_NOT_FOUND_KHR ); - // cl_khr_il_program - to OpenCL 2.1 + // cl_khr_il_program - promoted to OpenCL 2.1 //CL_DEVICE_IL_VERSION_KHR //CL_PROGRAM_IL_KHR // cl_khr_initalize_memory ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_MEMORY_INITIALIZE_KHR ); - // cl_khr_integer_dot_product - to OpenCL 3.1 + // cl_khr_integer_dot_product - promoted to OpenCL 3.1 //CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR //CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR @@ -878,7 +877,7 @@ CEnumNameMap::CEnumNameMap() ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIR_VERSIONS ); ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_BINARY_TYPE_INTERMEDIATE ); - // cl_khr_spirv_queries - to OpenCL 3.1 + // cl_khr_spirv_queries - promoted to OpenCL 3.1 //CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR //CL_DEVICE_SPIRV_EXTENSIONS_KHR //CL_DEVICE_SPIRV_CAPABILITIES_KHR @@ -886,7 +885,7 @@ CEnumNameMap::CEnumNameMap() // cl_khr_subgroup_named_barrier ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR ); - // cl_khr_subgroups - to OpenCL 2.1 + // cl_khr_subgroups - promoted to OpenCL 2.1 //CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR //CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR From d96fa0ba014af82a26211fb926a8e8d8ba422cae Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 1 Jun 2026 09:04:38 -0700 Subject: [PATCH 5/7] a bit more tidy up --- intercept/src/enummap.h | 2 +- intercept/src/intercept.cpp | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/intercept/src/enummap.h b/intercept/src/enummap.h index a2301f40..400df7cf 100644 --- a/intercept/src/enummap.h +++ b/intercept/src/enummap.h @@ -68,7 +68,7 @@ class CEnumNameMap } \ else \ { \ - ret += ""; \ + ret += ""; \ } \ e &= ~check; \ } \ diff --git a/intercept/src/intercept.cpp b/intercept/src/intercept.cpp index 6fa96dc0..535ef34e 100644 --- a/intercept/src/intercept.cpp +++ b/intercept/src/intercept.cpp @@ -1908,16 +1908,17 @@ void CLIntercept::getPlatformInfoString( if( platform && m_PlatformInfoMap.find(platform) != m_PlatformInfoMap.end() ) { str += m_PlatformInfoMap.at(platform).Name; - { - char s[256]; - CLI_SPRINTF( s, 256, " (%p)", - platform ); - str += s; - } } else { - str += "ERROR"; + str += "Unknown"; + } + + { + char s[256]; + CLI_SPRINTF( s, 256, " (%p)", + platform ); + str += s; } } From 53ced00f1e529c87be264cad6672a7f4c1e88946 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 1 Jun 2026 09:58:31 -0700 Subject: [PATCH 6/7] add CodeQL build mode --- .github/workflows/codeql.yml | 1 + intercept/src/controls.h | 1370 ++++++++++++++++++++++++++++------ 2 files changed, 1153 insertions(+), 218 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index b5ee305a..f9975c6a 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -35,6 +35,7 @@ jobs: uses: github/codeql-action/init@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3 with: languages: cpp + build-mode: manual - name: Create Build Directory run: cmake -E make_directory ${{runner.workspace}}/build diff --git a/intercept/src/controls.h b/intercept/src/controls.h index 0a656821..00f0c81f 100644 --- a/intercept/src/controls.h +++ b/intercept/src/controls.h @@ -9,233 +9,1167 @@ #endif #ifndef CLI_CONTROL_SEPARATOR -#define CLI_CONTROL_SEPARATOR( _name ) +#define CLI_CONTROL_SEPARATOR(_name) #endif -CLI_CONTROL_SEPARATOR( Tracing Controls: ) -CLI_CONTROL( bool, BetaExtensionIntercepting, true, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will intercept extension APIs for beta extensions that are subject to change. If an application uses beta extensions and does not function correctly with the Intercept Layer for OpenCL Applications, setting this control to zero may allow the application to function correctly, albeit without the ability to debug and analyze the beta extension APIs." ) - -CLI_CONTROL_SEPARATOR( Logging Controls: ) -CLI_CONTROL( bool, SuppressLogging, false, "If set to a nonzero value, suppresses all logging output from the Intercept Layer for OpenCL Applications. This is particularly useful for tools that only want report data." ) -CLI_CONTROL( bool, AppendFiles, false, "By default, the Intercept Layer for OpenCL Applications log files will be created from scratch when the intercept DLL is loaded, and any Intercept Layer for OpenCL Applications report files will be created from scratch when the intercept DLL is unloaded. If AppendFiles is set to a nonzero value, the Intercept Layer for OpenCL Applications will append to an existing file instead of recreating it. This can be useful if an application loads and unloads the intercept DLL multiple times, or to simply preserve log or report data from run-to-run." ) -CLI_CONTROL( bool, LogToFile, false, "If set to a nonzero value, sends log information to the file \"clintercept_log.txt\" instead of to stderr." ) -CLI_CONTROL( bool, LogToDebugger, false, "If set to a nonzero value, sends log information to the debugger instead of to stderr. If both LogToFile and LogToDebugger are nonzero then log information will be sent both to a file and to the debugger." ) -CLI_CONTROL( int, LogIndent, 0, "Indents each log entry by this many spaces." ) -CLI_CONTROL( bool, BuildLogging, false, "If set to a nonzero value, logs the program build log after each call to clBuildProgram(). This will likely only function correctly for synchronous builds. Note that the build log is logged regardless of whether the program built successfully, which allows compiler warnings to be logged for successful compiles." ) -CLI_CONTROL( bool, PreferredWorkGroupSizeMultipleLogging, false, "If set to a nonzero value, logs the preferred work group size multiple for each kernel after each call to clCreateKernel(). On some devices this is the equivalent of the SIMD size for this kernel." ) -CLI_CONTROL( bool, KernelInfoLogging, false, "If set to a nonzero value, logs information about the kernel after each call to clCreateKernel()." ) -CLI_CONTROL( bool, CallLogging, false, "If set to a nonzero value, logs function entry and exit information for every OpenCL call. This can be used to easily determine which OpenCL call is causing an application to crash or fail or if a crash occurs outside of an OpenCL call. This setting is best used with LogToFile or LogToDebugger as it can generate a lot of log data." ) -CLI_CONTROL( bool, CallLoggingEnqueueCounter, false, "If set to a nonzero value, logs the enqueue counter in addition to function entry and exit information for every OpenCL call. This can be used to determine appropriate limits for DumpBuffersMinEnqueue, DumpBuffersMaxEnqueue, DumpImagesMinEnqueue, or DumpBuffersMaxEnqueue. If CallLogging is disabled then this control will have no effect." ) -CLI_CONTROL( bool, CallLoggingThreadId, false, "If set to a nonzero value, logs the ID of the calling thread in addition to function entry and exit information for every OpenCL call. This can be helpful when debugging multi-threading issues." ) -CLI_CONTROL( bool, CallLoggingThreadNumber, false, "If set to a nonzero value, logs the symbolic number of the calling thread in addition to function entry and exit information for every OpenCL call. This can be helpful when debugging multi-threading issues." ) -CLI_CONTROL( bool, CallLoggingElapsedTime, false, "If set to a nonzero value, logs the elapsed time in microseconds in addition to function entry and exit information for every OpenCL call, starting from the time the intercept DLL is loaded." ) -CLI_CONTROL( bool, ITTCallLogging, false, "If set to a nonzero value, logs function entry and exit information for every OpenCL call using the ITT APIs. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support." ) -CLI_CONTROL( cl_uint, ChromeTraceBufferSize, 16384, "If set to a nonzero value, buffers JSON records for Chrome Tracing in memory before writing to a file. The buffer will be flushed when it fills, upon application termination, and optionally on blocking OpenCL calls.") -CLI_CONTROL( bool, ChromeTraceBufferingBlockingCallFlush, true, "If set to a nonzero value, flushes buffered JSON records for Chrome Tracing after blocking OpenCL calls.") -CLI_CONTROL( bool, ChromeCallLogging, false, "If set to a nonzero value, logs function entry and exit information and host performance timing for every OpenCL call to a JSON file that may be used for Chrome Tracing." ) -CLI_CONTROL( bool, ChromeFlowEvents, false, "If set to a nonzero value, adds flow events between OpenCL calls and OpenCL commands in a JSON file that may be used for Chrome Tracing. Requires both ChromeCallLogging and ChromePerformanceTiming." ) -CLI_CONTROL( bool, ErrorLogging, false, "If set to a nonzero value, logs all OpenCL errors and the function name that caused the error." ) -CLI_CONTROL( bool, ErrorAssert, false, "If set to a nonzero value, breaks into the debugger when an OpenCL error occurs." ) -CLI_CONTROL( bool, ContextCallbackLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will install a callback for every context and log any calls to the context callback. The application's context callback, if any, will be invoked after the Intercept Layer for OpenCL Applications' context callback." ) -CLI_CONTROL( cl_uint, ContextHintLevel, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will attempt to create contexts with the CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL property set to the specified value. If this property is specified by the application, the Intercept Layer for OpenCL Applications will overwrite it with the specified value, otherwise the property and the specified value will be added to the list of context creation properties. This functionality is only available for OpenCL implementations that support the cl_intel_driver_diagnostics extension. If this functionality is not available in the underlying OpenCL implementation, the unmodified list of context properties will be used to create the context instead. More information about this feature, including valid values and their meaning, can be found in the cl_intel_driver_diagnostics extension specification." ) -CLI_CONTROL( bool, EventCallbackLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will install its own callback for every event callback and log the call to the event callback. The application's event callback will be invoked after the Intercept Layer for OpenCL Applications' event callback." ) -CLI_CONTROL( bool, QueueInfoLogging, false, "If set to a nonzero value, logs information about a queue when it is created." ) -CLI_CONTROL( bool, EventChecking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will check and log any events in an event wait list that are invalid or in an error state. This can help to debug complex event dependency issues." ) -CLI_CONTROL( bool, LeakChecking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will check for leaks of various OpenCL objects, such as memory objects and events." ) -CLI_CONTROL( bool, USMChecking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will check for incorrect usage of Unified Shared Memory (USM) pointers." ) -CLI_CONTROL( bool, CLInfoLogging, false, "If set to a nonzero value, logs information about the platforms and devices in the system on the first call to clGetPlatformIDs()." ) -CLI_CONTROL( bool, FlushFiles, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will flush files after ever write. This slows down performance but can help to avoid truncated files if the Intercept Layer for OpenCL Applications does not exit cleanly." ) -CLI_CONTROL( std::string, DumpDir, "", "If set, the Intercept Layer for OpenCL Applications will emit logs and dumps to this directory instead of the default directory. The default log and dump directory is \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\" on Windows and \"~/CLIntercept_Dump/\" on other operating systems. The log and dump directory must be writeable, otherwise the Intercept Layer for OpenCL Applications will not be able to create or modify log or dump files." ) -CLI_CONTROL( bool, AppendPid, false, "If set, the Intercept Layer for OpenCL Applications will append process ID to the log directory name." ) -CLI_CONTROL( bool, UniqueFiles, false, "If set, the Intercept Layer for OpenCL Applications will find a unique file name for logs and reports by appending a number to the file names, if needed." ) -CLI_CONTROL( bool, KernelNameHashTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will append the program and build option hashes to the kernel name in logs and reports." ) -CLI_CONTROL( cl_uint, LongKernelNameCutoff, UINT_MAX, "If an OpenCL application uses kernels with very long names, the Intercept Layer for OpenCL Applications can substitute a \"short\" kernel identifier for a \"long\" kernel name in logs and reports. This control defines how long a kernel name must be (in characters) before it is replaced by a \"short\" kernel identifier." ) -CLI_CONTROL( bool, DemangleKernelNames, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will track kernel names that are demangled according to C++ ABI rules. This setting requires compiler support for demangling and may not be available in all configurations." ) - -CLI_CONTROL_SEPARATOR( Reporting Controls: ) -CLI_CONTROL( bool, ReportToStderr, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will emit reports to stderr." ) -CLI_CONTROL( bool, ReportToFile, true, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will write results to the file \"clintercept_report.txt\"." ) -CLI_CONTROL( cl_uint, ReportInterval, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will generate a report at regular intervals (based on the enqueue counter). This can be useful to generate report data while a long-running application is executing, or if an application does not exit cleanly." ) - -CLI_CONTROL_SEPARATOR( Performance Timing Controls: ) -CLI_CONTROL( bool, HostPerformanceTiming, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will track the minimum, maximum, and average host CPU time for each OpenCL entry point. When the process exits, this information will be included in the file \"clIntercept_report.txt\"." ) -CLI_CONTROL( bool, ToolOverheadTiming, true, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will include some types of tool overhead in timing reports and some types of logging." ) -CLI_CONTROL( bool, DevicePerformanceTiming, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will add event profiling to track the minimum, maximum, and average device time for each OpenCL command. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. When the process exits, this information will be included in the file \"clIntercept_report.txt\"." ) -CLI_CONTROL( bool, DevicePerformanceTimingHistogram, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will report a histogram of device times in addition to the table of device times for each OpenCL command." ) -CLI_CONTROL( bool, DevicePerformanceTimeKernelInfoTracking,false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels using information such as the kernel's Preferred Work Group Size Multiple (AKA SIMD size)." ) -CLI_CONTROL( bool, DevicePerformanceTimeGWOTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels with different global work offsets for the purpose of device performance timing." ) -CLI_CONTROL( bool, DevicePerformanceTimeGWSTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels with different global work sizes for the purpose of device performance timing." ) -CLI_CONTROL( bool, DevicePerformanceTimeLWSTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels with different local work sizes for the purpose of device performance timing." ) -CLI_CONTROL( bool, DevicePerformanceTimeSuggestedLWSTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will attempt to query and track the suggested local work size when the passed-in local work size is NULL." ) -CLI_CONTROL( bool, DevicePerformanceTimeTransferTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between transfer operations of different sizes for the purpose of device performance timing." ) -CLI_CONTROL( bool, DevicePerformanceTimingKernelsOnly, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will collect device performance timing for kernel commands only" ) -CLI_CONTROL( bool, DevicePerformanceTimingSkipUnmap, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will skip device performance timing for unmap operations. This is a workaround for a bug in some OpenCL implementations, where querying events created from unmap operations results in driver crashes." ) -CLI_CONTROL( cl_uint, HostPerformanceTimingMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only collect host performance timing metrics when the enqueue counter is greater than this value, inclusive." ) -CLI_CONTROL( cl_uint, HostPerformanceTimingMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only collect host performance timing metrics when the enqueue counter is less than this value, inclusive." ) -CLI_CONTROL( cl_uint, DevicePerformanceTimingMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only collect device performance timing metrics when the enqueue counter is greater than this value, inclusive." ) -CLI_CONTROL( cl_uint, DevicePerformanceTimingMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only collect device performance timing metrics when the enqueue counter is less than this value, inclusive." ) -CLI_CONTROL( bool, HostPerformanceTimeLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the host elapsed time for each OpenCL entry point. This can be useful to identify OpenCL entry points that execute significantly slower or faster than average on the host." ) -CLI_CONTROL( bool, DevicePerformanceTimeLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the device execution time deltas for each OpenCL command. This can be useful to identify specific OpenCL commands that execute significantly slower or faster than average on the device. If DevicePerformanceTiming is disabled then this control will have no effect." ) -CLI_CONTROL( bool, DevicePerformanceTimelineLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the device execution times for each OpenCL command. This can be useful to visualize the execution timeline of OpenCL commands that execute on the device. If DevicePerformanceTiming is disabled then this control will have no effect." ) -CLI_CONTROL( std::string, DevicePerfCounterLibName, "", "Full path to MDAPI shared library. If not set, the default MDAPI library will be used.") -CLI_CONTROL( bool, DevicePerfCounterEventBasedSampling, false, "If set to a nonzero value and DevicePerfCounterCustom is set, the Intercept Layer for OpenCL Applications will enable Intel GPU Performance Counters to track the minimum, maximum, and average performance counter deltas for each OpenCL command. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support." ) -CLI_CONTROL( bool, DevicePerfCounterTimeBasedSampling, false, "If set to a nonzero value and DevicePerfCounterCustom is set, the Intercept Layer for OpenCL Applications will enable Intel GPU Performance Counters to track performance counter deltas at regular time intervals. This operation may be fairly intrusive and may have side effects. This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support." ) -CLI_CONTROL( uint32_t, DevicePerfCounterAdapterIndex, 0, "Select which MDAPI device to report performance counters." ) -CLI_CONTROL( std::string, DevicePerfCounterCustom, "", "If set, the Intercept Layer for OpenCL Applications will collect MDAPI metrics for the Metric Set corresponding to this value for each OpenCL command. Frequently used Metric Sets include: ComputeBasic, ComputeExtended, L3_1, Sampler. The output file has the potential to be very big depending on the work load. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. When the process exits, this information will be included in the file \"clintercept_perfcounter_dump_.txt\". This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support." ) -CLI_CONTROL( std::string, DevicePerfCounterFile, "", "Full path to a custom MDAPI file. This can be used to add custom Metric Sets." ) -CLI_CONTROL( bool, DevicePerfCounterTiming, false, "If set to a nonzero value and DevicePerfCounterEventBasedSampling is set, the Intercept Layer for OpenCL Applications will report the average Intel GPU Performance Counters for each OpenCL command. When the process exits, this information will be included in the file \"clIntercept_report.txt\". This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support." ) -CLI_CONTROL( bool, DevicePerfCounterReportMax, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will collect also max values of target platform to .csv with MDAPI counters as a column next to each metric." ) -CLI_CONTROL( uint32_t, DevicePerfCounterTimeBasedSamplingPeriod, 1000, "The sampling period for Intel GPU Performance Counter Time-based Sampling, in microseconds. A smaller sampling period increases overhead and the likelihood dropped samples but can be more precise. Note that some devices do not support very small sampling periods." ) -CLI_CONTROL( uint32_t, DevicePerfCounterTimeBasedBufferSize, 0, "The buffer size for Intel GPU Performance Counter Time-based Sampling, in bytes. When set to zero, automatically chooses the device maximum buffer size. A larger buffer size will decrease the likelihood of dropped samples." ) -CLI_CONTROL( bool, ITTPerformanceTiming, false, "[Note: This control makes ITT calls, but they appear to do nothing!] If set to a nonzero value, the Intercept Layer for OpenCL Applications will generate ITT-compatible performance timing data. Similar to DevicePerformanceTiming, this operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. ITTPerformanceTiming will also silently create OpenCL command queues that support advanced performance counters if this functionality is available. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support." ) -CLI_CONTROL( bool, ITTShowOnlyExecutingEvents, false, "[Note: This control makes ITT calls, but they appear to do nothing!] By default, when ITTPerformanceTiming is enabled, the Intercept Layer for OpenCL Applications will generate ITT-compatible information for all states of an OpenCL event: when the command was queued, when it was submitted, when it started executing, and when it finished executing. If ITTShowOnlyExecutingEvents is set to a nonzero value, the Intercept Layer for OpenCL Applications will only generate ITT-compatible instrumentation when an event begins executing and when an event ends executing. Since no information will be displayed about when a command is queued or submitted, this can sometimes make it easier to identify times when the device is idle. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support." ) -CLI_CONTROL( bool, ChromePerformanceTiming, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will generate device performance timing information in a JSON file that may be used for Chrome Tracing." ) -CLI_CONTROL( bool, ChromePerformanceTimingInStages, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will separate the performance information placed in the JSON file into Queued, Submitted, and Execution stages. It will also reorder the threads/queues by starting runtime. This flag is only functional when ChromePerformanceTiming is also set." ) -CLI_CONTROL( bool, ChromePerformanceTimingPerKernel, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will organize the performance information placed in the JSON file on a per kernel name basis. It is only functional when ChromePerformanceTiming is also set. When ChromePerformanceTimingInStages is also set, information about event stages will be retained." ) -CLI_CONTROL( bool, ChromePerformanceTimingEstimateQueuedTime, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will unconditionally estimate the queued time for Chrome Tracing rather than computing it using device and host timers and event profiling data. The estimated time is less accurate than the computed time, but may be more reliable if the device and host timers or event profiling data is incorrect or imprecise." ) -CLI_CONTROL( bool, PerformanceTimingConditional, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will only collect host performance timing, device performance timing, and chrome performance timing conditionally, when the \"CLI_ENABLE_PERFORMANCE_TIMING\" environment variable is set to a non-zero value." ) +CLI_CONTROL_SEPARATOR(Tracing Controls:) +CLI_CONTROL( + bool, BetaExtensionIntercepting, true, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will intercept extension APIs for beta extensions that are subject to " + "change. If an application uses beta extensions and does not function " + "correctly with the Intercept Layer for OpenCL Applications, setting this " + "control to zero may allow the application to function correctly, albeit " + "without the ability to debug and analyze the beta extension APIs.") + +CLI_CONTROL_SEPARATOR(Logging Controls:) +CLI_CONTROL(bool, SuppressLogging, false, + "If set to a nonzero value, suppresses all logging output from the " + "Intercept Layer for OpenCL Applications. This is particularly " + "useful for tools that only want report data.") +CLI_CONTROL( + bool, AppendFiles, false, + "By default, the Intercept Layer for OpenCL Applications log files will be " + "created from scratch when the intercept DLL is loaded, and any Intercept " + "Layer for OpenCL Applications report files will be created from scratch " + "when the intercept DLL is unloaded. If AppendFiles is set to a nonzero " + "value, the Intercept Layer for OpenCL Applications will append to an " + "existing file instead of recreating it. This can be useful if an " + "application loads and unloads the intercept DLL multiple times, or to " + "simply preserve log or report data from run-to-run.") +CLI_CONTROL(bool, LogToFile, false, + "If set to a nonzero value, sends log information to the file " + "\"clintercept_log.txt\" instead of to stderr.") +CLI_CONTROL( + bool, LogToDebugger, false, + "If set to a nonzero value, sends log information to the debugger instead " + "of to stderr. If both LogToFile and LogToDebugger are nonzero then log " + "information will be sent both to a file and to the debugger.") +CLI_CONTROL(int, LogIndent, 0, "Indents each log entry by this many spaces.") +CLI_CONTROL( + bool, BuildLogging, false, + "If set to a nonzero value, logs the program build log after each call to " + "clBuildProgram(). This will likely only function correctly for " + "synchronous builds. Note that the build log is logged regardless of " + "whether the program built successfully, which allows compiler warnings to " + "be logged for successful compiles.") +CLI_CONTROL( + bool, PreferredWorkGroupSizeMultipleLogging, false, + "If set to a nonzero value, logs the preferred work group size multiple " + "for each kernel after each call to clCreateKernel(). On some devices " + "this is the equivalent of the SIMD size for this kernel.") +CLI_CONTROL(bool, KernelInfoLogging, false, + "If set to a nonzero value, logs information about the kernel " + "after each call to clCreateKernel().") +CLI_CONTROL( + bool, CallLogging, false, + "If set to a nonzero value, logs function entry and exit information for " + "every OpenCL call. This can be used to easily determine which OpenCL " + "call is causing an application to crash or fail or if a crash occurs " + "outside of an OpenCL call. This setting is best used with LogToFile or " + "LogToDebugger as it can generate a lot of log data.") +CLI_CONTROL( + bool, CallLoggingEnqueueCounter, false, + "If set to a nonzero value, logs the enqueue counter in addition to " + "function entry and exit information for every OpenCL call. This can be " + "used to determine appropriate limits for DumpBuffersMinEnqueue, " + "DumpBuffersMaxEnqueue, DumpImagesMinEnqueue, or DumpBuffersMaxEnqueue. " + "If CallLogging is disabled then this control will have no effect.") +CLI_CONTROL(bool, CallLoggingThreadId, false, + "If set to a nonzero value, logs the ID of the calling thread in " + "addition to function entry and exit information for every OpenCL " + "call. This can be helpful when debugging multi-threading issues.") +CLI_CONTROL( + bool, CallLoggingThreadNumber, false, + "If set to a nonzero value, logs the symbolic number of the calling thread " + "in addition to function entry and exit information for every OpenCL call. " + " This can be helpful when debugging multi-threading issues.") +CLI_CONTROL(bool, CallLoggingElapsedTime, false, + "If set to a nonzero value, logs the elapsed time in microseconds " + "in addition to function entry and exit information for every " + "OpenCL call, starting from the time the intercept DLL is loaded.") +CLI_CONTROL( + bool, ITTCallLogging, false, + "If set to a nonzero value, logs function entry and exit information for " + "every OpenCL call using the ITT APIs. This feature will only function if " + "the Intercept Layer for OpenCL Applications is built with ITT support.") +CLI_CONTROL(cl_uint, ChromeTraceBufferSize, 16384, + "If set to a nonzero value, buffers JSON records for Chrome " + "Tracing in memory before writing to a file. The buffer will be " + "flushed when it fills, upon application termination, and " + "optionally on blocking OpenCL calls.") +CLI_CONTROL(bool, ChromeTraceBufferingBlockingCallFlush, true, + "If set to a nonzero value, flushes buffered JSON records for " + "Chrome Tracing after blocking OpenCL calls.") +CLI_CONTROL(bool, ChromeCallLogging, false, + "If set to a nonzero value, logs function entry and exit " + "information and host performance timing for every OpenCL call to " + "a JSON file that may be used for Chrome Tracing.") +CLI_CONTROL( + bool, ChromeFlowEvents, false, + "If set to a nonzero value, adds flow events between OpenCL calls and " + "OpenCL commands in a JSON file that may be used for Chrome Tracing. " + "Requires both ChromeCallLogging and ChromePerformanceTiming.") +CLI_CONTROL(bool, ErrorLogging, false, + "If set to a nonzero value, logs all OpenCL errors and the " + "function name that caused the error.") +CLI_CONTROL(bool, ErrorAssert, false, + "If set to a nonzero value, breaks into the debugger when an " + "OpenCL error occurs.") +CLI_CONTROL(bool, ContextCallbackLogging, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will install a callback for every context and log " + "any calls to the context callback. The application's context " + "callback, if any, will be invoked after the Intercept Layer for " + "OpenCL Applications' context callback.") +CLI_CONTROL( + cl_uint, ContextHintLevel, 0, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will attempt to create contexts with the " + "CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL property set to the specified value. " + "If this property is specified by the application, the Intercept Layer for " + "OpenCL Applications will overwrite it with the specified value, otherwise " + "the property and the specified value will be added to the list of context " + "creation properties. This functionality is only available for OpenCL " + "implementations that support the cl_intel_driver_diagnostics extension. " + "If this functionality is not available in the underlying OpenCL " + "implementation, the unmodified list of context properties will be used to " + "create the context instead. More information about this feature, " + "including valid values and their meaning, can be found in the " + "cl_intel_driver_diagnostics extension specification.") +CLI_CONTROL( + bool, EventCallbackLogging, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will install its own callback for every event callback and log the call " + "to the event callback. The application's event callback will be invoked " + "after the Intercept Layer for OpenCL Applications' event callback.") +CLI_CONTROL(bool, QueueInfoLogging, false, + "If set to a nonzero value, logs information about a queue when it " + "is created.") +CLI_CONTROL(bool, EventChecking, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will check and log any events in an event wait list " + "that are invalid or in an error state. This can help to debug " + "complex event dependency issues.") +CLI_CONTROL(bool, LeakChecking, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will check for leaks of various OpenCL objects, such " + "as memory objects and events.") +CLI_CONTROL( + bool, USMChecking, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will check for incorrect usage of Unified Shared Memory (USM) pointers.") +CLI_CONTROL( + bool, CLInfoLogging, false, + "If set to a nonzero value, logs information about the platforms and " + "devices in the system on the first call to clGetPlatformIDs().") +CLI_CONTROL(bool, FlushFiles, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will flush files after ever write. This slows down " + "performance but can help to avoid truncated files if the " + "Intercept Layer for OpenCL Applications does not exit cleanly.") +CLI_CONTROL(std::string, DumpDir, "", + "If set, the Intercept Layer for OpenCL Applications will emit " + "logs and dumps to this directory instead of the default " + "directory. The default log and dump directory is " + "\"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\" on " + "Windows and \"~/CLIntercept_Dump/\" on other " + "operating systems. The log and dump directory must be writeable, " + "otherwise the Intercept Layer for OpenCL Applications will not be " + "able to create or modify log or dump files.") +CLI_CONTROL(bool, AppendPid, false, + "If set, the Intercept Layer for OpenCL Applications will append " + "process ID to the log directory name.") +CLI_CONTROL(bool, UniqueFiles, false, + "If set, the Intercept Layer for OpenCL Applications will find a " + "unique file name for logs and reports by appending a number to " + "the file names, if needed.") +CLI_CONTROL(bool, KernelNameHashTracking, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will append the program and build option hashes to " + "the kernel name in logs and reports.") +CLI_CONTROL( + cl_uint, LongKernelNameCutoff, UINT_MAX, + "If an OpenCL application uses kernels with very long names, the Intercept " + "Layer for OpenCL Applications can substitute a \"short\" kernel " + "identifier for a \"long\" kernel name in logs and reports. This control " + "defines how long a kernel name must be (in characters) before it is " + "replaced by a \"short\" kernel identifier.") +CLI_CONTROL(bool, DemangleKernelNames, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will track kernel names that are demangled according " + "to C++ ABI rules. This setting requires compiler support for " + "demangling and may not be available in all configurations.") + +CLI_CONTROL_SEPARATOR(Reporting Controls:) +CLI_CONTROL(bool, ReportToStderr, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will emit reports to stderr.") +CLI_CONTROL( + bool, ReportToFile, true, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will write results to the file \"clintercept_report.txt\".") +CLI_CONTROL(cl_uint, ReportInterval, 0, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will generate a report at regular intervals (based " + "on the enqueue counter). This can be useful to generate report " + "data while a long-running application is executing, or if an " + "application does not exit cleanly.") + +CLI_CONTROL_SEPARATOR(Performance Timing Controls:) +CLI_CONTROL( + bool, HostPerformanceTiming, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will track the minimum, maximum, and average host CPU time for each " + "OpenCL entry point. When the process exits, this information will be " + "included in the file \"clIntercept_report.txt\".") +CLI_CONTROL(bool, ToolOverheadTiming, true, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will include some types of tool overhead in timing " + "reports and some types of logging.") +CLI_CONTROL( + bool, DevicePerformanceTiming, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will add event profiling to track the minimum, maximum, and average " + "device time for each OpenCL command. This operation may be fairly " + "intrusive and may have side effects; in particular it forces all command " + "queues to be created with PROFILING_ENABLED and may increment the " + "reference count for application events. When the process exits, this " + "information will be included in the file \"clIntercept_report.txt\".") +CLI_CONTROL(bool, DevicePerformanceTimingHistogram, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will report a histogram of device times in addition " + "to the table of device times for each OpenCL command.") +CLI_CONTROL( + bool, DevicePerformanceTimeKernelInfoTracking, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will distinguish between OpenCL NDRange kernels using information such as " + "the kernel's Preferred Work Group Size Multiple (AKA SIMD size).") +CLI_CONTROL( + bool, DevicePerformanceTimeGWOTracking, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will distinguish between OpenCL NDRange kernels with different global " + "work offsets for the purpose of device performance timing.") +CLI_CONTROL( + bool, DevicePerformanceTimeGWSTracking, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will distinguish between OpenCL NDRange kernels with different global " + "work sizes for the purpose of device performance timing.") +CLI_CONTROL( + bool, DevicePerformanceTimeLWSTracking, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will distinguish between OpenCL NDRange kernels with different local work " + "sizes for the purpose of device performance timing.") +CLI_CONTROL(bool, DevicePerformanceTimeSuggestedLWSTracking, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will attempt to query and track the suggested local " + "work size when the passed-in local work size is NULL.") +CLI_CONTROL(bool, DevicePerformanceTimeTransferTracking, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will distinguish between transfer operations of " + "different sizes for the purpose of device performance timing.") +CLI_CONTROL( + bool, DevicePerformanceTimingKernelsOnly, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will collect device performance timing for kernel commands only") +CLI_CONTROL( + bool, DevicePerformanceTimingSkipUnmap, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will skip device performance timing for unmap operations. This is a " + "workaround for a bug in some OpenCL implementations, where querying " + "events created from unmap operations results in driver crashes.") +CLI_CONTROL(cl_uint, HostPerformanceTimingMinEnqueue, 0, + "The Intercept Layer for OpenCL Applications will only collect " + "host performance timing metrics when the enqueue counter is " + "greater than this value, inclusive.") +CLI_CONTROL(cl_uint, HostPerformanceTimingMaxEnqueue, UINT_MAX, + "The Intercept Layer for OpenCL Applications will only collect " + "host performance timing metrics when the enqueue counter is less " + "than this value, inclusive.") +CLI_CONTROL(cl_uint, DevicePerformanceTimingMinEnqueue, 0, + "The Intercept Layer for OpenCL Applications will only collect " + "device performance timing metrics when the enqueue counter is " + "greater than this value, inclusive.") +CLI_CONTROL(cl_uint, DevicePerformanceTimingMaxEnqueue, UINT_MAX, + "The Intercept Layer for OpenCL Applications will only collect " + "device performance timing metrics when the enqueue counter is " + "less than this value, inclusive.") +CLI_CONTROL(bool, HostPerformanceTimeLogging, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will log the host elapsed time for each OpenCL entry " + "point. This can be useful to identify OpenCL entry points that " + "execute significantly slower or faster than average on the host.") +CLI_CONTROL(bool, DevicePerformanceTimeLogging, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will log the device execution time deltas for each " + "OpenCL command. This can be useful to identify specific OpenCL " + "commands that execute significantly slower or faster than average " + "on the device. If DevicePerformanceTiming is disabled then this " + "control will have no effect.") +CLI_CONTROL( + bool, DevicePerformanceTimelineLogging, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will log the device execution times for each OpenCL command. This can be " + "useful to visualize the execution timeline of OpenCL commands that " + "execute on the device. If DevicePerformanceTiming is disabled then this " + "control will have no effect.") +CLI_CONTROL(std::string, DevicePerfCounterLibName, "", + "Full path to MDAPI shared library. If not set, the default MDAPI " + "library will be used.") +CLI_CONTROL( + bool, DevicePerfCounterEventBasedSampling, false, + "If set to a nonzero value and DevicePerfCounterCustom is set, the " + "Intercept Layer for OpenCL Applications will enable Intel GPU Performance " + "Counters to track the minimum, maximum, and average performance counter " + "deltas for each OpenCL command. This operation may be fairly intrusive " + "and may have side effects; in particular it forces all command queues to " + "be created with PROFILING_ENABLED and may increment the reference count " + "for application events. This feature will only function if the Intercept " + "Layer for OpenCL Applications is built with MDAPI support.") +CLI_CONTROL( + bool, DevicePerfCounterTimeBasedSampling, false, + "If set to a nonzero value and DevicePerfCounterCustom is set, the " + "Intercept Layer for OpenCL Applications will enable Intel GPU Performance " + "Counters to track performance counter deltas at regular time intervals. " + "This operation may be fairly intrusive and may have side effects. This " + "feature will only function if the Intercept Layer for OpenCL Applications " + "is built with MDAPI support.") +CLI_CONTROL(uint32_t, DevicePerfCounterAdapterIndex, 0, + "Select which MDAPI device to report performance counters.") +CLI_CONTROL(std::string, DevicePerfCounterCustom, "", + "If set, the Intercept Layer for OpenCL Applications will collect " + "MDAPI metrics for the Metric Set corresponding to this value for " + "each OpenCL command. Frequently used Metric Sets include: " + "ComputeBasic, ComputeExtended, L3_1, Sampler. The output file has " + "the potential to be very big depending on the work load. This " + "operation may be fairly intrusive and may have side effects; in " + "particular it forces all command queues to be created with " + "PROFILING_ENABLED and may increment the reference count for " + "application events. When the process exits, this information will " + "be included in the file \"clintercept_perfcounter_dump_.txt\". This feature will only function if the Intercept " + "Layer for OpenCL Applications is built with MDAPI support.") +CLI_CONTROL(std::string, DevicePerfCounterFile, "", + "Full path to a custom MDAPI file. This can be used to add custom " + "Metric Sets.") +CLI_CONTROL( + bool, DevicePerfCounterTiming, false, + "If set to a nonzero value and DevicePerfCounterEventBasedSampling is set, " + "the Intercept Layer for OpenCL Applications will report the average Intel " + "GPU Performance Counters for each OpenCL command. When the process exits, " + "this information will be included in the file \"clIntercept_report.txt\". " + " This feature will only function if the Intercept Layer for OpenCL " + "Applications is built with MDAPI support.") +CLI_CONTROL(bool, DevicePerfCounterReportMax, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will collect also max values of target platform to " + ".csv with MDAPI counters as a column next to each metric.") +CLI_CONTROL( + uint32_t, DevicePerfCounterTimeBasedSamplingPeriod, 1000, + "The sampling period for Intel GPU Performance Counter Time-based " + "Sampling, in microseconds. A smaller sampling period increases overhead " + "and the likelihood dropped samples but can be more precise. Note that " + "some devices do not support very small sampling periods.") +CLI_CONTROL(uint32_t, DevicePerfCounterTimeBasedBufferSize, 0, + "The buffer size for Intel GPU Performance Counter Time-based " + "Sampling, in bytes. When set to zero, automatically chooses the " + "device maximum buffer size. A larger buffer size will decrease " + "the likelihood of dropped samples.") +CLI_CONTROL( + bool, ITTPerformanceTiming, false, + "[Note: This control makes ITT calls, but they appear to do nothing!] If " + "set to a nonzero value, the Intercept Layer for OpenCL Applications will " + "generate ITT-compatible performance timing data. Similar to " + "DevicePerformanceTiming, this operation may be fairly intrusive and may " + "have side effects; in particular it forces all command queues to be " + "created with PROFILING_ENABLED and may increment the reference count for " + "application events. ITTPerformanceTiming will also silently create " + "OpenCL command queues that support advanced performance counters if this " + "functionality is available. This feature will only function if the " + "Intercept Layer for OpenCL Applications is built with ITT support.") +CLI_CONTROL( + bool, ITTShowOnlyExecutingEvents, false, + "[Note: This control makes ITT calls, but they appear to do nothing!] By " + "default, when ITTPerformanceTiming is enabled, the Intercept Layer for " + "OpenCL Applications will generate ITT-compatible information for all " + "states of an OpenCL event: when the command was queued, when it was " + "submitted, when it started executing, and when it finished executing. If " + "ITTShowOnlyExecutingEvents is set to a nonzero value, the Intercept Layer " + "for OpenCL Applications will only generate ITT-compatible instrumentation " + "when an event begins executing and when an event ends executing. Since no " + "information will be displayed about when a command is queued or " + "submitted, this can sometimes make it easier to identify times when the " + "device is idle. This feature will only function if the Intercept Layer " + "for OpenCL Applications is built with ITT support.") +CLI_CONTROL(bool, ChromePerformanceTiming, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will generate device performance timing information " + "in a JSON file that may be used for Chrome Tracing.") +CLI_CONTROL(bool, ChromePerformanceTimingInStages, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will separate the performance information placed in " + "the JSON file into Queued, Submitted, and Execution stages. It " + "will also reorder the threads/queues by starting runtime. This " + "flag is only functional when ChromePerformanceTiming is also set.") +CLI_CONTROL( + bool, ChromePerformanceTimingPerKernel, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will organize the performance information placed in the JSON file on a " + "per kernel name basis. It is only functional when ChromePerformanceTiming " + "is also set. When ChromePerformanceTimingInStages is also set, " + "information about event stages will be retained.") +CLI_CONTROL( + bool, ChromePerformanceTimingEstimateQueuedTime, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will unconditionally estimate the queued time for Chrome Tracing rather " + "than computing it using device and host timers and event profiling data. " + "The estimated time is less accurate than the computed time, but may be " + "more reliable if the device and host timers or event profiling data is " + "incorrect or imprecise.") +CLI_CONTROL(bool, PerformanceTimingConditional, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will only collect host performance timing, device " + "performance timing, and chrome performance timing conditionally, " + "when the \"CLI_ENABLE_PERFORMANCE_TIMING\" environment variable " + "is set to a non-zero value.") CLI_CONTROL_SEPARATOR( Controls for Dumping and Injecting Programs and Build Options: ) -CLI_CONTROL( bool, OmitProgramNumber, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will omit the program number from dumped file names and hash tracking. This can produce deterministic results even if programs are built in a non-deterministic order (say, by multiple threads)." ) -CLI_CONTROL( bool, OmitCompileCount, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will omit the compile count from dumped file names and hash tracking. This can reduce the number of files that are dumped if the same program is compiled multiple times." ) -CLI_CONTROL( bool, SimpleDumpProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the last string(s) passed to clCreateProgramWithSource() to the file kernel.cl, and the last program options passed to clBuildProgram() to the file kernel.txt. These files will be dumped to the application's working directory. If an application fails to compile a program and exits the program immediately after detecting a compile failure SimpleDumpProgram may be all that is needed to identify the program and program options that are failing to compile." ) -CLI_CONTROL( bool, DumpProgramSourceScript, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every string passed to clCreateProgramWithSource() to its own file. The directory names and file names for the dumped files match the directory names and file names expected by a modified OpenCL conformance test script to capture kernels. This setting overrides SimpleDumpProgramSource, and if it is set to a nonzero value then the value of SimpleDumpProgramSource is ignored." ) -CLI_CONTROL( bool, DumpProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every string passed to clCreateProgramWithSource() to its own file. The file name will have the form \"CLI___source.cl\". Program options will be dumped to the same directory with the file name \"CLI______options.txt\", where API is an empty string for clBuildProgram(), \"compile\" for clCompileProgram(), and \"link\" for clLinkProgram(). This setting can be used for information purposes to see all kernels that are used by an application or to dump programs for program injection. This setting overrides DumpProgramSourceScript and SimpleDumpProgramSource, and if it is set to a nozero value then the values of DumpProgramSourceScript and SimpleDumpProgramSource will be ignored." ) -CLI_CONTROL( bool, DumpInputProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program binary that is passed to clCreateProgramWithBinary() to its own file. The file name will have the form \"CLI___.bin\". This is the input program binary provided by the application, and not a device binary queried from the OpenCL implementation. In particular, note that it may be a SPIR 1.2 binary." ) -CLI_CONTROL( bool, DumpProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program binary that was successfully built with clBuildProgram() to its own file. The file name will have the form \"CLI_____.bin\". Program options will be dumped to the same directory with the file name \"CLI______options.txt\", where API is an empty string for clBuildProgram(), \"compile\" for clCompileProgram(), and \"link\" for clLinkProgram(). This setting can be used to examine compiled program binaries or to dump program binaries for program binary injection. Note that this option dumps the output binary, which is a device binary, after calling clBuildProgram() or clLinkProgram()." ) -CLI_CONTROL( bool, DumpProgramSPIRV, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program IL binary passed to clCreateProgramWithIL() to its own file. The file name will have the form \"CLI___0000.spv\" - for now at least!. Program options will be dumped to the same directory with the file name \"CLI______options.txt\", where is an empty string for clBuildProgram(), \"compile\" for clCompileProgram(), and \"link\" for clLinkProgram(). This setting can be used for information purposes to see all kernels that are used by an application or to dump SPIRV programs for SPIRV injection." ) -CLI_CONTROL( bool, InjectProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel source to clCreateProgramWithSource() and/or potentially modified options to clCompileProgram() or clBuildProgram(). Note that program options currently cannot be injected for clLinkProgram()." ) -CLI_CONTROL( bool, InjectProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel binaries via clCreateProgramWithBinary() in place of program text for each call to clCreateProgramWithSource(). This is typically done to reduce program compilation time or to use known good program binaries." ) -CLI_CONTROL( bool, RejectProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will reject kernel binaries passed via clCreateProgramWithBinary() and return CL_INVALID_BINARY. This can be used to force an application to re-compile program binaries from source." ) -CLI_CONTROL( bool, InjectProgramSPIRV, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel SPIR-V binaries via clCreateProgramWithIL() in place of program text for each call to clCreateProgramWithSource()." ) -CLI_CONTROL( bool, PrependProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to prepend kernel code from a file to the application provided kernel source passed to clCreateProgramWithSource(). The Intercept Layer for OpenCL Applications will look for kernel source to prepend in the dump and log directory. The files that are searched for are (in order) \"CLI___prepend.cl\", \"CLI__prepend.cl\", and \"CLI_prepend.cl\"." ) -CLI_CONTROL( std::string, AppendBuildOptions, "", "If set, the Intercept Layer for OpenCL Applications will add these build options to the end of any application provided or injected build options for each call to clCompileProgram or clBuildProgram()." ) -CLI_CONTROL( std::string, AppendLinkOptions, "", "If set, the Intercept Layer for OpenCL Applications will add these build options to the end of any application provided or injected build options for each call to clLinkProgram()." ) -CLI_CONTROL( bool, DumpProgramBuildLogs, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump build logs for every device a program is built for to a separate file. The file name will have the form \"CLI______build_log.txt\"." ) -CLI_CONTROL( bool, DumpKernelISABinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump kernel ISA binaries for every kernel, if supported. Currently, kernel ISA binaries are only supported for Intel GPU devices. Kernel ISA binaries can be decoded into ISA text with a disassembler. The file name will have the form \"CLI______.isabin\"." ) +CLI_CONTROL( + bool, OmitProgramNumber, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will omit the program number from dumped file names and hash tracking. " + "This can produce deterministic results even if programs are built in a " + "non-deterministic order (say, by multiple threads).") +CLI_CONTROL(bool, OmitCompileCount, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will omit the compile count from dumped file names " + "and hash tracking. This can reduce the number of files that are " + "dumped if the same program is compiled multiple times.") +CLI_CONTROL( + bool, SimpleDumpProgramSource, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump the last string(s) passed to clCreateProgramWithSource() to the " + "file kernel.cl, and the last program options passed to clBuildProgram() " + "to the file kernel.txt. These files will be dumped to the application's " + "working directory. If an application fails to compile a program and " + "exits the program immediately after detecting a compile failure " + "SimpleDumpProgram may be all that is needed to identify the program and " + "program options that are failing to compile.") +CLI_CONTROL( + bool, DumpProgramSourceScript, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump every string passed to clCreateProgramWithSource() to its own " + "file. The directory names and file names for the dumped files match the " + "directory names and file names expected by a modified OpenCL conformance " + "test script to capture kernels. This setting overrides " + "SimpleDumpProgramSource, and if it is set to a nonzero value then the " + "value of SimpleDumpProgramSource is ignored.") +CLI_CONTROL( + bool, DumpProgramSource, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump every string passed to clCreateProgramWithSource() to its own " + "file. The file name will have the form \"CLI___source.cl\". Program options will be dumped to the " + "same directory with the file name \"CLI______options.txt\", where API is an empty string for " + "clBuildProgram(), \"compile\" for clCompileProgram(), and \"link\" for " + "clLinkProgram(). This setting can be used for information purposes to " + "see all kernels that are used by an application or to dump programs for " + "program injection. This setting overrides DumpProgramSourceScript and " + "SimpleDumpProgramSource, and if it is set to a nozero value then the " + "values of DumpProgramSourceScript and SimpleDumpProgramSource will be " + "ignored.") +CLI_CONTROL( + bool, DumpInputProgramBinaries, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump every program binary that is passed to " + "clCreateProgramWithBinary() to its own file. The file name will have the " + "form \"CLI___.bin\". This is the input program binary provided by the " + "application, and not a device binary queried from the OpenCL " + "implementation. In particular, note that it may be a SPIR 1.2 binary.") +CLI_CONTROL( + bool, DumpProgramBinaries, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump every program binary that was successfully built with " + "clBuildProgram() to its own file. The file name will have the form " + "\"CLI_____.bin\". Program options will be " + "dumped to the same directory with the file name \"CLI______options.txt\", where API is an empty string for " + "clBuildProgram(), \"compile\" for clCompileProgram(), and \"link\" for " + "clLinkProgram(). This setting can be used to examine compiled program " + "binaries or to dump program binaries for program binary injection. Note " + "that this option dumps the output binary, which is a device binary, after " + "calling clBuildProgram() or clLinkProgram().") +CLI_CONTROL( + bool, DumpProgramSPIRV, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump every program IL binary passed to clCreateProgramWithIL() to " + "its own file. The file name will have the form \"CLI___0000.spv\" - for now at least!. " + "Program options will be dumped to the same directory with the file name " + "\"CLI______options.txt\", where is an empty " + "string for clBuildProgram(), \"compile\" for clCompileProgram(), and " + "\"link\" for clLinkProgram(). This setting can be used for information " + "purposes to see all kernels that are used by an application or to dump " + "SPIRV programs for SPIRV injection.") +CLI_CONTROL(bool, InjectProgramSource, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will look to inject potentially modified kernel " + "source to clCreateProgramWithSource() and/or potentially modified " + "options to clCompileProgram() or clBuildProgram(). Note that " + "program options currently cannot be injected for clLinkProgram().") +CLI_CONTROL( + bool, InjectProgramBinaries, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will look to inject potentially modified kernel binaries via " + "clCreateProgramWithBinary() in place of program text for each call to " + "clCreateProgramWithSource(). This is typically done to reduce program " + "compilation time or to use known good program binaries.") +CLI_CONTROL( + bool, RejectProgramBinaries, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will reject kernel binaries passed via clCreateProgramWithBinary() and " + "return CL_INVALID_BINARY. This can be used to force an application to " + "re-compile program binaries from source.") +CLI_CONTROL(bool, InjectProgramSPIRV, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will look to inject potentially modified kernel " + "SPIR-V binaries via clCreateProgramWithIL() in place of program " + "text for each call to clCreateProgramWithSource().") +CLI_CONTROL( + bool, PrependProgramSource, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will look to prepend kernel code from a file to the application provided " + "kernel source passed to clCreateProgramWithSource(). The Intercept Layer " + "for OpenCL Applications will look for kernel source to prepend in the " + "dump and log directory. The files that are searched for are (in order) " + "\"CLI___prepend.cl\", " + "\"CLI__prepend.cl\", and \"CLI_prepend.cl\".") +CLI_CONTROL( + std::string, AppendBuildOptions, "", + "If set, the Intercept Layer for OpenCL Applications will add these build " + "options to the end of any application provided or injected build options " + "for each call to clCompileProgram or clBuildProgram().") +CLI_CONTROL(std::string, AppendLinkOptions, "", + "If set, the Intercept Layer for OpenCL Applications will add " + "these build options to the end of any application provided or " + "injected build options for each call to clLinkProgram().") +CLI_CONTROL( + bool, DumpProgramBuildLogs, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump build logs for every device a program is built for to a " + "separate file. The file name will have the form \"CLI______build_log.txt\".") +CLI_CONTROL( + bool, DumpKernelISABinaries, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump kernel ISA binaries for every kernel, if supported. Currently, " + "kernel ISA binaries are only supported for Intel GPU devices. Kernel ISA " + "binaries can be decoded into ISA text with a disassembler. The file name " + "will have the form \"CLI______.isabin\".") CLI_CONTROL_SEPARATOR( Controls for Emulating Features: ) -CLI_CONTROL( bool, Emulate_cl_khr_extended_versioning, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will emulate support for the cl_khr_extended_versioning extension." ) -CLI_CONTROL( bool, Emulate_cl_khr_semaphore, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will emulate support for the cl_khr_semaphore extension." ) -CLI_CONTROL( bool, Emulate_cl_intel_unified_shared_memory, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will emulate support for the cl_intel_unified_shared_memory extension USM APIs using SVM APIs. This can be useful to test USM applications on an implementation that supports SVM, but not USM." ) +CLI_CONTROL( + bool, Emulate_cl_khr_extended_versioning, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will emulate support for the cl_khr_extended_versioning extension.") +CLI_CONTROL( + bool, Emulate_cl_khr_semaphore, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will emulate support for the cl_khr_semaphore extension.") +CLI_CONTROL( + bool, Emulate_cl_intel_unified_shared_memory, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will emulate support for the cl_intel_unified_shared_memory extension USM " + "APIs using SVM APIs. This can be useful to test USM applications on an " + "implementation that supports SVM, but not USM.") CLI_CONTROL_SEPARATOR( Controls for Automatically Creating SPIR-V Modules: ) -CLI_CONTROL( bool, AutoCreateSPIRV, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will automatically create SPIR-V modules by invoking CLANG each time a program is built. The file name will have the form \"CLI____.spv\". Because invoking CLANG requires a file containing the OpenCL C source, setting this option implicitly sets DumpProgramSource as well. Additionally, this feature is not available for injected program source." ) -CLI_CONTROL( std::string, SPIRVClang, "clang", "The clang executable used to compile an OpenCL C program to a SPIR-V module. This can be an executable in the system path, a relative path, or a full absolute path." ) -CLI_CONTROL( std::string, SPIRVCLHeader, "opencl.h", "The OpenCL header file used to compile an OpenCL C program to a SPIR-V module. This must be a relative path or a full absolute path." ) -CLI_CONTROL( std::string, SPIRVDis, "spirv-dis", "The spirv-dis executable used to optionally disassemble the compiled SPIR-V module to a SPIR-V text representation. This can be an executable in the system path, a relative path, or a full absolute path." ) -CLI_CONTROL( std::string, DefaultOptions, "-cc1 -x cl -cl-std=CL1.2 -D__OPENCL_C_VERSION__=120 -D__OPENCL_VERSION__=120 -emit-spirv -triple=spir", "This is the list of options that is implicitly passed to CLANG to build a non-OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options." ) -CLI_CONTROL( std::string, OpenCL2Options, "-cc1 -x cl -cl-std=CL2.0 -D__OPENCL_C_VERSION__=200 -D__OPENCL_VERSION__=200 -emit-spirv -triple=spir", "This is the list of options that is implicitly passed to CLANG to build an OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options." ) +CLI_CONTROL( + bool, AutoCreateSPIRV, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will automatically create SPIR-V modules by invoking CLANG each time a " + "program is built. The file name will have the form \"CLI____.spv\". Because invoking CLANG requires a file containing the " + "OpenCL C source, setting this option implicitly sets DumpProgramSource as " + "well. Additionally, this feature is not available for injected program " + "source.") +CLI_CONTROL(std::string, SPIRVClang, "clang", + "The clang executable used to compile an OpenCL C program to a " + "SPIR-V module. This can be an executable in the system path, a " + "relative path, or a full absolute path.") +CLI_CONTROL( + std::string, SPIRVCLHeader, "opencl.h", + "The OpenCL header file used to compile an OpenCL C program to a SPIR-V " + "module. This must be a relative path or a full absolute path.") +CLI_CONTROL( + std::string, SPIRVDis, "spirv-dis", + "The spirv-dis executable used to optionally disassemble the compiled " + "SPIR-V module to a SPIR-V text representation. This can be an executable " + "in the system path, a relative path, or a full absolute path.") +CLI_CONTROL(std::string, DefaultOptions, + "-cc1 -x cl -cl-std=CL1.2 -D__OPENCL_C_VERSION__=120 " + "-D__OPENCL_VERSION__=120 -emit-spirv -triple=spir", + "This is the list of options that is implicitly passed to CLANG to " + "build a non-OpenCL 2.0 SPIR-V module. Any application-provided " + "build options will be appended to these build options.") +CLI_CONTROL(std::string, OpenCL2Options, + "-cc1 -x cl -cl-std=CL2.0 -D__OPENCL_C_VERSION__=200 " + "-D__OPENCL_VERSION__=200 -emit-spirv -triple=spir", + "This is the list of options that is implicitly passed to CLANG to " + "build an OpenCL 2.0 SPIR-V module. Any application-provided " + "build options will be appended to these build options.") CLI_CONTROL_SEPARATOR( Controls for Dumping Command Buffers: ) -CLI_CONTROL( bool, OmitCommandBufferNumber, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will omit the command buffer number from dumped file names and hash tracking. This can produce deterministic results even if command buffers are creatd and finalized in a non-deterministic order (say, by multiple threads)." ) -CLI_CONTROL( bool, DumpCommandBuffers, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the commands and dependencies in a command buffer to a file when the command buffer is successfully finalized. The file name will have the form \"CLI___cmdbuf.dot\". The command buffer is described using the DOT graph description language." ) +CLI_CONTROL(bool, OmitCommandBufferNumber, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will omit the command buffer number from dumped file " + "names and hash tracking. This can produce deterministic results " + "even if command buffers are creatd and finalized in a " + "non-deterministic order (say, by multiple threads).") +CLI_CONTROL( + bool, DumpCommandBuffers, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump the commands and dependencies in a command buffer to a file " + "when the command buffer is successfully finalized. The file name will " + "have the form \"CLI___cmdbuf.dot\". The command buffer is described using the DOT graph " + "description language.") CLI_CONTROL_SEPARATOR( Controls for Dumping and Injecting Buffers and Images: ) -CLI_CONTROL( bool, DumpBufferHashes, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump hashes of a buffer, SVM, or USM allocation rather than the full contents of the buffer. This can be useful to identify which kernel enqueues generate different results without requiring a large amount of disk space." ) -CLI_CONTROL( bool, DumpImageHashes, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump hashes of an image rather than the full contents of the image. This can be useful to identify which kernel enqueues generate different results without requiring a large amount of disk space." ) -CLI_CONTROL( bool, DumpArgumentsOnSet, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the argument value on calls to clSetKernelArg(). Arguments are dumped as raw binary data. The file names will have the form \"SetKernelArg__Kernel__Arg_.bin\"." ) -CLI_CONTROL( bool, DumpBuffersAfterCreate, false, "If set, the Intercept Layer for OpenCL Applications will dump buffers to a file after creation. This control still honors the enqueue counter limits, even though no enqueues are involved during buffer creation. Currently only works for cl_mem buffers created from host pointers." ) -CLI_CONTROL( bool, DumpBuffersAfterMap, false, "If set, the Intercept Layer for OpenCL Applications will dump the contents of a buffer to a file after the buffer is mapped. Only valid if the buffer is NOT mapped with CL_MAP_WRITE_INVALIDATE_REGION. If the buffer was mapped non-blocking, this may insert a clFinish() into the command queue, which may have functional or performance implications." ) -CLI_CONTROL( bool, DumpBuffersBeforeUnmap, false, "If set, the Intercept Layer for OpenCL Applications will dump the contents of a buffer to a file immediately before the buffer is unmapped. This is done by inserting a blocking clEnqueueMapBuffer() (and matching clEnqueueUnmapMemObject()) into the command queue, which may have functional or performance implications." ) -CLI_CONTROL( bool, DumpBuffersBeforeEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump buffer, SVM, and USM kernel arguments before calls to clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the kernel being enqueued are dumped. Buffers are dumped as raw binary data to a \"memDumpPreEnqueue\" subdirectory of the dump directory. The file names will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\"." ) -CLI_CONTROL( bool, DumpBuffersAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump buffer, SVM, and USM kernel arguments after calls to clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the kernel being enqueued are dumped. Buffers are dumped as raw binary data to a \"memDumpPostEnqueue\" subdirectory of the dump directory. The file names will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\". Note that this is the same naming convention as with DumpBuffersBeforeEnqueue, so the changes resulting from an enqueue can be determined by diff'ing the preEnqueue folder with the postEnqueue folder." ) -CLI_CONTROL( std::string, DumpBuffersForKernel, "", "If set, the Intercept Layer for OpenCL Applications will only dump buffer, SVM, and USM kernel arguments when the specified kernel is enqueued. This control is ignored unless DumpBuffersBeforeEnqueue or DumpBuffersAfterEnqueue are enabled." ) -CLI_CONTROL( bool, DumpImagesBeforeEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump image kernel arguments before calls to clEnqueueNDRangeKernel(). Only images that are kernel arguments for the kernel being enqueued are dumped. Images are dumped as raw binary data to a \"memDumpPreEnqueue\" subdirectory of the dump directory. The file names will have the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\"." ) -CLI_CONTROL( bool, DumpImagesAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump image kernel arguments after calls to clEnqueueNDRangeKernel(). Only images that are kernel arguments for the kernel being enqueued are dumped. Images are dumped as raw binary data to a \"memDumpPostEnqueue\" subdirectory of the dump directory. The file names will have the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\". Note that this is the same naming convention as with DumpImagesBeforeEnqueue, so the changes resulting from an enqueue can be determined by diff'ing the preEnqueue folder with the postEnqueue folder." ) -CLI_CONTROL( std::string, DumpImagesForKernel, "", "If set, the Intercept Layer for OpenCL Applications will only dump image kernel arguments when the specified kernel is enqueued. This control is ignored unless DumpImagesBeforeEnqueue or DumpImagesAfterEnqueue are enabled." ) -CLI_CONTROL( cl_uint, DumpBuffersMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only dump buffer, SVM, and USM kernel arguments when the enqueue counter is greater than this value, inclusive." ) -CLI_CONTROL( cl_uint, DumpBuffersMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only dump buffer, SVM, and USM kernel arguments when the enqueue counter is less than this value, inclusive." ) -CLI_CONTROL( cl_uint, DumpImagesMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only dump image kernel arguments when the enqueue counter is greater than this value, inclusive." ) -CLI_CONTROL( cl_uint, DumpImagesMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only dump image kernel arguments when the enqueue counter is less than this value, inclusive." ) -CLI_CONTROL( cl_uint, DumpArgumentsOnSetMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only dump argument values when the enqueue counter is greater than this value, inclusive." ) -CLI_CONTROL( cl_uint, DumpArgumentsOnSetMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only dump kernel arguments when the enqueue counter is less than this value, inclusive." ) -CLI_CONTROL( bool, InjectBuffers, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified buffer, SVM, and USM contents before calls to clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the kernel being enqueued may be injected. The file name to inject will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\", which matches the file name for dumped buffers." ) -CLI_CONTROL( bool, InjectImages, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified image contents before calls to clEnqueueNDRangeKernel(). Only images that are kernel arguments for the kernel being enqueued may be injected. The file name to inject will have the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\", which matches the file name for dumped images." ) - -CLI_CONTROL_SEPARATOR( Device Partitioning Controls: ) -CLI_CONTROL( bool, AutoPartitionAllDevices, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will automatically partition parent devices and return all parent devices and all sub-devices." ) -CLI_CONTROL( bool, AutoPartitionAllSubDevices, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will automatically partition parent devices and return all sub-devices, but no parent devices." ) -CLI_CONTROL( bool, AutoPartitionSingleSubDevice, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will automatically partition parent devices and return a single sub-device, but no other sub-devices or parent devices or other sub-devices." ) -CLI_CONTROL( bool, AutoPartitionByAffinityDomain, true, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will try to automatically partition parent devices by the next partitionable affinity domain." ) -CLI_CONTROL( cl_uint, AutoPartitionEqually, 1, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will try to automatically partition parent devices into sub-devices with the specified number of compute units." ) - -CLI_CONTROL_SEPARATOR( Capture and Replay Controls: ) -CLI_CONTROL( bool, CaptureReplay, false, "This is the top-level control for kernel capture and replay." ) -CLI_CONTROL( cl_uint, CaptureReplayMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only enable kernel capture and replay when the enqueue counter is greater than this value, inclusive." ) -CLI_CONTROL( cl_uint, CaptureReplayMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will stop kernel capture and replay when the encounter is greater than this value, meaning that only enqueues less than this value, inclusive, will be captured." ) -CLI_CONTROL( std::string, CaptureReplayKernelName, "", "If set, the Intercept Layer for OpenCL Applications will only enable kernel capture and replay when the kernel name equals this name.") -CLI_CONTROL( bool, CaptureReplayUniqueKernels, false, "If set, the Intercept Layer for OpenCL Applications will only enable kernel capture and replay if the kernel signature (i.e. hash + kernelname) has not been seen already." ) -CLI_CONTROL( cl_uint, CaptureReplayNumKernelEnqueuesSkip, 0, "The Intercept Layer for OpenCL Applications will skip this many kernel enqueues before enabling kernel capture and replay.") -CLI_CONTROL( cl_uint, CaptureReplayNumKernelEnqueuesCapture, UINT_MAX, "The Intercept Layer for OpenCL Applications will only capture this many kernel enqueues.") - -CLI_CONTROL_SEPARATOR( AubCapture Controls: ) -CLI_CONTROL( bool, AubCapture, false, "This is the top-level control for aub capture. The Intercept Layer for OpenCL Applications doesn't implement aub capture itself, but can be used to selectively enable and disable aub capture via other methods." ) -CLI_CONTROL( bool, AubCaptureKDC, false, "If set, the Intercept Layer for OpenCL Applications will use the older kdc.exe method of aub capture. By default, the newer NEO method of aub capture will be used. This control is ignored for all non-Windows operating systems." ) -CLI_CONTROL( bool, AubCaptureIndividualEnqueues, false, "If set, the Intercept Layer for OpenCL Applications will start aub capture before a kernel enqueue, and will also stop aub capture immediately after the kernel enqueue. Each file will have the form \"AubCapture_Enqueue__kernel_\". Note that non-kernel enqueues such as calls to clEnqueueReadBuffer() and clEnqueueWriteBuffer() will NOT be aub captured when this control is set. The AubCaptureMinEnqueue and AubCaptureMaxEnqueue controls are still honored when AubCaptureIndividualEnqueues is set." ) -CLI_CONTROL( cl_uint, AubCaptureMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only enable aub capture when the enqueue counter is greater than this value, inclusive." ) -CLI_CONTROL( cl_uint, AubCaptureMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will stop aub capture when the encounter is greater than this value, meaning that only enqueues less than this value, inclusive, will be captured. If the enqueue counter never reaches this value, the Intercept Layer for OpenCL Applications will stop aub capture when the it is unloaded." ) -CLI_CONTROL( std::string, AubCaptureKernelName, "", "If set, the Intercept Layer for OpenCL Applications will only enable aub capture when the kernel name equals this name.") -CLI_CONTROL( std::string, AubCaptureKernelGWS, "", "If set, the Intercept Layer for OpenCL Applications will only enable aub capture when the NDRange global work size matches this string. The string should have the form \"XxYxZ\". The wildcard \"*\" matches all global work sizes.") -CLI_CONTROL( std::string, AubCaptureKernelLWS, "", "If set, the Intercept Layer for OpenCL Applications will only enable aub capture when the NDRange local work size matches this string. The string should have the form \"XxYxZ\". The wildcard \"*\" matches all local work sizes, and the string \"NULL\" matches a NULL local work size.") -CLI_CONTROL( bool, AubCaptureUniqueKernels, false, "If set, the Intercept Layer for OpenCL Applications will only enable aub capture if the kernel signature (i.e. hash + kernelname + gws + lws) has not been seen already. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues." ) -CLI_CONTROL( cl_uint, AubCaptureNumKernelEnqueuesSkip, 0, "The Intercept Layer for OpenCL Applications will skip this many kernel enqueues before enabling aub capture. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues.") -CLI_CONTROL( cl_uint, AubCaptureNumKernelEnqueuesCapture, UINT_MAX, "The Intercept Layer for OpenCL Applications will only capture this many kernel enqueues. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues.") -CLI_CONTROL( cl_uint, AubCaptureStartWait, 0, "The Intercept Layer for OpenCL Applications will wait for this many milliseconds before beginning aub capture.") -CLI_CONTROL( cl_uint, AubCaptureEndWait, 0, "The Intercept Layer for OpenCL Applications will wait for this many milliseconds before ending aub capture.") - -CLI_CONTROL_SEPARATOR( Execution Controls: ) -CLI_CONTROL( bool, NoErrors, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will cause all OpenCL APIs to return a successful error status." ) -CLI_CONTROL( uint64_t, ExitOnEnqueueCount, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will exit the application when the enqueue counter reaches the specified value. This can be useful to debug sporadic issues by exiting an application immediately, without needing to wait for the application to exit normally." ) -CLI_CONTROL( bool, NullContextCallback, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will force the context callback to be NULL. With both context callback logging and NULL context callback set, the context callback will still be logged, but any application context callback will not be called." ) -CLI_CONTROL( bool, FinishAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFinish() after every enqueue. The command queue that the command was just enqueued to is passed to clFinish(). This can be used to debug possible timing or resource management issues and will likely impact performance." ) -CLI_CONTROL( bool, FlushAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFlush() after every enqueue. The command queue that the command was just enqueued to is passed to clFlush(). This can also be used to debug possible timing or resource management issues and is slightly less obtrusive than FinishAfterEnqueue but still will likely impact performance. If both FinishAfterEnqueue and FlushAfterEnqueue are nonzero then the Intercept Layer for OpenCL Applications will only insert a call to clFinish() after every enqueue, because clFinish() implies clFlush()." ) -CLI_CONTROL( bool, FlushAfterEnqueueBarrier, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFlush() after every barrier enqueue. The command queue that the command was just enqueued to is passed to clFlush(). This has been useful to debug out-of-order queue issues." ) -CLI_CONTROL( bool, InOrderQueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will force all queues to be created in-order. This can be used for performance analysis, but may lead to deadlocks in some cases." ) -CLI_CONTROL( bool, NoProfilingQueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will force all queues to be created without event profiling support. This can be used for performance analysis, but may lead to errors if the application requires event profiling." ) -CLI_CONTROL( bool, DummyOutOfOrderQueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will create and destroy a dummy out-of-order queue. This may be useful for performance analysis." ) -CLI_CONTROL( bool, NullEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will silently ignore any enqueue. This can be used for performance analysis, but will likely cause errors if the application relies on any sort of information from OpenCL events and should be used carefully." ) -CLI_CONTROL( bool, NullLocalWorkSize, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will force the local work size argument to clEnqueueNDRangeKernel() to be NULL, which causes the OpenCL implementation to pick the local work size. Note that this control takes effect before NullLocalWorkSizeX / NullLocalWorkSizeY / NullLocalWorkSizeZ (see below), so enabling both controls will have the effect of forcing a specific local work size." ) -CLI_CONTROL( size_t, NullLocalWorkSizeX, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect." ) -CLI_CONTROL( size_t, NullLocalWorkSizeY, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect." ) -CLI_CONTROL( size_t, NullLocalWorkSizeZ, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect." ) -CLI_CONTROL( bool, InitializeBuffers, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will initialize the contents of allocated buffers with zero. Only valid for non-COPY_HOST_PTR and non-USE_HOST_PTR allocations." ) -CLI_CONTROL( cl_uint, DefaultQueuePriorityHint, 0, "If set to a nonzero value, and if no other priority hint is specified by the application, the Intercept Layer for OpencL Applications will attempt to create a command queue with this priority hint value. Note: HIGH priority is 1, MED priority is 2, and LOW priority is 4." ) -CLI_CONTROL( cl_uint, DefaultQueueThrottleHint, 0, "If set to a nonzero value, and if no other throttle hint is specified by the application, the Intercept Layer for OpencL Applications will attempt to create a command queue with this throttle hint value. Note: HIGH throttle is 1, MED throttle is 2, and LOW throttle is 4." ) -CLI_CONTROL( bool, RelaxAllocationLimits, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will attempt to relax allocation limits to enable allocations larger than CL_DEVICE_MAX_MEM_ALLOC_SIZE." ) - -CLI_CONTROL_SEPARATOR( Platform and Device Query Overrides: ) -CLI_CONTROL( std::string, PlatformName, "", "If set to a non-empty value, the clGetPlatformInfo() query for CL_PLATFORM_NAME will return this string instead of the true platform name." ) -CLI_CONTROL( std::string, PlatformVendor, "", "If set to a non-empty value, the clGetPlatformInfo() query for CL_PLATFORM_VENDOR will return this string instead of the true platform vendor." ) -CLI_CONTROL( std::string, PlatformProfile, "", "If set to a non-empty value, the clGetPlatformInfo() query for CL_PLATFORM_PROFILE will return this string instead of the true platform profile." ) -CLI_CONTROL( std::string, PlatformVersion, "", "If set to a non-empty string, the clGetPlatformInfo() query for CL_PLATFORM_VERSION will return this string instead of the true platform version." ) -CLI_CONTROL( cl_uint, DeviceTypeFilter, CL_DEVICE_TYPE_ALL, "Hides all device types that are not in the filter. Note: CL_DEVICE_TYPE_CPU = 2, CL_DEVICE_TYPE_GPU = 4, CL_DEVICE_TYPE_ACCELERATOR = 8, CL_DEVICE_TYPE_CUSTOM = 16." ) -CLI_CONTROL( cl_uint, DeviceType, 0, "If set to a non-zero value, the clGetDeviceInfo() query for CL_DEVICE_TYPE will return this value instead of the true device type. In addition, calls to clGetDeviceIDs() for this device type will return all devices, not just devices of the requested type. This can be used to enumerate all devices (even CPUs) as GPUs, or vice versa." ) -CLI_CONTROL( std::string, DeviceName, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_NAME will return this value instead of the true device name." ) -CLI_CONTROL( std::string, DeviceVendor, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_VENDOR will return this value instead of the true device vendor." ) -CLI_CONTROL( std::string, DeviceProfile, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_PROFILE will return this value instead of the true device profile." ) -CLI_CONTROL( std::string, DeviceVersion, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_VERSION will return this value instead of the true device version." ) -CLI_CONTROL( std::string, DeviceCVersion, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_OPENCL_C_VERSION will return this value instead of the true device version." ) -CLI_CONTROL( std::string, DeviceExtensions, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_EXTENSIONS will return this value instead of the true device extensions string." ) -CLI_CONTROL( std::string, DeviceILVersion, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_IL_VERSION will return this value instead of the true device intermediate language versions." ) -CLI_CONTROL( cl_uint, DeviceVendorID, 0, "If set to a non-zero value, the clGetDeviceInfo() query for CL_DEVICE_VENDOR will return this value instead of the true device vendor ID." ) -CLI_CONTROL( cl_uint, DeviceMaxComputeUnits, 0, "If set to a non-zero value, the clGetDeviceInfo() query for CL_DEVICE_MAX_COMPUTE_UNITS will return this value instead of the true device max compute units." ) -CLI_CONTROL( cl_uint, DevicePreferredVectorWidthChar, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR will return this value instead of the true device preferred vector width." ) -CLI_CONTROL( cl_uint, DevicePreferredVectorWidthShort, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT will return this value instead of the true device preferred vector width." ) -CLI_CONTROL( cl_uint, DevicePreferredVectorWidthInt, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT will return this value instead of the true device preferred vector width." ) -CLI_CONTROL( cl_uint, DevicePreferredVectorWidthLong, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG will return this value instead of the true device preferred vector width." ) -CLI_CONTROL( cl_uint, DevicePreferredVectorWidthHalf, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF will return this value instead of the true device preferred vector width." ) -CLI_CONTROL( cl_uint, DevicePreferredVectorWidthFloat, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT will return this value instead of the true device preferred vector width." ) -CLI_CONTROL( cl_uint, DevicePreferredVectorWidthDouble, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE will return this value instead of the true device preferred vector width." ) -CLI_CONTROL( std::string, DriverVersion, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DRIVER_VERSION will return this value instead of the true driver version." ) -CLI_CONTROL( std::string, PrependDeviceExtensions, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_EXTENSIONS will return this value followed by the true device extensions string." ) - -CLI_CONTROL_SEPARATOR( Precompiled Kernel and Builtin Kernel Override Controls: ) -CLI_CONTROL( bool, ForceByteBufferOverrides, false, "If set to a nonzero value, each of the buffer functions that are overridden (via one or more of the keys below) will use a byte-wise operation to read/write/copy the buffer (default behavior is to try to copy multiple bytes at a time, if possible). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) -CLI_CONTROL( bool, OverrideReadBuffer, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueReadBuffer() instead of the implementation's clEnqueueReadBuffer(). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) -CLI_CONTROL( bool, OverrideWriteBuffer, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueWriteBuffer() instead of the implementation's clEnqueueWriteBuffer(). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) -CLI_CONTROL( bool, OverrideCopyBuffer, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueCopyBuffer() instead of the implementation's clEnqueueCopyBuffer(). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) -CLI_CONTROL( bool, OverrideReadImage, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueReadImage() instead of the implementation's clEnqueueReadImage(). Only 2D images are currently supported." ) -CLI_CONTROL( bool, OverrideWriteImage, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueWriteImage() instead of the implementation's clEnqueueWriteImage(). Only 2D images are currently supported." ) -CLI_CONTROL( bool, OverrideCopyImage, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueCopyImage() instead of the implementation's clEnqueueCopyImage(). Only 2D images are currently supported." ) -CLI_CONTROL( bool, OverrideBuiltinKernels, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use its own version of the built-in OpenCL kernels that may be accessed via clCreateProgramWithBuiltInKernels(). At present, only the VME block_motion_estimate_intel kernel is implemented." ) +CLI_CONTROL(bool, DumpBufferHashes, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will dump hashes of a buffer, SVM, or USM allocation " + "rather than the full contents of the buffer. This can be useful " + "to identify which kernel enqueues generate different results " + "without requiring a large amount of disk space.") +CLI_CONTROL( + bool, DumpImageHashes, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump hashes of an image rather than the full contents of the image. " + "This can be useful to identify which kernel enqueues generate different " + "results without requiring a large amount of disk space.") +CLI_CONTROL(bool, DumpArgumentsOnSet, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will dump the argument value on calls to " + "clSetKernelArg(). Arguments are dumped as raw binary data. The " + "file names will have the form \"SetKernelArg__Kernel__Arg_.bin\".") +CLI_CONTROL( + bool, DumpBuffersAfterCreate, false, + "If set, the Intercept Layer for OpenCL Applications will dump buffers to " + "a file after creation. This control still honors the enqueue counter " + "limits, even though no enqueues are involved during buffer creation. " + "Currently only works for cl_mem buffers created from host pointers.") +CLI_CONTROL( + bool, DumpBuffersAfterMap, false, + "If set, the Intercept Layer for OpenCL Applications will dump the " + "contents of a buffer to a file after the buffer is mapped. Only valid if " + "the buffer is NOT mapped with CL_MAP_WRITE_INVALIDATE_REGION. If the " + "buffer was mapped non-blocking, this may insert a clFinish() into the " + "command queue, which may have functional or performance implications.") +CLI_CONTROL( + bool, DumpBuffersBeforeUnmap, false, + "If set, the Intercept Layer for OpenCL Applications will dump the " + "contents of a buffer to a file immediately before the buffer is unmapped. " + " This is done by inserting a blocking clEnqueueMapBuffer() (and matching " + "clEnqueueUnmapMemObject()) into the command queue, which may have " + "functional or performance implications.") +CLI_CONTROL( + bool, DumpBuffersBeforeEnqueue, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump buffer, SVM, and USM kernel arguments before calls to " + "clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the " + "kernel being enqueued are dumped. Buffers are dumped as raw binary data " + "to a \"memDumpPreEnqueue\" subdirectory of the dump directory. The file " + "names will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\".") +CLI_CONTROL( + bool, DumpBuffersAfterEnqueue, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump buffer, SVM, and USM kernel arguments after calls to " + "clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the " + "kernel being enqueued are dumped. Buffers are dumped as raw binary data " + "to a \"memDumpPostEnqueue\" subdirectory of the dump directory. The file " + "names will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\". " + "Note that this is the same naming convention as with " + "DumpBuffersBeforeEnqueue, so the changes resulting from an enqueue can be " + "determined by diff'ing the preEnqueue folder with the postEnqueue folder.") +CLI_CONTROL(std::string, DumpBuffersForKernel, "", + "If set, the Intercept Layer for OpenCL Applications will only " + "dump buffer, SVM, and USM kernel arguments when the specified " + "kernel is enqueued. This control is ignored unless " + "DumpBuffersBeforeEnqueue or DumpBuffersAfterEnqueue are enabled.") +CLI_CONTROL( + bool, DumpImagesBeforeEnqueue, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump image kernel arguments before calls to " + "clEnqueueNDRangeKernel(). Only images that are kernel arguments for the " + "kernel being enqueued are dumped. Images are dumped as raw binary data " + "to a \"memDumpPreEnqueue\" subdirectory of the dump directory. The file " + "names will have the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\".") +CLI_CONTROL( + bool, DumpImagesAfterEnqueue, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will dump image kernel arguments after calls to clEnqueueNDRangeKernel(). " + " Only images that are kernel arguments for the kernel being enqueued are " + "dumped. Images are dumped as raw binary data to a \"memDumpPostEnqueue\" " + "subdirectory of the dump directory. The file names will have the form " + "\"Enqueue__Kernel__Arg__Image__xx_bpp.raw\". Note that this " + "is the same naming convention as with DumpImagesBeforeEnqueue, so the " + "changes resulting from an enqueue can be determined by diff'ing the " + "preEnqueue folder with the postEnqueue folder.") +CLI_CONTROL(std::string, DumpImagesForKernel, "", + "If set, the Intercept Layer for OpenCL Applications will only " + "dump image kernel arguments when the specified kernel is " + "enqueued. This control is ignored unless DumpImagesBeforeEnqueue " + "or DumpImagesAfterEnqueue are enabled.") +CLI_CONTROL(cl_uint, DumpBuffersMinEnqueue, 0, + "The Intercept Layer for OpenCL Applications will only dump " + "buffer, SVM, and USM kernel arguments when the enqueue counter is " + "greater than this value, inclusive.") +CLI_CONTROL(cl_uint, DumpBuffersMaxEnqueue, UINT_MAX, + "The Intercept Layer for OpenCL Applications will only dump " + "buffer, SVM, and USM kernel arguments when the enqueue counter is " + "less than this value, inclusive.") +CLI_CONTROL( + cl_uint, DumpImagesMinEnqueue, 0, + "The Intercept Layer for OpenCL Applications will only dump image kernel " + "arguments when the enqueue counter is greater than this value, inclusive.") +CLI_CONTROL( + cl_uint, DumpImagesMaxEnqueue, UINT_MAX, + "The Intercept Layer for OpenCL Applications will only dump image kernel " + "arguments when the enqueue counter is less than this value, inclusive.") +CLI_CONTROL( + cl_uint, DumpArgumentsOnSetMinEnqueue, 0, + "The Intercept Layer for OpenCL Applications will only dump argument " + "values when the enqueue counter is greater than this value, inclusive.") +CLI_CONTROL( + cl_uint, DumpArgumentsOnSetMaxEnqueue, UINT_MAX, + "The Intercept Layer for OpenCL Applications will only dump kernel " + "arguments when the enqueue counter is less than this value, inclusive.") +CLI_CONTROL( + bool, InjectBuffers, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will look to inject potentially modified buffer, SVM, and USM contents " + "before calls to clEnqueueNDRangeKernel(). Only buffers that are kernel " + "arguments for the kernel being enqueued may be injected. The file name " + "to inject will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\", " + "which matches the file name for dumped buffers.") +CLI_CONTROL( + bool, InjectImages, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will look to inject potentially modified image contents before calls to " + "clEnqueueNDRangeKernel(). Only images that are kernel arguments for the " + "kernel being enqueued may be injected. The file name to inject will have " + "the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\", which matches " + "the file name for dumped images.") + +CLI_CONTROL_SEPARATOR(Device Partitioning Controls:) +CLI_CONTROL(bool, AutoPartitionAllDevices, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will automatically partition parent devices and " + "return all parent devices and all sub-devices.") +CLI_CONTROL(bool, AutoPartitionAllSubDevices, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will automatically partition parent devices and " + "return all sub-devices, but no parent devices.") +CLI_CONTROL(bool, AutoPartitionSingleSubDevice, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will automatically partition parent devices and " + "return a single sub-device, but no other sub-devices or parent " + "devices or other sub-devices.") +CLI_CONTROL(bool, AutoPartitionByAffinityDomain, true, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will try to automatically partition parent devices " + "by the next partitionable affinity domain.") +CLI_CONTROL(cl_uint, AutoPartitionEqually, 1, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will try to automatically partition parent devices " + "into sub-devices with the specified number of compute units.") + +CLI_CONTROL_SEPARATOR(Capture and Replay Controls:) +CLI_CONTROL(bool, CaptureReplay, false, + "This is the top-level control for kernel capture and replay.") +CLI_CONTROL(cl_uint, CaptureReplayMinEnqueue, 0, + "The Intercept Layer for OpenCL Applications will only enable " + "kernel capture and replay when the enqueue counter is greater " + "than this value, inclusive.") +CLI_CONTROL( + cl_uint, CaptureReplayMaxEnqueue, UINT_MAX, + "The Intercept Layer for OpenCL Applications will stop kernel capture and " + "replay when the encounter is greater than this value, meaning that only " + "enqueues less than this value, inclusive, will be captured.") +CLI_CONTROL( + std::string, CaptureReplayKernelName, "", + "If set, the Intercept Layer for OpenCL Applications will only enable " + "kernel capture and replay when the kernel name equals this name.") +CLI_CONTROL(bool, CaptureReplayUniqueKernels, false, + "If set, the Intercept Layer for OpenCL Applications will only " + "enable kernel capture and replay if the kernel signature (i.e. " + "hash + kernelname) has not been seen already.") +CLI_CONTROL(cl_uint, CaptureReplayNumKernelEnqueuesSkip, 0, + "The Intercept Layer for OpenCL Applications will skip this many " + "kernel enqueues before enabling kernel capture and replay.") +CLI_CONTROL(cl_uint, CaptureReplayNumKernelEnqueuesCapture, UINT_MAX, + "The Intercept Layer for OpenCL Applications will only capture " + "this many kernel enqueues.") + +CLI_CONTROL_SEPARATOR(AubCapture Controls:) +CLI_CONTROL( + bool, AubCapture, false, + "This is the top-level control for aub capture. The Intercept Layer for " + "OpenCL Applications doesn't implement aub capture itself, but can be used " + "to selectively enable and disable aub capture via other methods.") +CLI_CONTROL(bool, AubCaptureKDC, false, + "If set, the Intercept Layer for OpenCL Applications will use the " + "older kdc.exe method of aub capture. By default, the newer NEO " + "method of aub capture will be used. This control is ignored for " + "all non-Windows operating systems.") +CLI_CONTROL( + bool, AubCaptureIndividualEnqueues, false, + "If set, the Intercept Layer for OpenCL Applications will start aub " + "capture before a kernel enqueue, and will also stop aub capture " + "immediately after the kernel enqueue. Each file will have the form " + "\"AubCapture_Enqueue__kernel_\". Note that " + "non-kernel enqueues such as calls to clEnqueueReadBuffer() and " + "clEnqueueWriteBuffer() will NOT be aub captured when this control is set. " + " The AubCaptureMinEnqueue and AubCaptureMaxEnqueue controls are still " + "honored when AubCaptureIndividualEnqueues is set.") +CLI_CONTROL( + cl_uint, AubCaptureMinEnqueue, 0, + "The Intercept Layer for OpenCL Applications will only enable aub capture " + "when the enqueue counter is greater than this value, inclusive.") +CLI_CONTROL( + cl_uint, AubCaptureMaxEnqueue, UINT_MAX, + "The Intercept Layer for OpenCL Applications will stop aub capture when " + "the encounter is greater than this value, meaning that only enqueues less " + "than this value, inclusive, will be captured. If the enqueue counter " + "never reaches this value, the Intercept Layer for OpenCL Applications " + "will stop aub capture when the it is unloaded.") +CLI_CONTROL(std::string, AubCaptureKernelName, "", + "If set, the Intercept Layer for OpenCL Applications will only " + "enable aub capture when the kernel name equals this name.") +CLI_CONTROL(std::string, AubCaptureKernelGWS, "", + "If set, the Intercept Layer for OpenCL Applications will only " + "enable aub capture when the NDRange global work size matches this " + "string. The string should have the form \"XxYxZ\". The wildcard " + "\"*\" matches all global work sizes.") +CLI_CONTROL( + std::string, AubCaptureKernelLWS, "", + "If set, the Intercept Layer for OpenCL Applications will only enable aub " + "capture when the NDRange local work size matches this string. The string " + "should have the form \"XxYxZ\". The wildcard \"*\" matches all local " + "work sizes, and the string \"NULL\" matches a NULL local work size.") +CLI_CONTROL( + bool, AubCaptureUniqueKernels, false, + "If set, the Intercept Layer for OpenCL Applications will only enable aub " + "capture if the kernel signature (i.e. hash + kernelname + gws + lws) has " + "not been seen already. The behavior of this control is well-defined when " + "AubCaptureIndividualEnqueues is not set, but it doesn't make much sense " + "without AubCaptureIndividualEnqueues.") +CLI_CONTROL( + cl_uint, AubCaptureNumKernelEnqueuesSkip, 0, + "The Intercept Layer for OpenCL Applications will skip this many kernel " + "enqueues before enabling aub capture. The behavior of this control is " + "well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't " + "make much sense without AubCaptureIndividualEnqueues.") +CLI_CONTROL(cl_uint, AubCaptureNumKernelEnqueuesCapture, UINT_MAX, + "The Intercept Layer for OpenCL Applications will only capture " + "this many kernel enqueues. The behavior of this control is " + "well-defined when AubCaptureIndividualEnqueues is not set, but it " + "doesn't make much sense without AubCaptureIndividualEnqueues.") +CLI_CONTROL(cl_uint, AubCaptureStartWait, 0, + "The Intercept Layer for OpenCL Applications will wait for this " + "many milliseconds before beginning aub capture.") +CLI_CONTROL(cl_uint, AubCaptureEndWait, 0, + "The Intercept Layer for OpenCL Applications will wait for this " + "many milliseconds before ending aub capture.") + +CLI_CONTROL_SEPARATOR(Execution Controls:) +CLI_CONTROL( + bool, NoErrors, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will cause all OpenCL APIs to return a successful error status.") +CLI_CONTROL(uint64_t, ExitOnEnqueueCount, 0, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will exit the application when the enqueue counter " + "reaches the specified value. This can be useful to debug " + "sporadic issues by exiting an application immediately, without " + "needing to wait for the application to exit normally.") +CLI_CONTROL( + bool, NullContextCallback, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will force the context callback to be NULL. With both context callback " + "logging and NULL context callback set, the context callback will still be " + "logged, but any application context callback will not be called.") +CLI_CONTROL(bool, FinishAfterEnqueue, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications inserts a call to clFinish() after every enqueue. " + "The command queue that the command was just enqueued to is passed " + "to clFinish(). This can be used to debug possible timing or " + "resource management issues and will likely impact performance.") +CLI_CONTROL( + bool, FlushAfterEnqueue, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "inserts a call to clFlush() after every enqueue. The command queue that " + "the command was just enqueued to is passed to clFlush(). This can also " + "be used to debug possible timing or resource management issues and is " + "slightly less obtrusive than FinishAfterEnqueue but still will likely " + "impact performance. If both FinishAfterEnqueue and FlushAfterEnqueue are " + "nonzero then the Intercept Layer for OpenCL Applications will only insert " + "a call to clFinish() after every enqueue, because clFinish() implies " + "clFlush().") +CLI_CONTROL( + bool, FlushAfterEnqueueBarrier, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "inserts a call to clFlush() after every barrier enqueue. The command " + "queue that the command was just enqueued to is passed to clFlush(). This " + "has been useful to debug out-of-order queue issues.") +CLI_CONTROL( + bool, InOrderQueue, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will force all queues to be created in-order. This can be used for " + "performance analysis, but may lead to deadlocks in some cases.") +CLI_CONTROL( + bool, NoProfilingQueue, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will force all queues to be created without event profiling support. " + "This can be used for performance analysis, but may lead to errors if the " + "application requires event profiling.") +CLI_CONTROL(bool, DummyOutOfOrderQueue, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will create and destroy a dummy out-of-order queue. " + "This may be useful for performance analysis.") +CLI_CONTROL( + bool, NullEnqueue, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will silently ignore any enqueue. This can be used for performance " + "analysis, but will likely cause errors if the application relies on any " + "sort of information from OpenCL events and should be used carefully.") +CLI_CONTROL( + bool, NullLocalWorkSize, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will force the local work size argument to clEnqueueNDRangeKernel() to be " + "NULL, which causes the OpenCL implementation to pick the local work size. " + "Note that this control takes effect before NullLocalWorkSizeX / " + "NullLocalWorkSizeY / NullLocalWorkSizeZ (see below), so enabling both " + "controls will have the effect of forcing a specific local work size.") +CLI_CONTROL( + size_t, NullLocalWorkSizeX, 0, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will set the local work size that will be used if an application passes " + "NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches " + "will only look at NullLocalWorkSizeX, 2D dispatches will only look at " + "NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look " + "at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If " + "the specified values for NullLocalWorkSize do not evenly divide the " + "global work size then the specified values of NullLocalWorkSize will not " + "take effect.") +CLI_CONTROL( + size_t, NullLocalWorkSizeY, 0, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will set the local work size that will be used if an application passes " + "NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches " + "will only look at NullLocalWorkSizeX, 2D dispatches will only look at " + "NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look " + "at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If " + "the specified values for NullLocalWorkSize do not evenly divide the " + "global work size then the specified values of NullLocalWorkSize will not " + "take effect.") +CLI_CONTROL( + size_t, NullLocalWorkSizeZ, 0, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will set the local work size that will be used if an application passes " + "NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches " + "will only look at NullLocalWorkSizeX, 2D dispatches will only look at " + "NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look " + "at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If " + "the specified values for NullLocalWorkSize do not evenly divide the " + "global work size then the specified values of NullLocalWorkSize will not " + "take effect.") +CLI_CONTROL( + bool, InitializeBuffers, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will initialize the contents of allocated buffers with zero. Only valid " + "for non-COPY_HOST_PTR and non-USE_HOST_PTR allocations.") +CLI_CONTROL( + cl_uint, DefaultQueuePriorityHint, 0, + "If set to a nonzero value, and if no other priority hint is specified by " + "the application, the Intercept Layer for OpencL Applications will attempt " + "to create a command queue with this priority hint value. Note: HIGH " + "priority is 1, MED priority is 2, and LOW priority is 4.") +CLI_CONTROL( + cl_uint, DefaultQueueThrottleHint, 0, + "If set to a nonzero value, and if no other throttle hint is specified by " + "the application, the Intercept Layer for OpencL Applications will attempt " + "to create a command queue with this throttle hint value. Note: HIGH " + "throttle is 1, MED throttle is 2, and LOW throttle is 4.") +CLI_CONTROL(bool, RelaxAllocationLimits, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will attempt to relax allocation limits to enable " + "allocations larger than CL_DEVICE_MAX_MEM_ALLOC_SIZE.") + +CLI_CONTROL_SEPARATOR(Platform and Device Query Overrides:) +CLI_CONTROL(std::string, PlatformName, "", + "If set to a non-empty value, the clGetPlatformInfo() query for " + "CL_PLATFORM_NAME will return this string instead of the true " + "platform name.") +CLI_CONTROL(std::string, PlatformVendor, "", + "If set to a non-empty value, the clGetPlatformInfo() query for " + "CL_PLATFORM_VENDOR will return this string instead of the true " + "platform vendor.") +CLI_CONTROL(std::string, PlatformProfile, "", + "If set to a non-empty value, the clGetPlatformInfo() query for " + "CL_PLATFORM_PROFILE will return this string instead of the true " + "platform profile.") +CLI_CONTROL(std::string, PlatformVersion, "", + "If set to a non-empty string, the clGetPlatformInfo() query for " + "CL_PLATFORM_VERSION will return this string instead of the true " + "platform version.") +CLI_CONTROL(cl_uint, DeviceTypeFilter, CL_DEVICE_TYPE_ALL, + "Hides all device types that are not in the filter. Note: " + "CL_DEVICE_TYPE_CPU = 2, CL_DEVICE_TYPE_GPU = 4, " + "CL_DEVICE_TYPE_ACCELERATOR = 8, CL_DEVICE_TYPE_CUSTOM = 16.") +CLI_CONTROL( + cl_uint, DeviceType, 0, + "If set to a non-zero value, the clGetDeviceInfo() query for " + "CL_DEVICE_TYPE will return this value instead of the true device type. " + "In addition, calls to clGetDeviceIDs() for this device type will return " + "all devices, not just devices of the requested type. This can be used to " + "enumerate all devices (even CPUs) as GPUs, or vice versa.") +CLI_CONTROL( + std::string, DeviceName, "", + "If set to a non-empty string, the clGetDeviceInfo() query for " + "CL_DEVICE_NAME will return this value instead of the true device name.") +CLI_CONTROL(std::string, DeviceVendor, "", + "If set to a non-empty string, the clGetDeviceInfo() query for " + "CL_DEVICE_VENDOR will return this value instead of the true " + "device vendor.") +CLI_CONTROL(std::string, DeviceProfile, "", + "If set to a non-empty string, the clGetDeviceInfo() query for " + "CL_DEVICE_PROFILE will return this value instead of the true " + "device profile.") +CLI_CONTROL(std::string, DeviceVersion, "", + "If set to a non-empty string, the clGetDeviceInfo() query for " + "CL_DEVICE_VERSION will return this value instead of the true " + "device version.") +CLI_CONTROL(std::string, DeviceCVersion, "", + "If set to a non-empty string, the clGetDeviceInfo() query for " + "CL_DEVICE_OPENCL_C_VERSION will return this value instead of the " + "true device version.") +CLI_CONTROL(std::string, DeviceExtensions, "", + "If set to a non-empty string, the clGetDeviceInfo() query for " + "CL_DEVICE_EXTENSIONS will return this value instead of the true " + "device extensions string.") +CLI_CONTROL(std::string, DeviceILVersion, "", + "If set to a non-empty string, the clGetDeviceInfo() query for " + "CL_DEVICE_IL_VERSION will return this value instead of the true " + "device intermediate language versions.") +CLI_CONTROL(cl_uint, DeviceVendorID, 0, + "If set to a non-zero value, the clGetDeviceInfo() query for " + "CL_DEVICE_VENDOR will return this value instead of the true " + "device vendor ID.") +CLI_CONTROL(cl_uint, DeviceMaxComputeUnits, 0, + "If set to a non-zero value, the clGetDeviceInfo() query for " + "CL_DEVICE_MAX_COMPUTE_UNITS will return this value instead of the " + "true device max compute units.") +CLI_CONTROL(cl_uint, DevicePreferredVectorWidthChar, UINT_MAX, + "If set to a non-negative value, the clGetDeviceInfo() query for " + "CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR will return this value " + "instead of the true device preferred vector width.") +CLI_CONTROL(cl_uint, DevicePreferredVectorWidthShort, UINT_MAX, + "If set to a non-negative value, the clGetDeviceInfo() query for " + "CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT will return this value " + "instead of the true device preferred vector width.") +CLI_CONTROL(cl_uint, DevicePreferredVectorWidthInt, UINT_MAX, + "If set to a non-negative value, the clGetDeviceInfo() query for " + "CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT will return this value " + "instead of the true device preferred vector width.") +CLI_CONTROL(cl_uint, DevicePreferredVectorWidthLong, UINT_MAX, + "If set to a non-negative value, the clGetDeviceInfo() query for " + "CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG will return this value " + "instead of the true device preferred vector width.") +CLI_CONTROL(cl_uint, DevicePreferredVectorWidthHalf, UINT_MAX, + "If set to a non-negative value, the clGetDeviceInfo() query for " + "CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF will return this value " + "instead of the true device preferred vector width.") +CLI_CONTROL(cl_uint, DevicePreferredVectorWidthFloat, UINT_MAX, + "If set to a non-negative value, the clGetDeviceInfo() query for " + "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT will return this value " + "instead of the true device preferred vector width.") +CLI_CONTROL(cl_uint, DevicePreferredVectorWidthDouble, UINT_MAX, + "If set to a non-negative value, the clGetDeviceInfo() query for " + "CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE will return this value " + "instead of the true device preferred vector width.") +CLI_CONTROL(std::string, DriverVersion, "", + "If set to a non-empty string, the clGetDeviceInfo() query for " + "CL_DRIVER_VERSION will return this value instead of the true " + "driver version.") +CLI_CONTROL(std::string, PrependDeviceExtensions, "", + "If set to a non-empty string, the clGetDeviceInfo() query for " + "CL_DEVICE_EXTENSIONS will return this value followed by the true " + "device extensions string.") + +CLI_CONTROL_SEPARATOR(Precompiled Kernel and Builtin Kernel Override Controls:) +CLI_CONTROL( + bool, ForceByteBufferOverrides, false, + "If set to a nonzero value, each of the buffer functions that are " + "overridden (via one or more of the keys below) will use a byte-wise " + "operation to read/write/copy the buffer (default behavior is to try to " + "copy multiple bytes at a time, if possible). Note: Requires OpenCL 1.1 " + "or the \"byte addressable store\" extension.") +CLI_CONTROL(bool, OverrideReadBuffer, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will use a kernel to implement clEnqueueReadBuffer() " + "instead of the implementation's clEnqueueReadBuffer(). Note: " + "Requires OpenCL 1.1 or the \"byte addressable store\" extension.") +CLI_CONTROL( + bool, OverrideWriteBuffer, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will use a kernel to implement clEnqueueWriteBuffer() instead of the " + "implementation's clEnqueueWriteBuffer(). Note: Requires OpenCL 1.1 or " + "the \"byte addressable store\" extension.") +CLI_CONTROL(bool, OverrideCopyBuffer, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will use a kernel to implement clEnqueueCopyBuffer() " + "instead of the implementation's clEnqueueCopyBuffer(). Note: " + "Requires OpenCL 1.1 or the \"byte addressable store\" extension.") +CLI_CONTROL(bool, OverrideReadImage, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will use a kernel to implement clEnqueueReadImage() " + "instead of the implementation's clEnqueueReadImage(). Only 2D " + "images are currently supported.") +CLI_CONTROL(bool, OverrideWriteImage, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will use a kernel to implement clEnqueueWriteImage() " + "instead of the implementation's clEnqueueWriteImage(). Only 2D " + "images are currently supported.") +CLI_CONTROL(bool, OverrideCopyImage, false, + "If set to a nonzero value, the Intercept Layer for OpenCL " + "Applications will use a kernel to implement clEnqueueCopyImage() " + "instead of the implementation's clEnqueueCopyImage(). Only 2D " + "images are currently supported.") +CLI_CONTROL( + bool, OverrideBuiltinKernels, false, + "If set to a nonzero value, the Intercept Layer for OpenCL Applications " + "will use its own version of the built-in OpenCL kernels that may be " + "accessed via clCreateProgramWithBuiltInKernels(). At present, only the " + "VME block_motion_estimate_intel kernel is implemented.") From c4da5210f0f7ef6cf43221c60e77ff965f6bbbba Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 1 Jun 2026 10:06:37 -0700 Subject: [PATCH 7/7] revert formatting changes to controls.h --- intercept/src/controls.h | 1370 ++++++-------------------------------- 1 file changed, 218 insertions(+), 1152 deletions(-) diff --git a/intercept/src/controls.h b/intercept/src/controls.h index 00f0c81f..8813ab61 100644 --- a/intercept/src/controls.h +++ b/intercept/src/controls.h @@ -9,1167 +9,233 @@ #endif #ifndef CLI_CONTROL_SEPARATOR -#define CLI_CONTROL_SEPARATOR(_name) +#define CLI_CONTROL_SEPARATOR( _name ) #endif -CLI_CONTROL_SEPARATOR(Tracing Controls:) -CLI_CONTROL( - bool, BetaExtensionIntercepting, true, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will intercept extension APIs for beta extensions that are subject to " - "change. If an application uses beta extensions and does not function " - "correctly with the Intercept Layer for OpenCL Applications, setting this " - "control to zero may allow the application to function correctly, albeit " - "without the ability to debug and analyze the beta extension APIs.") - -CLI_CONTROL_SEPARATOR(Logging Controls:) -CLI_CONTROL(bool, SuppressLogging, false, - "If set to a nonzero value, suppresses all logging output from the " - "Intercept Layer for OpenCL Applications. This is particularly " - "useful for tools that only want report data.") -CLI_CONTROL( - bool, AppendFiles, false, - "By default, the Intercept Layer for OpenCL Applications log files will be " - "created from scratch when the intercept DLL is loaded, and any Intercept " - "Layer for OpenCL Applications report files will be created from scratch " - "when the intercept DLL is unloaded. If AppendFiles is set to a nonzero " - "value, the Intercept Layer for OpenCL Applications will append to an " - "existing file instead of recreating it. This can be useful if an " - "application loads and unloads the intercept DLL multiple times, or to " - "simply preserve log or report data from run-to-run.") -CLI_CONTROL(bool, LogToFile, false, - "If set to a nonzero value, sends log information to the file " - "\"clintercept_log.txt\" instead of to stderr.") -CLI_CONTROL( - bool, LogToDebugger, false, - "If set to a nonzero value, sends log information to the debugger instead " - "of to stderr. If both LogToFile and LogToDebugger are nonzero then log " - "information will be sent both to a file and to the debugger.") -CLI_CONTROL(int, LogIndent, 0, "Indents each log entry by this many spaces.") -CLI_CONTROL( - bool, BuildLogging, false, - "If set to a nonzero value, logs the program build log after each call to " - "clBuildProgram(). This will likely only function correctly for " - "synchronous builds. Note that the build log is logged regardless of " - "whether the program built successfully, which allows compiler warnings to " - "be logged for successful compiles.") -CLI_CONTROL( - bool, PreferredWorkGroupSizeMultipleLogging, false, - "If set to a nonzero value, logs the preferred work group size multiple " - "for each kernel after each call to clCreateKernel(). On some devices " - "this is the equivalent of the SIMD size for this kernel.") -CLI_CONTROL(bool, KernelInfoLogging, false, - "If set to a nonzero value, logs information about the kernel " - "after each call to clCreateKernel().") -CLI_CONTROL( - bool, CallLogging, false, - "If set to a nonzero value, logs function entry and exit information for " - "every OpenCL call. This can be used to easily determine which OpenCL " - "call is causing an application to crash or fail or if a crash occurs " - "outside of an OpenCL call. This setting is best used with LogToFile or " - "LogToDebugger as it can generate a lot of log data.") -CLI_CONTROL( - bool, CallLoggingEnqueueCounter, false, - "If set to a nonzero value, logs the enqueue counter in addition to " - "function entry and exit information for every OpenCL call. This can be " - "used to determine appropriate limits for DumpBuffersMinEnqueue, " - "DumpBuffersMaxEnqueue, DumpImagesMinEnqueue, or DumpBuffersMaxEnqueue. " - "If CallLogging is disabled then this control will have no effect.") -CLI_CONTROL(bool, CallLoggingThreadId, false, - "If set to a nonzero value, logs the ID of the calling thread in " - "addition to function entry and exit information for every OpenCL " - "call. This can be helpful when debugging multi-threading issues.") -CLI_CONTROL( - bool, CallLoggingThreadNumber, false, - "If set to a nonzero value, logs the symbolic number of the calling thread " - "in addition to function entry and exit information for every OpenCL call. " - " This can be helpful when debugging multi-threading issues.") -CLI_CONTROL(bool, CallLoggingElapsedTime, false, - "If set to a nonzero value, logs the elapsed time in microseconds " - "in addition to function entry and exit information for every " - "OpenCL call, starting from the time the intercept DLL is loaded.") -CLI_CONTROL( - bool, ITTCallLogging, false, - "If set to a nonzero value, logs function entry and exit information for " - "every OpenCL call using the ITT APIs. This feature will only function if " - "the Intercept Layer for OpenCL Applications is built with ITT support.") -CLI_CONTROL(cl_uint, ChromeTraceBufferSize, 16384, - "If set to a nonzero value, buffers JSON records for Chrome " - "Tracing in memory before writing to a file. The buffer will be " - "flushed when it fills, upon application termination, and " - "optionally on blocking OpenCL calls.") -CLI_CONTROL(bool, ChromeTraceBufferingBlockingCallFlush, true, - "If set to a nonzero value, flushes buffered JSON records for " - "Chrome Tracing after blocking OpenCL calls.") -CLI_CONTROL(bool, ChromeCallLogging, false, - "If set to a nonzero value, logs function entry and exit " - "information and host performance timing for every OpenCL call to " - "a JSON file that may be used for Chrome Tracing.") -CLI_CONTROL( - bool, ChromeFlowEvents, false, - "If set to a nonzero value, adds flow events between OpenCL calls and " - "OpenCL commands in a JSON file that may be used for Chrome Tracing. " - "Requires both ChromeCallLogging and ChromePerformanceTiming.") -CLI_CONTROL(bool, ErrorLogging, false, - "If set to a nonzero value, logs all OpenCL errors and the " - "function name that caused the error.") -CLI_CONTROL(bool, ErrorAssert, false, - "If set to a nonzero value, breaks into the debugger when an " - "OpenCL error occurs.") -CLI_CONTROL(bool, ContextCallbackLogging, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will install a callback for every context and log " - "any calls to the context callback. The application's context " - "callback, if any, will be invoked after the Intercept Layer for " - "OpenCL Applications' context callback.") -CLI_CONTROL( - cl_uint, ContextHintLevel, 0, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will attempt to create contexts with the " - "CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL property set to the specified value. " - "If this property is specified by the application, the Intercept Layer for " - "OpenCL Applications will overwrite it with the specified value, otherwise " - "the property and the specified value will be added to the list of context " - "creation properties. This functionality is only available for OpenCL " - "implementations that support the cl_intel_driver_diagnostics extension. " - "If this functionality is not available in the underlying OpenCL " - "implementation, the unmodified list of context properties will be used to " - "create the context instead. More information about this feature, " - "including valid values and their meaning, can be found in the " - "cl_intel_driver_diagnostics extension specification.") -CLI_CONTROL( - bool, EventCallbackLogging, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will install its own callback for every event callback and log the call " - "to the event callback. The application's event callback will be invoked " - "after the Intercept Layer for OpenCL Applications' event callback.") -CLI_CONTROL(bool, QueueInfoLogging, false, - "If set to a nonzero value, logs information about a queue when it " - "is created.") -CLI_CONTROL(bool, EventChecking, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will check and log any events in an event wait list " - "that are invalid or in an error state. This can help to debug " - "complex event dependency issues.") -CLI_CONTROL(bool, LeakChecking, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will check for leaks of various OpenCL objects, such " - "as memory objects and events.") -CLI_CONTROL( - bool, USMChecking, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will check for incorrect usage of Unified Shared Memory (USM) pointers.") -CLI_CONTROL( - bool, CLInfoLogging, false, - "If set to a nonzero value, logs information about the platforms and " - "devices in the system on the first call to clGetPlatformIDs().") -CLI_CONTROL(bool, FlushFiles, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will flush files after ever write. This slows down " - "performance but can help to avoid truncated files if the " - "Intercept Layer for OpenCL Applications does not exit cleanly.") -CLI_CONTROL(std::string, DumpDir, "", - "If set, the Intercept Layer for OpenCL Applications will emit " - "logs and dumps to this directory instead of the default " - "directory. The default log and dump directory is " - "\"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\" on " - "Windows and \"~/CLIntercept_Dump/\" on other " - "operating systems. The log and dump directory must be writeable, " - "otherwise the Intercept Layer for OpenCL Applications will not be " - "able to create or modify log or dump files.") -CLI_CONTROL(bool, AppendPid, false, - "If set, the Intercept Layer for OpenCL Applications will append " - "process ID to the log directory name.") -CLI_CONTROL(bool, UniqueFiles, false, - "If set, the Intercept Layer for OpenCL Applications will find a " - "unique file name for logs and reports by appending a number to " - "the file names, if needed.") -CLI_CONTROL(bool, KernelNameHashTracking, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will append the program and build option hashes to " - "the kernel name in logs and reports.") -CLI_CONTROL( - cl_uint, LongKernelNameCutoff, UINT_MAX, - "If an OpenCL application uses kernels with very long names, the Intercept " - "Layer for OpenCL Applications can substitute a \"short\" kernel " - "identifier for a \"long\" kernel name in logs and reports. This control " - "defines how long a kernel name must be (in characters) before it is " - "replaced by a \"short\" kernel identifier.") -CLI_CONTROL(bool, DemangleKernelNames, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will track kernel names that are demangled according " - "to C++ ABI rules. This setting requires compiler support for " - "demangling and may not be available in all configurations.") - -CLI_CONTROL_SEPARATOR(Reporting Controls:) -CLI_CONTROL(bool, ReportToStderr, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will emit reports to stderr.") -CLI_CONTROL( - bool, ReportToFile, true, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will write results to the file \"clintercept_report.txt\".") -CLI_CONTROL(cl_uint, ReportInterval, 0, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will generate a report at regular intervals (based " - "on the enqueue counter). This can be useful to generate report " - "data while a long-running application is executing, or if an " - "application does not exit cleanly.") - -CLI_CONTROL_SEPARATOR(Performance Timing Controls:) -CLI_CONTROL( - bool, HostPerformanceTiming, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will track the minimum, maximum, and average host CPU time for each " - "OpenCL entry point. When the process exits, this information will be " - "included in the file \"clIntercept_report.txt\".") -CLI_CONTROL(bool, ToolOverheadTiming, true, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will include some types of tool overhead in timing " - "reports and some types of logging.") -CLI_CONTROL( - bool, DevicePerformanceTiming, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will add event profiling to track the minimum, maximum, and average " - "device time for each OpenCL command. This operation may be fairly " - "intrusive and may have side effects; in particular it forces all command " - "queues to be created with PROFILING_ENABLED and may increment the " - "reference count for application events. When the process exits, this " - "information will be included in the file \"clIntercept_report.txt\".") -CLI_CONTROL(bool, DevicePerformanceTimingHistogram, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will report a histogram of device times in addition " - "to the table of device times for each OpenCL command.") -CLI_CONTROL( - bool, DevicePerformanceTimeKernelInfoTracking, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will distinguish between OpenCL NDRange kernels using information such as " - "the kernel's Preferred Work Group Size Multiple (AKA SIMD size).") -CLI_CONTROL( - bool, DevicePerformanceTimeGWOTracking, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will distinguish between OpenCL NDRange kernels with different global " - "work offsets for the purpose of device performance timing.") -CLI_CONTROL( - bool, DevicePerformanceTimeGWSTracking, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will distinguish between OpenCL NDRange kernels with different global " - "work sizes for the purpose of device performance timing.") -CLI_CONTROL( - bool, DevicePerformanceTimeLWSTracking, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will distinguish between OpenCL NDRange kernels with different local work " - "sizes for the purpose of device performance timing.") -CLI_CONTROL(bool, DevicePerformanceTimeSuggestedLWSTracking, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will attempt to query and track the suggested local " - "work size when the passed-in local work size is NULL.") -CLI_CONTROL(bool, DevicePerformanceTimeTransferTracking, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will distinguish between transfer operations of " - "different sizes for the purpose of device performance timing.") -CLI_CONTROL( - bool, DevicePerformanceTimingKernelsOnly, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will collect device performance timing for kernel commands only") -CLI_CONTROL( - bool, DevicePerformanceTimingSkipUnmap, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will skip device performance timing for unmap operations. This is a " - "workaround for a bug in some OpenCL implementations, where querying " - "events created from unmap operations results in driver crashes.") -CLI_CONTROL(cl_uint, HostPerformanceTimingMinEnqueue, 0, - "The Intercept Layer for OpenCL Applications will only collect " - "host performance timing metrics when the enqueue counter is " - "greater than this value, inclusive.") -CLI_CONTROL(cl_uint, HostPerformanceTimingMaxEnqueue, UINT_MAX, - "The Intercept Layer for OpenCL Applications will only collect " - "host performance timing metrics when the enqueue counter is less " - "than this value, inclusive.") -CLI_CONTROL(cl_uint, DevicePerformanceTimingMinEnqueue, 0, - "The Intercept Layer for OpenCL Applications will only collect " - "device performance timing metrics when the enqueue counter is " - "greater than this value, inclusive.") -CLI_CONTROL(cl_uint, DevicePerformanceTimingMaxEnqueue, UINT_MAX, - "The Intercept Layer for OpenCL Applications will only collect " - "device performance timing metrics when the enqueue counter is " - "less than this value, inclusive.") -CLI_CONTROL(bool, HostPerformanceTimeLogging, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will log the host elapsed time for each OpenCL entry " - "point. This can be useful to identify OpenCL entry points that " - "execute significantly slower or faster than average on the host.") -CLI_CONTROL(bool, DevicePerformanceTimeLogging, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will log the device execution time deltas for each " - "OpenCL command. This can be useful to identify specific OpenCL " - "commands that execute significantly slower or faster than average " - "on the device. If DevicePerformanceTiming is disabled then this " - "control will have no effect.") -CLI_CONTROL( - bool, DevicePerformanceTimelineLogging, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will log the device execution times for each OpenCL command. This can be " - "useful to visualize the execution timeline of OpenCL commands that " - "execute on the device. If DevicePerformanceTiming is disabled then this " - "control will have no effect.") -CLI_CONTROL(std::string, DevicePerfCounterLibName, "", - "Full path to MDAPI shared library. If not set, the default MDAPI " - "library will be used.") -CLI_CONTROL( - bool, DevicePerfCounterEventBasedSampling, false, - "If set to a nonzero value and DevicePerfCounterCustom is set, the " - "Intercept Layer for OpenCL Applications will enable Intel GPU Performance " - "Counters to track the minimum, maximum, and average performance counter " - "deltas for each OpenCL command. This operation may be fairly intrusive " - "and may have side effects; in particular it forces all command queues to " - "be created with PROFILING_ENABLED and may increment the reference count " - "for application events. This feature will only function if the Intercept " - "Layer for OpenCL Applications is built with MDAPI support.") -CLI_CONTROL( - bool, DevicePerfCounterTimeBasedSampling, false, - "If set to a nonzero value and DevicePerfCounterCustom is set, the " - "Intercept Layer for OpenCL Applications will enable Intel GPU Performance " - "Counters to track performance counter deltas at regular time intervals. " - "This operation may be fairly intrusive and may have side effects. This " - "feature will only function if the Intercept Layer for OpenCL Applications " - "is built with MDAPI support.") -CLI_CONTROL(uint32_t, DevicePerfCounterAdapterIndex, 0, - "Select which MDAPI device to report performance counters.") -CLI_CONTROL(std::string, DevicePerfCounterCustom, "", - "If set, the Intercept Layer for OpenCL Applications will collect " - "MDAPI metrics for the Metric Set corresponding to this value for " - "each OpenCL command. Frequently used Metric Sets include: " - "ComputeBasic, ComputeExtended, L3_1, Sampler. The output file has " - "the potential to be very big depending on the work load. This " - "operation may be fairly intrusive and may have side effects; in " - "particular it forces all command queues to be created with " - "PROFILING_ENABLED and may increment the reference count for " - "application events. When the process exits, this information will " - "be included in the file \"clintercept_perfcounter_dump_.txt\". This feature will only function if the Intercept " - "Layer for OpenCL Applications is built with MDAPI support.") -CLI_CONTROL(std::string, DevicePerfCounterFile, "", - "Full path to a custom MDAPI file. This can be used to add custom " - "Metric Sets.") -CLI_CONTROL( - bool, DevicePerfCounterTiming, false, - "If set to a nonzero value and DevicePerfCounterEventBasedSampling is set, " - "the Intercept Layer for OpenCL Applications will report the average Intel " - "GPU Performance Counters for each OpenCL command. When the process exits, " - "this information will be included in the file \"clIntercept_report.txt\". " - " This feature will only function if the Intercept Layer for OpenCL " - "Applications is built with MDAPI support.") -CLI_CONTROL(bool, DevicePerfCounterReportMax, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will collect also max values of target platform to " - ".csv with MDAPI counters as a column next to each metric.") -CLI_CONTROL( - uint32_t, DevicePerfCounterTimeBasedSamplingPeriod, 1000, - "The sampling period for Intel GPU Performance Counter Time-based " - "Sampling, in microseconds. A smaller sampling period increases overhead " - "and the likelihood dropped samples but can be more precise. Note that " - "some devices do not support very small sampling periods.") -CLI_CONTROL(uint32_t, DevicePerfCounterTimeBasedBufferSize, 0, - "The buffer size for Intel GPU Performance Counter Time-based " - "Sampling, in bytes. When set to zero, automatically chooses the " - "device maximum buffer size. A larger buffer size will decrease " - "the likelihood of dropped samples.") -CLI_CONTROL( - bool, ITTPerformanceTiming, false, - "[Note: This control makes ITT calls, but they appear to do nothing!] If " - "set to a nonzero value, the Intercept Layer for OpenCL Applications will " - "generate ITT-compatible performance timing data. Similar to " - "DevicePerformanceTiming, this operation may be fairly intrusive and may " - "have side effects; in particular it forces all command queues to be " - "created with PROFILING_ENABLED and may increment the reference count for " - "application events. ITTPerformanceTiming will also silently create " - "OpenCL command queues that support advanced performance counters if this " - "functionality is available. This feature will only function if the " - "Intercept Layer for OpenCL Applications is built with ITT support.") -CLI_CONTROL( - bool, ITTShowOnlyExecutingEvents, false, - "[Note: This control makes ITT calls, but they appear to do nothing!] By " - "default, when ITTPerformanceTiming is enabled, the Intercept Layer for " - "OpenCL Applications will generate ITT-compatible information for all " - "states of an OpenCL event: when the command was queued, when it was " - "submitted, when it started executing, and when it finished executing. If " - "ITTShowOnlyExecutingEvents is set to a nonzero value, the Intercept Layer " - "for OpenCL Applications will only generate ITT-compatible instrumentation " - "when an event begins executing and when an event ends executing. Since no " - "information will be displayed about when a command is queued or " - "submitted, this can sometimes make it easier to identify times when the " - "device is idle. This feature will only function if the Intercept Layer " - "for OpenCL Applications is built with ITT support.") -CLI_CONTROL(bool, ChromePerformanceTiming, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will generate device performance timing information " - "in a JSON file that may be used for Chrome Tracing.") -CLI_CONTROL(bool, ChromePerformanceTimingInStages, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will separate the performance information placed in " - "the JSON file into Queued, Submitted, and Execution stages. It " - "will also reorder the threads/queues by starting runtime. This " - "flag is only functional when ChromePerformanceTiming is also set.") -CLI_CONTROL( - bool, ChromePerformanceTimingPerKernel, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will organize the performance information placed in the JSON file on a " - "per kernel name basis. It is only functional when ChromePerformanceTiming " - "is also set. When ChromePerformanceTimingInStages is also set, " - "information about event stages will be retained.") -CLI_CONTROL( - bool, ChromePerformanceTimingEstimateQueuedTime, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will unconditionally estimate the queued time for Chrome Tracing rather " - "than computing it using device and host timers and event profiling data. " - "The estimated time is less accurate than the computed time, but may be " - "more reliable if the device and host timers or event profiling data is " - "incorrect or imprecise.") -CLI_CONTROL(bool, PerformanceTimingConditional, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will only collect host performance timing, device " - "performance timing, and chrome performance timing conditionally, " - "when the \"CLI_ENABLE_PERFORMANCE_TIMING\" environment variable " - "is set to a non-zero value.") +CLI_CONTROL_SEPARATOR( Tracing Controls: ) +CLI_CONTROL( bool, BetaExtensionIntercepting, true, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will intercept extension APIs for beta extensions that are subject to change. If an application uses beta extensions and does not function correctly with the Intercept Layer for OpenCL Applications, setting this control to zero may allow the application to function correctly, albeit without the ability to debug and analyze the beta extension APIs." ) + +CLI_CONTROL_SEPARATOR( Logging Controls: ) +CLI_CONTROL( bool, SuppressLogging, false, "If set to a nonzero value, suppresses all logging output from the Intercept Layer for OpenCL Applications. This is particularly useful for tools that only want report data." ) +CLI_CONTROL( bool, AppendFiles, false, "By default, the Intercept Layer for OpenCL Applications log files will be created from scratch when the intercept DLL is loaded, and any Intercept Layer for OpenCL Applications report files will be created from scratch when the intercept DLL is unloaded. If AppendFiles is set to a nonzero value, the Intercept Layer for OpenCL Applications will append to an existing file instead of recreating it. This can be useful if an application loads and unloads the intercept DLL multiple times, or to simply preserve log or report data from run-to-run." ) +CLI_CONTROL( bool, LogToFile, false, "If set to a nonzero value, sends log information to the file \"clintercept_log.txt\" instead of to stderr." ) +CLI_CONTROL( bool, LogToDebugger, false, "If set to a nonzero value, sends log information to the debugger instead of to stderr. If both LogToFile and LogToDebugger are nonzero then log information will be sent both to a file and to the debugger." ) +CLI_CONTROL( int, LogIndent, 0, "Indents each log entry by this many spaces." ) +CLI_CONTROL( bool, BuildLogging, false, "If set to a nonzero value, logs the program build log after each call to clBuildProgram(). This will likely only function correctly for synchronous builds. Note that the build log is logged regardless of whether the program built successfully, which allows compiler warnings to be logged for successful compiles." ) +CLI_CONTROL( bool, PreferredWorkGroupSizeMultipleLogging, false, "If set to a nonzero value, logs the preferred work group size multiple for each kernel after each call to clCreateKernel(). On some devices this is the equivalent of the SIMD size for this kernel." ) +CLI_CONTROL( bool, KernelInfoLogging, false, "If set to a nonzero value, logs information about the kernel after each call to clCreateKernel()." ) +CLI_CONTROL( bool, CallLogging, false, "If set to a nonzero value, logs function entry and exit information for every OpenCL call. This can be used to easily determine which OpenCL call is causing an application to crash or fail or if a crash occurs outside of an OpenCL call. This setting is best used with LogToFile or LogToDebugger as it can generate a lot of log data." ) +CLI_CONTROL( bool, CallLoggingEnqueueCounter, false, "If set to a nonzero value, logs the enqueue counter in addition to function entry and exit information for every OpenCL call. This can be used to determine appropriate limits for DumpBuffersMinEnqueue, DumpBuffersMaxEnqueue, DumpImagesMinEnqueue, or DumpBuffersMaxEnqueue. If CallLogging is disabled then this control will have no effect." ) +CLI_CONTROL( bool, CallLoggingThreadId, false, "If set to a nonzero value, logs the ID of the calling thread in addition to function entry and exit information for every OpenCL call. This can be helpful when debugging multi-threading issues." ) +CLI_CONTROL( bool, CallLoggingThreadNumber, false, "If set to a nonzero value, logs the symbolic number of the calling thread in addition to function entry and exit information for every OpenCL call. This can be helpful when debugging multi-threading issues." ) +CLI_CONTROL( bool, CallLoggingElapsedTime, false, "If set to a nonzero value, logs the elapsed time in microseconds in addition to function entry and exit information for every OpenCL call, starting from the time the intercept DLL is loaded." ) +CLI_CONTROL( bool, ITTCallLogging, false, "If set to a nonzero value, logs function entry and exit information for every OpenCL call using the ITT APIs. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support." ) +CLI_CONTROL( cl_uint, ChromeTraceBufferSize, 16384, "If set to a nonzero value, buffers JSON records for Chrome Tracing in memory before writing to a file. The buffer will be flushed when it fills, upon application termination, and optionally on blocking OpenCL calls.") +CLI_CONTROL( bool, ChromeTraceBufferingBlockingCallFlush, true, "If set to a nonzero value, flushes buffered JSON records for Chrome Tracing after blocking OpenCL calls.") +CLI_CONTROL( bool, ChromeCallLogging, false, "If set to a nonzero value, logs function entry and exit information and host performance timing for every OpenCL call to a JSON file that may be used for Chrome Tracing." ) +CLI_CONTROL( bool, ChromeFlowEvents, false, "If set to a nonzero value, adds flow events between OpenCL calls and OpenCL commands in a JSON file that may be used for Chrome Tracing. Requires both ChromeCallLogging and ChromePerformanceTiming." ) +CLI_CONTROL( bool, ErrorLogging, false, "If set to a nonzero value, logs all OpenCL errors and the function name that caused the error." ) +CLI_CONTROL( bool, ErrorAssert, false, "If set to a nonzero value, breaks into the debugger when an OpenCL error occurs." ) +CLI_CONTROL( bool, ContextCallbackLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will install a callback for every context and log any calls to the context callback. The application's context callback, if any, will be invoked after the Intercept Layer for OpenCL Applications' context callback." ) +CLI_CONTROL( cl_uint, ContextHintLevel, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will attempt to create contexts with the CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL property set to the specified value. If this property is specified by the application, the Intercept Layer for OpenCL Applications will overwrite it with the specified value, otherwise the property and the specified value will be added to the list of context creation properties. This functionality is only available for OpenCL implementations that support the cl_intel_driver_diagnostics extension. If this functionality is not available in the underlying OpenCL implementation, the unmodified list of context properties will be used to create the context instead. More information about this feature, including valid values and their meaning, can be found in the cl_intel_driver_diagnostics extension specification." ) +CLI_CONTROL( bool, EventCallbackLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will install its own callback for every event callback and log the call to the event callback. The application's event callback will be invoked after the Intercept Layer for OpenCL Applications' event callback." ) +CLI_CONTROL( bool, QueueInfoLogging, false, "If set to a nonzero value, logs information about a queue when it is created." ) +CLI_CONTROL( bool, EventChecking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will check and log any events in an event wait list that are invalid or in an error state. This can help to debug complex event dependency issues." ) +CLI_CONTROL( bool, LeakChecking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will check for leaks of various OpenCL objects, such as memory objects and events." ) +CLI_CONTROL( bool, USMChecking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will check for incorrect usage of Unified Shared Memory (USM) pointers." ) +CLI_CONTROL( bool, CLInfoLogging, false, "If set to a nonzero value, logs information about the platforms and devices in the system on the first call to clGetPlatformIDs()." ) +CLI_CONTROL( bool, FlushFiles, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will flush files after ever write. This slows down performance but can help to avoid truncated files if the Intercept Layer for OpenCL Applications does not exit cleanly." ) +CLI_CONTROL( std::string, DumpDir, "", "If set, the Intercept Layer for OpenCL Applications will emit logs and dumps to this directory instead of the default directory. The default log and dump directory is \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\" on Windows and \"~/CLIntercept_Dump/\" on other operating systems. The log and dump directory must be writeable, otherwise the Intercept Layer for OpenCL Applications will not be able to create or modify log or dump files." ) +CLI_CONTROL( bool, AppendPid, false, "If set, the Intercept Layer for OpenCL Applications will append process ID to the log directory name." ) +CLI_CONTROL( bool, UniqueFiles, false, "If set, the Intercept Layer for OpenCL Applications will find a unique file name for logs and reports by appending a number to the file names, if needed." ) +CLI_CONTROL( bool, KernelNameHashTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will append the program and build option hashes to the kernel name in logs and reports." ) +CLI_CONTROL( cl_uint, LongKernelNameCutoff, UINT_MAX, "If an OpenCL application uses kernels with very long names, the Intercept Layer for OpenCL Applications can substitute a \"short\" kernel identifier for a \"long\" kernel name in logs and reports. This control defines how long a kernel name must be (in characters) before it is replaced by a \"short\" kernel identifier." ) +CLI_CONTROL( bool, DemangleKernelNames, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will track kernel names that are demangled according to C++ ABI rules. This setting requires compiler support for demangling and may not be available in all configurations." ) + +CLI_CONTROL_SEPARATOR( Reporting Controls: ) +CLI_CONTROL( bool, ReportToStderr, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will emit reports to stderr." ) +CLI_CONTROL( bool, ReportToFile, true, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will write results to the file \"clintercept_report.txt\"." ) +CLI_CONTROL( cl_uint, ReportInterval, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will generate a report at regular intervals (based on the enqueue counter). This can be useful to generate report data while a long-running application is executing, or if an application does not exit cleanly." ) + +CLI_CONTROL_SEPARATOR( Performance Timing Controls: ) +CLI_CONTROL( bool, HostPerformanceTiming, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will track the minimum, maximum, and average host CPU time for each OpenCL entry point. When the process exits, this information will be included in the file \"clIntercept_report.txt\"." ) +CLI_CONTROL( bool, ToolOverheadTiming, true, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will include some types of tool overhead in timing reports and some types of logging." ) +CLI_CONTROL( bool, DevicePerformanceTiming, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will add event profiling to track the minimum, maximum, and average device time for each OpenCL command. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. When the process exits, this information will be included in the file \"clIntercept_report.txt\"." ) +CLI_CONTROL( bool, DevicePerformanceTimingHistogram, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will report a histogram of device times in addition to the table of device times for each OpenCL command." ) +CLI_CONTROL( bool, DevicePerformanceTimeKernelInfoTracking,false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels using information such as the kernel's Preferred Work Group Size Multiple (AKA SIMD size)." ) +CLI_CONTROL( bool, DevicePerformanceTimeGWOTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels with different global work offsets for the purpose of device performance timing." ) +CLI_CONTROL( bool, DevicePerformanceTimeGWSTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels with different global work sizes for the purpose of device performance timing." ) +CLI_CONTROL( bool, DevicePerformanceTimeLWSTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels with different local work sizes for the purpose of device performance timing." ) +CLI_CONTROL( bool, DevicePerformanceTimeSuggestedLWSTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will attempt to query and track the suggested local work size when the passed-in local work size is NULL." ) +CLI_CONTROL( bool, DevicePerformanceTimeTransferTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between transfer operations of different sizes for the purpose of device performance timing." ) +CLI_CONTROL( bool, DevicePerformanceTimingKernelsOnly, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will collect device performance timing for kernel commands only" ) +CLI_CONTROL( bool, DevicePerformanceTimingSkipUnmap, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will skip device performance timing for unmap operations. This is a workaround for a bug in some OpenCL implementations, where querying events created from unmap operations results in driver crashes." ) +CLI_CONTROL( cl_uint, HostPerformanceTimingMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only collect host performance timing metrics when the enqueue counter is greater than this value, inclusive." ) +CLI_CONTROL( cl_uint, HostPerformanceTimingMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only collect host performance timing metrics when the enqueue counter is less than this value, inclusive." ) +CLI_CONTROL( cl_uint, DevicePerformanceTimingMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only collect device performance timing metrics when the enqueue counter is greater than this value, inclusive." ) +CLI_CONTROL( cl_uint, DevicePerformanceTimingMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only collect device performance timing metrics when the enqueue counter is less than this value, inclusive." ) +CLI_CONTROL( bool, HostPerformanceTimeLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the host elapsed time for each OpenCL entry point. This can be useful to identify OpenCL entry points that execute significantly slower or faster than average on the host." ) +CLI_CONTROL( bool, DevicePerformanceTimeLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the device execution time deltas for each OpenCL command. This can be useful to identify specific OpenCL commands that execute significantly slower or faster than average on the device. If DevicePerformanceTiming is disabled then this control will have no effect." ) +CLI_CONTROL( bool, DevicePerformanceTimelineLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the device execution times for each OpenCL command. This can be useful to visualize the execution timeline of OpenCL commands that execute on the device. If DevicePerformanceTiming is disabled then this control will have no effect." ) +CLI_CONTROL( std::string, DevicePerfCounterLibName, "", "Full path to MDAPI shared library. If not set, the default MDAPI library will be used.") +CLI_CONTROL( bool, DevicePerfCounterEventBasedSampling, false, "If set to a nonzero value and DevicePerfCounterCustom is set, the Intercept Layer for OpenCL Applications will enable Intel GPU Performance Counters to track the minimum, maximum, and average performance counter deltas for each OpenCL command. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support." ) +CLI_CONTROL( bool, DevicePerfCounterTimeBasedSampling, false, "If set to a nonzero value and DevicePerfCounterCustom is set, the Intercept Layer for OpenCL Applications will enable Intel GPU Performance Counters to track performance counter deltas at regular time intervals. This operation may be fairly intrusive and may have side effects. This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support." ) +CLI_CONTROL( uint32_t, DevicePerfCounterAdapterIndex, 0, "Select which MDAPI device to report performance counters." ) +CLI_CONTROL( std::string, DevicePerfCounterCustom, "", "If set, the Intercept Layer for OpenCL Applications will collect MDAPI metrics for the Metric Set corresponding to this value for each OpenCL command. Frequently used Metric Sets include: ComputeBasic, ComputeExtended, L3_1, Sampler. The output file has the potential to be very big depending on the work load. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. When the process exits, this information will be included in the file \"clintercept_perfcounter_dump_.txt\". This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support." ) +CLI_CONTROL( std::string, DevicePerfCounterFile, "", "Full path to a custom MDAPI file. This can be used to add custom Metric Sets." ) +CLI_CONTROL( bool, DevicePerfCounterTiming, false, "If set to a nonzero value and DevicePerfCounterEventBasedSampling is set, the Intercept Layer for OpenCL Applications will report the average Intel GPU Performance Counters for each OpenCL command. When the process exits, this information will be included in the file \"clIntercept_report.txt\". This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support." ) +CLI_CONTROL( bool, DevicePerfCounterReportMax, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will collect also max values of target platform to .csv with MDAPI counters as a column next to each metric." ) +CLI_CONTROL( uint32_t, DevicePerfCounterTimeBasedSamplingPeriod, 1000, "The sampling period for Intel GPU Performance Counter Time-based Sampling, in microseconds. A smaller sampling period increases overhead and the likelihood dropped samples but can be more precise. Note that some devices do not support very small sampling periods." ) +CLI_CONTROL( uint32_t, DevicePerfCounterTimeBasedBufferSize, 0, "The buffer size for Intel GPU Performance Counter Time-based Sampling, in bytes. When set to zero, automatically chooses the device maximum buffer size. A larger buffer size will decrease the likelihood of dropped samples." ) +CLI_CONTROL( bool, ITTPerformanceTiming, false, "[Note: This control makes ITT calls, but they appear to do nothing!] If set to a nonzero value, the Intercept Layer for OpenCL Applications will generate ITT-compatible performance timing data. Similar to DevicePerformanceTiming, this operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. ITTPerformanceTiming will also silently create OpenCL command queues that support advanced performance counters if this functionality is available. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support." ) +CLI_CONTROL( bool, ITTShowOnlyExecutingEvents, false, "[Note: This control makes ITT calls, but they appear to do nothing!] By default, when ITTPerformanceTiming is enabled, the Intercept Layer for OpenCL Applications will generate ITT-compatible information for all states of an OpenCL event: when the command was queued, when it was submitted, when it started executing, and when it finished executing. If ITTShowOnlyExecutingEvents is set to a nonzero value, the Intercept Layer for OpenCL Applications will only generate ITT-compatible instrumentation when an event begins executing and when an event ends executing. Since no information will be displayed about when a command is queued or submitted, this can sometimes make it easier to identify times when the device is idle. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support." ) +CLI_CONTROL( bool, ChromePerformanceTiming, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will generate device performance timing information in a JSON file that may be used for Chrome Tracing." ) +CLI_CONTROL( bool, ChromePerformanceTimingInStages, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will separate the performance information placed in the JSON file into Queued, Submitted, and Execution stages. It will also reorder the threads/queues by starting runtime. This flag is only functional when ChromePerformanceTiming is also set." ) +CLI_CONTROL( bool, ChromePerformanceTimingPerKernel, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will organize the performance information placed in the JSON file on a per kernel name basis. It is only functional when ChromePerformanceTiming is also set. When ChromePerformanceTimingInStages is also set, information about event stages will be retained." ) +CLI_CONTROL( bool, ChromePerformanceTimingEstimateQueuedTime, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will unconditionally estimate the queued time for Chrome Tracing rather than computing it using device and host timers and event profiling data. The estimated time is less accurate than the computed time, but may be more reliable if the device and host timers or event profiling data is incorrect or imprecise." ) +CLI_CONTROL( bool, PerformanceTimingConditional, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will only collect host performance timing, device performance timing, and chrome performance timing conditionally, when the \"CLI_ENABLE_PERFORMANCE_TIMING\" environment variable is set to a non-zero value." ) CLI_CONTROL_SEPARATOR( Controls for Dumping and Injecting Programs and Build Options: ) -CLI_CONTROL( - bool, OmitProgramNumber, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will omit the program number from dumped file names and hash tracking. " - "This can produce deterministic results even if programs are built in a " - "non-deterministic order (say, by multiple threads).") -CLI_CONTROL(bool, OmitCompileCount, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will omit the compile count from dumped file names " - "and hash tracking. This can reduce the number of files that are " - "dumped if the same program is compiled multiple times.") -CLI_CONTROL( - bool, SimpleDumpProgramSource, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump the last string(s) passed to clCreateProgramWithSource() to the " - "file kernel.cl, and the last program options passed to clBuildProgram() " - "to the file kernel.txt. These files will be dumped to the application's " - "working directory. If an application fails to compile a program and " - "exits the program immediately after detecting a compile failure " - "SimpleDumpProgram may be all that is needed to identify the program and " - "program options that are failing to compile.") -CLI_CONTROL( - bool, DumpProgramSourceScript, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump every string passed to clCreateProgramWithSource() to its own " - "file. The directory names and file names for the dumped files match the " - "directory names and file names expected by a modified OpenCL conformance " - "test script to capture kernels. This setting overrides " - "SimpleDumpProgramSource, and if it is set to a nonzero value then the " - "value of SimpleDumpProgramSource is ignored.") -CLI_CONTROL( - bool, DumpProgramSource, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump every string passed to clCreateProgramWithSource() to its own " - "file. The file name will have the form \"CLI___source.cl\". Program options will be dumped to the " - "same directory with the file name \"CLI______options.txt\", where API is an empty string for " - "clBuildProgram(), \"compile\" for clCompileProgram(), and \"link\" for " - "clLinkProgram(). This setting can be used for information purposes to " - "see all kernels that are used by an application or to dump programs for " - "program injection. This setting overrides DumpProgramSourceScript and " - "SimpleDumpProgramSource, and if it is set to a nozero value then the " - "values of DumpProgramSourceScript and SimpleDumpProgramSource will be " - "ignored.") -CLI_CONTROL( - bool, DumpInputProgramBinaries, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump every program binary that is passed to " - "clCreateProgramWithBinary() to its own file. The file name will have the " - "form \"CLI___.bin\". This is the input program binary provided by the " - "application, and not a device binary queried from the OpenCL " - "implementation. In particular, note that it may be a SPIR 1.2 binary.") -CLI_CONTROL( - bool, DumpProgramBinaries, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump every program binary that was successfully built with " - "clBuildProgram() to its own file. The file name will have the form " - "\"CLI_____.bin\". Program options will be " - "dumped to the same directory with the file name \"CLI______options.txt\", where API is an empty string for " - "clBuildProgram(), \"compile\" for clCompileProgram(), and \"link\" for " - "clLinkProgram(). This setting can be used to examine compiled program " - "binaries or to dump program binaries for program binary injection. Note " - "that this option dumps the output binary, which is a device binary, after " - "calling clBuildProgram() or clLinkProgram().") -CLI_CONTROL( - bool, DumpProgramSPIRV, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump every program IL binary passed to clCreateProgramWithIL() to " - "its own file. The file name will have the form \"CLI___0000.spv\" - for now at least!. " - "Program options will be dumped to the same directory with the file name " - "\"CLI______options.txt\", where is an empty " - "string for clBuildProgram(), \"compile\" for clCompileProgram(), and " - "\"link\" for clLinkProgram(). This setting can be used for information " - "purposes to see all kernels that are used by an application or to dump " - "SPIRV programs for SPIRV injection.") -CLI_CONTROL(bool, InjectProgramSource, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will look to inject potentially modified kernel " - "source to clCreateProgramWithSource() and/or potentially modified " - "options to clCompileProgram() or clBuildProgram(). Note that " - "program options currently cannot be injected for clLinkProgram().") -CLI_CONTROL( - bool, InjectProgramBinaries, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will look to inject potentially modified kernel binaries via " - "clCreateProgramWithBinary() in place of program text for each call to " - "clCreateProgramWithSource(). This is typically done to reduce program " - "compilation time or to use known good program binaries.") -CLI_CONTROL( - bool, RejectProgramBinaries, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will reject kernel binaries passed via clCreateProgramWithBinary() and " - "return CL_INVALID_BINARY. This can be used to force an application to " - "re-compile program binaries from source.") -CLI_CONTROL(bool, InjectProgramSPIRV, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will look to inject potentially modified kernel " - "SPIR-V binaries via clCreateProgramWithIL() in place of program " - "text for each call to clCreateProgramWithSource().") -CLI_CONTROL( - bool, PrependProgramSource, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will look to prepend kernel code from a file to the application provided " - "kernel source passed to clCreateProgramWithSource(). The Intercept Layer " - "for OpenCL Applications will look for kernel source to prepend in the " - "dump and log directory. The files that are searched for are (in order) " - "\"CLI___prepend.cl\", " - "\"CLI__prepend.cl\", and \"CLI_prepend.cl\".") -CLI_CONTROL( - std::string, AppendBuildOptions, "", - "If set, the Intercept Layer for OpenCL Applications will add these build " - "options to the end of any application provided or injected build options " - "for each call to clCompileProgram or clBuildProgram().") -CLI_CONTROL(std::string, AppendLinkOptions, "", - "If set, the Intercept Layer for OpenCL Applications will add " - "these build options to the end of any application provided or " - "injected build options for each call to clLinkProgram().") -CLI_CONTROL( - bool, DumpProgramBuildLogs, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump build logs for every device a program is built for to a " - "separate file. The file name will have the form \"CLI______build_log.txt\".") -CLI_CONTROL( - bool, DumpKernelISABinaries, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump kernel ISA binaries for every kernel, if supported. Currently, " - "kernel ISA binaries are only supported for Intel GPU devices. Kernel ISA " - "binaries can be decoded into ISA text with a disassembler. The file name " - "will have the form \"CLI______.isabin\".") +CLI_CONTROL( bool, OmitProgramNumber, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will omit the program number from dumped file names and hash tracking. This can produce deterministic results even if programs are built in a non-deterministic order (say, by multiple threads)." ) +CLI_CONTROL( bool, OmitCompileCount, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will omit the compile count from dumped file names and hash tracking. This can reduce the number of files that are dumped if the same program is compiled multiple times." ) +CLI_CONTROL( bool, SimpleDumpProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the last string(s) passed to clCreateProgramWithSource() to the file kernel.cl, and the last program options passed to clBuildProgram() to the file kernel.txt. These files will be dumped to the application's working directory. If an application fails to compile a program and exits the program immediately after detecting a compile failure SimpleDumpProgram may be all that is needed to identify the program and program options that are failing to compile." ) +CLI_CONTROL( bool, DumpProgramSourceScript, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every string passed to clCreateProgramWithSource() to its own file. The directory names and file names for the dumped files match the directory names and file names expected by a modified OpenCL conformance test script to capture kernels. This setting overrides SimpleDumpProgramSource, and if it is set to a nonzero value then the value of SimpleDumpProgramSource is ignored." ) +CLI_CONTROL( bool, DumpProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every string passed to clCreateProgramWithSource() to its own file. The file name will have the form \"CLI___source.cl\". Program options will be dumped to the same directory with the file name \"CLI______options.txt\", where API is an empty string for clBuildProgram(), \"compile\" for clCompileProgram(), and \"link\" for clLinkProgram(). This setting can be used for information purposes to see all kernels that are used by an application or to dump programs for program injection. This setting overrides DumpProgramSourceScript and SimpleDumpProgramSource, and if it is set to a nozero value then the values of DumpProgramSourceScript and SimpleDumpProgramSource will be ignored." ) +CLI_CONTROL( bool, DumpInputProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program binary that is passed to clCreateProgramWithBinary() to its own file. The file name will have the form \"CLI___.bin\". This is the input program binary provided by the application, and not a device binary queried from the OpenCL implementation. In particular, note that it may be a SPIR 1.2 binary." ) +CLI_CONTROL( bool, DumpProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program binary that was successfully built with clBuildProgram() to its own file. The file name will have the form \"CLI_____.bin\". Program options will be dumped to the same directory with the file name \"CLI______options.txt\", where API is an empty string for clBuildProgram(), \"compile\" for clCompileProgram(), and \"link\" for clLinkProgram(). This setting can be used to examine compiled program binaries or to dump program binaries for program binary injection. Note that this option dumps the output binary, which is a device binary, after calling clBuildProgram() or clLinkProgram()." ) +CLI_CONTROL( bool, DumpProgramSPIRV, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program IL binary passed to clCreateProgramWithIL() to its own file. The file name will have the form \"CLI___0000.spv\" - for now at least!. Program options will be dumped to the same directory with the file name \"CLI______options.txt\", where is an empty string for clBuildProgram(), \"compile\" for clCompileProgram(), and \"link\" for clLinkProgram(). This setting can be used for information purposes to see all kernels that are used by an application or to dump SPIRV programs for SPIRV injection." ) +CLI_CONTROL( bool, InjectProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel source to clCreateProgramWithSource() and/or potentially modified options to clCompileProgram() or clBuildProgram(). Note that program options currently cannot be injected for clLinkProgram()." ) +CLI_CONTROL( bool, InjectProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel binaries via clCreateProgramWithBinary() for each call to clCreateProgramWithSource() or clCreateProgramWithBinary(). This can be used to reduce program compilation time, use known good program binaries, or replace application-provided binaries with modified program binaries." ) +CLI_CONTROL( bool, RejectProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will reject kernel binaries passed via clCreateProgramWithBinary() and return CL_INVALID_BINARY. This can be used to force an application to re-compile program binaries from source." ) +CLI_CONTROL( bool, InjectProgramSPIRV, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel SPIR-V binaries via clCreateProgramWithIL() in place of program text for each call to clCreateProgramWithSource()." ) +CLI_CONTROL( bool, PrependProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to prepend kernel code from a file to the application provided kernel source passed to clCreateProgramWithSource(). The Intercept Layer for OpenCL Applications will look for kernel source to prepend in the dump and log directory. The files that are searched for are (in order) \"CLI___prepend.cl\", \"CLI__prepend.cl\", and \"CLI_prepend.cl\"." ) +CLI_CONTROL( std::string, AppendBuildOptions, "", "If set, the Intercept Layer for OpenCL Applications will add these build options to the end of any application provided or injected build options for each call to clCompileProgram or clBuildProgram()." ) +CLI_CONTROL( std::string, AppendLinkOptions, "", "If set, the Intercept Layer for OpenCL Applications will add these build options to the end of any application provided or injected build options for each call to clLinkProgram()." ) +CLI_CONTROL( bool, DumpProgramBuildLogs, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump build logs for every device a program is built for to a separate file. The file name will have the form \"CLI______build_log.txt\"." ) +CLI_CONTROL( bool, DumpKernelISABinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump kernel ISA binaries for every kernel, if supported. Currently, kernel ISA binaries are only supported for Intel GPU devices. Kernel ISA binaries can be decoded into ISA text with a disassembler. The file name will have the form \"CLI______.isabin\"." ) CLI_CONTROL_SEPARATOR( Controls for Emulating Features: ) -CLI_CONTROL( - bool, Emulate_cl_khr_extended_versioning, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will emulate support for the cl_khr_extended_versioning extension.") -CLI_CONTROL( - bool, Emulate_cl_khr_semaphore, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will emulate support for the cl_khr_semaphore extension.") -CLI_CONTROL( - bool, Emulate_cl_intel_unified_shared_memory, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will emulate support for the cl_intel_unified_shared_memory extension USM " - "APIs using SVM APIs. This can be useful to test USM applications on an " - "implementation that supports SVM, but not USM.") +CLI_CONTROL( bool, Emulate_cl_khr_extended_versioning, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will emulate support for the cl_khr_extended_versioning extension." ) +CLI_CONTROL( bool, Emulate_cl_khr_semaphore, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will emulate support for the cl_khr_semaphore extension." ) +CLI_CONTROL( bool, Emulate_cl_intel_unified_shared_memory, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will emulate support for the cl_intel_unified_shared_memory extension USM APIs using SVM APIs. This can be useful to test USM applications on an implementation that supports SVM, but not USM." ) CLI_CONTROL_SEPARATOR( Controls for Automatically Creating SPIR-V Modules: ) -CLI_CONTROL( - bool, AutoCreateSPIRV, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will automatically create SPIR-V modules by invoking CLANG each time a " - "program is built. The file name will have the form \"CLI____.spv\". Because invoking CLANG requires a file containing the " - "OpenCL C source, setting this option implicitly sets DumpProgramSource as " - "well. Additionally, this feature is not available for injected program " - "source.") -CLI_CONTROL(std::string, SPIRVClang, "clang", - "The clang executable used to compile an OpenCL C program to a " - "SPIR-V module. This can be an executable in the system path, a " - "relative path, or a full absolute path.") -CLI_CONTROL( - std::string, SPIRVCLHeader, "opencl.h", - "The OpenCL header file used to compile an OpenCL C program to a SPIR-V " - "module. This must be a relative path or a full absolute path.") -CLI_CONTROL( - std::string, SPIRVDis, "spirv-dis", - "The spirv-dis executable used to optionally disassemble the compiled " - "SPIR-V module to a SPIR-V text representation. This can be an executable " - "in the system path, a relative path, or a full absolute path.") -CLI_CONTROL(std::string, DefaultOptions, - "-cc1 -x cl -cl-std=CL1.2 -D__OPENCL_C_VERSION__=120 " - "-D__OPENCL_VERSION__=120 -emit-spirv -triple=spir", - "This is the list of options that is implicitly passed to CLANG to " - "build a non-OpenCL 2.0 SPIR-V module. Any application-provided " - "build options will be appended to these build options.") -CLI_CONTROL(std::string, OpenCL2Options, - "-cc1 -x cl -cl-std=CL2.0 -D__OPENCL_C_VERSION__=200 " - "-D__OPENCL_VERSION__=200 -emit-spirv -triple=spir", - "This is the list of options that is implicitly passed to CLANG to " - "build an OpenCL 2.0 SPIR-V module. Any application-provided " - "build options will be appended to these build options.") +CLI_CONTROL( bool, AutoCreateSPIRV, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will automatically create SPIR-V modules by invoking CLANG each time a program is built. The file name will have the form \"CLI____.spv\". Because invoking CLANG requires a file containing the OpenCL C source, setting this option implicitly sets DumpProgramSource as well. Additionally, this feature is not available for injected program source." ) +CLI_CONTROL( std::string, SPIRVClang, "clang", "The clang executable used to compile an OpenCL C program to a SPIR-V module. This can be an executable in the system path, a relative path, or a full absolute path." ) +CLI_CONTROL( std::string, SPIRVCLHeader, "opencl.h", "The OpenCL header file used to compile an OpenCL C program to a SPIR-V module. This must be a relative path or a full absolute path." ) +CLI_CONTROL( std::string, SPIRVDis, "spirv-dis", "The spirv-dis executable used to optionally disassemble the compiled SPIR-V module to a SPIR-V text representation. This can be an executable in the system path, a relative path, or a full absolute path." ) +CLI_CONTROL( std::string, DefaultOptions, "-cc1 -x cl -cl-std=CL1.2 -D__OPENCL_C_VERSION__=120 -D__OPENCL_VERSION__=120 -emit-spirv -triple=spir", "This is the list of options that is implicitly passed to CLANG to build a non-OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options." ) +CLI_CONTROL( std::string, OpenCL2Options, "-cc1 -x cl -cl-std=CL2.0 -D__OPENCL_C_VERSION__=200 -D__OPENCL_VERSION__=200 -emit-spirv -triple=spir", "This is the list of options that is implicitly passed to CLANG to build an OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options." ) CLI_CONTROL_SEPARATOR( Controls for Dumping Command Buffers: ) -CLI_CONTROL(bool, OmitCommandBufferNumber, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will omit the command buffer number from dumped file " - "names and hash tracking. This can produce deterministic results " - "even if command buffers are creatd and finalized in a " - "non-deterministic order (say, by multiple threads).") -CLI_CONTROL( - bool, DumpCommandBuffers, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump the commands and dependencies in a command buffer to a file " - "when the command buffer is successfully finalized. The file name will " - "have the form \"CLI___cmdbuf.dot\". The command buffer is described using the DOT graph " - "description language.") +CLI_CONTROL( bool, OmitCommandBufferNumber, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will omit the command buffer number from dumped file names and hash tracking. This can produce deterministic results even if command buffers are creatd and finalized in a non-deterministic order (say, by multiple threads)." ) +CLI_CONTROL( bool, DumpCommandBuffers, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the commands and dependencies in a command buffer to a file when the command buffer is successfully finalized. The file name will have the form \"CLI___cmdbuf.dot\". The command buffer is described using the DOT graph description language." ) CLI_CONTROL_SEPARATOR( Controls for Dumping and Injecting Buffers and Images: ) -CLI_CONTROL(bool, DumpBufferHashes, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will dump hashes of a buffer, SVM, or USM allocation " - "rather than the full contents of the buffer. This can be useful " - "to identify which kernel enqueues generate different results " - "without requiring a large amount of disk space.") -CLI_CONTROL( - bool, DumpImageHashes, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump hashes of an image rather than the full contents of the image. " - "This can be useful to identify which kernel enqueues generate different " - "results without requiring a large amount of disk space.") -CLI_CONTROL(bool, DumpArgumentsOnSet, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will dump the argument value on calls to " - "clSetKernelArg(). Arguments are dumped as raw binary data. The " - "file names will have the form \"SetKernelArg__Kernel__Arg_.bin\".") -CLI_CONTROL( - bool, DumpBuffersAfterCreate, false, - "If set, the Intercept Layer for OpenCL Applications will dump buffers to " - "a file after creation. This control still honors the enqueue counter " - "limits, even though no enqueues are involved during buffer creation. " - "Currently only works for cl_mem buffers created from host pointers.") -CLI_CONTROL( - bool, DumpBuffersAfterMap, false, - "If set, the Intercept Layer for OpenCL Applications will dump the " - "contents of a buffer to a file after the buffer is mapped. Only valid if " - "the buffer is NOT mapped with CL_MAP_WRITE_INVALIDATE_REGION. If the " - "buffer was mapped non-blocking, this may insert a clFinish() into the " - "command queue, which may have functional or performance implications.") -CLI_CONTROL( - bool, DumpBuffersBeforeUnmap, false, - "If set, the Intercept Layer for OpenCL Applications will dump the " - "contents of a buffer to a file immediately before the buffer is unmapped. " - " This is done by inserting a blocking clEnqueueMapBuffer() (and matching " - "clEnqueueUnmapMemObject()) into the command queue, which may have " - "functional or performance implications.") -CLI_CONTROL( - bool, DumpBuffersBeforeEnqueue, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump buffer, SVM, and USM kernel arguments before calls to " - "clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the " - "kernel being enqueued are dumped. Buffers are dumped as raw binary data " - "to a \"memDumpPreEnqueue\" subdirectory of the dump directory. The file " - "names will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\".") -CLI_CONTROL( - bool, DumpBuffersAfterEnqueue, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump buffer, SVM, and USM kernel arguments after calls to " - "clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the " - "kernel being enqueued are dumped. Buffers are dumped as raw binary data " - "to a \"memDumpPostEnqueue\" subdirectory of the dump directory. The file " - "names will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\". " - "Note that this is the same naming convention as with " - "DumpBuffersBeforeEnqueue, so the changes resulting from an enqueue can be " - "determined by diff'ing the preEnqueue folder with the postEnqueue folder.") -CLI_CONTROL(std::string, DumpBuffersForKernel, "", - "If set, the Intercept Layer for OpenCL Applications will only " - "dump buffer, SVM, and USM kernel arguments when the specified " - "kernel is enqueued. This control is ignored unless " - "DumpBuffersBeforeEnqueue or DumpBuffersAfterEnqueue are enabled.") -CLI_CONTROL( - bool, DumpImagesBeforeEnqueue, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump image kernel arguments before calls to " - "clEnqueueNDRangeKernel(). Only images that are kernel arguments for the " - "kernel being enqueued are dumped. Images are dumped as raw binary data " - "to a \"memDumpPreEnqueue\" subdirectory of the dump directory. The file " - "names will have the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\".") -CLI_CONTROL( - bool, DumpImagesAfterEnqueue, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will dump image kernel arguments after calls to clEnqueueNDRangeKernel(). " - " Only images that are kernel arguments for the kernel being enqueued are " - "dumped. Images are dumped as raw binary data to a \"memDumpPostEnqueue\" " - "subdirectory of the dump directory. The file names will have the form " - "\"Enqueue__Kernel__Arg__Image__xx_bpp.raw\". Note that this " - "is the same naming convention as with DumpImagesBeforeEnqueue, so the " - "changes resulting from an enqueue can be determined by diff'ing the " - "preEnqueue folder with the postEnqueue folder.") -CLI_CONTROL(std::string, DumpImagesForKernel, "", - "If set, the Intercept Layer for OpenCL Applications will only " - "dump image kernel arguments when the specified kernel is " - "enqueued. This control is ignored unless DumpImagesBeforeEnqueue " - "or DumpImagesAfterEnqueue are enabled.") -CLI_CONTROL(cl_uint, DumpBuffersMinEnqueue, 0, - "The Intercept Layer for OpenCL Applications will only dump " - "buffer, SVM, and USM kernel arguments when the enqueue counter is " - "greater than this value, inclusive.") -CLI_CONTROL(cl_uint, DumpBuffersMaxEnqueue, UINT_MAX, - "The Intercept Layer for OpenCL Applications will only dump " - "buffer, SVM, and USM kernel arguments when the enqueue counter is " - "less than this value, inclusive.") -CLI_CONTROL( - cl_uint, DumpImagesMinEnqueue, 0, - "The Intercept Layer for OpenCL Applications will only dump image kernel " - "arguments when the enqueue counter is greater than this value, inclusive.") -CLI_CONTROL( - cl_uint, DumpImagesMaxEnqueue, UINT_MAX, - "The Intercept Layer for OpenCL Applications will only dump image kernel " - "arguments when the enqueue counter is less than this value, inclusive.") -CLI_CONTROL( - cl_uint, DumpArgumentsOnSetMinEnqueue, 0, - "The Intercept Layer for OpenCL Applications will only dump argument " - "values when the enqueue counter is greater than this value, inclusive.") -CLI_CONTROL( - cl_uint, DumpArgumentsOnSetMaxEnqueue, UINT_MAX, - "The Intercept Layer for OpenCL Applications will only dump kernel " - "arguments when the enqueue counter is less than this value, inclusive.") -CLI_CONTROL( - bool, InjectBuffers, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will look to inject potentially modified buffer, SVM, and USM contents " - "before calls to clEnqueueNDRangeKernel(). Only buffers that are kernel " - "arguments for the kernel being enqueued may be injected. The file name " - "to inject will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\", " - "which matches the file name for dumped buffers.") -CLI_CONTROL( - bool, InjectImages, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will look to inject potentially modified image contents before calls to " - "clEnqueueNDRangeKernel(). Only images that are kernel arguments for the " - "kernel being enqueued may be injected. The file name to inject will have " - "the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\", which matches " - "the file name for dumped images.") - -CLI_CONTROL_SEPARATOR(Device Partitioning Controls:) -CLI_CONTROL(bool, AutoPartitionAllDevices, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will automatically partition parent devices and " - "return all parent devices and all sub-devices.") -CLI_CONTROL(bool, AutoPartitionAllSubDevices, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will automatically partition parent devices and " - "return all sub-devices, but no parent devices.") -CLI_CONTROL(bool, AutoPartitionSingleSubDevice, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will automatically partition parent devices and " - "return a single sub-device, but no other sub-devices or parent " - "devices or other sub-devices.") -CLI_CONTROL(bool, AutoPartitionByAffinityDomain, true, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will try to automatically partition parent devices " - "by the next partitionable affinity domain.") -CLI_CONTROL(cl_uint, AutoPartitionEqually, 1, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will try to automatically partition parent devices " - "into sub-devices with the specified number of compute units.") - -CLI_CONTROL_SEPARATOR(Capture and Replay Controls:) -CLI_CONTROL(bool, CaptureReplay, false, - "This is the top-level control for kernel capture and replay.") -CLI_CONTROL(cl_uint, CaptureReplayMinEnqueue, 0, - "The Intercept Layer for OpenCL Applications will only enable " - "kernel capture and replay when the enqueue counter is greater " - "than this value, inclusive.") -CLI_CONTROL( - cl_uint, CaptureReplayMaxEnqueue, UINT_MAX, - "The Intercept Layer for OpenCL Applications will stop kernel capture and " - "replay when the encounter is greater than this value, meaning that only " - "enqueues less than this value, inclusive, will be captured.") -CLI_CONTROL( - std::string, CaptureReplayKernelName, "", - "If set, the Intercept Layer for OpenCL Applications will only enable " - "kernel capture and replay when the kernel name equals this name.") -CLI_CONTROL(bool, CaptureReplayUniqueKernels, false, - "If set, the Intercept Layer for OpenCL Applications will only " - "enable kernel capture and replay if the kernel signature (i.e. " - "hash + kernelname) has not been seen already.") -CLI_CONTROL(cl_uint, CaptureReplayNumKernelEnqueuesSkip, 0, - "The Intercept Layer for OpenCL Applications will skip this many " - "kernel enqueues before enabling kernel capture and replay.") -CLI_CONTROL(cl_uint, CaptureReplayNumKernelEnqueuesCapture, UINT_MAX, - "The Intercept Layer for OpenCL Applications will only capture " - "this many kernel enqueues.") - -CLI_CONTROL_SEPARATOR(AubCapture Controls:) -CLI_CONTROL( - bool, AubCapture, false, - "This is the top-level control for aub capture. The Intercept Layer for " - "OpenCL Applications doesn't implement aub capture itself, but can be used " - "to selectively enable and disable aub capture via other methods.") -CLI_CONTROL(bool, AubCaptureKDC, false, - "If set, the Intercept Layer for OpenCL Applications will use the " - "older kdc.exe method of aub capture. By default, the newer NEO " - "method of aub capture will be used. This control is ignored for " - "all non-Windows operating systems.") -CLI_CONTROL( - bool, AubCaptureIndividualEnqueues, false, - "If set, the Intercept Layer for OpenCL Applications will start aub " - "capture before a kernel enqueue, and will also stop aub capture " - "immediately after the kernel enqueue. Each file will have the form " - "\"AubCapture_Enqueue__kernel_\". Note that " - "non-kernel enqueues such as calls to clEnqueueReadBuffer() and " - "clEnqueueWriteBuffer() will NOT be aub captured when this control is set. " - " The AubCaptureMinEnqueue and AubCaptureMaxEnqueue controls are still " - "honored when AubCaptureIndividualEnqueues is set.") -CLI_CONTROL( - cl_uint, AubCaptureMinEnqueue, 0, - "The Intercept Layer for OpenCL Applications will only enable aub capture " - "when the enqueue counter is greater than this value, inclusive.") -CLI_CONTROL( - cl_uint, AubCaptureMaxEnqueue, UINT_MAX, - "The Intercept Layer for OpenCL Applications will stop aub capture when " - "the encounter is greater than this value, meaning that only enqueues less " - "than this value, inclusive, will be captured. If the enqueue counter " - "never reaches this value, the Intercept Layer for OpenCL Applications " - "will stop aub capture when the it is unloaded.") -CLI_CONTROL(std::string, AubCaptureKernelName, "", - "If set, the Intercept Layer for OpenCL Applications will only " - "enable aub capture when the kernel name equals this name.") -CLI_CONTROL(std::string, AubCaptureKernelGWS, "", - "If set, the Intercept Layer for OpenCL Applications will only " - "enable aub capture when the NDRange global work size matches this " - "string. The string should have the form \"XxYxZ\". The wildcard " - "\"*\" matches all global work sizes.") -CLI_CONTROL( - std::string, AubCaptureKernelLWS, "", - "If set, the Intercept Layer for OpenCL Applications will only enable aub " - "capture when the NDRange local work size matches this string. The string " - "should have the form \"XxYxZ\". The wildcard \"*\" matches all local " - "work sizes, and the string \"NULL\" matches a NULL local work size.") -CLI_CONTROL( - bool, AubCaptureUniqueKernels, false, - "If set, the Intercept Layer for OpenCL Applications will only enable aub " - "capture if the kernel signature (i.e. hash + kernelname + gws + lws) has " - "not been seen already. The behavior of this control is well-defined when " - "AubCaptureIndividualEnqueues is not set, but it doesn't make much sense " - "without AubCaptureIndividualEnqueues.") -CLI_CONTROL( - cl_uint, AubCaptureNumKernelEnqueuesSkip, 0, - "The Intercept Layer for OpenCL Applications will skip this many kernel " - "enqueues before enabling aub capture. The behavior of this control is " - "well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't " - "make much sense without AubCaptureIndividualEnqueues.") -CLI_CONTROL(cl_uint, AubCaptureNumKernelEnqueuesCapture, UINT_MAX, - "The Intercept Layer for OpenCL Applications will only capture " - "this many kernel enqueues. The behavior of this control is " - "well-defined when AubCaptureIndividualEnqueues is not set, but it " - "doesn't make much sense without AubCaptureIndividualEnqueues.") -CLI_CONTROL(cl_uint, AubCaptureStartWait, 0, - "The Intercept Layer for OpenCL Applications will wait for this " - "many milliseconds before beginning aub capture.") -CLI_CONTROL(cl_uint, AubCaptureEndWait, 0, - "The Intercept Layer for OpenCL Applications will wait for this " - "many milliseconds before ending aub capture.") - -CLI_CONTROL_SEPARATOR(Execution Controls:) -CLI_CONTROL( - bool, NoErrors, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will cause all OpenCL APIs to return a successful error status.") -CLI_CONTROL(uint64_t, ExitOnEnqueueCount, 0, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will exit the application when the enqueue counter " - "reaches the specified value. This can be useful to debug " - "sporadic issues by exiting an application immediately, without " - "needing to wait for the application to exit normally.") -CLI_CONTROL( - bool, NullContextCallback, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will force the context callback to be NULL. With both context callback " - "logging and NULL context callback set, the context callback will still be " - "logged, but any application context callback will not be called.") -CLI_CONTROL(bool, FinishAfterEnqueue, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications inserts a call to clFinish() after every enqueue. " - "The command queue that the command was just enqueued to is passed " - "to clFinish(). This can be used to debug possible timing or " - "resource management issues and will likely impact performance.") -CLI_CONTROL( - bool, FlushAfterEnqueue, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "inserts a call to clFlush() after every enqueue. The command queue that " - "the command was just enqueued to is passed to clFlush(). This can also " - "be used to debug possible timing or resource management issues and is " - "slightly less obtrusive than FinishAfterEnqueue but still will likely " - "impact performance. If both FinishAfterEnqueue and FlushAfterEnqueue are " - "nonzero then the Intercept Layer for OpenCL Applications will only insert " - "a call to clFinish() after every enqueue, because clFinish() implies " - "clFlush().") -CLI_CONTROL( - bool, FlushAfterEnqueueBarrier, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "inserts a call to clFlush() after every barrier enqueue. The command " - "queue that the command was just enqueued to is passed to clFlush(). This " - "has been useful to debug out-of-order queue issues.") -CLI_CONTROL( - bool, InOrderQueue, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will force all queues to be created in-order. This can be used for " - "performance analysis, but may lead to deadlocks in some cases.") -CLI_CONTROL( - bool, NoProfilingQueue, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will force all queues to be created without event profiling support. " - "This can be used for performance analysis, but may lead to errors if the " - "application requires event profiling.") -CLI_CONTROL(bool, DummyOutOfOrderQueue, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will create and destroy a dummy out-of-order queue. " - "This may be useful for performance analysis.") -CLI_CONTROL( - bool, NullEnqueue, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will silently ignore any enqueue. This can be used for performance " - "analysis, but will likely cause errors if the application relies on any " - "sort of information from OpenCL events and should be used carefully.") -CLI_CONTROL( - bool, NullLocalWorkSize, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will force the local work size argument to clEnqueueNDRangeKernel() to be " - "NULL, which causes the OpenCL implementation to pick the local work size. " - "Note that this control takes effect before NullLocalWorkSizeX / " - "NullLocalWorkSizeY / NullLocalWorkSizeZ (see below), so enabling both " - "controls will have the effect of forcing a specific local work size.") -CLI_CONTROL( - size_t, NullLocalWorkSizeX, 0, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will set the local work size that will be used if an application passes " - "NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches " - "will only look at NullLocalWorkSizeX, 2D dispatches will only look at " - "NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look " - "at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If " - "the specified values for NullLocalWorkSize do not evenly divide the " - "global work size then the specified values of NullLocalWorkSize will not " - "take effect.") -CLI_CONTROL( - size_t, NullLocalWorkSizeY, 0, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will set the local work size that will be used if an application passes " - "NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches " - "will only look at NullLocalWorkSizeX, 2D dispatches will only look at " - "NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look " - "at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If " - "the specified values for NullLocalWorkSize do not evenly divide the " - "global work size then the specified values of NullLocalWorkSize will not " - "take effect.") -CLI_CONTROL( - size_t, NullLocalWorkSizeZ, 0, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will set the local work size that will be used if an application passes " - "NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches " - "will only look at NullLocalWorkSizeX, 2D dispatches will only look at " - "NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look " - "at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If " - "the specified values for NullLocalWorkSize do not evenly divide the " - "global work size then the specified values of NullLocalWorkSize will not " - "take effect.") -CLI_CONTROL( - bool, InitializeBuffers, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will initialize the contents of allocated buffers with zero. Only valid " - "for non-COPY_HOST_PTR and non-USE_HOST_PTR allocations.") -CLI_CONTROL( - cl_uint, DefaultQueuePriorityHint, 0, - "If set to a nonzero value, and if no other priority hint is specified by " - "the application, the Intercept Layer for OpencL Applications will attempt " - "to create a command queue with this priority hint value. Note: HIGH " - "priority is 1, MED priority is 2, and LOW priority is 4.") -CLI_CONTROL( - cl_uint, DefaultQueueThrottleHint, 0, - "If set to a nonzero value, and if no other throttle hint is specified by " - "the application, the Intercept Layer for OpencL Applications will attempt " - "to create a command queue with this throttle hint value. Note: HIGH " - "throttle is 1, MED throttle is 2, and LOW throttle is 4.") -CLI_CONTROL(bool, RelaxAllocationLimits, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will attempt to relax allocation limits to enable " - "allocations larger than CL_DEVICE_MAX_MEM_ALLOC_SIZE.") - -CLI_CONTROL_SEPARATOR(Platform and Device Query Overrides:) -CLI_CONTROL(std::string, PlatformName, "", - "If set to a non-empty value, the clGetPlatformInfo() query for " - "CL_PLATFORM_NAME will return this string instead of the true " - "platform name.") -CLI_CONTROL(std::string, PlatformVendor, "", - "If set to a non-empty value, the clGetPlatformInfo() query for " - "CL_PLATFORM_VENDOR will return this string instead of the true " - "platform vendor.") -CLI_CONTROL(std::string, PlatformProfile, "", - "If set to a non-empty value, the clGetPlatformInfo() query for " - "CL_PLATFORM_PROFILE will return this string instead of the true " - "platform profile.") -CLI_CONTROL(std::string, PlatformVersion, "", - "If set to a non-empty string, the clGetPlatformInfo() query for " - "CL_PLATFORM_VERSION will return this string instead of the true " - "platform version.") -CLI_CONTROL(cl_uint, DeviceTypeFilter, CL_DEVICE_TYPE_ALL, - "Hides all device types that are not in the filter. Note: " - "CL_DEVICE_TYPE_CPU = 2, CL_DEVICE_TYPE_GPU = 4, " - "CL_DEVICE_TYPE_ACCELERATOR = 8, CL_DEVICE_TYPE_CUSTOM = 16.") -CLI_CONTROL( - cl_uint, DeviceType, 0, - "If set to a non-zero value, the clGetDeviceInfo() query for " - "CL_DEVICE_TYPE will return this value instead of the true device type. " - "In addition, calls to clGetDeviceIDs() for this device type will return " - "all devices, not just devices of the requested type. This can be used to " - "enumerate all devices (even CPUs) as GPUs, or vice versa.") -CLI_CONTROL( - std::string, DeviceName, "", - "If set to a non-empty string, the clGetDeviceInfo() query for " - "CL_DEVICE_NAME will return this value instead of the true device name.") -CLI_CONTROL(std::string, DeviceVendor, "", - "If set to a non-empty string, the clGetDeviceInfo() query for " - "CL_DEVICE_VENDOR will return this value instead of the true " - "device vendor.") -CLI_CONTROL(std::string, DeviceProfile, "", - "If set to a non-empty string, the clGetDeviceInfo() query for " - "CL_DEVICE_PROFILE will return this value instead of the true " - "device profile.") -CLI_CONTROL(std::string, DeviceVersion, "", - "If set to a non-empty string, the clGetDeviceInfo() query for " - "CL_DEVICE_VERSION will return this value instead of the true " - "device version.") -CLI_CONTROL(std::string, DeviceCVersion, "", - "If set to a non-empty string, the clGetDeviceInfo() query for " - "CL_DEVICE_OPENCL_C_VERSION will return this value instead of the " - "true device version.") -CLI_CONTROL(std::string, DeviceExtensions, "", - "If set to a non-empty string, the clGetDeviceInfo() query for " - "CL_DEVICE_EXTENSIONS will return this value instead of the true " - "device extensions string.") -CLI_CONTROL(std::string, DeviceILVersion, "", - "If set to a non-empty string, the clGetDeviceInfo() query for " - "CL_DEVICE_IL_VERSION will return this value instead of the true " - "device intermediate language versions.") -CLI_CONTROL(cl_uint, DeviceVendorID, 0, - "If set to a non-zero value, the clGetDeviceInfo() query for " - "CL_DEVICE_VENDOR will return this value instead of the true " - "device vendor ID.") -CLI_CONTROL(cl_uint, DeviceMaxComputeUnits, 0, - "If set to a non-zero value, the clGetDeviceInfo() query for " - "CL_DEVICE_MAX_COMPUTE_UNITS will return this value instead of the " - "true device max compute units.") -CLI_CONTROL(cl_uint, DevicePreferredVectorWidthChar, UINT_MAX, - "If set to a non-negative value, the clGetDeviceInfo() query for " - "CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR will return this value " - "instead of the true device preferred vector width.") -CLI_CONTROL(cl_uint, DevicePreferredVectorWidthShort, UINT_MAX, - "If set to a non-negative value, the clGetDeviceInfo() query for " - "CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT will return this value " - "instead of the true device preferred vector width.") -CLI_CONTROL(cl_uint, DevicePreferredVectorWidthInt, UINT_MAX, - "If set to a non-negative value, the clGetDeviceInfo() query for " - "CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT will return this value " - "instead of the true device preferred vector width.") -CLI_CONTROL(cl_uint, DevicePreferredVectorWidthLong, UINT_MAX, - "If set to a non-negative value, the clGetDeviceInfo() query for " - "CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG will return this value " - "instead of the true device preferred vector width.") -CLI_CONTROL(cl_uint, DevicePreferredVectorWidthHalf, UINT_MAX, - "If set to a non-negative value, the clGetDeviceInfo() query for " - "CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF will return this value " - "instead of the true device preferred vector width.") -CLI_CONTROL(cl_uint, DevicePreferredVectorWidthFloat, UINT_MAX, - "If set to a non-negative value, the clGetDeviceInfo() query for " - "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT will return this value " - "instead of the true device preferred vector width.") -CLI_CONTROL(cl_uint, DevicePreferredVectorWidthDouble, UINT_MAX, - "If set to a non-negative value, the clGetDeviceInfo() query for " - "CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE will return this value " - "instead of the true device preferred vector width.") -CLI_CONTROL(std::string, DriverVersion, "", - "If set to a non-empty string, the clGetDeviceInfo() query for " - "CL_DRIVER_VERSION will return this value instead of the true " - "driver version.") -CLI_CONTROL(std::string, PrependDeviceExtensions, "", - "If set to a non-empty string, the clGetDeviceInfo() query for " - "CL_DEVICE_EXTENSIONS will return this value followed by the true " - "device extensions string.") - -CLI_CONTROL_SEPARATOR(Precompiled Kernel and Builtin Kernel Override Controls:) -CLI_CONTROL( - bool, ForceByteBufferOverrides, false, - "If set to a nonzero value, each of the buffer functions that are " - "overridden (via one or more of the keys below) will use a byte-wise " - "operation to read/write/copy the buffer (default behavior is to try to " - "copy multiple bytes at a time, if possible). Note: Requires OpenCL 1.1 " - "or the \"byte addressable store\" extension.") -CLI_CONTROL(bool, OverrideReadBuffer, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will use a kernel to implement clEnqueueReadBuffer() " - "instead of the implementation's clEnqueueReadBuffer(). Note: " - "Requires OpenCL 1.1 or the \"byte addressable store\" extension.") -CLI_CONTROL( - bool, OverrideWriteBuffer, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will use a kernel to implement clEnqueueWriteBuffer() instead of the " - "implementation's clEnqueueWriteBuffer(). Note: Requires OpenCL 1.1 or " - "the \"byte addressable store\" extension.") -CLI_CONTROL(bool, OverrideCopyBuffer, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will use a kernel to implement clEnqueueCopyBuffer() " - "instead of the implementation's clEnqueueCopyBuffer(). Note: " - "Requires OpenCL 1.1 or the \"byte addressable store\" extension.") -CLI_CONTROL(bool, OverrideReadImage, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will use a kernel to implement clEnqueueReadImage() " - "instead of the implementation's clEnqueueReadImage(). Only 2D " - "images are currently supported.") -CLI_CONTROL(bool, OverrideWriteImage, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will use a kernel to implement clEnqueueWriteImage() " - "instead of the implementation's clEnqueueWriteImage(). Only 2D " - "images are currently supported.") -CLI_CONTROL(bool, OverrideCopyImage, false, - "If set to a nonzero value, the Intercept Layer for OpenCL " - "Applications will use a kernel to implement clEnqueueCopyImage() " - "instead of the implementation's clEnqueueCopyImage(). Only 2D " - "images are currently supported.") -CLI_CONTROL( - bool, OverrideBuiltinKernels, false, - "If set to a nonzero value, the Intercept Layer for OpenCL Applications " - "will use its own version of the built-in OpenCL kernels that may be " - "accessed via clCreateProgramWithBuiltInKernels(). At present, only the " - "VME block_motion_estimate_intel kernel is implemented.") +CLI_CONTROL( bool, DumpBufferHashes, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump hashes of a buffer, SVM, or USM allocation rather than the full contents of the buffer. This can be useful to identify which kernel enqueues generate different results without requiring a large amount of disk space." ) +CLI_CONTROL( bool, DumpImageHashes, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump hashes of an image rather than the full contents of the image. This can be useful to identify which kernel enqueues generate different results without requiring a large amount of disk space." ) +CLI_CONTROL( bool, DumpArgumentsOnSet, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the argument value on calls to clSetKernelArg(). Arguments are dumped as raw binary data. The file names will have the form \"SetKernelArg__Kernel__Arg_.bin\"." ) +CLI_CONTROL( bool, DumpBuffersAfterCreate, false, "If set, the Intercept Layer for OpenCL Applications will dump buffers to a file after creation. This control still honors the enqueue counter limits, even though no enqueues are involved during buffer creation. Currently only works for cl_mem buffers created from host pointers." ) +CLI_CONTROL( bool, DumpBuffersAfterMap, false, "If set, the Intercept Layer for OpenCL Applications will dump the contents of a buffer to a file after the buffer is mapped. Only valid if the buffer is NOT mapped with CL_MAP_WRITE_INVALIDATE_REGION. If the buffer was mapped non-blocking, this may insert a clFinish() into the command queue, which may have functional or performance implications." ) +CLI_CONTROL( bool, DumpBuffersBeforeUnmap, false, "If set, the Intercept Layer for OpenCL Applications will dump the contents of a buffer to a file immediately before the buffer is unmapped. This is done by inserting a blocking clEnqueueMapBuffer() (and matching clEnqueueUnmapMemObject()) into the command queue, which may have functional or performance implications." ) +CLI_CONTROL( bool, DumpBuffersBeforeEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump buffer, SVM, and USM kernel arguments before calls to clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the kernel being enqueued are dumped. Buffers are dumped as raw binary data to a \"memDumpPreEnqueue\" subdirectory of the dump directory. The file names will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\"." ) +CLI_CONTROL( bool, DumpBuffersAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump buffer, SVM, and USM kernel arguments after calls to clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the kernel being enqueued are dumped. Buffers are dumped as raw binary data to a \"memDumpPostEnqueue\" subdirectory of the dump directory. The file names will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\". Note that this is the same naming convention as with DumpBuffersBeforeEnqueue, so the changes resulting from an enqueue can be determined by diff'ing the preEnqueue folder with the postEnqueue folder." ) +CLI_CONTROL( std::string, DumpBuffersForKernel, "", "If set, the Intercept Layer for OpenCL Applications will only dump buffer, SVM, and USM kernel arguments when the specified kernel is enqueued. This control is ignored unless DumpBuffersBeforeEnqueue or DumpBuffersAfterEnqueue are enabled." ) +CLI_CONTROL( bool, DumpImagesBeforeEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump image kernel arguments before calls to clEnqueueNDRangeKernel(). Only images that are kernel arguments for the kernel being enqueued are dumped. Images are dumped as raw binary data to a \"memDumpPreEnqueue\" subdirectory of the dump directory. The file names will have the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\"." ) +CLI_CONTROL( bool, DumpImagesAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump image kernel arguments after calls to clEnqueueNDRangeKernel(). Only images that are kernel arguments for the kernel being enqueued are dumped. Images are dumped as raw binary data to a \"memDumpPostEnqueue\" subdirectory of the dump directory. The file names will have the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\". Note that this is the same naming convention as with DumpImagesBeforeEnqueue, so the changes resulting from an enqueue can be determined by diff'ing the preEnqueue folder with the postEnqueue folder." ) +CLI_CONTROL( std::string, DumpImagesForKernel, "", "If set, the Intercept Layer for OpenCL Applications will only dump image kernel arguments when the specified kernel is enqueued. This control is ignored unless DumpImagesBeforeEnqueue or DumpImagesAfterEnqueue are enabled." ) +CLI_CONTROL( cl_uint, DumpBuffersMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only dump buffer, SVM, and USM kernel arguments when the enqueue counter is greater than this value, inclusive." ) +CLI_CONTROL( cl_uint, DumpBuffersMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only dump buffer, SVM, and USM kernel arguments when the enqueue counter is less than this value, inclusive." ) +CLI_CONTROL( cl_uint, DumpImagesMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only dump image kernel arguments when the enqueue counter is greater than this value, inclusive." ) +CLI_CONTROL( cl_uint, DumpImagesMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only dump image kernel arguments when the enqueue counter is less than this value, inclusive." ) +CLI_CONTROL( cl_uint, DumpArgumentsOnSetMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only dump argument values when the enqueue counter is greater than this value, inclusive." ) +CLI_CONTROL( cl_uint, DumpArgumentsOnSetMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only dump kernel arguments when the enqueue counter is less than this value, inclusive." ) +CLI_CONTROL( bool, InjectBuffers, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified buffer, SVM, and USM contents before calls to clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the kernel being enqueued may be injected. The file name to inject will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\", which matches the file name for dumped buffers." ) +CLI_CONTROL( bool, InjectImages, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified image contents before calls to clEnqueueNDRangeKernel(). Only images that are kernel arguments for the kernel being enqueued may be injected. The file name to inject will have the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\", which matches the file name for dumped images." ) + +CLI_CONTROL_SEPARATOR( Device Partitioning Controls: ) +CLI_CONTROL( bool, AutoPartitionAllDevices, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will automatically partition parent devices and return all parent devices and all sub-devices." ) +CLI_CONTROL( bool, AutoPartitionAllSubDevices, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will automatically partition parent devices and return all sub-devices, but no parent devices." ) +CLI_CONTROL( bool, AutoPartitionSingleSubDevice, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will automatically partition parent devices and return a single sub-device, but no other sub-devices or parent devices or other sub-devices." ) +CLI_CONTROL( bool, AutoPartitionByAffinityDomain, true, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will try to automatically partition parent devices by the next partitionable affinity domain." ) +CLI_CONTROL( cl_uint, AutoPartitionEqually, 1, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will try to automatically partition parent devices into sub-devices with the specified number of compute units." ) + +CLI_CONTROL_SEPARATOR( Capture and Replay Controls: ) +CLI_CONTROL( bool, CaptureReplay, false, "This is the top-level control for kernel capture and replay." ) +CLI_CONTROL( cl_uint, CaptureReplayMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only enable kernel capture and replay when the enqueue counter is greater than this value, inclusive." ) +CLI_CONTROL( cl_uint, CaptureReplayMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will stop kernel capture and replay when the encounter is greater than this value, meaning that only enqueues less than this value, inclusive, will be captured." ) +CLI_CONTROL( std::string, CaptureReplayKernelName, "", "If set, the Intercept Layer for OpenCL Applications will only enable kernel capture and replay when the kernel name equals this name.") +CLI_CONTROL( bool, CaptureReplayUniqueKernels, false, "If set, the Intercept Layer for OpenCL Applications will only enable kernel capture and replay if the kernel signature (i.e. hash + kernelname) has not been seen already." ) +CLI_CONTROL( cl_uint, CaptureReplayNumKernelEnqueuesSkip, 0, "The Intercept Layer for OpenCL Applications will skip this many kernel enqueues before enabling kernel capture and replay.") +CLI_CONTROL( cl_uint, CaptureReplayNumKernelEnqueuesCapture, UINT_MAX, "The Intercept Layer for OpenCL Applications will only capture this many kernel enqueues.") + +CLI_CONTROL_SEPARATOR( AubCapture Controls: ) +CLI_CONTROL( bool, AubCapture, false, "This is the top-level control for aub capture. The Intercept Layer for OpenCL Applications doesn't implement aub capture itself, but can be used to selectively enable and disable aub capture via other methods." ) +CLI_CONTROL( bool, AubCaptureKDC, false, "If set, the Intercept Layer for OpenCL Applications will use the older kdc.exe method of aub capture. By default, the newer NEO method of aub capture will be used. This control is ignored for all non-Windows operating systems." ) +CLI_CONTROL( bool, AubCaptureIndividualEnqueues, false, "If set, the Intercept Layer for OpenCL Applications will start aub capture before a kernel enqueue, and will also stop aub capture immediately after the kernel enqueue. Each file will have the form \"AubCapture_Enqueue__kernel_\". Note that non-kernel enqueues such as calls to clEnqueueReadBuffer() and clEnqueueWriteBuffer() will NOT be aub captured when this control is set. The AubCaptureMinEnqueue and AubCaptureMaxEnqueue controls are still honored when AubCaptureIndividualEnqueues is set." ) +CLI_CONTROL( cl_uint, AubCaptureMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only enable aub capture when the enqueue counter is greater than this value, inclusive." ) +CLI_CONTROL( cl_uint, AubCaptureMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will stop aub capture when the encounter is greater than this value, meaning that only enqueues less than this value, inclusive, will be captured. If the enqueue counter never reaches this value, the Intercept Layer for OpenCL Applications will stop aub capture when the it is unloaded." ) +CLI_CONTROL( std::string, AubCaptureKernelName, "", "If set, the Intercept Layer for OpenCL Applications will only enable aub capture when the kernel name equals this name.") +CLI_CONTROL( std::string, AubCaptureKernelGWS, "", "If set, the Intercept Layer for OpenCL Applications will only enable aub capture when the NDRange global work size matches this string. The string should have the form \"XxYxZ\". The wildcard \"*\" matches all global work sizes.") +CLI_CONTROL( std::string, AubCaptureKernelLWS, "", "If set, the Intercept Layer for OpenCL Applications will only enable aub capture when the NDRange local work size matches this string. The string should have the form \"XxYxZ\". The wildcard \"*\" matches all local work sizes, and the string \"NULL\" matches a NULL local work size.") +CLI_CONTROL( bool, AubCaptureUniqueKernels, false, "If set, the Intercept Layer for OpenCL Applications will only enable aub capture if the kernel signature (i.e. hash + kernelname + gws + lws) has not been seen already. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues." ) +CLI_CONTROL( cl_uint, AubCaptureNumKernelEnqueuesSkip, 0, "The Intercept Layer for OpenCL Applications will skip this many kernel enqueues before enabling aub capture. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues.") +CLI_CONTROL( cl_uint, AubCaptureNumKernelEnqueuesCapture, UINT_MAX, "The Intercept Layer for OpenCL Applications will only capture this many kernel enqueues. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues.") +CLI_CONTROL( cl_uint, AubCaptureStartWait, 0, "The Intercept Layer for OpenCL Applications will wait for this many milliseconds before beginning aub capture.") +CLI_CONTROL( cl_uint, AubCaptureEndWait, 0, "The Intercept Layer for OpenCL Applications will wait for this many milliseconds before ending aub capture.") + +CLI_CONTROL_SEPARATOR( Execution Controls: ) +CLI_CONTROL( bool, NoErrors, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will cause all OpenCL APIs to return a successful error status." ) +CLI_CONTROL( uint64_t, ExitOnEnqueueCount, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will exit the application when the enqueue counter reaches the specified value. This can be useful to debug sporadic issues by exiting an application immediately, without needing to wait for the application to exit normally." ) +CLI_CONTROL( bool, NullContextCallback, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will force the context callback to be NULL. With both context callback logging and NULL context callback set, the context callback will still be logged, but any application context callback will not be called." ) +CLI_CONTROL( bool, FinishAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFinish() after every enqueue. The command queue that the command was just enqueued to is passed to clFinish(). This can be used to debug possible timing or resource management issues and will likely impact performance." ) +CLI_CONTROL( bool, FlushAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFlush() after every enqueue. The command queue that the command was just enqueued to is passed to clFlush(). This can also be used to debug possible timing or resource management issues and is slightly less obtrusive than FinishAfterEnqueue but still will likely impact performance. If both FinishAfterEnqueue and FlushAfterEnqueue are nonzero then the Intercept Layer for OpenCL Applications will only insert a call to clFinish() after every enqueue, because clFinish() implies clFlush()." ) +CLI_CONTROL( bool, FlushAfterEnqueueBarrier, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFlush() after every barrier enqueue. The command queue that the command was just enqueued to is passed to clFlush(). This has been useful to debug out-of-order queue issues." ) +CLI_CONTROL( bool, InOrderQueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will force all queues to be created in-order. This can be used for performance analysis, but may lead to deadlocks in some cases." ) +CLI_CONTROL( bool, NoProfilingQueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will force all queues to be created without event profiling support. This can be used for performance analysis, but may lead to errors if the application requires event profiling." ) +CLI_CONTROL( bool, DummyOutOfOrderQueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will create and destroy a dummy out-of-order queue. This may be useful for performance analysis." ) +CLI_CONTROL( bool, NullEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will silently ignore any enqueue. This can be used for performance analysis, but will likely cause errors if the application relies on any sort of information from OpenCL events and should be used carefully." ) +CLI_CONTROL( bool, NullLocalWorkSize, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will force the local work size argument to clEnqueueNDRangeKernel() to be NULL, which causes the OpenCL implementation to pick the local work size. Note that this control takes effect before NullLocalWorkSizeX / NullLocalWorkSizeY / NullLocalWorkSizeZ (see below), so enabling both controls will have the effect of forcing a specific local work size." ) +CLI_CONTROL( size_t, NullLocalWorkSizeX, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect." ) +CLI_CONTROL( size_t, NullLocalWorkSizeY, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect." ) +CLI_CONTROL( size_t, NullLocalWorkSizeZ, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect." ) +CLI_CONTROL( bool, InitializeBuffers, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will initialize the contents of allocated buffers with zero. Only valid for non-COPY_HOST_PTR and non-USE_HOST_PTR allocations." ) +CLI_CONTROL( cl_uint, DefaultQueuePriorityHint, 0, "If set to a nonzero value, and if no other priority hint is specified by the application, the Intercept Layer for OpencL Applications will attempt to create a command queue with this priority hint value. Note: HIGH priority is 1, MED priority is 2, and LOW priority is 4." ) +CLI_CONTROL( cl_uint, DefaultQueueThrottleHint, 0, "If set to a nonzero value, and if no other throttle hint is specified by the application, the Intercept Layer for OpencL Applications will attempt to create a command queue with this throttle hint value. Note: HIGH throttle is 1, MED throttle is 2, and LOW throttle is 4." ) +CLI_CONTROL( bool, RelaxAllocationLimits, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will attempt to relax allocation limits to enable allocations larger than CL_DEVICE_MAX_MEM_ALLOC_SIZE." ) + +CLI_CONTROL_SEPARATOR( Platform and Device Query Overrides: ) +CLI_CONTROL( std::string, PlatformName, "", "If set to a non-empty value, the clGetPlatformInfo() query for CL_PLATFORM_NAME will return this string instead of the true platform name." ) +CLI_CONTROL( std::string, PlatformVendor, "", "If set to a non-empty value, the clGetPlatformInfo() query for CL_PLATFORM_VENDOR will return this string instead of the true platform vendor." ) +CLI_CONTROL( std::string, PlatformProfile, "", "If set to a non-empty value, the clGetPlatformInfo() query for CL_PLATFORM_PROFILE will return this string instead of the true platform profile." ) +CLI_CONTROL( std::string, PlatformVersion, "", "If set to a non-empty string, the clGetPlatformInfo() query for CL_PLATFORM_VERSION will return this string instead of the true platform version." ) +CLI_CONTROL( cl_uint, DeviceTypeFilter, CL_DEVICE_TYPE_ALL, "Hides all device types that are not in the filter. Note: CL_DEVICE_TYPE_CPU = 2, CL_DEVICE_TYPE_GPU = 4, CL_DEVICE_TYPE_ACCELERATOR = 8, CL_DEVICE_TYPE_CUSTOM = 16." ) +CLI_CONTROL( cl_uint, DeviceType, 0, "If set to a non-zero value, the clGetDeviceInfo() query for CL_DEVICE_TYPE will return this value instead of the true device type. In addition, calls to clGetDeviceIDs() for this device type will return all devices, not just devices of the requested type. This can be used to enumerate all devices (even CPUs) as GPUs, or vice versa." ) +CLI_CONTROL( std::string, DeviceName, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_NAME will return this value instead of the true device name." ) +CLI_CONTROL( std::string, DeviceVendor, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_VENDOR will return this value instead of the true device vendor." ) +CLI_CONTROL( std::string, DeviceProfile, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_PROFILE will return this value instead of the true device profile." ) +CLI_CONTROL( std::string, DeviceVersion, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_VERSION will return this value instead of the true device version." ) +CLI_CONTROL( std::string, DeviceCVersion, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_OPENCL_C_VERSION will return this value instead of the true device version." ) +CLI_CONTROL( std::string, DeviceExtensions, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_EXTENSIONS will return this value instead of the true device extensions string." ) +CLI_CONTROL( std::string, DeviceILVersion, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_IL_VERSION will return this value instead of the true device intermediate language versions." ) +CLI_CONTROL( cl_uint, DeviceVendorID, 0, "If set to a non-zero value, the clGetDeviceInfo() query for CL_DEVICE_VENDOR will return this value instead of the true device vendor ID." ) +CLI_CONTROL( cl_uint, DeviceMaxComputeUnits, 0, "If set to a non-zero value, the clGetDeviceInfo() query for CL_DEVICE_MAX_COMPUTE_UNITS will return this value instead of the true device max compute units." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthChar, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthShort, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthInt, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthLong, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthHalf, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthFloat, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthDouble, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( std::string, DriverVersion, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DRIVER_VERSION will return this value instead of the true driver version." ) +CLI_CONTROL( std::string, PrependDeviceExtensions, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_EXTENSIONS will return this value followed by the true device extensions string." ) + +CLI_CONTROL_SEPARATOR( Precompiled Kernel and Builtin Kernel Override Controls: ) +CLI_CONTROL( bool, ForceByteBufferOverrides, false, "If set to a nonzero value, each of the buffer functions that are overridden (via one or more of the keys below) will use a byte-wise operation to read/write/copy the buffer (default behavior is to try to copy multiple bytes at a time, if possible). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) +CLI_CONTROL( bool, OverrideReadBuffer, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueReadBuffer() instead of the implementation's clEnqueueReadBuffer(). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) +CLI_CONTROL( bool, OverrideWriteBuffer, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueWriteBuffer() instead of the implementation's clEnqueueWriteBuffer(). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) +CLI_CONTROL( bool, OverrideCopyBuffer, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueCopyBuffer() instead of the implementation's clEnqueueCopyBuffer(). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) +CLI_CONTROL( bool, OverrideReadImage, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueReadImage() instead of the implementation's clEnqueueReadImage(). Only 2D images are currently supported." ) +CLI_CONTROL( bool, OverrideWriteImage, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueWriteImage() instead of the implementation's clEnqueueWriteImage(). Only 2D images are currently supported." ) +CLI_CONTROL( bool, OverrideCopyImage, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueCopyImage() instead of the implementation's clEnqueueCopyImage(). Only 2D images are currently supported." ) +CLI_CONTROL( bool, OverrideBuiltinKernels, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use its own version of the built-in OpenCL kernels that may be accessed via clCreateProgramWithBuiltInKernels(). At present, only the VME block_motion_estimate_intel kernel is implemented." )