Skip to content

WaveActiveAllEqual fails dxil validator when operating on boolean vectors. #981

@bob80905

Description

@bob80905

I get this output after running the DXC Validator on Clang-DXC's generated IR:

# .---command stdout------------
# | Function: main: error: Instructions must be of an allowed type.
# | note: at '%.i03.cast.i09 = extractelement <32 x i1> %.i03.cast, i32 0' in block 'entry' of function 'main'.
# | Function: main: error: Instructions must be of an allowed type.
# | note: at '%.i14.cast.i010 = extractelement <32 x i1> %.i14.cast, i32 0' in block 'entry' of function 'main'.
# | Validation failed.
# |
# `-----------------------------
# .---command stderr------------
# | clang-dxc: error: dxv command failed with exit code 1 (use -v to see invocation)
# `-----------------------------
# error: command failed with exit status: 1

The source HLSL:

#--- source.hlsl
StructuredBuffer<half2> In2  : register(t1);

RWStructuredBuffer<int> Out : register(u4);

[numthreads(4,1,1)]
void main(uint3 TID : SV_GroupThreadID)
{
    unsigned int index = 0;
    bool2 Result2 = WaveActiveAllEqual(In2[TID.x]);
    Out[index + TID.x] = (int)Result2.x;
    index += 4;
    Out[index + TID.x] = (int)Result2.y;
    index += 4;

    }

The generated IR, in essence:

source_filename = "/app/example.hlsl"
target datalayout = "e-m:e-ve-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxilv1.7-unknown-shadermodel6.7-compute"

%"StructuredBuffer<float2>" = type { <2 x float> }
%"RWStructuredBuffer<int32_t>" = type { i32 }
%dx.types.Handle = type { ptr }
%dx.types.ResBind = type { i32, i32, i32, i8 }
%dx.types.ResourceProperties = type { i32, i32 }
%dx.types.ResRet.f32 = type { float, float, float, float, i32 }

define void @main() local_unnamed_addr #0 {
  %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) #1, !dbg !165
  %2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 12, i32 8 }) #1, !dbg !165
  %3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) #1, !dbg !166
  %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 4108, i32 4 }) #1, !dbg !166
  %5 = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0) #1
  %6 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %2, i32 %5, i32 0, i8 3, i32 4), !dbg !192
  %7 = extractvalue %dx.types.ResRet.f32 %6, 1, !dbg !192
  %8 = extractvalue %dx.types.ResRet.f32 %6, 0, !dbg !192
  %9 = call i1 @dx.op.waveActiveAllEqual.f32(i32 115, float %8), !dbg !193
  %10 = call i1 @dx.op.waveActiveAllEqual.f32(i32 115, float %7), !dbg !193
  %11 = zext i1 %9 to i32, !dbg !194
  %12 = zext i1 %10 to i32, !dbg !194
  %13 = bitcast i32 %11 to <32 x i1>, !dbg !195
  %14 = extractelement <32 x i1> %13, i32 0, !dbg !195
  %15 = bitcast i32 %12 to <32 x i1>, !dbg !195
  %16 = extractelement <32 x i1> %15, i32 0, !dbg !195
  %17 = zext i1 %14 to i32, !dbg !195
  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %4, i32 %5, i32 0, i32 %17, i32 undef, i32 undef, i32 undef, i8 1, i32 4), !dbg !202
  %18 = zext i1 %16 to i32, !dbg !203
  %19 = add i32 %5, 4, !dbg !204
  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %4, i32 %19, i32 0, i32 %18, i32 undef, i32 undef, i32 undef, i8 1, i32 4), !dbg !207
  ret void

  uselistorder %dx.types.Handle %4, { 1, 0 }
  uselistorder i32 %5, { 2, 0, 1 }
}

declare i32 @dx.op.threadIdInGroup.i32(i32, i32)

declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1)

declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties)

declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32)

declare void @dx.op.rawBufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8, i32)

declare i1 @dx.op.waveActiveAllEqual.f32(i32, float)

uselistorder ptr @dx.op.createHandleFromBinding, { 1, 0 }
uselistorder ptr @dx.op.annotateHandle, { 1, 0 }

attributes #0 = { convergent noinline nounwind memory(readwrite, inaccessiblemem: none, target_mem0: none, target_mem1: none) }
attributes #1 = { memory(none) }

Compiled with -T cs_6_5 -enable-16bit-types .
It looks like clang is not doing the right thing when using extract element, and for some reason is bit casting an i32 to a 32 x i1, then preforming an extraction. This can likely be entirely circumvented.
DXC's validator does not expect a 32 x i1 type, so we should also not generate that in clang.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    Status

    No status

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions