From 8b63d0f6e1c77746dc24880de3fe61ea0edd541a Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 13 May 2026 22:19:33 +0300 Subject: [PATCH 1/8] asm: save XMM1 (f2) after CALL in syscall_unix_amd64.s Per SysV AMD64 ABI, structs {SSE, SSE} (e.g. NSPoint) return in XMM0:XMM1. The assembly already saved XMM0 at offset 128 (f1) but discarded XMM1. Save XMM1 at offset 136 (f2) so CallNFloat can expose it as a second float return register. --- internal/syscall/syscall_unix_amd64.s | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/syscall/syscall_unix_amd64.s b/internal/syscall/syscall_unix_amd64.s index bee2949..fc69361 100644 --- a/internal/syscall/syscall_unix_amd64.s +++ b/internal/syscall/syscall_unix_amd64.s @@ -106,6 +106,7 @@ TEXT syscallN(SB), NOSPLIT|NOFRAME, $0 MOVQ AX, 192(DI) // r1: integer return in RAX MOVQ DX, 200(DI) // r2: second integer return in RDX (9-16 byte structs) MOVQ X0, 128(DI) // f1: float return in XMM0 + MOVQ X1, 136(DI) // f2: XMM1 — second SSE return for 9-16B all-float struct returns // Restore stack and return XORL AX, AX // no error (ignored by runtime.cgocall) From fee44c589e47632f487cbec7b41bd2264a14f40c Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 13 May 2026 22:22:35 +0300 Subject: [PATCH 2/8] ffi: extend CallNFloat and handleReturn to expose XMM1 (f2) CallNFloat gains a fourth return value f2 (XMM1 bit pattern) to support {SSE, SSE} 9-16B struct returns (e.g. NSPoint on macOS). handleReturn gains fret/fret2 parameters so the Execute path can supply both XMM return registers; call_windows.go passes zeros since syscall.SyscallN does not capture XMM0/XMM1 (known gap). All existing handleReturn unit tests updated to pass 0,0 for the new float parameters. --- internal/arch/amd64/amd64_test.go | 46 +++++++++++++++----------- internal/arch/amd64/call_unix.go | 4 +-- internal/arch/amd64/call_windows.go | 4 ++- internal/arch/amd64/implementation.go | 4 +++ internal/syscall/syscall_unix_amd64.go | 4 ++- 5 files changed, 39 insertions(+), 23 deletions(-) diff --git a/internal/arch/amd64/amd64_test.go b/internal/arch/amd64/amd64_test.go index 083223d..e611471 100644 --- a/internal/arch/amd64/amd64_test.go +++ b/internal/arch/amd64/amd64_test.go @@ -137,7 +137,7 @@ func TestHandleReturn(t *testing.T) { t.Run("Void", func(t *testing.T) { cif := &types.CallInterface{ReturnType: types.VoidTypeDescriptor} - err := impl.handleReturn(cif, nil, 0, 0) + err := impl.handleReturn(cif, nil, 0, 0, 0, 0) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -145,7 +145,7 @@ func TestHandleReturn(t *testing.T) { t.Run("NilRvalue", func(t *testing.T) { cif := &types.CallInterface{ReturnType: types.UInt64TypeDescriptor} - err := impl.handleReturn(cif, nil, 42, 0) + err := impl.handleReturn(cif, nil, 42, 0, 0, 0) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -154,7 +154,7 @@ func TestHandleReturn(t *testing.T) { t.Run("UInt8", func(t *testing.T) { var result uint8 cif := &types.CallInterface{ReturnType: types.UInt8TypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), 0xFF, 0) + err := impl.handleReturn(cif, unsafe.Pointer(&result), 0xFF, 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -166,7 +166,7 @@ func TestHandleReturn(t *testing.T) { t.Run("SInt8", func(t *testing.T) { var result int8 cif := &types.CallInterface{ReturnType: types.SInt8TypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), uint64(0xFE), 0) // -2 + err := impl.handleReturn(cif, unsafe.Pointer(&result), uint64(0xFE), 0, 0, 0) // -2 if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -178,7 +178,7 @@ func TestHandleReturn(t *testing.T) { t.Run("UInt16", func(t *testing.T) { var result uint16 cif := &types.CallInterface{ReturnType: types.UInt16TypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), 0xBEEF, 0) + err := impl.handleReturn(cif, unsafe.Pointer(&result), 0xBEEF, 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -190,7 +190,7 @@ func TestHandleReturn(t *testing.T) { t.Run("SInt16", func(t *testing.T) { var result int16 cif := &types.CallInterface{ReturnType: types.SInt16TypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), uint64(0xFFFF), 0) // -1 + err := impl.handleReturn(cif, unsafe.Pointer(&result), uint64(0xFFFF), 0, 0, 0) // -1 if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -202,7 +202,7 @@ func TestHandleReturn(t *testing.T) { t.Run("UInt32", func(t *testing.T) { var result uint32 cif := &types.CallInterface{ReturnType: types.UInt32TypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), 0xDEADBEEF, 0) + err := impl.handleReturn(cif, unsafe.Pointer(&result), 0xDEADBEEF, 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -214,7 +214,7 @@ func TestHandleReturn(t *testing.T) { t.Run("SInt32", func(t *testing.T) { var result int32 cif := &types.CallInterface{ReturnType: types.SInt32TypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), uint64(0xFFFFFFFF), 0) // -1 + err := impl.handleReturn(cif, unsafe.Pointer(&result), uint64(0xFFFFFFFF), 0, 0, 0) // -1 if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -226,7 +226,7 @@ func TestHandleReturn(t *testing.T) { t.Run("UInt64", func(t *testing.T) { var result uint64 cif := &types.CallInterface{ReturnType: types.UInt64TypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), 0x123456789ABCDEF0, 0) + err := impl.handleReturn(cif, unsafe.Pointer(&result), 0x123456789ABCDEF0, 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -238,7 +238,7 @@ func TestHandleReturn(t *testing.T) { t.Run("SInt64", func(t *testing.T) { var result uint64 cif := &types.CallInterface{ReturnType: types.SInt64TypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), 42, 0) + err := impl.handleReturn(cif, unsafe.Pointer(&result), 42, 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -250,7 +250,7 @@ func TestHandleReturn(t *testing.T) { t.Run("Pointer", func(t *testing.T) { var result uint64 cif := &types.CallInterface{ReturnType: types.PointerTypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), 0xCAFEBABE, 0) + err := impl.handleReturn(cif, unsafe.Pointer(&result), 0xCAFEBABE, 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -264,7 +264,7 @@ func TestHandleReturn(t *testing.T) { expected := float32(3.14) bits := uint64(math.Float32bits(expected)) cif := &types.CallInterface{ReturnType: types.FloatTypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), bits, 0) + err := impl.handleReturn(cif, unsafe.Pointer(&result), bits, 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -278,7 +278,7 @@ func TestHandleReturn(t *testing.T) { expected := 2.71828 bits := math.Float64bits(expected) cif := &types.CallInterface{ReturnType: types.DoubleTypeDescriptor} - err := impl.handleReturn(cif, unsafe.Pointer(&result), bits, 0) + err := impl.handleReturn(cif, unsafe.Pointer(&result), bits, 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -292,7 +292,7 @@ func TestHandleReturn(t *testing.T) { cif := &types.CallInterface{ ReturnType: &types.TypeDescriptor{Size: 8, Kind: types.StructType}, } - err := impl.handleReturn(cif, unsafe.Pointer(&result), 0xDEADCAFE, 0) + err := impl.handleReturn(cif, unsafe.Pointer(&result), 0xDEADCAFE, 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -302,14 +302,22 @@ func TestHandleReturn(t *testing.T) { }) t.Run("Struct9to16", func(t *testing.T) { - // 12-byte struct: RAX=low 8 bytes, RDX=high 4 bytes + // 12-byte struct {int64, int32}: RAX=low 8 bytes, RDX=high 4 bytes (ReturnStRaxRdx) var buf [16]byte cif := &types.CallInterface{ - ReturnType: &types.TypeDescriptor{Size: 12, Kind: types.StructType}, + ReturnType: &types.TypeDescriptor{ + Size: 12, + Kind: types.StructType, + Members: []*types.TypeDescriptor{ + types.SInt64TypeDescriptor, + types.SInt32TypeDescriptor, + }, + }, + Flags: types.ReturnStRaxRdx, } retVal := uint64(0x0807060504030201) retVal2 := uint64(0x0000000C0B0A09) - err := impl.handleReturn(cif, unsafe.Pointer(&buf[0]), retVal, retVal2) + err := impl.handleReturn(cif, unsafe.Pointer(&buf[0]), retVal, retVal2, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -334,7 +342,7 @@ func TestHandleReturn(t *testing.T) { cif := &types.CallInterface{ ReturnType: &types.TypeDescriptor{Size: 24, Kind: types.StructType}, } - err := impl.handleReturn(cif, unsafe.Pointer(&buf[0]), 0, 0) + err := impl.handleReturn(cif, unsafe.Pointer(&buf[0]), 0, 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -350,7 +358,7 @@ func TestHandleReturn(t *testing.T) { ReturnType: types.PointerTypeDescriptor, Flags: types.ReturnViaPointer, } - err := impl.handleReturn(cif, unsafe.Pointer(&result), uint64(uintptr(unsafe.Pointer(&dummy))), 0) + err := impl.handleReturn(cif, unsafe.Pointer(&result), uint64(uintptr(unsafe.Pointer(&dummy))), 0, 0, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/internal/arch/amd64/call_unix.go b/internal/arch/amd64/call_unix.go index d40647c..653072d 100644 --- a/internal/arch/amd64/call_unix.go +++ b/internal/arch/amd64/call_unix.go @@ -219,7 +219,7 @@ func (i *Implementation) Execute( copy(stackArgs[:], sysargs[6:]) // Call via syscall - ret, r2, fret := gosyscall.CallNFloat(uintptr(fn), gpr, sse, stackArgs, numStack) + ret, r2, fret, fret2 := gosyscall.CallNFloat(uintptr(fn), gpr, sse, stackArgs, numStack) runtime.KeepAlive(avalue) runtime.KeepAlive(sretBuf) @@ -237,5 +237,5 @@ func (i *Implementation) Execute( retVal = *(*uint64)(unsafe.Pointer(&fret)) } - return i.handleReturn(cif, rvalue, retVal, uint64(r2)) + return i.handleReturn(cif, rvalue, retVal, uint64(r2), fret, fret2) } diff --git a/internal/arch/amd64/call_windows.go b/internal/arch/amd64/call_windows.go index 1d2c516..603e037 100644 --- a/internal/arch/amd64/call_windows.go +++ b/internal/arch/amd64/call_windows.go @@ -79,5 +79,7 @@ func (i *Implementation) Execute( // XMM0. Since this requires significant additional infrastructure and matches purego's // documented limitation, it is recorded as a known limitation for v0.4.1. // See: TASK-019, GAP-7. Workaround: use integer return type and reinterpret bits. - return i.handleReturn(cif, rvalue, uint64(ret), 0) + // fret and fret2 are zero: Windows syscall.SyscallN does not capture XMM returns. + // Float-returning functions on Windows require a custom assembly wrapper (known limitation). + return i.handleReturn(cif, rvalue, uint64(ret), 0, 0, 0) } diff --git a/internal/arch/amd64/implementation.go b/internal/arch/amd64/implementation.go index 1cc5a06..945d392 100644 --- a/internal/arch/amd64/implementation.go +++ b/internal/arch/amd64/implementation.go @@ -40,11 +40,15 @@ func (i *Implementation) ClassifyArgument( // Return value handling (common for both Unix and Windows AMD64). // retVal = RAX (first integer return register) // retVal2 = RDX (second integer return register, used for 9-16 byte struct returns) +// fret = XMM0 float return value (for float/double types and SSE eightbytes) +// fret2 = XMM1 second float return value (for {SSE, SSE} 9-16B struct returns, e.g. NSPoint) func (i *Implementation) handleReturn( cif *types.CallInterface, rvalue unsafe.Pointer, retVal uint64, retVal2 uint64, + fret float64, + fret2 float64, ) error { if rvalue == nil || cif.ReturnType.Kind == types.VoidType { return nil diff --git a/internal/syscall/syscall_unix_amd64.go b/internal/syscall/syscall_unix_amd64.go index 7fa2bf2..e197fe0 100644 --- a/internal/syscall/syscall_unix_amd64.go +++ b/internal/syscall/syscall_unix_amd64.go @@ -49,7 +49,8 @@ var syscallNABI0 uintptr // - r1: RAX integer return value // - r2: RDX second integer return value (9-16 byte struct returns) // - f1: XMM0 float return value (bit pattern) -func CallNFloat(fn uintptr, gpr [6]uintptr, sse [8]float64, stackArgs [9]uintptr, numStack int) (r1 uintptr, r2 uintptr, f1 float64) { +// - f2: XMM1 second float return value — for {SSE, SSE} 9-16B struct returns (e.g. NSPoint) +func CallNFloat(fn uintptr, gpr [6]uintptr, sse [8]float64, stackArgs [9]uintptr, numStack int) (r1 uintptr, r2 uintptr, f1 float64, f2 float64) { args := syscallArgs{ fn: fn, a1: gpr[0], a2: gpr[1], a3: gpr[2], @@ -79,5 +80,6 @@ func CallNFloat(fn uintptr, gpr [6]uintptr, sse [8]float64, stackArgs [9]uintptr r1 = args.r1 r2 = args.r2 f1 = *(*float64)(unsafe.Pointer(&args.f1)) + f2 = *(*float64)(unsafe.Pointer(&args.f2)) return } From b5bb75e480c9d6141f892b03bc1153fa36584968 Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 13 May 2026 22:23:20 +0300 Subject: [PATCH 3/8] types: add ReturnStRaxRdx/ReturnStRaxXmm0/ReturnStXmm0Rax/ReturnStXmm0Xmm1 Four new return-flag constants for SysV AMD64 9-16B struct returns. The two-eightbyte classification (INTEGER vs SSE) determines which register pair the callee uses; handleReturn will switch on these flags to reconstruct the struct from the correct register slots. Values 10-13 sit between the scalar flags (0-9) and the bit-field flags (1<<10+) so they cannot collide with either group. --- types/types.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/types/types.go b/types/types.go index ce39850..f9448fd 100644 --- a/types/types.go +++ b/types/types.go @@ -118,6 +118,13 @@ const ( ReturnInt64 = 7 ReturnInXMM32 = 8 ReturnInXMM64 = 9 + // AMD64 9-16B struct return modes (SysV ABI §3.2.3). + // Each eightbyte is classified independently as INTEGER (GP register) or SSE (XMM register). + // These flags drive handleReturn to reconstruct the struct from the correct registers. + ReturnStRaxRdx = 10 // {INTEGER, INTEGER} — eightbyte0 in RAX, eightbyte1 in RDX + ReturnStRaxXmm0 = 11 // {INTEGER, SSE} — eightbyte0 in RAX, eightbyte1 in XMM0 + ReturnStXmm0Rax = 12 // {SSE, INTEGER} — eightbyte0 in XMM0, eightbyte1 in RAX + ReturnStXmm0Xmm1 = 13 // {SSE, SSE} — eightbyte0 in XMM0, eightbyte1 in XMM1 (e.g. NSPoint/NSSize) ReturnViaPointer = 1 << 10 // ARM64 HFA (Homogeneous Floating-point Aggregate) return flags. // HFA structs with 2-4 float/double members are returned in D0-D3. From 4de4fa36759dba2f328111840d4229c4dbbd2ce1 Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 13 May 2026 22:24:37 +0300 Subject: [PATCH 4/8] amd64: classify 9-16B struct returns by eightbyte composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously all structs sized 9-16B fell through to ReturnViaPointer which is wrong — those sizes are returned in register pairs per SysV AMD64 ABI §3.2.3. classifyReturnAMD64 now calls classifyEightbyte() for each of the two eightbytes and selects one of four modes: ReturnStXmm0Xmm1 — {SSE, SSE} (e.g. NSPoint: double+double) ReturnStXmm0Rax — {SSE, INTEGER} ReturnStRaxXmm0 — {INTEGER, SSE} ReturnStRaxRdx — {INTEGER, INTEGER} Structs > 16B continue to use ReturnViaPointer (sret). TestClassifyReturnAMD64 extended with four 9-16B struct cases. --- internal/arch/amd64/amd64_test.go | 33 +++++++++++++++++++++++++++ internal/arch/amd64/classification.go | 22 ++++++++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/internal/arch/amd64/amd64_test.go b/internal/arch/amd64/amd64_test.go index e611471..03cc225 100644 --- a/internal/arch/amd64/amd64_test.go +++ b/internal/arch/amd64/amd64_test.go @@ -60,6 +60,39 @@ func TestClassifyReturnAMD64(t *testing.T) { {"Struct2B", &types.TypeDescriptor{Size: 2, Kind: types.StructType}, types.ReturnSInt16}, {"Struct4B", &types.TypeDescriptor{Size: 4, Kind: types.StructType}, types.ReturnSInt32}, {"Struct8B", &types.TypeDescriptor{Size: 8, Kind: types.StructType}, types.ReturnInt64}, + // 9-16B: two-eightbyte classification + { + "Struct16B_TwoDoubles", + &types.TypeDescriptor{Size: 16, Kind: types.StructType, Members: []*types.TypeDescriptor{ + types.DoubleTypeDescriptor, + types.DoubleTypeDescriptor, + }}, + types.ReturnStXmm0Xmm1, + }, + { + "Struct16B_IntFloat", + &types.TypeDescriptor{Size: 16, Kind: types.StructType, Members: []*types.TypeDescriptor{ + types.SInt64TypeDescriptor, + types.DoubleTypeDescriptor, + }}, + types.ReturnStRaxXmm0, + }, + { + "Struct16B_FloatInt", + &types.TypeDescriptor{Size: 16, Kind: types.StructType, Members: []*types.TypeDescriptor{ + types.DoubleTypeDescriptor, + types.SInt64TypeDescriptor, + }}, + types.ReturnStXmm0Rax, + }, + { + "Struct16B_TwoInts", + &types.TypeDescriptor{Size: 16, Kind: types.StructType, Members: []*types.TypeDescriptor{ + types.SInt64TypeDescriptor, + types.SInt64TypeDescriptor, + }}, + types.ReturnStRaxRdx, + }, {"Struct24B", &types.TypeDescriptor{Size: 24, Kind: types.StructType}, types.ReturnViaPointer | types.ReturnVoid}, } diff --git a/internal/arch/amd64/classification.go b/internal/arch/amd64/classification.go index 64da36f..cbea968 100644 --- a/internal/arch/amd64/classification.go +++ b/internal/arch/amd64/classification.go @@ -13,7 +13,8 @@ type classification struct { SSECount int } -// classifyReturnAMD64 for x86_64 +// classifyReturnAMD64 for x86_64. +// Implements SysV AMD64 ABI §3.2.3 return-value classification. func classifyReturnAMD64(t *types.TypeDescriptor, abi types.CallingConvention) int { switch t.Kind { case types.VoidType: @@ -33,7 +34,24 @@ func classifyReturnAMD64(t *types.TypeDescriptor, abi types.CallingConvention) i case 8: return types.ReturnInt64 default: - return types.ReturnViaPointer | types.ReturnVoid + if t.Size > 16 { + // MEMORY class (>16B): returned via hidden first argument (sret pointer). + return types.ReturnViaPointer | types.ReturnVoid + } + // 9-16B: classify each eightbyte independently per ABI §3.2.3. + // INTEGER wins over SSE within an eightbyte. + eb0SSE := classifyEightbyte(t, 0, 8) + eb1SSE := classifyEightbyte(t, 8, t.Size) + switch { + case eb0SSE && eb1SSE: + return types.ReturnStXmm0Xmm1 // {SSE, SSE} — XMM0 : XMM1 + case eb0SSE: + return types.ReturnStXmm0Rax // {SSE, INTEGER} — XMM0 : RAX + case eb1SSE: + return types.ReturnStRaxXmm0 // {INTEGER, SSE} — RAX : XMM0 + default: + return types.ReturnStRaxRdx // {INTEGER, INTEGER} — RAX : RDX + } } default: if t.Size <= 8 { From 4cc3093a83587bc9d9a594889d3683d23c677d22 Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 13 May 2026 22:26:35 +0300 Subject: [PATCH 5/8] amd64: implement four 9-16B struct return modes in handleReturn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the single RAX:RDX path with a switch on cif.Flags that dispatches to the correct register pair: ReturnStXmm0Xmm1 — both eightbytes from XMM0:XMM1 (NSPoint fix) ReturnStXmm0Rax — first from XMM0, second from RAX ReturnStRaxXmm0 — first from RAX, second from XMM0 ReturnStRaxRdx — both from RAX:RDX (default / legacy) The default branch preserves backward compatibility for callers that do not set Flags (e.g. call_windows.go which passes 0,0 for floats). TestHandleReturnSSEStructs added: four sub-tests covering each mode with concrete double / int64 combinations. --- internal/arch/amd64/amd64_test.go | 120 ++++++++++++++++++++++++++ internal/arch/amd64/implementation.go | 45 +++++++--- 2 files changed, 152 insertions(+), 13 deletions(-) diff --git a/internal/arch/amd64/amd64_test.go b/internal/arch/amd64/amd64_test.go index 03cc225..ccdff27 100644 --- a/internal/arch/amd64/amd64_test.go +++ b/internal/arch/amd64/amd64_test.go @@ -401,6 +401,126 @@ func TestHandleReturn(t *testing.T) { }) } +func TestHandleReturnSSEStructs(t *testing.T) { + impl := &Implementation{} + + t.Run("ReturnStXmm0Xmm1_TwoDoubles", func(t *testing.T) { + // {double, double} returned in XMM0 : XMM1 — the NSPoint/NSSize case. + type PairF64 struct{ A, B float64 } + var result PairF64 + cif := &types.CallInterface{ + ReturnType: &types.TypeDescriptor{ + Size: 16, Kind: types.StructType, + Members: []*types.TypeDescriptor{ + types.DoubleTypeDescriptor, + types.DoubleTypeDescriptor, + }, + }, + Flags: types.ReturnStXmm0Xmm1, + } + a := 1.5 + b := 2.5 + fret := a + fret2 := b + err := impl.handleReturn(cif, unsafe.Pointer(&result), 0, 0, fret, fret2) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.A != a || result.B != b { + t.Errorf("got {%f, %f}, want {%f, %f}", result.A, result.B, a, b) + } + }) + + t.Run("ReturnStXmm0Rax_FloatInt", func(t *testing.T) { + // {double, int64} returned in XMM0 : RAX + type MixedFloatInt struct { + A float64 + B int64 + } + var result MixedFloatInt + cif := &types.CallInterface{ + ReturnType: &types.TypeDescriptor{ + Size: 16, Kind: types.StructType, + Members: []*types.TypeDescriptor{ + types.DoubleTypeDescriptor, + types.SInt64TypeDescriptor, + }, + }, + Flags: types.ReturnStXmm0Rax, + } + a := 3.14 + b := int64(42) + fret := a + // eightbyte1 (B) comes from RAX which maps to retVal in handleReturn + // but ReturnStXmm0Rax uses retVal for the second slot + bBits := *(*uint64)(unsafe.Pointer(&b)) + err := impl.handleReturn(cif, unsafe.Pointer(&result), bBits, 0, fret, 0) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.A != a || result.B != b { + t.Errorf("got {%f, %d}, want {%f, %d}", result.A, result.B, a, b) + } + }) + + t.Run("ReturnStRaxXmm0_IntFloat", func(t *testing.T) { + // {int64, double} returned in RAX : XMM0 + type MixedIntFloat struct { + A int64 + B float64 + } + var result MixedIntFloat + cif := &types.CallInterface{ + ReturnType: &types.TypeDescriptor{ + Size: 16, Kind: types.StructType, + Members: []*types.TypeDescriptor{ + types.SInt64TypeDescriptor, + types.DoubleTypeDescriptor, + }, + }, + Flags: types.ReturnStRaxXmm0, + } + a := int64(100) + b := 2.71828 + aBits := *(*uint64)(unsafe.Pointer(&a)) + fret := b + err := impl.handleReturn(cif, unsafe.Pointer(&result), aBits, 0, fret, 0) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.A != a || result.B != b { + t.Errorf("got {%d, %f}, want {%d, %f}", result.A, result.B, a, b) + } + }) + + t.Run("ReturnStRaxRdx_TwoInts", func(t *testing.T) { + // {int64, int64} returned in RAX : RDX + type PairI64 struct{ A, B int64 } + var result PairI64 + cif := &types.CallInterface{ + ReturnType: &types.TypeDescriptor{ + Size: 16, Kind: types.StructType, + Members: []*types.TypeDescriptor{ + types.SInt64TypeDescriptor, + types.SInt64TypeDescriptor, + }, + }, + Flags: types.ReturnStRaxRdx, + } + a := int64(0xDEAD) + b := int64(0xBEEF) + aBits := *(*uint64)(unsafe.Pointer(&a)) + bBits := *(*uint64)(unsafe.Pointer(&b)) + err := impl.handleReturn(cif, unsafe.Pointer(&result), aBits, bBits, 0, 0) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.A != a || result.B != b { + t.Errorf("got {%d, %d}, want {%d, %d}", result.A, result.B, a, b) + } + }) +} + func TestClassifyReturnViaInterface(t *testing.T) { impl := &Implementation{} got := impl.ClassifyReturn(types.FloatTypeDescriptor, types.UnixCallingConvention) diff --git a/internal/arch/amd64/implementation.go b/internal/arch/amd64/implementation.go index 945d392..d1a71f9 100644 --- a/internal/arch/amd64/implementation.go +++ b/internal/arch/amd64/implementation.go @@ -87,23 +87,42 @@ func (i *Implementation) handleReturn( *(*uint64)(rvalue) = retVal case types.StructType: // System V AMD64 ABI struct return rules: - // <= 8 bytes : returned in RAX - // 9-16 bytes : returned in RAX (low 8) + RDX (high 8) - // > 16 bytes : returned via hidden sret pointer (handled above) + // <= 8 bytes : returned in RAX (any eightbyte class, since there is only one) + // 9-16 bytes : two eightbytes, each classified as INTEGER or SSE independently. + // The return flag encodes which register pair was used: + // ReturnStRaxRdx → {INTEGER, INTEGER} — RAX : RDX + // ReturnStRaxXmm0 → {INTEGER, SSE} — RAX : XMM0 + // ReturnStXmm0Rax → {SSE, INTEGER} — XMM0 : RAX + // ReturnStXmm0Xmm1 → {SSE, SSE} — XMM0 : XMM1 (e.g. NSPoint) + // > 16 bytes : returned via hidden sret pointer (handled above before the switch) size := cif.ReturnType.Size - switch { - case size <= 8: + if size <= 8 { *(*uint64)(rvalue) = retVal - case size <= 16: - // Copy RAX into first 8 bytes, RDX into remaining bytes + break + } + // 9-16B: reconstruct from the correct register pair. + remaining := size - 8 + switch cif.Flags { + case types.ReturnStXmm0Xmm1: + // eightbyte0 from XMM0, eightbyte1 from XMM1 + fretBits := *(*uint64)(unsafe.Pointer(&fret)) + *(*uint64)(rvalue) = fretBits + fret2Bits := *(*uint64)(unsafe.Pointer(&fret2)) + copy((*[8]byte)(unsafe.Add(rvalue, 8))[:remaining], (*[8]byte)(unsafe.Pointer(&fret2Bits))[:remaining]) + case types.ReturnStXmm0Rax: + // eightbyte0 from XMM0, eightbyte1 from RAX + fretBits := *(*uint64)(unsafe.Pointer(&fret)) + *(*uint64)(rvalue) = fretBits + copy((*[8]byte)(unsafe.Add(rvalue, 8))[:remaining], (*[8]byte)(unsafe.Pointer(&retVal))[:remaining]) + case types.ReturnStRaxXmm0: + // eightbyte0 from RAX, eightbyte1 from XMM0 *(*uint64)(rvalue) = retVal - // Remaining bytes are in RDX; copy only what is needed - remaining := size - 8 - src := (*[8]byte)(unsafe.Pointer(&retVal2)) - dst := (*[8]byte)(unsafe.Add(rvalue, 8)) - copy(dst[:remaining], src[:remaining]) + fretBits := *(*uint64)(unsafe.Pointer(&fret)) + copy((*[8]byte)(unsafe.Add(rvalue, 8))[:remaining], (*[8]byte)(unsafe.Pointer(&fretBits))[:remaining]) default: - return types.ErrUnsupportedReturnType + // ReturnStRaxRdx (and legacy/unset Flags): {INTEGER, INTEGER} — RAX : RDX + *(*uint64)(rvalue) = retVal + copy((*[8]byte)(unsafe.Add(rvalue, 8))[:remaining], (*[8]byte)(unsafe.Pointer(&retVal2))[:remaining]) } default: return types.ErrUnsupportedReturnType From b7efa27da61457d9f8a424a34b6ceb4cc57f3014 Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 13 May 2026 22:34:02 +0300 Subject: [PATCH 6/8] ffi: add C struct-return test functions and E2E tests for 9-16B returns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add four C functions to testdata/structtest.c that return 16-byte structs covering all four SysV AMD64 eightbyte register combinations: - return_struct_2doubles: {double,double} → XMM0:XMM1 - return_struct_int_float: {int64,double} → RAX:XMM0 - return_struct_float_int: {double,int64} → XMM0:RAX - return_struct_2ints: {int64,int64} → RAX:RDX Add four E2E tests in struct_e2e_test.go that verify CIF flag assignment and correct field reconstruction from the assembled register pairs. XMM-return tests skip on Windows (syscall.SyscallN limitation). Also apply gofmt alignment to return flag constants in types/types.go. --- ffi/struct_e2e_test.go | 204 ++++++++++++++++++++++++++++++++++++++ ffi/testdata/structtest.c | 30 ++++++ types/types.go | 20 ++-- 3 files changed, 244 insertions(+), 10 deletions(-) diff --git a/ffi/struct_e2e_test.go b/ffi/struct_e2e_test.go index e8c0951..3df4acb 100644 --- a/ffi/struct_e2e_test.go +++ b/ffi/struct_e2e_test.go @@ -531,3 +531,207 @@ func TestCallbackStructArgWithScalar(t *testing.T) { t.Errorf("expected %#v %d, received %#v %d", expected, extra, receivedArg1, receivedArg2) } } + +// TestStructReturn16B_TwoDoubles verifies that {double, double} is returned in XMM0:XMM1. +// This is the NSPoint / NSSize case on macOS Intel — the primary motivation for TASK-045. +// SysV AMD64 ABI: both eightbytes are SSE class → ReturnStXmm0Xmm1. +func TestStructReturn16B_TwoDoubles(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("Windows: XMM struct returns not captured by syscall.SyscallN") + } + requireStructLib(t) + + sym, err := GetSymbol(structTestLib, "return_struct_2doubles") + if err != nil { + t.Fatal(err) + } + + // {double, double} — both SSE → ReturnStXmm0Xmm1 + structType := &types.TypeDescriptor{ + Kind: types.StructType, + Size: 16, + Alignment: 8, + Members: []*types.TypeDescriptor{ + types.DoubleTypeDescriptor, + types.DoubleTypeDescriptor, + }, + } + + var cif types.CallInterface + if err := PrepareCallInterface(&cif, types.DefaultCall, structType, + []*types.TypeDescriptor{types.DoubleTypeDescriptor, types.DoubleTypeDescriptor}); err != nil { + t.Fatal(err) + } + + if cif.Flags != types.ReturnStXmm0Xmm1 { + t.Fatalf("expected cif.Flags = ReturnStXmm0Xmm1 (%d), got %d", types.ReturnStXmm0Xmm1, cif.Flags) + } + + type PairF64 struct{ A, B float64 } + + a := 1.5 + b := 2.5 + args := []unsafe.Pointer{unsafe.Pointer(&a), unsafe.Pointer(&b)} + var result PairF64 + if err := CallFunction(&cif, sym, unsafe.Pointer(&result), args); err != nil { + t.Fatal(err) + } + + if result.A != a || result.B != b { + t.Errorf("return_struct_2doubles(%f, %f) = {%f, %f}, want {%f, %f}", + a, b, result.A, result.B, a, b) + } +} + +// TestStructReturn16B_IntFloat verifies that {int64, double} returns in RAX:XMM0. +// SysV AMD64 ABI: eightbyte0 INTEGER (RAX), eightbyte1 SSE (XMM0) → ReturnStRaxXmm0. +func TestStructReturn16B_IntFloat(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("Windows: XMM struct returns not captured by syscall.SyscallN") + } + requireStructLib(t) + + sym, err := GetSymbol(structTestLib, "return_struct_int_float") + if err != nil { + t.Fatal(err) + } + + // {int64, double} — INTEGER + SSE → ReturnStRaxXmm0 + structType := &types.TypeDescriptor{ + Kind: types.StructType, + Size: 16, + Alignment: 8, + Members: []*types.TypeDescriptor{ + types.SInt64TypeDescriptor, + types.DoubleTypeDescriptor, + }, + } + + var cif types.CallInterface + if err := PrepareCallInterface(&cif, types.DefaultCall, structType, + []*types.TypeDescriptor{types.SInt64TypeDescriptor, types.DoubleTypeDescriptor}); err != nil { + t.Fatal(err) + } + + if cif.Flags != types.ReturnStRaxXmm0 { + t.Fatalf("expected cif.Flags = ReturnStRaxXmm0 (%d), got %d", types.ReturnStRaxXmm0, cif.Flags) + } + + type MixedIntFloat struct { + A int64 + B float64 + } + + a := int64(42) + b := 3.14 + args := []unsafe.Pointer{unsafe.Pointer(&a), unsafe.Pointer(&b)} + var result MixedIntFloat + if err := CallFunction(&cif, sym, unsafe.Pointer(&result), args); err != nil { + t.Fatal(err) + } + + if result.A != a || result.B != b { + t.Errorf("return_struct_int_float(%d, %f) = {%d, %f}, want {%d, %f}", + a, b, result.A, result.B, a, b) + } +} + +// TestStructReturn16B_FloatInt verifies that {double, int64} returns in XMM0:RAX. +// SysV AMD64 ABI: eightbyte0 SSE (XMM0), eightbyte1 INTEGER (RAX) → ReturnStXmm0Rax. +func TestStructReturn16B_FloatInt(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("Windows: XMM struct returns not captured by syscall.SyscallN") + } + requireStructLib(t) + + sym, err := GetSymbol(structTestLib, "return_struct_float_int") + if err != nil { + t.Fatal(err) + } + + // {double, int64} — SSE + INTEGER → ReturnStXmm0Rax + structType := &types.TypeDescriptor{ + Kind: types.StructType, + Size: 16, + Alignment: 8, + Members: []*types.TypeDescriptor{ + types.DoubleTypeDescriptor, + types.SInt64TypeDescriptor, + }, + } + + var cif types.CallInterface + if err := PrepareCallInterface(&cif, types.DefaultCall, structType, + []*types.TypeDescriptor{types.DoubleTypeDescriptor, types.SInt64TypeDescriptor}); err != nil { + t.Fatal(err) + } + + if cif.Flags != types.ReturnStXmm0Rax { + t.Fatalf("expected cif.Flags = ReturnStXmm0Rax (%d), got %d", types.ReturnStXmm0Rax, cif.Flags) + } + + type MixedFloatInt struct { + A float64 + B int64 + } + + a := 2.71828 + b := int64(100) + args := []unsafe.Pointer{unsafe.Pointer(&a), unsafe.Pointer(&b)} + var result MixedFloatInt + if err := CallFunction(&cif, sym, unsafe.Pointer(&result), args); err != nil { + t.Fatal(err) + } + + if result.A != a || result.B != b { + t.Errorf("return_struct_float_int(%f, %d) = {%f, %d}, want {%f, %d}", + a, b, result.A, result.B, a, b) + } +} + +// TestStructReturn16B_TwoInts verifies that {int64, int64} returns in RAX:RDX. +// SysV AMD64 ABI: both eightbytes INTEGER → ReturnStRaxRdx. +func TestStructReturn16B_TwoInts(t *testing.T) { + requireStructLib(t) + + sym, err := GetSymbol(structTestLib, "return_struct_2ints") + if err != nil { + t.Fatal(err) + } + + // {int64, int64} — both INTEGER → ReturnStRaxRdx + structType := &types.TypeDescriptor{ + Kind: types.StructType, + Size: 16, + Alignment: 8, + Members: []*types.TypeDescriptor{ + types.SInt64TypeDescriptor, + types.SInt64TypeDescriptor, + }, + } + + var cif types.CallInterface + if err := PrepareCallInterface(&cif, types.DefaultCall, structType, + []*types.TypeDescriptor{types.SInt64TypeDescriptor, types.SInt64TypeDescriptor}); err != nil { + t.Fatal(err) + } + + if cif.Flags != types.ReturnStRaxRdx { + t.Fatalf("expected cif.Flags = ReturnStRaxRdx (%d), got %d", types.ReturnStRaxRdx, cif.Flags) + } + + type PairI64 struct{ A, B int64 } + + a := int64(1000000) + b := int64(2000000) + args := []unsafe.Pointer{unsafe.Pointer(&a), unsafe.Pointer(&b)} + var result PairI64 + if err := CallFunction(&cif, sym, unsafe.Pointer(&result), args); err != nil { + t.Fatal(err) + } + + if result.A != a || result.B != b { + t.Errorf("return_struct_2ints(%d, %d) = {%d, %d}, want {%d, %d}", + a, b, result.A, result.B, a, b) + } +} diff --git a/ffi/testdata/structtest.c b/ffi/testdata/structtest.c index 5dc95e2..155ce57 100644 --- a/ffi/testdata/structtest.c +++ b/ffi/testdata/structtest.c @@ -54,3 +54,33 @@ void callback_struct_and_int(int32_t a, uint32_t b, int64_t extra, struct pair_i32_u32 s = {.a = a, .b = b}; callback(s, extra); } + +// Struct RETURN functions — test XMM0:XMM1 / RAX:RDX register pair selection. +// {double, double}: SysV AMD64 ABI returns this in XMM0:XMM1 (SSE, SSE). +// Models NSPoint / NSSize on macOS Intel. +struct pair_f64 { double a; double b; }; +struct pair_f64 return_struct_2doubles(double a, double b) { + struct pair_f64 s = {.a = a, .b = b}; + return s; +} + +// {int64, double}: eightbyte0 INTEGER (RAX), eightbyte1 SSE (XMM0). +struct mixed_int_float { int64_t a; double b; }; +struct mixed_int_float return_struct_int_float(int64_t a, double b) { + struct mixed_int_float s = {.a = a, .b = b}; + return s; +} + +// {double, int64}: eightbyte0 SSE (XMM0), eightbyte1 INTEGER (RAX). +struct mixed_float_int { double a; int64_t b; }; +struct mixed_float_int return_struct_float_int(double a, int64_t b) { + struct mixed_float_int s = {.a = a, .b = b}; + return s; +} + +// {int64, int64}: both INTEGER, returned in RAX:RDX. +struct return_pair_i64 { int64_t a; int64_t b; }; +struct return_pair_i64 return_struct_2ints(int64_t a, int64_t b) { + struct return_pair_i64 s = {.a = a, .b = b}; + return s; +} diff --git a/types/types.go b/types/types.go index f9448fd..21d2b58 100644 --- a/types/types.go +++ b/types/types.go @@ -108,16 +108,16 @@ type CallInterface struct { // Return flags constants const ( - ReturnVoid = 0 - ReturnUInt8 = 1 - ReturnUInt16 = 2 - ReturnUInt32 = 3 - ReturnSInt8 = 4 - ReturnSInt16 = 5 - ReturnSInt32 = 6 - ReturnInt64 = 7 - ReturnInXMM32 = 8 - ReturnInXMM64 = 9 + ReturnVoid = 0 + ReturnUInt8 = 1 + ReturnUInt16 = 2 + ReturnUInt32 = 3 + ReturnSInt8 = 4 + ReturnSInt16 = 5 + ReturnSInt32 = 6 + ReturnInt64 = 7 + ReturnInXMM32 = 8 + ReturnInXMM64 = 9 // AMD64 9-16B struct return modes (SysV ABI §3.2.3). // Each eightbyte is classified independently as INTEGER (GP register) or SSE (XMM register). // These flags drive handleReturn to reconstruct the struct from the correct registers. From a1465b4c1257935ad226556d4e19c4593c399cbd Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 13 May 2026 22:44:07 +0300 Subject: [PATCH 7/8] docs: update CHANGELOG, README, ARCHITECTURE, PERFORMANCE, ROADMAP for SSE struct return (TASK-045) - CHANGELOG: 9-16B XMM return modes, 4-way classification - README: struct return feature table updated - ARCHITECTURE: 4-mode return table with flag names - PERFORMANCE: struct return + callback struct args comparison rows - ROADMAP: v0.5.0 released, v0.5.1 pending with all recent features --- CHANGELOG.md | 1 + README.md | 2 +- ROADMAP.md | 34 ++++++++++++++++++++++++++-------- docs/ARCHITECTURE.md | 13 +++++++++++-- docs/PERFORMANCE.md | 2 ++ 5 files changed, 41 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7e8396..5e62634 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Classification: >16B structs** — `classifyArgumentAMD64` now correctly returns zero register usage for MEMORY class structs (previously claimed GP registers) - **Classification: mixed eightbyte** — per-eightbyte SSE/INTEGER classification now walks all members with INTEGER-wins merge rule per System V ABI - **Deprecated `reflect.Ptr`** — replaced with `reflect.Pointer` in callback validation (PR [#38](https://github.com/go-webgpu/goffi/pull/38), flagged by golangci-lint v2.12.1) +- **AMD64: 9-16B struct return via XMM registers** — structs like `{float64, float64}` (NSPoint, NSSize, CGPoint, CGSize) now correctly return via XMM0:XMM1 per System V ABI. Previously misclassified as sret (hidden pointer), producing corrupted values on macOS Intel. Four return modes now supported: RAX:RDX, RAX:XMM0, XMM0:RAX, XMM0:XMM1 (TASK-045) ### Added - `CGO_ENABLED=1` support ([#13](https://github.com/go-webgpu/goffi/issues/13), PR [#37](https://github.com/go-webgpu/goffi/pull/37) by [@jiyeyuran](https://github.com/jiyeyuran)) — goffi now builds and tests under both `CGO_ENABLED=0` (fakecgo) and `CGO_ENABLED=1` (real `runtime/cgo`). Enables race detector, coexistence with CGO libraries (gocv, database drivers, etc.), and resolves [#22](https://github.com/go-webgpu/goffi/issues/22) duplicate symbol conflict as alternative workaround diff --git a/README.md b/README.md index 33c87b2..61bf650 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ ffi.CallFunction(cif, sym, unsafe.Pointer(&result), args) | **Cross-platform** | 7 targets | Windows, Linux, macOS, FreeBSD × AMD64 + ARM64 | | **Callbacks** | C→Go safe | `crosscall2` integration, struct args, works from any C thread | | **Type-safe** | Runtime validation | 5 typed error types with `errors.As()` support | -| **Struct pass/return** | Full ABI | Args: INTEGER/SSE classification. Returns: ≤8B (RAX), 9–16B (RAX+RDX), >16B (sret) | +| **Struct pass/return** | Full ABI | Args: INTEGER/SSE classification. Returns: ≤8B (RAX/XMM0), 9–16B (4 modes: RAX/XMM × RAX/XMM), >16B (sret) | | **Context** | Timeouts | `CallFunctionContext(ctx, ...)` cancellation | | **Race detector** | `-race` compatible | `CGO_ENABLED=1 go test -race` works cleanly | | **Tested** | 89% coverage | CI on Linux, Windows, macOS (CGO=0 and CGO=1) | diff --git a/ROADMAP.md b/ROADMAP.md index e00e6ba..d3c74e9 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -3,7 +3,7 @@ > **Strategic Approach**: Build production-ready Zero-CGO FFI with benchmarked performance > **Philosophy**: Performance first, usability second, platform coverage third -**Last Updated**: 2026-03-02 | **Current Version**: v0.4.1 | **Strategy**: Benchmarks → Callbacks → ARM64 → Runtime → API → v1.0 LTS | **Milestone**: v0.4.1 (ABI compliance) → v0.5.0 Usability → v1.0.0 LTS +**Last Updated**: 2026-05-13 | **Current Version**: v0.5.0 (v0.5.1 pending) | **Strategy**: Benchmarks → Callbacks → ARM64 → Runtime → ABI → v1.0 LTS | **Milestone**: v0.5.1 (struct ABI + CGO=1) → v0.6.0 Variadic/Builder → v1.0.0 LTS --- @@ -56,11 +56,15 @@ v0.3.9 (CALLBACK FIXES) ✅ RELEASED 2026-02-18 ↓ (runtime integration) v0.4.0 (CROSSCALL2 INTEGRATION) ✅ RELEASED 2026-02-27 ↓ (usability) -v0.5.0 (USABILITY + VARIADIC) → 2026 Q2-Q3 +v0.5.0 (PLATFORM COVERAGE) ✅ RELEASED 2026-03-29 + ↓ (struct ABI + CGO support) +v0.5.1 (STRUCT ABI + CGO_ENABLED=1) → 2026-05 (pending tag) + ↓ (variadic + builder API) +v0.6.0 (VARIADIC + BUILDER API) → 2026 Q3 ↓ (advanced features) v0.8.0 (ADVANCED FEATURES) → 2026 Q3-Q4 ↓ (community adoption + validation) -v1.0.0 LTS → Long-term support release (Q1 2026) +v1.0.0 LTS → Long-term support release (2027 Q1) ``` ### Critical Milestones @@ -119,12 +123,26 @@ v1.0.0 LTS → Long-term support release (Q1 2026) - Struct return 9-16 bytes, sret hidden pointer, HFA stack spill - Overflow detection, `runtime.KeepAlive` safety -**v0.5.0** = Usability + Variadic (2026 Q2-Q3) +**v0.5.0** = Platform coverage ✅ RELEASED (2026-03-29) +- **Windows ARM64** support (Snapdragon X Elite, tested by @SideFx) +- **FreeBSD amd64** support (cross-compile verified) +- 7 platform targets (Linux/Windows/macOS/FreeBSD × amd64 + ARM64) + +**v0.5.1** = Struct ABI + CGO_ENABLED=1 (2026-05, pending tag) +- **CGO_ENABLED=1 support** (PR #37 by @jiyeyuran) — dual-mode build, race detector compatible +- **Struct by-value argument passing** (PR #39, closes #33) — ≤8B/9-16B/>16B, INTEGER/SSE classification +- **Callback struct arguments** (PR #42 by @pekim, closes #41) — C→Go callbacks with struct args +- **9-16B struct return via XMM** (TASK-045) — 4 return modes: RAX:RDX, RAX:XMM0, XMM0:RAX, XMM0:XMM1 +- **Race detector** — checkptr double-indirection fix (Go #58625), `-race` clean +- **E2E test infrastructure** — gcc-compiled C test library for struct passing verification +- Contributors: @jiyeyuran (CGO path maintainer), @pekim (callback structs) + +**v0.6.0** = Variadic + Builder API (2026 Q3) - Builder pattern API -- Platform-specific struct handling - **Variadic function support** (printf, sprintf, etc.) +- RegisterFunc convenience API -**v1.0.0** = Long-term support release (Q1 2026) +**v1.0.0** = Long-term support release (2027 Q1) - API stability guarantee - Security audit - Published benchmarks vs CGO/purego @@ -132,9 +150,9 @@ v1.0.0 LTS → Long-term support release (Q1 2026) --- -## 📊 Current Status (v0.4.1) +## 📊 Current Status (v0.5.1) -**Phase**: ABI compliance audit complete, forward call path fully verified +**Phase**: Struct ABI complete, CGO_ENABLED=1 supported, SSE struct return fixed **What Works**: - ✅ Dynamic library loading (`LoadLibrary`, `GetSymbol`, `FreeLibrary`) diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 3eed6fa..9d85350 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -191,8 +191,17 @@ Classification uses `reflect.Type` (not `types.TypeDescriptor`) since callback s ABI rules for returning structs depend on size: -- **≤ 8 bytes**: returned in RAX (AMD64) or X0 (ARM64) -- **9-16 bytes** (AMD64): split across RAX (low 8) + RDX (high 8) +- **≤ 8 bytes**: returned in RAX (INTEGER) or XMM0 (SSE) on AMD64, X0 or D0 on ARM64 +- **9-16 bytes** (AMD64): two eightbytes, each returned in GP or XMM per classification. Four modes: + +| Struct layout | Eightbyte 0 | Eightbyte 1 | Registers | Flag | +|---|---|---|---|---| +| `{int64, int64}` | INTEGER | INTEGER | RAX + RDX | `ReturnStRaxRdx` | +| `{int64, float64}` | INTEGER | SSE | RAX + XMM0 | `ReturnStRaxXmm0` | +| `{float64, int64}` | SSE | INTEGER | XMM0 + RAX | `ReturnStXmm0Rax` | +| `{float64, float64}` | SSE | SSE | XMM0 + XMM1 | `ReturnStXmm0Xmm1` | + +Classification is computed at CIF-prepare time (`classifyReturnAMD64` using `classifyEightbyte`), stored in `cif.Flags`, and dispatched in `handleReturn`. This matches libffi's `UNIX64_RET_ST_*` pattern. - **> 16 bytes**: caller passes a hidden pointer as the first argument (sret) Implementation in `internal/arch/amd64/implementation.go`: diff --git a/docs/PERFORMANCE.md b/docs/PERFORMANCE.md index 497211b..2e8e465 100644 --- a/docs/PERFORMANCE.md +++ b/docs/PERFORMANCE.md @@ -262,6 +262,8 @@ FFI overhead: 0.0001ms = 0.001% ✅ | **Type Safety** | ✅ TypeDescriptor validation | Go reflect.Type | | **Error Handling** | ✅ 5 typed errors | Generic errors | | **Callback float returns** | ✅ XMM0 in asm | ❌ panic | +| **Struct return 9-16B** | ✅ 4 modes (RAX/XMM × RAX/XMM) | ✅ 4 modes (f1/f2 + a1/a2) | +| **Callback struct args** | ✅ ≤8B, 9-16B, >16B | ❌ panic | | **ARM64 HFA** | Recursive struct walk | Partial recursive (bug in nested path) | | **Context support** | ✅ Timeouts/cancellation | ❌ | | **Platforms** | 5 (quality focus) | 9+ (breadth focus) | From 9e0c1f8549449131061f7835d941236bce32b4f5 Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 13 May 2026 23:15:28 +0300 Subject: [PATCH 8/8] =?UTF-8?q?fix:=20Windows=20sret=20for=209-16B=20struc?= =?UTF-8?q?ts=20=E2=80=94=20Win64=20ABI=20uses=20sret=20for=20all=20>8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit classifyReturnAMD64 now checks runtime.GOOS == windows and returns ReturnViaPointer for all structs >8B on Windows. Unit tests and e2e tests updated with platform-aware expectations. --- ffi/struct_e2e_test.go | 3 +++ internal/arch/amd64/amd64_test.go | 20 +++++++++++++++----- internal/arch/amd64/classification.go | 8 +++++--- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/ffi/struct_e2e_test.go b/ffi/struct_e2e_test.go index 3df4acb..e5cd9f7 100644 --- a/ffi/struct_e2e_test.go +++ b/ffi/struct_e2e_test.go @@ -692,6 +692,9 @@ func TestStructReturn16B_FloatInt(t *testing.T) { // TestStructReturn16B_TwoInts verifies that {int64, int64} returns in RAX:RDX. // SysV AMD64 ABI: both eightbytes INTEGER → ReturnStRaxRdx. func TestStructReturn16B_TwoInts(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("Windows: 16B struct returns use sret, not RAX:RDX (Win64 ABI)") + } requireStructLib(t) sym, err := GetSymbol(structTestLib, "return_struct_2ints") diff --git a/internal/arch/amd64/amd64_test.go b/internal/arch/amd64/amd64_test.go index ccdff27..80908e7 100644 --- a/internal/arch/amd64/amd64_test.go +++ b/internal/arch/amd64/amd64_test.go @@ -4,12 +4,22 @@ package amd64 import ( "math" + "runtime" "testing" "unsafe" "github.com/go-webgpu/goffi/types" ) +// struct16BExpected returns the expected return flag for 9-16B structs. +// On Windows, all structs >8B use sret regardless of field types. +func struct16BExpected(unixFlag int) int { + if runtime.GOOS == "windows" { + return types.ReturnViaPointer | types.ReturnVoid + } + return unixFlag +} + func TestAlign(t *testing.T) { impl := &Implementation{} tests := []struct { @@ -60,14 +70,14 @@ func TestClassifyReturnAMD64(t *testing.T) { {"Struct2B", &types.TypeDescriptor{Size: 2, Kind: types.StructType}, types.ReturnSInt16}, {"Struct4B", &types.TypeDescriptor{Size: 4, Kind: types.StructType}, types.ReturnSInt32}, {"Struct8B", &types.TypeDescriptor{Size: 8, Kind: types.StructType}, types.ReturnInt64}, - // 9-16B: two-eightbyte classification + // 9-16B: two-eightbyte classification (Unix only; Windows uses sret for all >8B) { "Struct16B_TwoDoubles", &types.TypeDescriptor{Size: 16, Kind: types.StructType, Members: []*types.TypeDescriptor{ types.DoubleTypeDescriptor, types.DoubleTypeDescriptor, }}, - types.ReturnStXmm0Xmm1, + struct16BExpected(types.ReturnStXmm0Xmm1), }, { "Struct16B_IntFloat", @@ -75,7 +85,7 @@ func TestClassifyReturnAMD64(t *testing.T) { types.SInt64TypeDescriptor, types.DoubleTypeDescriptor, }}, - types.ReturnStRaxXmm0, + struct16BExpected(types.ReturnStRaxXmm0), }, { "Struct16B_FloatInt", @@ -83,7 +93,7 @@ func TestClassifyReturnAMD64(t *testing.T) { types.DoubleTypeDescriptor, types.SInt64TypeDescriptor, }}, - types.ReturnStXmm0Rax, + struct16BExpected(types.ReturnStXmm0Rax), }, { "Struct16B_TwoInts", @@ -91,7 +101,7 @@ func TestClassifyReturnAMD64(t *testing.T) { types.SInt64TypeDescriptor, types.SInt64TypeDescriptor, }}, - types.ReturnStRaxRdx, + struct16BExpected(types.ReturnStRaxRdx), }, {"Struct24B", &types.TypeDescriptor{Size: 24, Kind: types.StructType}, types.ReturnViaPointer | types.ReturnVoid}, } diff --git a/internal/arch/amd64/classification.go b/internal/arch/amd64/classification.go index cbea968..7a3430e 100644 --- a/internal/arch/amd64/classification.go +++ b/internal/arch/amd64/classification.go @@ -4,6 +4,7 @@ package amd64 import ( "math" + "runtime" "github.com/go-webgpu/goffi/types" ) @@ -34,11 +35,12 @@ func classifyReturnAMD64(t *types.TypeDescriptor, abi types.CallingConvention) i case 8: return types.ReturnInt64 default: - if t.Size > 16 { - // MEMORY class (>16B): returned via hidden first argument (sret pointer). + if t.Size > 16 || runtime.GOOS == "windows" { + // MEMORY class (>16B) or Windows (all structs >8B): sret pointer. + // Win64 ABI: structs not exactly 1/2/4/8 bytes are returned by reference. return types.ReturnViaPointer | types.ReturnVoid } - // 9-16B: classify each eightbyte independently per ABI §3.2.3. + // 9-16B on Unix: classify each eightbyte independently per SysV ABI §3.2.3. // INTEGER wins over SSE within an eightbyte. eb0SSE := classifyEightbyte(t, 0, 8) eb1SSE := classifyEightbyte(t, 8, t.Size)