From a7b04120d7460246e67144fcd4fff66e923051ec Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 13 May 2026 08:45:04 +0300 Subject: [PATCH 1/2] fix: eliminate flaky TestThread_CallAsync on Windows CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace time.Sleep(10ms) with channel-based synchronization. Same pattern as f940eb7 (SnatchLock fix) — deterministic wait instead of timing-dependent sleep on slow CI runners. Verified: 100/100 passes with -count=100. --- internal/thread/thread_test.go | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/internal/thread/thread_test.go b/internal/thread/thread_test.go index c7089b9..fbd0e5b 100644 --- a/internal/thread/thread_test.go +++ b/internal/thread/thread_test.go @@ -6,21 +6,19 @@ package thread import ( - "sync/atomic" "testing" - "time" ) func TestThread_CallVoid(t *testing.T) { th := New() defer th.Stop() - var called atomic.Bool + var called bool th.CallVoid(func() { - called.Store(true) + called = true }) - if !called.Load() { + if !called { t.Error("CallVoid did not execute function") } } @@ -42,17 +40,12 @@ func TestThread_CallAsync(t *testing.T) { th := New() defer th.Stop() - var called atomic.Bool + done := make(chan struct{}) th.CallAsync(func() { - called.Store(true) + close(done) }) - // Wait for async call to complete - time.Sleep(10 * time.Millisecond) - - if !called.Load() { - t.Error("CallAsync did not execute function") - } + <-done } func TestThread_Stop(t *testing.T) { From e7e5f9072952ed98ddfe8de51c969c4d394d2028 Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 13 May 2026 23:34:57 +0300 Subject: [PATCH 2/2] =?UTF-8?q?docs:=20v0.27.4=20release=20prep=20?= =?UTF-8?q?=E2=80=94=20goffi=20v0.5.1,=20x/sys=20v0.44.0,=20CGO=5FENABLED?= =?UTF-8?q?=3D1=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit goffi v0.5.1: struct by-value ABI, XMM return (Metal NSPoint/CGSize), callback struct args, CGO_ENABLED=1 (race detector). x/sys v0.43.0 → v0.44.0. --- CHANGELOG.md | 16 ++++++++++++++++ README.md | 2 +- ROADMAP.md | 21 +++++++++++++++------ go.mod | 4 ++-- go.sum | 4 ++++ 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b9f503..ccdc50c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.27.4] - 2026-05-13 + +### Fixed + +- **Flaky TestThread_CallAsync on Windows CI** — replaced `time.Sleep(10ms)` with + channel-based synchronization. Same deterministic pattern as SnatchLock fix (f940eb7). + Verified: 100/100 passes with `-count=100`. + +### Changed + +- **goffi v0.5.0 → v0.5.1** — struct by-value argument passing (System V AMD64 ABI), + 9-16B struct return via XMM registers (NSPoint, CGSize — critical for Metal backend), + callback struct arguments, CGO_ENABLED=1 support (race detector), E2E test infra. + Contributors: @jiyeyuran (CGO), @pekim (callbacks). +- **x/sys v0.43.0 → v0.44.0** — latest platform syscall definitions. + ## [0.27.3] - 2026-05-11 ### Added diff --git a/README.md b/README.md index bf5a19a..cb62700 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ go get github.com/gogpu/wgpu CGO_ENABLED=0 go build ``` -> **Note:** wgpu uses Pure Go FFI via `cgo_import_dynamic`, which requires `CGO_ENABLED=0`. This enables zero C compiler dependency and easy cross-compilation. +> **Note:** wgpu uses Pure Go FFI via [goffi](https://github.com/go-webgpu/goffi). Both `CGO_ENABLED=0` (default, zero C compiler dependency) and `CGO_ENABLED=1` (for race detector or coexistence with CGO libraries) are supported. --- diff --git a/ROADMAP.md b/ROADMAP.md index 0df3c83..a93ea62 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -19,7 +19,7 @@ --- -## Current State: v0.27.0 +## Current State: v0.27.4 ✅ **All 5 HAL backends complete** (~127K LOC) ✅ **Three-layer WebGPU stack** — wgpu API → wgpu/core → wgpu/hal @@ -53,12 +53,16 @@ ✅ **Zero-init workgroup memory** — WebGPU spec default, plumbed through all layers ✅ **CopyTextureToTexture public API** — DMA hardware copy with sub-region support ✅ **Vulkan relay semaphores** — GPU-side submission ordering (Mesa ANV workaround) -✅ **Software SPIR-V interpreter** — CPU shader execution for vertex/fragment (Phase 1: triangle) ✅ **WASM platform split** — root package _native.go/_browser.go, core/hal excluded from WASM build ✅ **Vulkan command buffer free list** — batch alloc 16 CBs, pool reset (Khronos/NVIDIA/ARM/Mesa/Rust parity) ✅ **Damage-aware surface presentation** — `PresentWithDamage()` with compositor dirty rects. First WebGPU implementation. Software + Vulkan + DX12 + GLES backends. ✅ **Automatic resource lifecycle** — `runtime.AddCleanup` for Buffer/BindGroup (ADR-018, Rust Arc+Drop pattern). GC safety net prevents per-frame resource leaks. ✅ **Zero-allocation WriteBuffer batching** — pre-allocated BufferCopy + stack barrier arrays. All PendingWrites hot paths 0 allocs/op. +✅ **Full SPIR-V interpreter** — 7 phases (~10K LOC): vertex/fragment/compute on CPU, texture sampling, GLSL.std.450 intrinsics, control flow, atomics, workgroup shared memory. Shader debugger with breakpoints and JSON trace. For debugging/CI, not production. +✅ **DX12 timestamp queries** — CreateQuerySet, EndQuery, ResolveQueryData (Rust wgpu-hal parity) +✅ **Queue thread safety** — Submit/WriteBuffer/WriteTexture serialized via sync.Mutex (Rust wgpu-core parity) +✅ **GLES compute memory barriers** — glMemoryBarrier for storage→draw/dispatch transitions (Rust parity) +✅ **Software render pass instrumentation** — slog debug events + RenderPassStats for CI e2e assertions ### Remaining validation (planned) - **Phase C** (P2): Spec compliance edge cases, feature gates @@ -70,7 +74,7 @@ | Metal | macOS | ✅ Stable — naga MSL 91/91 | | DX12 | Windows | ✅ Stable — TDR fixed, PendingWrites, deferred destruction | | GLES | Windows, Linux | ✅ Stable — text rendering, SamplerBindMap, texture completeness | -| Software | Windows, Linux | ✅ Stable — windowed presentation (GDI/X11), macOS planned | +| Software | Windows, Linux | ✅ Stable — windowed presentation (GDI/X11), SPIR-V interpreter, macOS planned (#163) | → **See [CHANGELOG.md](CHANGELOG.md) for detailed per-version notes** @@ -78,11 +82,12 @@ ## Upcoming -### Next: v0.26.0 +### Next +- [ ] GLES Phase 1 — CopyBufferToTexture, CopyTextureToTexture, glFenceSync +- [ ] macOS software presentation — CGImage + CALayer (#163, contributor @k-chimi) - [ ] DX12 DeviceTextureTracker for proper barrier state tracking - [ ] GLES global UNPACK_ALIGNMENT=1 (Rust pattern — set once at device open) -- [ ] Vulkan relay semaphores for multi-submission ordering (VK-SYNC-001) - [ ] GetSurfaceCapabilities on all backends (currently Vulkan-only) - [ ] DXIL as default DX12 shader path (currently opt-in via `GOGPU_DX12_DXIL=1`) @@ -98,7 +103,7 @@ - [x] Text rendering on all GPU backends - [x] Blend constant tracking + resource usage conflict detection - [ ] Full render/compute pass validation (resource transitions) -- [ ] Late buffer binding size validation (SPIR-V reflection → min binding size) +- [x] Late buffer binding size validation (VAL-006, draw/dispatch-time checks) - [ ] Comprehensive documentation - [ ] Conformance test suite @@ -144,6 +149,10 @@ | Version | Date | Highlights | |---------|------|------------| +| **v0.27.4** | 2026-05 | goffi v0.5.1 (struct ABI, XMM return, CGO_ENABLED=1), x/sys v0.44.0, flaky TestThread_CallAsync fix | +| **v0.27.3** | 2026-05 | Software render pass instrumentation (slog + RenderPassStats), Metal MsgSend docs | +| **v0.27.2** | 2026-05 | DX12 timestamp queries, Queue mutex, GLES compute barriers, Vulkan timestampPeriod fix | +| **v0.27.1** | 2026-05 | MSAA resolve LoadOp=CLEAR, Vulkan offscreen ImageLayoutGeneral, persistent stencil, naga v0.17.13 | | **v0.27.0** | 2026-05 | **Full SPIR-V interpreter** (7 phases, ~10K LOC), shader debugger, compute HAL, particles rendering, tagged union optimization, naga v0.17.11, flaky test fix | | **v0.26.12** | 2026-05 | **Test coverage** (core 85.5%, root 78.4%), Metal entry point fix (#168 by @k-chimi), naga v0.17.10 | | **v0.26.11** | 2026-04 | **DX12 indirect dispatch/draw** — ExecuteIndirect + CommandSignature (was last GPU backend with stubs) | diff --git a/go.mod b/go.mod index 5c1c411..9eb11e4 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,8 @@ module github.com/gogpu/wgpu go 1.25.0 require ( - github.com/go-webgpu/goffi v0.5.0 + github.com/go-webgpu/goffi v0.5.1 github.com/gogpu/gputypes v0.5.0 github.com/gogpu/naga v0.17.13 - golang.org/x/sys v0.43.0 + golang.org/x/sys v0.44.0 ) diff --git a/go.sum b/go.sum index 66442b8..87273a9 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,12 @@ github.com/go-webgpu/goffi v0.5.0 h1:EuvVRiRn9qAfCkYYXbHs9gz8NY+zv2/OA1N7gi56UVE= github.com/go-webgpu/goffi v0.5.0/go.mod h1:wfoxNsJkU+5RFbV1kNN1kunhc1lFHuJKK3zpgx08/uM= +github.com/go-webgpu/goffi v0.5.1 h1:RSPR+YKT0tmbp5Uon+xwhN1veC9cehmqMptMkQuopok= +github.com/go-webgpu/goffi v0.5.1/go.mod h1:wfoxNsJkU+5RFbV1kNN1kunhc1lFHuJKK3zpgx08/uM= github.com/gogpu/gputypes v0.5.0 h1:i2ED/9w6m6yLxf8XJT69/NIMSNTLO2y5F1LqvugCKIE= github.com/gogpu/gputypes v0.5.0/go.mod h1:cnXrDMwTpWTvJLW1Vreop3PcT6a2YP/i3s91rPaOavw= github.com/gogpu/naga v0.17.13 h1:VlponVgD1fEfNotx0874M4n7tnfum8YlMEB3pBdd2Ps= github.com/gogpu/naga v0.17.13/go.mod h1:15sQaHKkbqXcwTN+hHYGLsA0WBBnkmYzne/eF5p5WEg= golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ= +golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=