diff --git a/include/core/detail/var_shape_descriptor_table.hpp b/include/core/detail/var_shape_descriptor_table.hpp new file mode 100644 index 00000000..10bae71d --- /dev/null +++ b/include/core/detail/var_shape_descriptor_table.hpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include +#include + +#include "core/detail/allocators/i_allocator.hpp" +#include "core/image_buffer.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" + +namespace roccv::detail { + +/** + * @brief Owns the per-image descriptor table (strided buffer + format arrays) + * for an ImageBatchVarShape, along with its residency-specific lifecycle. + * + * GPU: a device-resident table mirrored by pinned host buffers, brought up to + * date by a lazy H2D copy guarded by a hipEvent. CPU: a single host-resident + * table handed straight to host kernels — no device buffers, no fence, no sync. + * + * All device dispatch lives here so ImageBatchVarShape stays device-agnostic and + * gets a defaulted destructor and trivial move. Move-only: the moved-from table + * is left empty (every pointer null) so its destructor is a no-op. + */ +class VarShapeDescriptorTable { + public: + /** + * @brief The kernel-facing pointer set for one exported snapshot. `imageList` + * and `formatList` are device pointers for a GPU table and host pointers for a + * CPU table; `hostFormatList` is always host-resident (it aliases `formatList` + * for a CPU table). + */ + struct Snapshot { + ImageBufferStrided* imageList; + ImageFormat* formatList; + const ImageFormat* hostFormatList; + }; + + /** + * @brief Allocate a table sized for `capacity` images on `device`. Throws + * INVALID_VALUE if capacity is not positive. + */ + VarShapeDescriptorTable(int32_t capacity, eDeviceType device, const IAllocator& alloc); + ~VarShapeDescriptorTable(); + + VarShapeDescriptorTable(const VarShapeDescriptorTable&) = delete; + VarShapeDescriptorTable& operator=(const VarShapeDescriptorTable&) = delete; + VarShapeDescriptorTable(VarShapeDescriptorTable&&) noexcept; + VarShapeDescriptorTable& operator=(VarShapeDescriptorTable&&) = delete; + + eDeviceType device() const noexcept { return m_device; } + + /** Host-resident mirrors, always valid (both devices) for cache rebuilds. */ + const ImageBufferStrided* hostImages() const noexcept { return m_hostImages; } + const ImageFormat* hostFormats() const noexcept { return m_hostFormats; } + + /** + * @brief Write descriptor slot `index` from already-validated image data. For + * a GPU table, drains any in-flight H2D copy first so the host mirror a + * consumer is reading never tears. + */ + void writeSlot(int32_t index, const ImageBufferStrided& slot, ImageFormat format); + + /** + * @brief Adjust dirty tracking after the live image count shrinks (popBack / + * clear). Pass the new image count (0 for clear). + */ + void onShrink(int32_t newNumImages) noexcept; + + /** + * @brief Flush the dirty suffix [dirtyStart, numImages) to the device on + * `stream` (GPU) or do nothing (CPU), then return the kernel-facing pointers. + */ + Snapshot sync(hipStream_t stream, int32_t numImages); + + private: + void freeAll() noexcept; + + eDeviceType m_device; + const IAllocator& m_allocator; + int32_t m_dirtyStartingFromIndex = 0; + bool m_fencePending = false; + + ImageBufferStrided* m_devImages = nullptr; + ImageFormat* m_devFormats = nullptr; + ImageBufferStrided* m_hostImages = nullptr; + ImageFormat* m_hostFormats = nullptr; + hipEvent_t m_fence = nullptr; +}; + +} // namespace roccv::detail diff --git a/include/core/image.hpp b/include/core/image.hpp new file mode 100644 index 00000000..ef7707a8 --- /dev/null +++ b/include/core/image.hpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "core/detail/allocators/i_allocator.hpp" +#include "core/image_buffer.hpp" +#include "core/image_data.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" +#include "operator_types.h" + +namespace roccv { + +class ImageStorage; + +/** + * @brief Cleanup callback signature for ImageWrapData. Invoked when the last + * Image handle referencing the wrapped buffer is destroyed. Receives the + * ImageData snapshot that was originally wrapped, so callbacks can free + * multi-plane buffers or dispatch on format. + */ +using ImageDataCleanupFunc = std::function; + +/** + * @brief Per-image allocation spec describing what to allocate for a single + * variable-sized image. Mirrors NVCVImageRequirements: size, format, per-plane + * row strides, and base-address alignment. Used as the input to Image's + * allocating constructors and as the output of CalcRequirements; also + * preserved on the Image itself as the source of truth from which exportData() + * rebuilds an ImageData snapshot on demand. + * + * Per-plane row strides are populated only for planes 0..numPlanes(format)-1; + * remaining slots are unused. Today's interleaved-only ImageFormat means only + * planeRowStride[0] is populated in practice. + */ +struct ImageRequirements { + Size2D size; // Width and height in pixels. + ImageFormat format; // Pixel format (dtype + channel count + swizzle). + int64_t planeRowStride[ROCCV_MAX_IMAGE_PLANES]; // Per-plane row stride in bytes. + int32_t alignBytes; // Required base-address alignment, in bytes. +}; + +/** + * @brief A single variable-sized image with device-resident pixel data. + * + * Image is the per-element type held by ImageBatchVarShape. It is a handle + * over a refcounted ImageStorage: copying an Image bumps the refcount and + * leaves both handles pointing at the same underlying buffer. The buffer is + * freed when the last handle is destroyed (for owning Images) or when the + * cleanup callback fires (for ImageWrapData with a callback). + * + * Storage shape: Image holds the buffer pointer (via ImageStorage) plus the + * "ingredients" describing it (size, format, device, per-plane row strides). + * It does NOT hold a precomputed ImageData snapshot — exportData() rebuilds + * one on demand from the ingredients. This keeps a single source of truth for + * the buffer pointer and aligns with how ImageBatchVarShape produces its + * own snapshots. + */ +class Image { + public: + using Requirements = ImageRequirements; + + /** + * @brief Compute the requirements (row stride, etc.) for an image of the + * given dimensions and format. + */ + static Requirements CalcRequirements(Size2D size, ImageFormat format); + + /** + * @brief Allocate a new device buffer for an image of the given dimensions + * and format using the global default allocator. + */ + explicit Image(Size2D size, ImageFormat format, eDeviceType device = eDeviceType::GPU); + + /** + * @brief Allocate a new device buffer using a caller-supplied allocator. + */ + explicit Image(Size2D size, ImageFormat format, const IAllocator& alloc, eDeviceType device = eDeviceType::GPU); + + /** + * @brief Allocate a new device buffer from precomputed requirements. + */ + explicit Image(const Requirements& reqs, eDeviceType device = eDeviceType::GPU); + explicit Image(const Requirements& reqs, const IAllocator& alloc, eDeviceType device = eDeviceType::GPU); + + Image(const Image&) = default; // refcount bump + Image(Image&&) noexcept = default; + Image& operator=(const Image&) = default; // refcount bump + Image& operator=(Image&&) noexcept = default; + ~Image() = default; + + Size2D size() const noexcept { return m_size; } + ImageFormat format() const noexcept { return m_format; } + eDeviceType device() const noexcept { return m_device; } + + /** + * @brief Build and return an ImageData snapshot describing this image. + * + * Returned by value (not by reference) — Image stores ingredients, not a + * precomputed snapshot, so each call constructs a fresh ImageData. The + * snapshot's plane descriptors point into this Image's buffer; it remains + * valid as long as any handle to this storage is alive. + */ + ImageData exportData() const; + + /** + * @brief Build a snapshot and down-cast it to a specific subclass. Throws + * std::bad_cast if the underlying buffer kind doesn't match Derived. + */ + template + Derived exportData() const; + + private: + Image(const Requirements& reqs, eDeviceType device, std::shared_ptr storage); + + friend Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup); + + std::shared_ptr m_data; + Size2D m_size; + ImageFormat m_format; + eDeviceType m_device; + std::array m_planeRowStride; +}; + +template +Derived Image::exportData() const { + ImageData data = exportData(); + auto derived = data.cast(); + if (!derived.has_value()) { + throw std::bad_cast(); + } + return derived.value(); +} + +/** + * @brief Wrap an externally-owned buffer as an Image without allocating. + * + * View-only by default: the wrapped buffer is NOT freed when the returned + * Image (and any copies) go out of scope. The caller is responsible for + * keeping the underlying memory alive for as long as any handle survives. + * + * Pass a non-null cleanup callback to opt into ownership transfer; the + * callback runs exactly once, when the last handle is destroyed. + * + * @param[in] data Pre-existing image data (pointer, layout, device). + * @param[in] cleanup Optional callback to free the buffer on last destruction. + * @return An Image referencing the wrapped buffer. + */ +extern Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup = nullptr); + +} // namespace roccv diff --git a/include/core/image_batch_buffer.hpp b/include/core/image_batch_buffer.hpp new file mode 100644 index 00000000..06f0c4b3 --- /dev/null +++ b/include/core/image_batch_buffer.hpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include + +#include "core/image_buffer.hpp" +#include "core/image_format.hpp" + +namespace roccv { + +/** + * @brief Pitch-linear descriptor table for a variable-shape image batch. + * + * Each entry of `imageList` is a full per-image strided buffer descriptor — + * reusing `ImageBufferStrided` keeps the per-image shape (multi-plane-capable, + * one base pointer per plane, per-plane row stride) identical to what a single + * `Image` carries today. + * + * Pointer residency: + * - `imageList` is the descriptor table read by GPU kernels. For a GPU-resident + * batch this points into device memory; for a hypothetical CPU-resident + * batch it would point into host memory. The producing batch class owns the + * allocation and decides residency. + * - `formatList` mirrors `imageList`'s residency and holds one ImageFormat per + * image (so kernels can branch on per-image format without dereferencing the + * descriptor table). + * - `hostFormatList` is always host-resident. It exists so host-side validation + * code can read per-image formats without paying a D->H copy. For a + * CPU-resident batch this MAY alias `formatList`; for a GPU-resident batch + * it is a separate host mirror kept in sync by the producer. + * + * `uniqueFormat` is the common ImageFormat across all images, or FMT_NONE if + * formats are heterogeneous or the batch is empty. Cached to fast-path the + * homogeneous case. + * + * `maxWidth` / `maxHeight` are the bounding box across all images. Used by + * operators to size launch grids. Both are 0 when the batch is empty. + * + * The struct is intentionally trivially copyable so it can ride inside + * `ImageBatchBuffer` without an allocation, mirroring `ImageBufferStrided`'s + * relationship to `ImageBuffer`. + */ +struct ImageBatchVarShapeBufferStrided { + /** Common format across all images in the batch, or a default-constructed + * ImageFormat if formats are heterogeneous or the batch is empty. */ + ImageFormat uniqueFormat; + + /** Bounding box across all images, in pixels. Both 0 when empty. */ + int32_t maxWidth; + int32_t maxHeight; + + /** Per-image format array, length == numImages. Residency matches + * `imageList` (device for GPU batches, host for CPU batches). */ + ImageFormat* formatList; + + /** Host-resident mirror of `formatList`. May alias `formatList` for + * CPU-resident batches. Length == numImages. */ + const ImageFormat* hostFormatList; + + /** Per-image descriptor table, length == numImages. The kernel-facing + * pointer; residency determines which device the batch lives on. */ + ImageBufferStrided* imageList; +}; + +/** + * @brief An image-batch buffer. Currently only the variable-shape strided + * variant is supported. Shaped as a tagged-union-style aggregate so additional + * batch buffer kinds can be added later (e.g. tensor-backed batches) without + * changing the public type. + */ +struct ImageBatchBuffer { + ImageBatchVarShapeBufferStrided varShapeStrided; +}; + +} // namespace roccv diff --git a/include/core/image_batch_data.hpp b/include/core/image_batch_data.hpp new file mode 100644 index 00000000..c50de010 --- /dev/null +++ b/include/core/image_batch_data.hpp @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include + +#include +#include + +#include "core/image_batch_buffer.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" +#include "operator_types.h" + +namespace roccv { + +/** + * @brief Discriminator for the kind of buffer an ImageBatchData carries. Used + * by IsCompatibleKind() / cast<>() to perform safe runtime down-casting through + * the ImageBatchData hierarchy. + * + * The hierarchy currently exposes only one concrete buffer kind + * (variable-shape, strided, GPU-resident); the enum is shaped to grow into + * additional kinds (e.g. tensor-backed batches, host-resident varshape) without + * breaking the existing buffer kind values. + */ +enum class ImageBatchBufferType { + IMAGE_BATCH_BUFFER_NONE, // Default/invalid buffer type. + IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP, // GPU-accessible varshape descriptor table. + IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST, // Host-accessible varshape descriptor table. +}; + +/** + * @brief Holds the underlying image-batch data alongside metadata + * (numImages, buffer kind). Non-strided batch data is not supported for use + * right now; use ImageBatchVarShapeDataStrided to access strided varshape data + * instead. + * + * ImageBatchData is the interchange type for a batch of variable-sized images. + * It does not own any of the underlying buffers (the descriptor table, the + * format arrays, or the per-image pixel buffers) — it is a metadata snapshot, + * valid only as long as the producing batch outlives it. + * + * Lazy-sync note: for a GPU-resident batch the producer (ImageBatchVarShape) + * is responsible for ensuring the device-side descriptor table is up to date + * with any pushBack/popBack edits before handing out an ImageBatchData. The + * snapshot itself carries no synchronization state. + */ +class ImageBatchData { + public: + ImageBatchData() = delete; + virtual ~ImageBatchData() = default; + + /** + * @brief Returns the number of images currently in the batch. + */ + virtual int32_t numImages() const; + + /** + * @brief Returns the device the descriptor table (and per-image pixel + * buffers) reside on. + */ + virtual eDeviceType device() const; + + /** + * @brief Attempts to down-cast this ImageBatchData to a more specific + * subclass. Returns the casted value if the underlying buffer kind matches + * what Derived expects, or std::nullopt otherwise. + * + * @tparam Derived The target subclass to cast to. + */ + template + std::optional cast() const { + static_assert(std::is_base_of::value, + "Cannot cast ImageBatchData to an unrelated type."); + static_assert(sizeof(Derived) == sizeof(ImageBatchData), + "Derived type must not add any additional data members."); + + if (!Derived::IsCompatibleKind(m_bufferType)) { + return std::nullopt; + } + + return std::make_optional(m_numImages, m_buffer); + } + + static bool IsCompatibleKind(ImageBatchBufferType bufferType); + + protected: + ImageBatchData(int32_t numImages, const ImageBatchBuffer& buffer); + + int32_t m_numImages; + eDeviceType m_deviceType; + ImageBatchBufferType m_bufferType; + ImageBatchBuffer m_buffer; +}; + +/** + * @brief Image-batch data backed by a variable-shape descriptor table. Adds + * typed accessors for the per-image format arrays and the bounding box across + * the batch. Sub-classed by ImageBatchVarShapeDataStrided to discriminate + * pitch-linear storage; further sub-classed by ImageBatchVarShapeDataStridedHip + * to tag device residency. + */ +class ImageBatchVarShapeData : public ImageBatchData { + public: + using Buffer = ImageBatchVarShapeBufferStrided; + + ImageBatchVarShapeData(int32_t numImages, const ImageBatchBuffer& buffer); + + static bool IsCompatibleKind(ImageBatchBufferType bufferType); + + /** + * @brief Bounding box across all images in the batch, in pixels. Both + * dimensions are 0 when the batch is empty. Used by operators to size + * launch grids without iterating the descriptor table. + */ + Size2D maxSize() const; + + /** + * @brief Returns the common ImageFormat across all images, or FMT_NONE if + * formats are heterogeneous or the batch is empty. + */ + ImageFormat uniqueFormat() const; + + /** + * @brief Per-image format array. Residency matches the descriptor table + * (device for GPU batches). Length == numImages(). + * + * Prefer hostFormatList() for host-side validation paths to avoid a D->H + * copy. + */ + const ImageFormat* formatList() const; + + /** + * @brief Host-resident mirror of formatList(). Always safe to dereference + * from host code. Length == numImages(). + */ + const ImageFormat* hostFormatList() const; +}; + +/** + * @brief Variable-shape image-batch data backed by a pitch-linear descriptor + * table. Adds the per-image descriptor accessor on top of + * ImageBatchVarShapeData. + */ +class ImageBatchVarShapeDataStrided : public ImageBatchVarShapeData { + public: + using Buffer = ImageBatchVarShapeBufferStrided; + + ImageBatchVarShapeDataStrided(int32_t numImages, const ImageBatchBuffer& buffer); + + static bool IsCompatibleKind(ImageBatchBufferType bufferType); + + /** + * @brief Per-image descriptor table. Length == numImages(). Residency + * matches the enclosing data type — for ImageBatchVarShapeDataStridedHip + * this is a device pointer; kernels read it directly. + * + * Each entry is a full ImageBufferStrided so the per-image shape + * (multi-plane-capable, per-plane stride and base pointer) matches what a + * single Image carries. + */ + const ImageBufferStrided* imageList() const; +}; + +/** + * @brief GPU-accessible variable-shape image-batch data. + */ +class ImageBatchVarShapeDataStridedHip : public ImageBatchVarShapeDataStrided { + public: + using Buffer = ImageBatchVarShapeBufferStrided; + + ImageBatchVarShapeDataStridedHip(int32_t numImages, const ImageBatchBuffer& buffer); + + /** + * @brief Constructs GPU-accessible varshape image-batch data from the + * concrete strided buffer directly. + * + * @param[in] numImages Number of images currently in the batch. + * @param[in] buffer Descriptor table + per-image format arrays. The + * descriptor table and `formatList` must point to GPU + * memory; `hostFormatList` to host memory. + */ + ImageBatchVarShapeDataStridedHip(int32_t numImages, const Buffer& buffer); + + static bool IsCompatibleKind(ImageBatchBufferType bufferType); +}; + +/** + * @brief Host-accessible variable-shape image-batch data. + * + * The host-resident counterpart to ImageBatchVarShapeDataStridedHip. The + * descriptor table, `formatList`, and `hostFormatList` all point to host + * memory; `formatList` and `hostFormatList` MAY alias the same allocation + * since no D->H sync is required. + * + * The lazy host->device descriptor sync that the GPU producer needs is not + * applicable here — host-only varshape batches can edit the descriptor table + * in place and hand it straight to host kernels. The matching producer-side + * design (whether host batches are a separate type, a runtime-tagged variant + * of ImageBatchVarShape, or skipped entirely in favor of CPU-side per-image + * loops) is still open. + */ +class ImageBatchVarShapeDataStridedHost : public ImageBatchVarShapeDataStrided { + public: + using Buffer = ImageBatchVarShapeBufferStrided; + + ImageBatchVarShapeDataStridedHost(int32_t numImages, const ImageBatchBuffer& buffer); + + /** + * @brief Constructs host-accessible varshape image-batch data from the + * concrete strided buffer directly. + * + * @param[in] numImages Number of images currently in the batch. + * @param[in] buffer Descriptor table + per-image format arrays. All + * pointers must reference host memory; `formatList` + * and `hostFormatList` may alias. + */ + ImageBatchVarShapeDataStridedHost(int32_t numImages, const Buffer& buffer); + + static bool IsCompatibleKind(ImageBatchBufferType bufferType); +}; + +} // namespace roccv diff --git a/include/core/image_batch_var_shape.hpp b/include/core/image_batch_var_shape.hpp new file mode 100644 index 00000000..3c56f0a4 --- /dev/null +++ b/include/core/image_batch_var_shape.hpp @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include +#include + +#include +#include +#include + +#include "core/detail/allocators/i_allocator.hpp" +#include "core/detail/var_shape_descriptor_table.hpp" +#include "core/image.hpp" +#include "core/image_batch_data.hpp" +#include "core/image_format.hpp" +#include "exception.hpp" +#include "operator_types.h" + +namespace roccv { + +/** + * @brief Producer-side container for a batch of variable-sized images that + * share a single GPU-resident descriptor table. + * + * Holds up to `capacity()` Image handles and maintains a parallel descriptor + * table that operators can dispatch over without iterating Image-by-Image. + * Capacity is fixed at construction; pushBack/popBack move within it. + * + * The host descriptor mirrors are pinned so the H2D copy in exportData() is a + * true DMA (no runtime bounce buffer) and so the snapshot can expose the same + * pinned pointer as both `formatList`'s host shadow and `hostFormatList`. + * + * Sync model: pushBack/popBack mutate the host mirrors only; the device + * descriptor table is brought up to date lazily inside exportData(stream), + * which copies just the dirty suffix `[dirtyStart, numImages)`. A hipEvent + * (`m_postFence`) guards the host buffers — if a previous exportData's H2D + * is still in flight, pushBack hipEventSynchronize's on the CPU before + * mutating, so the snapshot a consumer is reading never tears. + * + * Residency is fixed at construction (defaults to GPU). A GPU batch keeps a + * device descriptor table mirrored by pinned host buffers and the lazy H2D sync + * above; a CPU batch holds a single host-resident descriptor table with no + * device buffers, no fence, and no sync (exportData hands the host table + * straight to host kernels). pushBack rejects images whose device doesn't match + * the batch's. + */ +class ImageBatchVarShape { + public: + using const_iterator = std::vector::const_iterator; + + /** + * @brief Construct an empty batch with `capacity` slots on `device`, using + * the global default allocator. + */ + explicit ImageBatchVarShape(int32_t capacity, eDeviceType device = eDeviceType::GPU); + + /** + * @brief Construct an empty batch with `capacity` slots on `device`, using + * the supplied allocator. The allocator must outlive the batch. + */ + explicit ImageBatchVarShape(int32_t capacity, const IAllocator &alloc, eDeviceType device = eDeviceType::GPU); + + ~ImageBatchVarShape() = default; + + ImageBatchVarShape(const ImageBatchVarShape &) = delete; + ImageBatchVarShape &operator=(const ImageBatchVarShape &) = delete; + ImageBatchVarShape(ImageBatchVarShape &&) noexcept; + ImageBatchVarShape &operator=(ImageBatchVarShape &&) = delete; + + int32_t capacity() const noexcept { return m_capacity; } + int32_t numImages() const noexcept { return static_cast(m_images.size()); } + + /** + * @brief The device the batch (and every image it accepts) resides on. + */ + eDeviceType device() const noexcept { return m_table.device(); } + + /** + * @brief Append an image to the batch. Throws if capacity would be + * exceeded, the image is CPU-resident, or the image has more than one + * plane (rocCV is single-plane today). + */ + void pushBack(const Image &img); + + /** + * @brief Append a range of images. Strong exception guarantee — if any + * image fails validation, the batch is rolled back to its pre-call state + * and the exception is rethrown. + */ + template + void pushBack(It begin, It end); + + /** + * @brief Remove the trailing `count` images. Throws if `count` exceeds + * numImages(). + */ + void popBack(int32_t count = 1); + + /** + * @brief Drop all images. Buffers are kept; the batch is reusable. + */ + void clear(); + + const Image &operator[](int32_t i) const { return m_images[i]; } + + const_iterator begin() const noexcept { return m_images.cbegin(); } + const_iterator end() const noexcept { return m_images.cend(); } + + /** + * @brief Bounding box across all images, in pixels. Returns Size2D{0, 0} + * for an empty batch. + */ + Size2D maxSize() const; + + /** + * @brief The common ImageFormat across all images, or FMT_NONE if formats + * are heterogeneous or the batch is empty. popBack invalidates the cache + * so the next call rescans and may return an exact format again. + */ + ImageFormat uniqueFormat() const; + + /** + * @brief Build (and return by value) a snapshot of the batch, residency + * matching the batch's device. + * + * The concrete returned object is an ImageBatchVarShapeDataStridedHip for a + * GPU batch or an ImageBatchVarShapeDataStridedHost for a CPU batch; both are + * returned through the common ImageBatchVarShapeDataStrided base, which + * carries the device/buffer-kind tag so callers can recover the leaf via + * cast<>() (see the templated overload). The snapshot is a metadata view + * valid as long as this batch outlives it. + * + * GPU: synchronizes the dirty suffix of the host mirrors to the device + * descriptor table on `stream` first; `imageList`/`formatList` are device + * pointers safe for kernels enqueued on the same stream, and `hostFormatList` + * aliases the pinned host format mirror. CPU: `stream` is unused, no sync + * occurs, and `imageList`/`formatList`/`hostFormatList` are all host pointers + * (`formatList` and `hostFormatList` alias). + */ + ImageBatchVarShapeDataStrided exportData(hipStream_t stream); + + /** + * @brief Build a snapshot and down-cast it to a specific subclass. Throws + * std::bad_cast if the underlying buffer kind doesn't match Derived. + */ + template + Derived exportData(hipStream_t stream); + + private: + void doUpdateCache() const; + + int32_t m_capacity; + detail::VarShapeDescriptorTable m_table; // owns the descriptor buffers, fence, and sync. + std::vector m_images; + + mutable std::optional m_cacheMaxSize; + mutable std::optional m_cacheUniqueFormat; +}; + +template +void ImageBatchVarShape::pushBack(It begin, It end) { + const int32_t incoming = static_cast(std::distance(begin, end)); + if (incoming + numImages() > m_capacity) { + throw Exception("ImageBatchVarShape::pushBack range would exceed capacity", eStatusType::OUT_OF_BOUNDS); + } + + const int32_t oldNumImages = numImages(); + const auto oldMaxSize = m_cacheMaxSize; + const auto oldUniqueFormat = m_cacheUniqueFormat; + + try { + for (auto it = begin; it != end; ++it) { + pushBack(*it); + } + } catch (...) { + m_images.erase(m_images.begin() + oldNumImages, m_images.end()); + m_cacheMaxSize = oldMaxSize; + m_cacheUniqueFormat = oldUniqueFormat; + throw; + } +} + +template +Derived ImageBatchVarShape::exportData(hipStream_t stream) { + ImageBatchVarShapeDataStrided data = exportData(stream); + auto derived = data.cast(); + if (!derived.has_value()) { + throw std::bad_cast(); + } + return derived.value(); +} + +} // namespace roccv diff --git a/include/core/image_buffer.hpp b/include/core/image_buffer.hpp new file mode 100644 index 00000000..aea93c2d --- /dev/null +++ b/include/core/image_buffer.hpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include + +/** Maximum number of data planes an image can have. */ +#define ROCCV_MAX_IMAGE_PLANES (6) + +namespace roccv { + +/** + * @brief Describes a single pitch-linear image plane. + * + * For interleaved-channel formats there is exactly one plane covering the whole + * image. For planar formats (e.g. NV12, YUV420) each channel/plane carries its + * own width, height, and row stride and lives in its own buffer. + */ +struct ImagePlaneStrided { + /** Width of this plane in pixels. Must be >= 1. */ + int32_t width; + + /** Height of this plane in pixels. Must be >= 1. */ + int32_t height; + + /** Distance in bytes between the start of consecutive rows. Must be at + * least `(width * bits-per-pixel + 7) / 8`. */ + int64_t rowStride; + + /** Pointer to the first byte of plane data. Validity (device vs host) is + * determined by the enclosing data type. */ + void* basePtr; +}; + +/** + * @brief A pitch-linear image buffer: one or more `ImagePlaneStrided` entries. + * + * Only the first `numPlanes` entries carry valid data; the remainder of the + * fixed-size `planes` array is unused. Capping the array size keeps the buffer + * trivially copyable so it can ride inside `ImageBuffer` without an + * allocation. + */ +struct ImageBufferStrided { + /** Number of valid planes. Must be >= 1. */ + int32_t numPlanes; + + /** Per-plane descriptors. Only the first `numPlanes` are valid. */ + ImagePlaneStrided planes[ROCCV_MAX_IMAGE_PLANES]; +}; + +/** + * @brief An image buffer. Currently only the strided variant is supported. + * Mirrors the role `TensorBuffer` plays for tensors and is intentionally + * shaped as a tagged-union-style aggregate so additional buffer kinds can be + * added later (e.g. HIP textures) without changing the public type. + */ +struct ImageBuffer { + ImageBufferStrided strided; +}; + +} // namespace roccv diff --git a/include/core/image_data.hpp b/include/core/image_data.hpp new file mode 100644 index 00000000..cf45e71c --- /dev/null +++ b/include/core/image_data.hpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include + +#include +#include + +#include "core/image_buffer.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" +#include "operator_types.h" + +namespace roccv { + +/** + * @brief Discriminator for the kind of buffer an ImageData carries. Used by + * IsCompatibleKind() / cast<>() to perform safe runtime down-casting through + * the ImageData hierarchy. + */ +enum class ImageBufferType { + IMAGE_BUFFER_NONE, // Default/invalid buffer type. Used when no buffer type is specified. + IMAGE_BUFFER_STRIDED_HIP, // GPU-accessible buffer with strided access. + IMAGE_BUFFER_STRIDED_HOST // Host-accessible buffer with strided access. +}; + +/** + * @brief Holds the underlying image data alongside metadata (format, buffer + * kind). Non-strided image data is not supported for use right now; use + * ImageDataStrided to access strided image data instead. + * + * ImageData is the interchange type for a single variable-sized image. It + * does not own the underlying pixel buffer — it is a metadata snapshot, valid + * only as long as the producing buffer outlives it. + */ +class ImageData { + public: + ImageData() = delete; + virtual ~ImageData() = default; + + /** + * @brief Returns the pixel format of the image. + */ + virtual const ImageFormat &format() const; + + /** + * @brief Returns the device the image data resides on. + */ + virtual eDeviceType device() const; + + /** + * @brief Attempts to down-cast this ImageData to a more specific subclass. + * Returns the casted value if the underlying buffer kind matches what + * Derived expects, or std::nullopt otherwise. + * + * @tparam Derived The target subclass to cast to. + */ + template + std::optional cast() const { + static_assert(std::is_base_of::value, "Cannot cast ImageData to an unrelated type."); + static_assert(sizeof(Derived) == sizeof(ImageData), "Derived type must not add any additional data members."); + + if (!Derived::IsCompatibleKind(m_bufferType)) { + return std::nullopt; + } + + return std::make_optional(m_format, m_buffer); + } + + static bool IsCompatibleKind(ImageBufferType bufferType); + + protected: + ImageData(const ImageFormat &format, const ImageBuffer &buffer); + + ImageFormat m_format; + eDeviceType m_deviceType; + ImageBufferType m_bufferType; + ImageBuffer m_buffer; +}; + +/** + * @brief Image data backed by one or more pitch-linear planes. Adds typed + * accessors for plane descriptors on top of the base ImageData. Sub-classed + * by ImageDataStridedHip and ImageDataStridedHost to discriminate device vs + * host residency. + */ +class ImageDataStrided : public ImageData { + public: + using Buffer = ImageBufferStrided; + + ImageDataStrided(const ImageFormat &format, const ImageBuffer &buffer); + + static bool IsCompatibleKind(ImageBufferType bufferType); + + /** + * @brief Returns the logical image dimensions, taken from plane 0. For + * planar formats, individual planes may have smaller dimensions (e.g. + * chroma sub-sampling); use plane(p) to inspect each plane directly. + */ + Size2D size() const; + + /** + * @brief Returns the number of valid planes in the buffer. + */ + int32_t numPlanes() const; + + /** + * @brief Returns the descriptor for the requested plane. + * + * @param[in] p The plane index. Must satisfy `0 <= p < numPlanes()`. + */ + const ImagePlaneStrided &plane(int32_t p) const; +}; + +/** + * @brief GPU-accessible strided image data. + */ +class ImageDataStridedHip : public ImageDataStrided { + public: + using Buffer = ImageBufferStrided; + + ImageDataStridedHip(const ImageFormat &format, const ImageBuffer &buffer); + + /** + * @brief Constructs GPU-accessible strided image data from a strided + * image buffer directly. + * + * @param[in] format The pixel format. + * @param[in] buffer A strided image buffer with planes allocated on the GPU. + */ + ImageDataStridedHip(const ImageFormat &format, const Buffer &buffer); + + static bool IsCompatibleKind(ImageBufferType bufferType); +}; + +/** + * @brief Host-accessible strided image data. + */ +class ImageDataStridedHost : public ImageDataStrided { + public: + using Buffer = ImageBufferStrided; + + ImageDataStridedHost(const ImageFormat &format, const ImageBuffer &buffer); + + /** + * @brief Constructs host-accessible strided image data from a strided + * image buffer directly. + * + * @param[in] format The pixel format. + * @param[in] buffer A strided image buffer with planes allocated on the host. + */ + ImageDataStridedHost(const ImageFormat &format, const Buffer &buffer); + + static bool IsCompatibleKind(ImageBufferType bufferType); +}; + +} // namespace roccv diff --git a/include/core/image_format.hpp b/include/core/image_format.hpp index 7dd891f3..ddb1d100 100644 --- a/include/core/image_format.hpp +++ b/include/core/image_format.hpp @@ -40,7 +40,10 @@ enum class eSwizzle { */ class ImageFormat { public: - explicit ImageFormat() {} + /** + * @brief Default-constructs to FMT_NONE. + */ + constexpr ImageFormat() : m_dtype(eDataType::DATA_TYPE_U8), m_numChannels(0), m_swizzle(eSwizzle::XYZW) {} explicit constexpr ImageFormat(eDataType dtype, int32_t numChannels, eSwizzle swizzle = eSwizzle::XYZW) : m_dtype(dtype), m_numChannels(numChannels), m_swizzle(swizzle) {} @@ -48,12 +51,20 @@ class ImageFormat { int32_t channels() const noexcept; eSwizzle swizzle() const noexcept; + constexpr bool operator==(const ImageFormat& other) const noexcept { + return m_dtype == other.m_dtype && m_numChannels == other.m_numChannels && m_swizzle == other.m_swizzle; + } + constexpr bool operator!=(const ImageFormat& other) const noexcept { return !(*this == other); } + private: eDataType m_dtype; int32_t m_numChannels; eSwizzle m_swizzle; }; +// Undefined format. Used to represent an uninitialized or invalid format. +constexpr ImageFormat FMT_NONE{eDataType::DATA_TYPE_U8, 0, eSwizzle::XYZW}; + // Single plane with one 8-bit unsigned integer channel. constexpr ImageFormat FMT_U8(eDataType::DATA_TYPE_U8, 1, eSwizzle::XYZW); diff --git a/include/core/image_storage.hpp b/include/core/image_storage.hpp new file mode 100644 index 00000000..70984742 --- /dev/null +++ b/include/core/image_storage.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +namespace roccv { + +/** + * @brief Holds the raw data pointer for a single Image and serves as the + * refcount target shared between Image handles. + * + * ImageStorage carries no lifecycle logic of its own: freeing the underlying + * buffer is the responsibility of the shared_ptr deleter + * installed at the Image construction site. The allocating Image ctor + * captures the allocator + device into its deleter; ImageWrapData captures + * the user's cleanup callback (or installs none for the view-only case). + * + * As a result, ImageStorage is held only by shared_ptr — never by value, never + * copied. Move/copy are deleted to enforce that. + */ +class ImageStorage { + public: + explicit ImageStorage(void* data) : m_data(data) {} + + ImageStorage(const ImageStorage&) = delete; + ImageStorage& operator=(const ImageStorage&) = delete; + + void* data() const noexcept { return m_data; } + + private: + void* m_data; +}; + +} // namespace roccv diff --git a/include/core/wrappers/border_wrapper.hpp b/include/core/wrappers/border_wrapper.hpp index f55493a5..36128b37 100644 --- a/include/core/wrappers/border_wrapper.hpp +++ b/include/core/wrappers/border_wrapper.hpp @@ -23,38 +23,39 @@ #include -#include "core/wrappers/image_wrapper.hpp" +#include +#include + +#include "core/wrappers/tensor_wrapper.hpp" #include "operator_types.h" namespace roccv { /** - * @brief Wrapper class for ImageWrapper. This extends the descriptors by defining behaviors for when tensor - * coordinates go out of scope. + * @brief Wrapper class which adds border-handling behavior on top of an underlying image wrapper. + * + * Templated on the wrapper type W (e.g. TensorWrapper, ImageBatchVarShapeWrapper) so the same border math + * serves both uniform-shape and variable-shape image batches. The pixel value type T is recovered from + * W::ValueType. W must expose: ValueType, at(n,h,w,c), width(n), height(n), batches(), channels(). * - * @tparam T The underlying data type of the tensor. * @tparam BorderType The border type to use when coordinates are out of bounds. + * @tparam W The underlying image wrapper type. */ -template +template class BorderWrapper { public: - /** - * @brief Wraps an ImageWrapper and extends its capabilities to handle out of bounds coordinates. - * - * @param tensor The tensor to wrap. - * @param border_value The fallback border color to use when using a constant border mode. - */ - BorderWrapper(const Tensor& tensor, T border_value) : m_desc(tensor), m_border_value(border_value) {} + using ValueType = typename W::ValueType; + using WrapperType = W; + static constexpr eBorderType kBorderType = BorderType; /** - * @brief Constructs a BorderWrapper from an existing ImageWrapper. Extends its capabilities to handle out of bound - * coordinates. + * @brief Constructs a BorderWrapper from an existing image wrapper. Extends its capabilities to handle out of + * bound coordinates. * - * @param image_wrapper The ImageWrapper to wrap around the BorderWrapper. - * @param border_value The fallback border color to use when using a constant border mode. + * @param image_wrapper The image wrapper to wrap. + * @param border_value The fallback border color to use when using a constant border mode. */ - BorderWrapper(ImageWrapper image_wrapper, T border_value) - : m_desc(image_wrapper), m_border_value(border_value) {} + BorderWrapper(W image_wrapper, ValueType border_value) : m_desc(image_wrapper), m_border_value(border_value) {} /** * @brief Returns a reference to the underlying data given image coordinates. If the coordinates fall out of bounds, @@ -66,11 +67,14 @@ class BorderWrapper { * @param c The channel index. * @return A reference to the underlying data or a fallback border value of type T. */ - __device__ __host__ const T at(int64_t n, int64_t h, int64_t w, int64_t c) const { + __device__ __host__ const ValueType at(int64_t n, int64_t h, int64_t w, int64_t c) const { + const int64_t imgWidth = width(n); + const int64_t imgHeight = height(n); + // Constant border type implementation. This is a special case which doesn't remap values, but rather returns // the provided constant value. if constexpr (BorderType == eBorderType::BORDER_TYPE_CONSTANT) { - if (w < 0 || w >= width() || h < 0 || h >= height()) + if (w < 0 || w >= imgWidth || h < 0 || h >= imgHeight) return m_border_value; else return m_desc.at(n, h, w, c); @@ -80,13 +84,12 @@ class BorderWrapper { // required at image borders. While this may cause branch divergence, a good bulk of the pixels should fall // within image bounds and will take the same branch. This is preferred over having to do expensive calculations // for EVERY pixel in the image (most of which do not require said calculations). - if (w >= 0 && w < width() && h >= 0 && h < height()) { + if (w >= 0 && w < imgWidth && h >= 0 && h < imgHeight) { return m_desc.at(n, h, w, c); } // Otherwise, do some additional calculations to map the provided x and y coordinates to be within bounds. int64_t x = w, y = h; - int64_t imgWidth = width(), imgHeight = height(); // Reflect border type implementation. (Note: This is NOT REFLECT101, pixels at the border will be duplicated as // is the intended behavior for this border mode.) @@ -139,18 +142,20 @@ class BorderWrapper { } /** - * @brief Retrives the height of the images. + * @brief Retrives the height of the image at batch index n. * + * @param n Batch index. Ignored when W is a uniform-shape wrapper. * @return Image height. */ - __device__ __host__ inline int64_t height() const { return m_desc.height(); } + __device__ __host__ inline int64_t height(int64_t n = 0) const { return m_desc.height(n); } /** - * @brief Retrieves the width of the image. + * @brief Retrieves the width of the image at batch index n. * + * @param n Batch index. Ignored when W is a uniform-shape wrapper. * @return Image width. */ - __device__ __host__ inline int64_t width() const { return m_desc.width(); } + __device__ __host__ inline int64_t width(int64_t n = 0) const { return m_desc.width(n); } /** * @brief Retrieves the number of batches in the image tensor. @@ -167,7 +172,21 @@ class BorderWrapper { __device__ __host__ inline int64_t channels() const { return m_desc.channels(); } private: - ImageWrapper m_desc; - T m_border_value; + W m_desc; + ValueType m_border_value; }; -} // namespace roccv \ No newline at end of file + +/** + * @brief Factory for BorderWrapper. Deduces the underlying wrapper type W from the argument so callers + * only need to spell the border-mode policy explicitly. + * + * @tparam B The border mode to apply. + * @param wrap The underlying image wrapper. + * @param borderValue Fallback value used when B is BORDER_TYPE_CONSTANT. + */ +template +auto MakeBorderWrapper(W wrap, typename W::ValueType borderValue) { + return BorderWrapper(wrap, borderValue); +} + +} // namespace roccv diff --git a/include/core/wrappers/image_batch_var_shape_wrapper.hpp b/include/core/wrappers/image_batch_var_shape_wrapper.hpp new file mode 100644 index 00000000..b32bbdbb --- /dev/null +++ b/include/core/wrappers/image_batch_var_shape_wrapper.hpp @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include + +#include +#include + +#include "core/detail/type_traits.hpp" +#include "core/image_batch_data.hpp" +#include "core/image_buffer.hpp" + +namespace roccv { + +/** + * @brief ImageBatchVarShapeWrapper is a non-owning, kernel-friendly view over an ImageBatchVarShape's + * descriptor table. It satisfies the same wrapper concept as TensorWrapper + * (ValueType, at(n,h,w,c), width(n), height(n), batches(), channels()) so it composes with + * BorderWrapper / InterpolationWrapper unchanged. + * + * Single-plane interleaved (NHWC-style) only — ImageBatchVarShape rejects multi-plane images + * at pushBack, and channel count is derived from T via detail::NumElements. + * + * Pointer residency follows the snapshot it is built from: for a GPU batch + * (ImageBatchVarShapeDataStridedHip) m_imageList is a device pointer, so the wrapper is usable from + * device code; for a CPU batch (ImageBatchVarShapeDataStridedHost) it is a host pointer, usable from + * host code. Every at()/width()/height() call dereferences it, so the caller must run the wrapper on + * the side matching the snapshot's residency. + * + * @tparam T The datatype of an individual pixel (e.g. uchar1, uchar3, uchar4, float1, float4). + */ +template +class ImageBatchVarShapeWrapper { + public: + using ValueType = T; + using BaseType = detail::BaseType; + + ImageBatchVarShapeWrapper() = default; + + /** + * @brief Creates a ImageBatchVarShapeWrapper from a varshape batch data snapshot. + * + * Accepts either residency through the common ImageBatchVarShapeDataStrided base — a GPU + * (...Hip) snapshot yields a device-usable wrapper, a CPU (...Host) snapshot a host-usable one. + * + * @param data The exported descriptor table from ImageBatchVarShape::exportData(stream). + */ + __host__ ImageBatchVarShapeWrapper(const ImageBatchVarShapeDataStrided& data) + : m_imageList(data.imageList()), m_numImages(data.numImages()) {} + + /** + * @brief Returns a reference to data at given image-batch coordinates. + * + * @param n Batch index. + * @param h Row index within image n. + * @param w Column index within image n. + * @param c Channel index within the pixel. + * @return A reference to the underlying pixel-channel value. + */ + __device__ __host__ T& at(int64_t n, int64_t h, int64_t w, int64_t c) { return *doGetPtr(n, h, w, c); } + + __device__ __host__ const T at(int64_t n, int64_t h, int64_t w, int64_t c) const { return *doGetPtr(n, h, w, c); } + + /** + * @brief Width of the image at batch index n. + */ + __device__ __host__ inline int64_t width(int64_t n) const { return m_imageList[n].planes[0].width; } + + /** + * @brief Height of the image at batch index n. + */ + __device__ __host__ inline int64_t height(int64_t n) const { return m_imageList[n].planes[0].height; } + + /** + * @brief Number of images in the batch. + */ + __device__ __host__ inline int64_t batches() const { return m_numImages; } + + /** + * @brief Number of channels per pixel. Derived from T, identical across all images in v1. + */ + __device__ __host__ inline int64_t channels() const { return detail::NumElements; } + + private: + __device__ __host__ inline T* doGetPtr(int64_t n, int64_t h, int64_t w, int64_t c) const { + // Single-plane interleaved NHWC layout: pixel stride is sizeof(T), channel stride is + // sizeof(BaseType). Match TensorWrapper::at semantics — returns a T* offset to (h, w) + // and additionally shifted by c channels. + const ImagePlaneStrided& p = m_imageList[n].planes[0]; + unsigned char* addr = + reinterpret_cast(p.basePtr) + h * p.rowStride + w * sizeof(T) + c * sizeof(BaseType); + return reinterpret_cast(addr); + } + + const ImageBufferStrided* m_imageList = nullptr; + int32_t m_numImages = 0; +}; + +} // namespace roccv diff --git a/include/core/wrappers/interpolation_wrapper.hpp b/include/core/wrappers/interpolation_wrapper.hpp index 7adb8cb6..e4e8c037 100644 --- a/include/core/wrappers/interpolation_wrapper.hpp +++ b/include/core/wrappers/interpolation_wrapper.hpp @@ -23,41 +23,38 @@ #include "core/detail/casting.hpp" #include "core/detail/math/vectorized_type_math.hpp" -#include "core/wrappers/border_wrapper.hpp" #include "core/detail/vector_utils.hpp" +#include "core/wrappers/border_wrapper.hpp" #include "operator_types.h" namespace roccv { /** - * @brief A kernel-friendly wrapper which provides interpolation logic based on the given - * coordinates. This tensor wrapper is typically only used for input tensors and does not provide write access to its - * underlying data. + * @brief A kernel-friendly wrapper which provides interpolation logic on top of a BorderWrapper. + * + * Templated directly on the BorderWrapper type so the redundant border-mode and underlying-wrapper template + * parameters need only be spelled once (in the BorderWrapper type). Recover the border mode via + * BW::kBorderType and the underlying wrapper type via BW::WrapperType. * - * @tparam T Underlying data type of the tensor data. - * @tparam C Number of channels in data type. - * @tparam B Border type to use for interpolation. - * @tparam I Interpolation type to use. + * Read-only access; do not use for output tensors. + * + * @tparam I Interpolation type to use. + * @tparam BW The BorderWrapper type to wrap. Must expose ValueType plus at(n,h,w,c), width(n), height(n). */ -template +template class InterpolationWrapper { public: - /** - * @brief Wraps a roccv::Tensor in an InterpolationWrapper to provide pixel interpolation when accessing - * non-integer coordinate mappings. - * - * @param tensor The tensor to wrap. - * @param border_value A fallback border value to use in the case of a constant border mode. - */ - InterpolationWrapper(const Tensor& tensor, T border_value) : m_desc(tensor, border_value) {} + using ValueType = typename BW::ValueType; + using BorderType = BW; + static constexpr eInterpolationType kInterpolationType = I; /** - * @brief Wraps a BorderWrapper in an Interpolation wrapper. Extends capabilities to interpolate pixel values when - * given non-integer coordinates. + * @brief Wraps a BorderWrapper in an InterpolationWrapper. Extends capabilities to interpolate pixel values + * when given non-integer coordinates. * * @param borderWrapper The BorderWrapper to wrap. */ - InterpolationWrapper(BorderWrapper borderWrapper) : m_desc(borderWrapper) {} + InterpolationWrapper(BW borderWrapper) : m_desc(borderWrapper) {} /** * @brief This function calculates the weighting coefficients for the Catmull-Rom cubic interpolation. @@ -92,7 +89,7 @@ class InterpolationWrapper { * @param w Width coordinates. * @return An interpolated value. */ - inline __device__ __host__ const T at(int64_t n, float h, float w, int64_t c) const { + inline __device__ __host__ ValueType at(int64_t n, float h, float w, int64_t c) const { if constexpr (I == eInterpolationType::INTERP_TYPE_NEAREST) { // Nearest neighbor interpolation implementation return m_desc.at(n, lroundf(h), lroundf(w), c); @@ -102,7 +99,7 @@ class InterpolationWrapper { // - - // v3 -- v4 - using WorkType = detail::MakeType>; + using WorkType = detail::MakeType>; int64_t x0 = static_cast(floorf(w)); int64_t x1 = x0 + 1; @@ -118,10 +115,10 @@ class InterpolationWrapper { auto q2 = v3 * (x1 - w) + v4 * (w - x0); auto q = q1 * (y1 - h) + q2 * (h - y0); - return detail::RangeCast(q); + return detail::RangeCast(q); } else if constexpr (I == eInterpolationType::INTERP_TYPE_CUBIC) { using namespace roccv::detail; - using WorkType = detail::MakeType>; + using WorkType = detail::MakeType>; // Integer coordinates for pixel (x, y) int64_t int_x = static_cast(floorf(w)); @@ -136,20 +133,34 @@ class InterpolationWrapper { WorkType sum = SetAll(0.0f); for (int index_y = -1; index_y <= 2; index_y++) { for (int index_x = -1; index_x <= 2; index_x++) { - sum += detail::RangeCast(m_desc.at(n, int_y + index_y, int_x + index_x, 0)) * (weight_x[index_x + 1] * weight_y[index_y + 1]); + sum += detail::RangeCast(m_desc.at(n, int_y + index_y, int_x + index_x, 0)) * + (weight_x[index_x + 1] * weight_y[index_y + 1]); } } - return detail::RangeCast(sum); + return detail::RangeCast(sum); } } - __device__ __host__ inline int64_t height() const { return m_desc.height(); } - __device__ __host__ inline int64_t width() const { return m_desc.width(); } + __device__ __host__ inline int64_t height(int64_t n = 0) const { return m_desc.height(n); } + __device__ __host__ inline int64_t width(int64_t n = 0) const { return m_desc.width(n); } __device__ __host__ inline int64_t batches() const { return m_desc.batches(); } __device__ __host__ inline int64_t channels() const { return m_desc.channels(); } private: - BorderWrapper m_desc; + BW m_desc; }; + +/** + * @brief Factory for InterpolationWrapper. Deduces the BorderWrapper type BW (and its border mode + + * underlying wrapper) from the argument; callers only need to spell the interpolation policy. + * + * @tparam I The interpolation type. + * @param borderWrap An already-constructed BorderWrapper (typically via MakeBorderWrapper(...)). + */ +template +auto MakeInterpolationWrapper(BW borderWrap) { + return InterpolationWrapper(borderWrap); +} + } // namespace roccv \ No newline at end of file diff --git a/include/core/wrappers/image_wrapper.hpp b/include/core/wrappers/tensor_wrapper.hpp similarity index 81% rename from include/core/wrappers/image_wrapper.hpp rename to include/core/wrappers/tensor_wrapper.hpp index e174c64a..be776576 100644 --- a/include/core/wrappers/image_wrapper.hpp +++ b/include/core/wrappers/tensor_wrapper.hpp @@ -31,28 +31,30 @@ namespace roccv { /** - * @brief ImageWrapper is a non-owning wrapper for roccv::Tensors with a NHWC/NCHW/HWC layout. It provides + * @brief TensorWrapper is a non-owning wrapper for roccv::Tensors with a NHWC/NCHW/HWC layout. It provides * methods for accessing the underlying data within HIP kernels. * * @tparam T The datatype of the underlying tensor data. */ template -class ImageWrapper { +class TensorWrapper { public: using ValueType = T; using BaseType = detail::BaseType; + TensorWrapper() = default; + /** - * @brief Creates an ImageWrapper from a Tensor. + * @brief Creates an TensorWrapper from a Tensor. * - * @param tensor The Tensor to be represented by the ImageWrapper. + * @param tensor The Tensor to be represented by the TensorWrapper. */ - ImageWrapper(const Tensor& tensor) { + TensorWrapper(const Tensor& tensor) { if (tensor.layout() != eTensorLayout::TENSOR_LAYOUT_NHWC && tensor.layout() != eTensorLayout::TENSOR_LAYOUT_NCHW && tensor.layout() != eTensorLayout::TENSOR_LAYOUT_HWC && tensor.layout() != eTensorLayout::TENSOR_LAYOUT_CHW) { - throw Exception("The given tensor layout is not supported for ImageWrapper", eStatusType::NOT_IMPLEMENTED); + throw Exception("The given tensor layout is not supported for TensorWrapper", eStatusType::NOT_IMPLEMENTED); } // Copy tensor data into image tensor descriptor @@ -71,14 +73,14 @@ class ImageWrapper { } /** - * @brief Creates an ImageWrapper from a vector. + * @brief Creates an TensorWrapper from a vector. * * @param input The input vector to wrap. * @param batchSize The number of images within the batch. * @param width The width of each image within the batch. * @param height The height of each image within the batch. */ - ImageWrapper(std::vector& input, int32_t batchSize, int32_t width, int32_t height) { + TensorWrapper(std::vector& input, int32_t batchSize, int32_t width, int32_t height) { // Calculate strides based on input (byte-wise strides) stride.c = sizeof(BaseType); stride.w = stride.c * detail::NumElements; @@ -96,14 +98,14 @@ class ImageWrapper { } /** - * @brief Creates an ImageWrapper from a pointer. + * @brief Creates an TensorWrapper from a pointer. * * @param input The input pointer to wrap. * @param batchSize The number of images within the batch. * @param width The width of each image within the batch. * @param height The height of each image within the batch. */ - ImageWrapper(void* input, int32_t batchSize, int32_t width, int32_t height) { + TensorWrapper(void* input, int32_t batchSize, int32_t width, int32_t height) { // Calculate strides based on input (byte-wise strides) stride.c = sizeof(BaseType); stride.w = stride.c * detail::NumElements; @@ -139,16 +141,22 @@ class ImageWrapper { /** * @brief Retrives the height of the images. * + * @param n Batch index. Ignored for uniform-shape TensorWrapper; included so the signature matches + * ImageBatchVarShapeWrapper, allowing both to satisfy the wrapper concept consumed by + * BorderWrapper / InterpolationWrapper. * @return Image height. */ - __device__ __host__ inline int64_t height() const { return shape.h; } + __device__ __host__ inline int64_t height(int64_t /*n*/ = 0) const { return shape.h; } /** * @brief Retrieves the width of the image. * + * @param n Batch index. Ignored for uniform-shape TensorWrapper; included so the signature matches + * ImageBatchVarShapeWrapper, allowing both to satisfy the wrapper concept consumed by + * BorderWrapper / InterpolationWrapper. * @return Image width. */ - __device__ __host__ inline int64_t width() const { return shape.w; } + __device__ __host__ inline int64_t width(int64_t /*n*/ = 0) const { return shape.w; } /** * @brief Retrieves the number of batches in the image tensor. diff --git a/include/kernels/device/convert_to_device.hpp b/include/kernels/device/convert_to_device.hpp index 67596f36..4e20be42 100644 --- a/include/kernels/device/convert_to_device.hpp +++ b/include/kernels/device/convert_to_device.hpp @@ -26,7 +26,7 @@ THE SOFTWARE. #include "core/detail/casting.hpp" #include "core/detail/type_traits.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" namespace Kernels { namespace Device { diff --git a/include/kernels/device/copy_make_border_device.hpp b/include/kernels/device/copy_make_border_device.hpp index aeae2d38..7fd16461 100644 --- a/include/kernels/device/copy_make_border_device.hpp +++ b/include/kernels/device/copy_make_border_device.hpp @@ -29,9 +29,9 @@ namespace Device { * @brief GPU kernel for CopyMakeBorder operator. * * @tparam SrcDesc Must be a BorderWrapper. - * @tparam DstDesc Must be a ImageWrapper. + * @tparam DstDesc Must be a TensorWrapper. * @param src A BorderWrapper containing information for the input tensor. - * @param dst A ImageWrapper containing information for the output tensor. + * @param dst A TensorWrapper containing information for the output tensor. * @param top The top pixel coordinate on the y-axis where the border should start. * @param left The left-most pixel coordinate on the x-axis where the border should start. * @return __global__ diff --git a/include/kernels/device/reformat_device.hpp b/include/kernels/device/reformat_device.hpp index 35054752..95d6c32a 100644 --- a/include/kernels/device/reformat_device.hpp +++ b/include/kernels/device/reformat_device.hpp @@ -21,7 +21,7 @@ #include -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" namespace Kernels::Device { @@ -34,7 +34,7 @@ namespace Kernels::Device { * @param[out] output The output tensor. */ template -__global__ void reformat(roccv::ImageWrapper input, roccv::ImageWrapper output) { +__global__ void reformat(roccv::TensorWrapper input, roccv::TensorWrapper output) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; const int b = blockIdx.z; diff --git a/include/kernels/host/convert_to_host.hpp b/include/kernels/host/convert_to_host.hpp index 93c19521..13b8381c 100644 --- a/include/kernels/host/convert_to_host.hpp +++ b/include/kernels/host/convert_to_host.hpp @@ -25,7 +25,7 @@ THE SOFTWARE. #include #include "core/detail/casting.hpp" #include "core/detail/type_traits.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" namespace Kernels { namespace Host { diff --git a/include/kernels/host/reformat_host.hpp b/include/kernels/host/reformat_host.hpp index 69e21b67..00980104 100644 --- a/include/kernels/host/reformat_host.hpp +++ b/include/kernels/host/reformat_host.hpp @@ -23,7 +23,7 @@ #include -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" namespace Kernels::Host { @@ -36,7 +36,7 @@ namespace Kernels::Host { * @param[out] output The output tensor. */ template -void reformat(roccv::ImageWrapper input, roccv::ImageWrapper output) { +void reformat(roccv::TensorWrapper input, roccv::TensorWrapper output) { #pragma omp parallel for for (int b = 0; b < output.batches(); b++) { for (int y = 0; y < output.height(); y++) { diff --git a/src/core/detail/var_shape_descriptor_table.cpp b/src/core/detail/var_shape_descriptor_table.cpp new file mode 100644 index 00000000..d22f6c4c --- /dev/null +++ b/src/core/detail/var_shape_descriptor_table.cpp @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "core/detail/var_shape_descriptor_table.hpp" + +#include + +#include "core/exception.hpp" +#include "core/hip_assert.h" + +namespace roccv::detail { + +VarShapeDescriptorTable::VarShapeDescriptorTable(int32_t capacity, eDeviceType device, const IAllocator& alloc) + : m_device(device), m_allocator(alloc) { + if (capacity <= 0) { + throw Exception("ImageBatchVarShape capacity must be positive", eStatusType::INVALID_VALUE); + } + + const size_t imagesBytes = sizeof(ImageBufferStrided) * capacity; + const size_t formatsBytes = sizeof(ImageFormat) * capacity; + + try { + if (m_device == eDeviceType::GPU) { + // Device descriptor table + pinned host mirrors, kept in sync by the + // lazy H2D copy in sync() and guarded by m_fence. + m_devImages = static_cast(m_allocator.allocHipMem(imagesBytes)); + m_devFormats = static_cast(m_allocator.allocHipMem(formatsBytes)); + m_hostImages = static_cast(m_allocator.allocHostPinnedMem(imagesBytes)); + m_hostFormats = static_cast(m_allocator.allocHostPinnedMem(formatsBytes)); + + HIP_VALIDATE_NO_ERRORS(hipEventCreateWithFlags(&m_fence, hipEventDisableTiming)); + } else { + // A single host-resident table handed straight to host kernels: no + // device buffers, no pinned memory, no fence. + m_hostImages = static_cast(m_allocator.allocHostMem(imagesBytes)); + m_hostFormats = static_cast(m_allocator.allocHostMem(formatsBytes)); + } + } catch (...) { + freeAll(); + throw; + } +} + +VarShapeDescriptorTable::~VarShapeDescriptorTable() { + if (m_fencePending && m_fence != nullptr) { + // Drain any in-flight H2D copy before freeing the host mirrors it reads + // from. (void) — destructors must not throw. + (void)hipEventSynchronize(m_fence); + } + if (m_fence != nullptr) { + (void)hipEventDestroy(m_fence); + } + freeAll(); +} + +VarShapeDescriptorTable::VarShapeDescriptorTable(VarShapeDescriptorTable&& other) noexcept + : m_device(other.m_device), + m_allocator(other.m_allocator), + m_dirtyStartingFromIndex(other.m_dirtyStartingFromIndex), + m_fencePending(other.m_fencePending), + m_devImages(other.m_devImages), + m_devFormats(other.m_devFormats), + m_hostImages(other.m_hostImages), + m_hostFormats(other.m_hostFormats), + m_fence(other.m_fence) { + other.m_dirtyStartingFromIndex = 0; + other.m_fencePending = false; + other.m_devImages = nullptr; + other.m_devFormats = nullptr; + other.m_hostImages = nullptr; + other.m_hostFormats = nullptr; + other.m_fence = nullptr; +} + +void VarShapeDescriptorTable::freeAll() noexcept { + // The host mirrors are pinned for a GPU table and plain host memory for a CPU + // table, so only their free path differs. The device frees are null-guarded, + // so a CPU table (whose device pointers are null) skips them. + if (m_device == eDeviceType::GPU) { + if (m_hostFormats != nullptr) m_allocator.freeHostPinnedMem(m_hostFormats); + if (m_hostImages != nullptr) m_allocator.freeHostPinnedMem(m_hostImages); + } else { + if (m_hostFormats != nullptr) m_allocator.freeHostMem(m_hostFormats); + if (m_hostImages != nullptr) m_allocator.freeHostMem(m_hostImages); + } + if (m_devFormats != nullptr) m_allocator.freeHipMem(m_devFormats); + if (m_devImages != nullptr) m_allocator.freeHipMem(m_devImages); +} + +void VarShapeDescriptorTable::writeSlot(int32_t index, const ImageBufferStrided& slot, ImageFormat format) { + if (m_fencePending) { + HIP_VALIDATE_NO_ERRORS(hipEventSynchronize(m_fence)); + m_fencePending = false; + } + m_hostImages[index] = slot; + m_hostFormats[index] = format; +} + +void VarShapeDescriptorTable::onShrink(int32_t newNumImages) noexcept { + m_dirtyStartingFromIndex = std::min(m_dirtyStartingFromIndex, newNumImages); +} + +VarShapeDescriptorTable::Snapshot VarShapeDescriptorTable::sync(hipStream_t stream, int32_t numImages) { + // CPU tables have a single host table — nothing to copy. Only a GPU table with + // a dirty suffix issues an H2D copy and records the fence. + if (m_device == eDeviceType::GPU && m_dirtyStartingFromIndex < numImages) { + const int32_t dirtyCount = numImages - m_dirtyStartingFromIndex; + + if (m_fencePending) { + HIP_VALIDATE_NO_ERRORS(hipStreamWaitEvent(stream, m_fence, /*flags=*/0)); + } + + HIP_VALIDATE_NO_ERRORS(hipMemcpyAsync(m_devImages + m_dirtyStartingFromIndex, + m_hostImages + m_dirtyStartingFromIndex, + sizeof(ImageBufferStrided) * dirtyCount, hipMemcpyHostToDevice, stream)); + HIP_VALIDATE_NO_ERRORS(hipMemcpyAsync(m_devFormats + m_dirtyStartingFromIndex, + m_hostFormats + m_dirtyStartingFromIndex, + sizeof(ImageFormat) * dirtyCount, hipMemcpyHostToDevice, stream)); + + HIP_VALIDATE_NO_ERRORS(hipEventRecord(m_fence, stream)); + m_fencePending = true; + } + m_dirtyStartingFromIndex = numImages; + + if (m_device == eDeviceType::GPU) { + return Snapshot{m_devImages, m_devFormats, m_hostFormats}; + } + // CPU: imageList/formatList are the host table; hostFormatList aliases it. + return Snapshot{m_hostImages, m_hostFormats, m_hostFormats}; +} + +} // namespace roccv::detail diff --git a/src/core/image.cpp b/src/core/image.cpp new file mode 100644 index 00000000..d6077dcb --- /dev/null +++ b/src/core/image.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "core/image.hpp" + +#include +#include + +#include "core/data_type.hpp" +#include "core/detail/context.hpp" +#include "core/exception.hpp" +#include "core/image_storage.hpp" + +namespace roccv { + +namespace { + +// Allocates a buffer through `alloc` for the requested device and wraps it +// in an ImageStorage whose shared_ptr deleter frees through the same allocator. +// The allocator reference is captured by reference; callers must ensure it +// outlives every Image (and any handle copied from it) it creates. +std::shared_ptr makeStorage(const ImageRequirements& reqs, const IAllocator& alloc, eDeviceType device) { + const size_t bytes = static_cast(reqs.planeRowStride[0]) * reqs.size.h; + + void* buf = nullptr; + switch (device) { + case eDeviceType::GPU: + buf = alloc.allocHipMem(bytes); + break; + case eDeviceType::CPU: + buf = alloc.allocHostMem(bytes); + break; + } + + return std::shared_ptr(new ImageStorage(buf), [&alloc, device](ImageStorage* s) { + switch (device) { + case eDeviceType::GPU: + alloc.freeHipMem(s->data()); + break; + case eDeviceType::CPU: + alloc.freeHostMem(s->data()); + break; + } + delete s; + }); +} + +} // namespace + +// ----------------------------------------------------------------------------- +// CalcRequirements +// ----------------------------------------------------------------------------- + +Image::Requirements Image::CalcRequirements(Size2D size, ImageFormat format) { + if (size.w < 1 || size.h < 1) { + throw Exception("Image dimensions must be >= 1.", eStatusType::INVALID_VALUE); + } + + const int64_t bytesPerPixel = static_cast(DataType(format.dtype()).size()) * format.channels(); + + // Guard signed-overflow in the rowStride = bytesPerPixel * width product + // (UB on overflow). Realistic image sizes don't approach INT64_MAX, but + // pathological callers shouldn't silently propagate garbage into strides. + int64_t rowStride = 0; + if (__builtin_mul_overflow(bytesPerPixel, static_cast(size.w), &rowStride)) { + throw Exception("Image row stride overflows int64.", eStatusType::INVALID_VALUE); + } + + // TODO: derive a sensible default base/row alignment from device attributes. + return ImageRequirements{ + .size = size, + .format = format, + .planeRowStride = {rowStride}, + .alignBytes = 0, + }; +} + +// ----------------------------------------------------------------------------- +// Constructors +// ----------------------------------------------------------------------------- + +Image::Image(Size2D size, ImageFormat format, eDeviceType device) + : Image(size, format, GlobalContext().getDefaultAllocator(), device) {} + +Image::Image(Size2D size, ImageFormat format, const IAllocator& alloc, eDeviceType device) + : Image(CalcRequirements(size, format), alloc, device) {} + +Image::Image(const Requirements& reqs, eDeviceType device) + : Image(reqs, GlobalContext().getDefaultAllocator(), device) {} + +Image::Image(const Requirements& reqs, const IAllocator& alloc, eDeviceType device) + : Image(reqs, device, makeStorage(reqs, alloc, device)) {} + +Image::Image(const Requirements& reqs, eDeviceType device, std::shared_ptr storage) + : m_data(std::move(storage)), m_size(reqs.size), m_format(reqs.format), m_device(device), m_planeRowStride{} { + std::copy(std::begin(reqs.planeRowStride), std::end(reqs.planeRowStride), m_planeRowStride.begin()); +} + +// ----------------------------------------------------------------------------- +// exportData +// ----------------------------------------------------------------------------- + +ImageData Image::exportData() const { + // TODO: derive numPlanes from m_format when planar formats land. Today's + // ImageFormat is interleaved-only, so plane 0 covers the whole image and + // its dimensions match m_size verbatim. + ImageBufferStrided strided{}; + strided.numPlanes = 1; + strided.planes[0].width = m_size.w; + strided.planes[0].height = m_size.h; + strided.planes[0].rowStride = m_planeRowStride[0]; + strided.planes[0].basePtr = m_data->data(); + + switch (m_device) { + case eDeviceType::GPU: + return ImageDataStridedHip(m_format, strided); + case eDeviceType::CPU: + return ImageDataStridedHost(m_format, strided); + } + + throw Exception("Unsupported device type in Image::exportData.", eStatusType::INVALID_VALUE); +} + +// ----------------------------------------------------------------------------- +// ImageWrapData +// ----------------------------------------------------------------------------- + +Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup) { + auto strided = data.cast(); + if (!strided.has_value()) { + throw Exception("ImageWrapData requires strided image data.", eStatusType::INVALID_VALUE); + } + + // Single-plane assumption: storage tracks plane(0) and Requirements only + // populates planeRowStride[0]. Multi-plane wraps will need to copy each + // plane's stride and either store per-plane base pointers or derive them + // from a single owning allocation. + const ImagePlaneStrided& plane0 = strided->plane(0); + + // Designated initializers to avoid value-initializing ImageFormat through + // its explicit default ctor (which copy-list-init refuses). + Image::Requirements reqs{ + .size = Size2D{plane0.width, plane0.height}, + .format = data.format(), + .planeRowStride = {plane0.rowStride}, + .alignBytes = 0, + }; + + // The deleter captures `data` by value so the original snapshot survives + // long enough to be passed to the cleanup callback on last-handle drop. + // Swallow exceptions from `cleanup` — shared_ptr deleters run during + // destruction, and a throw would propagate into std::terminate. + auto storage = + std::shared_ptr(new ImageStorage(plane0.basePtr), [data, cleanup](ImageStorage* s) noexcept { + if (cleanup) { + try { + cleanup(data); + } catch (...) { + } + } + delete s; + }); + + return Image(reqs, data.device(), std::move(storage)); +} + +} // namespace roccv diff --git a/src/core/image_batch_data.cpp b/src/core/image_batch_data.cpp new file mode 100644 index 00000000..a8ce07ba --- /dev/null +++ b/src/core/image_batch_data.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "core/image_batch_data.hpp" + +#include "core/image_batch_buffer.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" + +namespace roccv { + +int32_t ImageBatchData::numImages() const { return m_numImages; } + +eDeviceType ImageBatchData::device() const { return m_deviceType; } + +ImageBatchData::ImageBatchData(int32_t numImages, const ImageBatchBuffer& buffer) + : m_numImages(numImages), + m_deviceType(eDeviceType::GPU), + m_bufferType(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE), + m_buffer(buffer) {} + +bool ImageBatchData::IsCompatibleKind(ImageBatchBufferType bufferType) { + return bufferType != ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE; +} + +ImageBatchVarShapeData::ImageBatchVarShapeData(int32_t numImages, const ImageBatchBuffer& buffer) + : ImageBatchData(numImages, buffer) {} + +bool ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType bufferType) { + return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP || + bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST; +} + +Size2D ImageBatchVarShapeData::maxSize() const { + return Size2D{m_buffer.varShapeStrided.maxWidth, m_buffer.varShapeStrided.maxHeight}; +} + +ImageFormat ImageBatchVarShapeData::uniqueFormat() const { return m_buffer.varShapeStrided.uniqueFormat; } + +const ImageFormat* ImageBatchVarShapeData::formatList() const { return m_buffer.varShapeStrided.formatList; } + +const ImageFormat* ImageBatchVarShapeData::hostFormatList() const { return m_buffer.varShapeStrided.hostFormatList; } + +ImageBatchVarShapeDataStrided::ImageBatchVarShapeDataStrided(int32_t numImages, const ImageBatchBuffer& buffer) + : ImageBatchVarShapeData(numImages, buffer) {} + +bool ImageBatchVarShapeDataStrided::IsCompatibleKind(ImageBatchBufferType bufferType) { + return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP || + bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST; +} + +const ImageBufferStrided* ImageBatchVarShapeDataStrided::imageList() const { + return m_buffer.varShapeStrided.imageList; +} + +ImageBatchVarShapeDataStridedHip::ImageBatchVarShapeDataStridedHip(int32_t numImages, const ImageBatchBuffer& buffer) + : ImageBatchVarShapeDataStrided(numImages, buffer) { + m_bufferType = ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP; + m_deviceType = eDeviceType::GPU; +} + +ImageBatchVarShapeDataStridedHip::ImageBatchVarShapeDataStridedHip( + int32_t numImages, const ImageBatchVarShapeDataStridedHip::Buffer& buffer) + : ImageBatchVarShapeDataStridedHip(numImages, ImageBatchBuffer{.varShapeStrided = buffer}) {} + +bool ImageBatchVarShapeDataStridedHip::IsCompatibleKind(ImageBatchBufferType bufferType) { + return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP; +} + +ImageBatchVarShapeDataStridedHost::ImageBatchVarShapeDataStridedHost(int32_t numImages, const ImageBatchBuffer& buffer) + : ImageBatchVarShapeDataStrided(numImages, buffer) { + m_bufferType = ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST; + m_deviceType = eDeviceType::CPU; +} + +ImageBatchVarShapeDataStridedHost::ImageBatchVarShapeDataStridedHost( + int32_t numImages, const ImageBatchVarShapeDataStridedHost::Buffer& buffer) + : ImageBatchVarShapeDataStridedHost(numImages, ImageBatchBuffer{.varShapeStrided = buffer}) {} + +bool ImageBatchVarShapeDataStridedHost::IsCompatibleKind(ImageBatchBufferType bufferType) { + return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST; +} + +} // namespace roccv diff --git a/src/core/image_batch_var_shape.cpp b/src/core/image_batch_var_shape.cpp new file mode 100644 index 00000000..aeeaa6b3 --- /dev/null +++ b/src/core/image_batch_var_shape.cpp @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "core/image_batch_var_shape.hpp" + +#include + +#include "core/detail/context.hpp" +#include "core/exception.hpp" +#include "core/image_batch_buffer.hpp" +#include "core/image_buffer.hpp" + +namespace roccv { + +ImageBatchVarShape::ImageBatchVarShape(int32_t capacity, eDeviceType device) + : ImageBatchVarShape(capacity, GlobalContext().getDefaultAllocator(), device) {} + +ImageBatchVarShape::ImageBatchVarShape(int32_t capacity, const IAllocator& alloc, eDeviceType device) + : m_capacity(capacity), m_table(capacity, device, alloc) { + m_images.reserve(capacity); +} + +ImageBatchVarShape::ImageBatchVarShape(ImageBatchVarShape&& other) noexcept + : m_capacity(other.m_capacity), + m_table(std::move(other.m_table)), + m_images(std::move(other.m_images)), + m_cacheMaxSize(other.m_cacheMaxSize), + m_cacheUniqueFormat(other.m_cacheUniqueFormat) { + other.m_capacity = 0; + other.m_cacheMaxSize.reset(); + other.m_cacheUniqueFormat.reset(); +} + +void ImageBatchVarShape::pushBack(const Image& img) { + const int32_t n = numImages(); + if (n >= m_capacity) { + throw Exception("ImageBatchVarShape::pushBack would exceed capacity", eStatusType::OUT_OF_BOUNDS); + } + if (img.device() != m_table.device()) { + throw Exception("ImageBatchVarShape only accepts images matching its device", eStatusType::INVALID_VALUE); + } + + // Export through the strided base so this works for both GPU- and + // CPU-resident images (a typed cast<...Hip> would reject host images). + auto strided = img.exportData().cast(); + if (!strided.has_value()) { + throw Exception("ImageBatchVarShape requires strided image data", eStatusType::INVALID_VALUE); + } + const ImageDataStrided& data = strided.value(); + if (data.numPlanes() != 1) { + throw Exception("ImageBatchVarShape only supports single-plane images", eStatusType::INVALID_VALUE); + } + + ImageBufferStrided slot{}; + slot.numPlanes = 1; + slot.planes[0] = data.plane(0); + m_table.writeSlot(n, slot, img.format()); + + const Size2D imgSize = img.size(); + if (n == 0) { + // Seed from scratch: an empty-batch query may have populated the + // cache with sentinels (FMT_NONE, 0×0); replacing avoids merging the + // first real image into them. + m_cacheMaxSize = imgSize; + m_cacheUniqueFormat = img.format(); + } else { + // popBack invalidates m_cacheMaxSize without rescanning, so make sure + // both halves of the cache are populated before merging in. + doUpdateCache(); + m_cacheMaxSize->w = std::max(m_cacheMaxSize->w, imgSize.w); + m_cacheMaxSize->h = std::max(m_cacheMaxSize->h, imgSize.h); + if (*m_cacheUniqueFormat != img.format()) { + m_cacheUniqueFormat = FMT_NONE; + } + } + + m_images.push_back(img); +} + +void ImageBatchVarShape::popBack(int32_t count) { + if (count < 0) { + throw Exception("ImageBatchVarShape::popBack count must be non-negative", eStatusType::INVALID_VALUE); + } + if (count > numImages()) { + throw Exception("ImageBatchVarShape::popBack count exceeds numImages", eStatusType::OUT_OF_BOUNDS); + } + + m_images.erase(m_images.end() - count, m_images.end()); + m_table.onShrink(numImages()); + + // maxSize can only shrink on pop; force a rescan on next query. uniqueFormat + // stays — it may now be conservatively FMT_NONE, but never wrong. + m_cacheMaxSize.reset(); + if (numImages() == 0) { + m_cacheUniqueFormat.reset(); + } +} + +void ImageBatchVarShape::clear() { + m_images.clear(); + m_table.onShrink(0); + m_cacheMaxSize.reset(); + m_cacheUniqueFormat.reset(); +} + +Size2D ImageBatchVarShape::maxSize() const { + doUpdateCache(); + return m_cacheMaxSize.value_or(Size2D{0, 0}); +} + +ImageFormat ImageBatchVarShape::uniqueFormat() const { + doUpdateCache(); + return m_cacheUniqueFormat.value_or(FMT_NONE); +} + +void ImageBatchVarShape::doUpdateCache() const { + if (m_cacheMaxSize.has_value() && m_cacheUniqueFormat.has_value()) { + return; + } + const int32_t n = static_cast(m_images.size()); + if (n == 0) { + m_cacheMaxSize = Size2D{0, 0}; + m_cacheUniqueFormat = FMT_NONE; + return; + } + + const ImageBufferStrided* hostImages = m_table.hostImages(); + const ImageFormat* hostFormats = m_table.hostFormats(); + + Size2D maxSz{0, 0}; + ImageFormat unique = hostFormats[0]; + bool heterogeneous = false; + for (int32_t i = 0; i < n; ++i) { + const ImagePlaneStrided& p0 = hostImages[i].planes[0]; + maxSz.w = std::max(maxSz.w, p0.width); + maxSz.h = std::max(maxSz.h, p0.height); + if (!heterogeneous && hostFormats[i] != unique) { + heterogeneous = true; + } + } + m_cacheMaxSize = maxSz; + m_cacheUniqueFormat = heterogeneous ? FMT_NONE : unique; +} + +ImageBatchVarShapeDataStrided ImageBatchVarShape::exportData(hipStream_t stream) { + const auto snap = m_table.sync(stream, numImages()); + doUpdateCache(); + + const Size2D maxSz = m_cacheMaxSize.value(); + ImageBatchVarShapeBufferStrided buffer{}; + buffer.uniqueFormat = m_cacheUniqueFormat.value(); + buffer.maxWidth = maxSz.w; + buffer.maxHeight = maxSz.h; + buffer.imageList = snap.imageList; + buffer.formatList = snap.formatList; + buffer.hostFormatList = snap.hostFormatList; + + if (m_table.device() == eDeviceType::GPU) { + return ImageBatchVarShapeDataStridedHip(numImages(), buffer); + } + return ImageBatchVarShapeDataStridedHost(numImages(), buffer); +} + +} // namespace roccv diff --git a/src/core/image_data.cpp b/src/core/image_data.cpp new file mode 100644 index 00000000..6fb0fc83 --- /dev/null +++ b/src/core/image_data.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "core/image_data.hpp" + +#include "core/image_buffer.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" + +namespace roccv { + +const ImageFormat& ImageData::format() const { return m_format; } + +eDeviceType ImageData::device() const { return m_deviceType; } + +ImageData::ImageData(const ImageFormat& format, const ImageBuffer& buffer) + : m_format(format), + m_deviceType(eDeviceType::GPU), + m_bufferType(ImageBufferType::IMAGE_BUFFER_NONE), + m_buffer(buffer) {} + +bool ImageData::IsCompatibleKind(ImageBufferType bufferType) { + return bufferType != ImageBufferType::IMAGE_BUFFER_NONE; +} + +ImageDataStrided::ImageDataStrided(const ImageFormat& format, const ImageBuffer& buffer) + : ImageData(format, buffer) {} + +bool ImageDataStrided::IsCompatibleKind(ImageBufferType bufferType) { + return bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HIP || + bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HOST; +} + +Size2D ImageDataStrided::size() const { + const ImagePlaneStrided& p0 = m_buffer.strided.planes[0]; + return Size2D{p0.width, p0.height}; +} + +int32_t ImageDataStrided::numPlanes() const { return m_buffer.strided.numPlanes; } + +const ImagePlaneStrided& ImageDataStrided::plane(int32_t p) const { return m_buffer.strided.planes[p]; } + +ImageDataStridedHip::ImageDataStridedHip(const ImageFormat& format, const ImageBuffer& buffer) + : ImageDataStrided(format, buffer) { + m_bufferType = ImageBufferType::IMAGE_BUFFER_STRIDED_HIP; + m_deviceType = eDeviceType::GPU; +} + +ImageDataStridedHip::ImageDataStridedHip(const ImageFormat& format, const ImageDataStridedHip::Buffer& buffer) + : ImageDataStridedHip(format, ImageBuffer{.strided = buffer}) {} + +bool ImageDataStridedHip::IsCompatibleKind(ImageBufferType bufferType) { + return bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HIP; +} + +ImageDataStridedHost::ImageDataStridedHost(const ImageFormat& format, const ImageBuffer& buffer) + : ImageDataStrided(format, buffer) { + m_bufferType = ImageBufferType::IMAGE_BUFFER_STRIDED_HOST; + m_deviceType = eDeviceType::CPU; +} + +ImageDataStridedHost::ImageDataStridedHost(const ImageFormat& format, const ImageDataStridedHost::Buffer& buffer) + : ImageDataStridedHost(format, ImageBuffer{.strided = buffer}) {} + +bool ImageDataStridedHost::IsCompatibleKind(ImageBufferType bufferType) { + return bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HOST; +} + +} // namespace roccv diff --git a/src/op_adv_cvt_color.cpp b/src/op_adv_cvt_color.cpp index 7f035507..c2c69175 100644 --- a/src/op_adv_cvt_color.cpp +++ b/src/op_adv_cvt_color.cpp @@ -26,7 +26,7 @@ THE SOFTWARE. #include "common/validation_helpers.hpp" #include "core/tensor.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/common/adv_cvt_color_coefficients.hpp" #include "kernels/device/adv_cvt_color_device.hpp" #include "kernels/host/adv_cvt_color_host.hpp" @@ -189,22 +189,22 @@ void AdvCvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &ou switch (conversionCode) { case COLOR_BGR2YUV: Kernels::Device::rgb_or_bgr_to_yuv_adv - <<>>(ImageWrapper(input), ImageWrapper(output), + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta); break; case COLOR_RGB2YUV: Kernels::Device::rgb_or_bgr_to_yuv_adv - <<>>(ImageWrapper(input), ImageWrapper(output), + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta); break; case COLOR_YUV2BGR: Kernels::Device::yuv_to_rgb_or_bgr_adv - <<>>(ImageWrapper(input), ImageWrapper(output), + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta); break; case COLOR_YUV2RGB: Kernels::Device::yuv_to_rgb_or_bgr_adv - <<>>(ImageWrapper(input), ImageWrapper(output), + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta); break; default: throw Exception("Unsupported conversion code.", eStatusType::INVALID_COMBINATION); @@ -215,26 +215,26 @@ void AdvCvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &ou if (outChannels == 3) { if (bgr) { - Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv, - ImageWrapper, uchar3> - <<>>(ImageWrapper(input), ImageWrapper(output), + Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv, + TensorWrapper, uchar3> + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } else { - Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv, - ImageWrapper, uchar3> - <<>>(ImageWrapper(input), ImageWrapper(output), + Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv, + TensorWrapper, uchar3> + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } } else { if (bgr) { - Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv, - ImageWrapper, uchar4> - <<>>(ImageWrapper(input), ImageWrapper(output), + Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv, + TensorWrapper, uchar4> + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } else { - Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv, - ImageWrapper, uchar4> - <<>>(ImageWrapper(input), ImageWrapper(output), + Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv, + TensorWrapper, uchar4> + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } } @@ -245,21 +245,21 @@ void AdvCvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &ou if (inChannels == 3) { if (bgr) { Kernels::Device::rgb_or_bgr_to_nv12_or_nv21_adv - <<>>(ImageWrapper(input), ImageWrapper(output), + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } else { Kernels::Device::rgb_or_bgr_to_nv12_or_nv21_adv - <<>>(ImageWrapper(input), ImageWrapper(output), + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } } else { if (bgr) { Kernels::Device::rgb_or_bgr_to_nv12_or_nv21_adv - <<>>(ImageWrapper(input), ImageWrapper(output), + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } else { Kernels::Device::rgb_or_bgr_to_nv12_or_nv21_adv - <<>>(ImageWrapper(input), ImageWrapper(output), + <<>>(TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } } @@ -268,23 +268,23 @@ void AdvCvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &ou if (IsInterleaved444(conversionCode)) { switch (conversionCode) { case COLOR_BGR2YUV: - Kernels::Host::rgb_or_bgr_to_yuv_adv(ImageWrapper(input), - ImageWrapper(output), coeff, + Kernels::Host::rgb_or_bgr_to_yuv_adv(TensorWrapper(input), + TensorWrapper(output), coeff, kDelta); break; case COLOR_RGB2YUV: - Kernels::Host::rgb_or_bgr_to_yuv_adv(ImageWrapper(input), - ImageWrapper(output), coeff, + Kernels::Host::rgb_or_bgr_to_yuv_adv(TensorWrapper(input), + TensorWrapper(output), coeff, kDelta); break; case COLOR_YUV2BGR: - Kernels::Host::yuv_to_rgb_or_bgr_adv(ImageWrapper(input), - ImageWrapper(output), coeff, + Kernels::Host::yuv_to_rgb_or_bgr_adv(TensorWrapper(input), + TensorWrapper(output), coeff, kDelta); break; case COLOR_YUV2RGB: - Kernels::Host::yuv_to_rgb_or_bgr_adv(ImageWrapper(input), - ImageWrapper(output), coeff, + Kernels::Host::yuv_to_rgb_or_bgr_adv(TensorWrapper(input), + TensorWrapper(output), coeff, kDelta); break; default: throw Exception("Unsupported conversion code.", eStatusType::INVALID_COMBINATION); @@ -292,41 +292,41 @@ void AdvCvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &ou } else if (IsSemiPlanarToInterleaved(conversionCode)) { if (outChannels == 3) { if (bgr) { - Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv, - ImageWrapper, uchar3>( - ImageWrapper(input), ImageWrapper(output), coeff, kDelta, uidx); + Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv, + TensorWrapper, uchar3>( + TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } else { - Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv, - ImageWrapper, uchar3>( - ImageWrapper(input), ImageWrapper(output), coeff, kDelta, uidx); + Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv, + TensorWrapper, uchar3>( + TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } } else { if (bgr) { - Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv, - ImageWrapper, uchar4>( - ImageWrapper(input), ImageWrapper(output), coeff, kDelta, uidx); + Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv, + TensorWrapper, uchar4>( + TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } else { - Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv, - ImageWrapper, uchar4>( - ImageWrapper(input), ImageWrapper(output), coeff, kDelta, uidx); + Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv, + TensorWrapper, uchar4>( + TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } } } else { if (inChannels == 3) { if (bgr) { Kernels::Host::rgb_or_bgr_to_nv12_or_nv21_adv( - ImageWrapper(input), ImageWrapper(output), coeff, kDelta, uidx); + TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } else { Kernels::Host::rgb_or_bgr_to_nv12_or_nv21_adv( - ImageWrapper(input), ImageWrapper(output), coeff, kDelta, uidx); + TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } } else { if (bgr) { Kernels::Host::rgb_or_bgr_to_nv12_or_nv21_adv( - ImageWrapper(input), ImageWrapper(output), coeff, kDelta, uidx); + TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } else { Kernels::Host::rgb_or_bgr_to_nv12_or_nv21_adv( - ImageWrapper(input), ImageWrapper(output), coeff, kDelta, uidx); + TensorWrapper(input), TensorWrapper(output), coeff, kDelta, uidx); } } } diff --git a/src/op_bilateral_filter.cpp b/src/op_bilateral_filter.cpp index dffba8ae..5c079b31 100644 --- a/src/op_bilateral_filter.cpp +++ b/src/op_bilateral_filter.cpp @@ -31,7 +31,7 @@ THE SOFTWARE. #include "common/validation_helpers.hpp" #include "core/detail/casting.hpp" #include "core/wrappers/border_wrapper.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/device/bilateral_filter_device.hpp" #include "kernels/host/bilateral_filter_host.hpp" @@ -43,8 +43,8 @@ BilateralFilter::~BilateralFilter() {} template void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &input, const Tensor &output, int diameter, float sigmaColor, float sigmaSpace, T borderValue, eDeviceType device) { - BorderWrapper inputWrapper(input, borderValue); - ImageWrapper outputWrapper(output); + auto inputWrapper = MakeBorderWrapper(TensorWrapper(input), borderValue); + TensorWrapper outputWrapper(output); if (outputWrapper.channels() > 4 || outputWrapper.channels() < 1) { throw Exception("Invalid channel size: cannot be greater than 4 or less than 1.", eStatusType::OUT_OF_BOUNDS); @@ -61,8 +61,7 @@ void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &inp sigmaSpace = 1.0f; } - const int radius = - (diameter <= 0) ? static_cast(std::roundf(sigmaSpace * 1.5f)) : (diameter >> 1); + const int radius = (diameter <= 0) ? static_cast(std::roundf(sigmaSpace * 1.5f)) : (diameter >> 1); float spaceCoeff = -1 / (2 * sigmaSpace * sigmaSpace); float colorCoeff = -1 / (2 * sigmaColor * sigmaColor); @@ -89,9 +88,9 @@ void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &inp for (int j = 0; j < divisor; j++) { for (int i = 0; i < dividend; i++) { - threads.push_back(std::thread(Kernels::Host::bilateral_filter, ImageWrapper>, - inputWrapper, outputWrapper, radius, rollingHeight, rollingWidth, - prevHeight, prevWidth, spaceCoeff, colorCoeff)); + threads.push_back(std::thread( + Kernels::Host::bilateral_filter>, inputWrapper, + outputWrapper, radius, rollingHeight, rollingWidth, prevHeight, prevWidth, spaceCoeff, colorCoeff)); prevWidth = rollingWidth; rollingWidth += factorW; } diff --git a/src/op_bnd_box.cpp b/src/op_bnd_box.cpp index 3b2443e0..abaff32d 100644 --- a/src/op_bnd_box.cpp +++ b/src/op_bnd_box.cpp @@ -32,7 +32,7 @@ THE SOFTWARE. #include "common/validation_helpers.hpp" #include "core/detail/hip_utils.hpp" #include "core/tensor.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/device/bnd_box_device.hpp" #include "kernels/host/bnd_box_host.hpp" @@ -44,8 +44,8 @@ BndBox::~BndBox() {} template void dispatch_bnd_box_dtype(hipStream_t stream, const Tensor &input, const Tensor &output, std::shared_ptr> rects, eDeviceType device) { - ImageWrapper inputWrapper(input); - ImageWrapper outputWrapper(output); + TensorWrapper inputWrapper(input); + TensorWrapper outputWrapper(output); auto width = inputWrapper.width(); auto height = inputWrapper.height(); diff --git a/src/op_composite.cpp b/src/op_composite.cpp index 650a7bf1..c73ac51f 100644 --- a/src/op_composite.cpp +++ b/src/op_composite.cpp @@ -24,7 +24,7 @@ #include #include "common/validation_helpers.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/device/composite_device.hpp" #include "kernels/host/composite_host.hpp" @@ -33,10 +33,10 @@ namespace roccv { template void dispatch_composite_masktype(hipStream_t stream, const Tensor& foreground, const Tensor& background, const Tensor& mask, const Tensor& output, eDeviceType device) { - ImageWrapper fgWrapper(foreground); - ImageWrapper bgWrapper(background); - ImageWrapper maskWrapper(mask); - ImageWrapper outputWrapper(output); + TensorWrapper fgWrapper(foreground); + TensorWrapper bgWrapper(background); + TensorWrapper maskWrapper(mask); + TensorWrapper outputWrapper(output); switch (device) { case eDeviceType::GPU: { diff --git a/src/op_convert_to.cpp b/src/op_convert_to.cpp index 3baa4577..2265a73e 100644 --- a/src/op_convert_to.cpp +++ b/src/op_convert_to.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. #include #include -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "common/validation_helpers.hpp" #include "core/detail/casting.hpp" #include "core/detail/type_traits.hpp" @@ -40,8 +40,8 @@ void dispatch_convert_to_channels(hipStream_t stream, const Tensor &input, const using SRC_DT_NC = detail::MakeType; using DST_DT_NC = detail::MakeType; - ImageWrapper inputWrapper(input); - ImageWrapper outputWrapper(output); + TensorWrapper inputWrapper(input); + TensorWrapper outputWrapper(output); using SRC_BT = detail::BaseType; using DST_BT = detail::BaseType; diff --git a/src/op_copy_make_border.cpp b/src/op_copy_make_border.cpp index feacfbd9..ffc3e0a7 100644 --- a/src/op_copy_make_border.cpp +++ b/src/op_copy_make_border.cpp @@ -25,7 +25,7 @@ #include "common/validation_helpers.hpp" #include "core/wrappers/border_wrapper.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "core/wrappers/interpolation_wrapper.hpp" #include "kernels/device/copy_make_border_device.hpp" #include "kernels/host/copy_make_border_host.hpp" @@ -38,8 +38,8 @@ namespace roccv { template void dispatch_copy_make_border_border_mode(hipStream_t stream, const Tensor& input, const Tensor& output, int32_t top, int32_t left, T border_value, eDeviceType device) { - BorderWrapper in_desc(input, border_value); - ImageWrapper out_desc(output); + auto in_desc = MakeBorderWrapper(TensorWrapper(input), border_value); + TensorWrapper out_desc(output); switch (device) { case eDeviceType::GPU: { @@ -83,8 +83,7 @@ void dispatch_copy_make_border(hipStream_t stream, const Tensor& input, const Te } void CopyMakeBorder::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, int32_t top, - int32_t left, eBorderType border_mode, float4 border_value, - eDeviceType device) const { + int32_t left, eBorderType border_mode, float4 border_value, eDeviceType device) const { CHECK_TENSOR_DEVICE(input, device); CHECK_TENSOR_LAYOUT(input, eTensorLayout::TENSOR_LAYOUT_NHWC, eTensorLayout::TENSOR_LAYOUT_HWC); CHECK_TENSOR_DATATYPES(input, eDataType::DATA_TYPE_U8, eDataType::DATA_TYPE_S8, eDataType::DATA_TYPE_U16, diff --git a/src/op_custom_crop.cpp b/src/op_custom_crop.cpp index 3d1a7056..13e5017c 100644 --- a/src/op_custom_crop.cpp +++ b/src/op_custom_crop.cpp @@ -26,7 +26,7 @@ THE SOFTWARE. #include #include "common/validation_helpers.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/device/custom_crop_device.hpp" #include "kernels/host/custom_crop_host.hpp" @@ -35,8 +35,8 @@ namespace roccv { template void dispatch_custom_crop_dtype(hipStream_t stream, const Tensor& input, const Tensor& output, Box_t cropRect, eDeviceType device) { - ImageWrapper inputWrapper(input); - ImageWrapper outputWrapper(output); + TensorWrapper inputWrapper(input); + TensorWrapper outputWrapper(output); switch (device) { case eDeviceType::GPU: { diff --git a/src/op_cvt_color.cpp b/src/op_cvt_color.cpp index 31abd103..7785b465 100644 --- a/src/op_cvt_color.cpp +++ b/src/op_cvt_color.cpp @@ -27,7 +27,7 @@ THE SOFTWARE. #include "common/validation_helpers.hpp" #include "core/tensor.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/device/cvt_color_device.hpp" #include "kernels/host/cvt_color_host.hpp" @@ -87,38 +87,38 @@ void CvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &outpu switch (conversionCode) { case eColorConversionCode::COLOR_BGR2GRAY: Kernels::Device::rgb_or_bgr_to_grayscale - <<>>(ImageWrapper(input), ImageWrapper(output)); + <<>>(TensorWrapper(input), TensorWrapper(output)); break; case eColorConversionCode::COLOR_RGB2GRAY: Kernels::Device::rgb_or_bgr_to_grayscale - <<>>(ImageWrapper(input), ImageWrapper(output)); + <<>>(TensorWrapper(input), TensorWrapper(output)); break; case eColorConversionCode::COLOR_BGR2RGB: case eColorConversionCode::COLOR_RGB2BGR: Kernels::Device::reorder - <<>>(ImageWrapper(input), ImageWrapper(output)); + <<>>(TensorWrapper(input), TensorWrapper(output)); break; case eColorConversionCode::COLOR_BGR2YUV: Kernels::Device::rgb_or_bgr_to_yuv<<>>( - ImageWrapper(input), ImageWrapper(output), 128.0f); + TensorWrapper(input), TensorWrapper(output), 128.0f); break; case eColorConversionCode::COLOR_RGB2YUV: Kernels::Device::rgb_or_bgr_to_yuv<<>>( - ImageWrapper(input), ImageWrapper(output), 128.0f); + TensorWrapper(input), TensorWrapper(output), 128.0f); break; case eColorConversionCode::COLOR_YUV2BGR: Kernels::Device::yuv_to_rgb_or_bgr<<>>( - ImageWrapper(input), ImageWrapper(output), 128.0f); + TensorWrapper(input), TensorWrapper(output), 128.0f); break; case eColorConversionCode::COLOR_YUV2RGB: Kernels::Device::yuv_to_rgb_or_bgr<<>>( - ImageWrapper(input), ImageWrapper(output), 128.0f); + TensorWrapper(input), TensorWrapper(output), 128.0f); break; default: @@ -129,39 +129,39 @@ void CvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &outpu switch (conversionCode) { case eColorConversionCode::COLOR_BGR2GRAY: - Kernels::Host::rgb_or_bgr_to_grayscale(ImageWrapper(input), - ImageWrapper(output)); + Kernels::Host::rgb_or_bgr_to_grayscale(TensorWrapper(input), + TensorWrapper(output)); break; case eColorConversionCode::COLOR_RGB2GRAY: - Kernels::Host::rgb_or_bgr_to_grayscale(ImageWrapper(input), - ImageWrapper(output)); + Kernels::Host::rgb_or_bgr_to_grayscale(TensorWrapper(input), + TensorWrapper(output)); break; case eColorConversionCode::COLOR_BGR2RGB: case eColorConversionCode::COLOR_RGB2BGR: - Kernels::Host::reorder(ImageWrapper(input), - ImageWrapper(output)); + Kernels::Host::reorder(TensorWrapper(input), + TensorWrapper(output)); break; case eColorConversionCode::COLOR_BGR2YUV: - Kernels::Host::rgb_or_bgr_to_yuv(ImageWrapper(input), - ImageWrapper(output), 128.0f); + Kernels::Host::rgb_or_bgr_to_yuv(TensorWrapper(input), + TensorWrapper(output), 128.0f); break; case eColorConversionCode::COLOR_RGB2YUV: - Kernels::Host::rgb_or_bgr_to_yuv(ImageWrapper(input), - ImageWrapper(output), 128.0f); + Kernels::Host::rgb_or_bgr_to_yuv(TensorWrapper(input), + TensorWrapper(output), 128.0f); break; case eColorConversionCode::COLOR_YUV2BGR: - Kernels::Host::yuv_to_rgb_or_bgr(ImageWrapper(input), - ImageWrapper(output), 128.0f); + Kernels::Host::yuv_to_rgb_or_bgr(TensorWrapper(input), + TensorWrapper(output), 128.0f); break; case eColorConversionCode::COLOR_YUV2RGB: - Kernels::Host::yuv_to_rgb_or_bgr(ImageWrapper(input), - ImageWrapper(output), 128.0f); + Kernels::Host::yuv_to_rgb_or_bgr(TensorWrapper(input), + TensorWrapper(output), 128.0f); break; default: diff --git a/src/op_flip.cpp b/src/op_flip.cpp index 5566a0b6..d28ab8fa 100644 --- a/src/op_flip.cpp +++ b/src/op_flip.cpp @@ -29,7 +29,7 @@ THE SOFTWARE. #include "common/validation_helpers.hpp" #include "core/exception.hpp" #include "core/status_type.h" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/device/flip_device.hpp" #include "kernels/host/flip_host.hpp" @@ -37,8 +37,8 @@ namespace roccv { template void dispatch_flip_axis(hipStream_t stream, const Tensor& input, const Tensor& output, eDeviceType device) { - ImageWrapper inputWrapper(input); - ImageWrapper outputWrapper(output); + TensorWrapper inputWrapper(input); + TensorWrapper outputWrapper(output); switch (device) { case eDeviceType::GPU: { diff --git a/src/op_gamma_contrast.cpp b/src/op_gamma_contrast.cpp index d6c78690..e4ca3520 100644 --- a/src/op_gamma_contrast.cpp +++ b/src/op_gamma_contrast.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. #include "common/math_vector.hpp" #include "common/validation_helpers.hpp" #include "core/tensor.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/device/gamma_contrast_device.hpp" #include "kernels/host/gamma_contrast_host.hpp" @@ -43,8 +43,8 @@ namespace roccv { template void dispatch_gamma_contrast_dtype(hipStream_t stream, const Tensor &input, const Tensor &output, float gamma, eDeviceType device) { - ImageWrapper inputWrapper(input); - ImageWrapper outputWrapper(output); + TensorWrapper inputWrapper(input); + TensorWrapper outputWrapper(output); if (device == eDeviceType::GPU) { dim3 block(64, 16); diff --git a/src/op_histogram.cpp b/src/op_histogram.cpp index 24a2ef79..0627a732 100644 --- a/src/op_histogram.cpp +++ b/src/op_histogram.cpp @@ -31,7 +31,7 @@ THE SOFTWARE. #include "common/array_wrapper.hpp" #include "common/validation_helpers.hpp" #include "core/wrappers/generic_tensor_wrapper.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/device/histogram_device.hpp" #include "kernels/host/histogram_host.hpp" @@ -44,7 +44,7 @@ template void dispatch_histogram_dtype(hipStream_t stream, const Tensor& input, std::optional> mask, const Tensor& histogram, eDeviceType device) { - ImageWrapper inputWrapper(input); + TensorWrapper inputWrapper(input); const auto o_height = histogram.shape()[histogram.shape().layout().height_index()]; const auto o_width = histogram.shape()[histogram.shape().layout().width_index()]; @@ -91,7 +91,7 @@ void dispatch_histogram_dtype(hipStream_t stream, const Tensor& input, std::reference_wrapper mask_ref = mask.value(); const Tensor& actual_mask = mask_ref.get(); CHECK_TENSOR_COMPARISON(input.shape() == actual_mask.shape()); - ImageWrapper maskWrapper(actual_mask); + TensorWrapper maskWrapper(actual_mask); Kernels::Device::histogram_kernel<<>>( inputWrapper, maskWrapper, GenericTensorWrapper(histogram)); } else { @@ -115,7 +115,7 @@ void dispatch_histogram_dtype(hipStream_t stream, const Tensor& input, std::reference_wrapper mask_ref = mask.value(); const Tensor& actual_mask = mask_ref.get(); CHECK_TENSOR_COMPARISON(input.shape() == actual_mask.shape()); - ImageWrapper maskWrapper(actual_mask); + TensorWrapper maskWrapper(actual_mask); Kernels::Host::histogram_kernel(inputWrapper, maskWrapper, GenericTensorWrapper(histogram)); } else { Kernels::Host::histogram_kernel(inputWrapper, GenericTensorWrapper(histogram)); diff --git a/src/op_normalize.cpp b/src/op_normalize.cpp index 529638b8..ca2fde96 100644 --- a/src/op_normalize.cpp +++ b/src/op_normalize.cpp @@ -28,7 +28,7 @@ THE SOFTWARE. #include "common/validation_helpers.hpp" #include "core/detail/type_traits.hpp" #include "core/tensor.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/device/normalize_device.hpp" #include "kernels/host/normalize_host.hpp" @@ -42,10 +42,10 @@ void dispatch_normalize_stddev(hipStream_t stream, const Tensor& input, const Te // tensors. using work_type = detail::MakeType>; - ImageWrapper inputWrap(input); - ImageWrapper outputWrap(output); - ImageWrapper scaleWrap(scale); - ImageWrapper baseWrap(base); + TensorWrapper inputWrap(input); + TensorWrapper outputWrap(output); + TensorWrapper scaleWrap(scale); + TensorWrapper baseWrap(base); switch (device) { case eDeviceType::GPU: { diff --git a/src/op_reformat.cpp b/src/op_reformat.cpp index 4a79145b..cda3c809 100644 --- a/src/op_reformat.cpp +++ b/src/op_reformat.cpp @@ -26,7 +26,7 @@ THE SOFTWARE. #include #include "common/validation_helpers.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "kernels/device/reformat_device.hpp" #include "kernels/host/reformat_host.hpp" @@ -35,8 +35,8 @@ namespace roccv { namespace { template void DispatchReformatChannels(hipStream_t stream, const Tensor& input, const Tensor& output, eDeviceType device) { - ImageWrapper inputWrap(input); - ImageWrapper outputWrap(output); + TensorWrapper inputWrap(input); + TensorWrapper outputWrap(output); switch (device) { case eDeviceType::GPU: { diff --git a/src/op_remap.cpp b/src/op_remap.cpp index 0992cf44..40d14427 100644 --- a/src/op_remap.cpp +++ b/src/op_remap.cpp @@ -29,7 +29,7 @@ THE SOFTWARE. #include "core/detail/internal_structs.hpp" #include "core/detail/math/math.hpp" #include "core/detail/type_traits.hpp" -#include "core/wrappers/image_wrapper.hpp" +#include "core/wrappers/tensor_wrapper.hpp" #include "core/wrappers/interpolation_wrapper.hpp" #include "kernels/device/remap_device.hpp" #include "kernels/host/remap_host.hpp" @@ -76,9 +76,10 @@ template void dispatch_remap_mapInterp(hipStream_t stream, const Tensor &input, const Tensor &output, const Tensor &map, const eRemapType mapValueType, const bool alignCorners, const T borderValue, const eDeviceType device) { - ImageWrapper outputWrapper(output); - InterpolationWrapper wrappedMapTensor(map, make_float2(0, 0)); - InterpolationWrapper inputWrapper(input, borderValue); + TensorWrapper outputWrapper(output); + auto wrappedMapTensor = + MakeInterpolationWrapper(MakeBorderWrapper(TensorWrapper(map), make_float2(0, 0))); + auto inputWrapper = MakeInterpolationWrapper(MakeBorderWrapper(TensorWrapper(input), borderValue)); int mapBatchSize = wrappedMapTensor.batches(); diff --git a/src/op_resize.cpp b/src/op_resize.cpp index d7cd0b61..7275727f 100644 --- a/src/op_resize.cpp +++ b/src/op_resize.cpp @@ -36,9 +36,10 @@ namespace roccv { template void dispatch_resize_interp(hipStream_t stream, const Tensor& input, const Tensor& output, eDeviceType device) { - ImageWrapper outputWrapper(output); + TensorWrapper outputWrapper(output); // Resize operation should clamp values at the border (REPLICATE border mode) - InterpolationWrapper inputWrapper(input, T{}); + auto inputWrapper = + MakeInterpolationWrapper(MakeBorderWrapper(TensorWrapper(input), T{})); float scaleX = inputWrapper.width() / static_cast(outputWrapper.width()); float scaleY = inputWrapper.height() / static_cast(outputWrapper.height()); @@ -62,13 +63,13 @@ void dispatch_resize_interp(hipStream_t stream, const Tensor& input, const Tenso template void dispatch_resize_dtype(hipStream_t stream, const Tensor& input, const Tensor& output, eInterpolationType interpolation, eDeviceType device) { - static const std::unordered_map< - eInterpolationType, - std::function> - funcs = {{eInterpolationType::INTERP_TYPE_NEAREST, dispatch_resize_interp}, - {eInterpolationType::INTERP_TYPE_LINEAR, dispatch_resize_interp}, - {eInterpolationType::INTERP_TYPE_CUBIC, dispatch_resize_interp} - }; + static const std::unordered_map> + funcs = { + {eInterpolationType::INTERP_TYPE_NEAREST, + dispatch_resize_interp}, + {eInterpolationType::INTERP_TYPE_LINEAR, dispatch_resize_interp}, + {eInterpolationType::INTERP_TYPE_CUBIC, dispatch_resize_interp}}; if (!funcs.contains(interpolation)) { throw Exception("Operation does not support the given interpolation mode.", eStatusType::NOT_IMPLEMENTED); @@ -78,8 +79,8 @@ void dispatch_resize_dtype(hipStream_t stream, const Tensor& input, const Tensor func(stream, input, output, device); } -void Resize::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, - eInterpolationType interpolation, eDeviceType device) const { +void Resize::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, eInterpolationType interpolation, + eDeviceType device) const { CHECK_TENSOR_DEVICE(input, device); CHECK_TENSOR_DEVICE(output, device); diff --git a/src/op_rotate.cpp b/src/op_rotate.cpp index 28806779..508de16c 100644 --- a/src/op_rotate.cpp +++ b/src/op_rotate.cpp @@ -54,8 +54,9 @@ void dispatch_rotate_interp(hipStream_t stream, const Tensor &input, const Tenso T borderVal = detail::SaturateCast(make_float4(0.0f, 0.0f, 0.0f, 0.0f)); - ImageWrapper outputWrap(output); - InterpolationWrapper inputWrap(input, borderVal); + TensorWrapper outputWrap(output); + auto inputWrap = MakeInterpolationWrapper( + MakeBorderWrapper(TensorWrapper(input), borderVal)); switch (device) { case eDeviceType::GPU: { @@ -74,8 +75,8 @@ void dispatch_rotate_interp(hipStream_t stream, const Tensor &input, const Tenso } template -void dispatch_rotate_type(hipStream_t stream, const Tensor &input, const Tensor &output, double angleDeg, - double2 shift, eInterpolationType interpolation, eDeviceType device) { +void dispatch_rotate_type(hipStream_t stream, const Tensor &input, const Tensor &output, double angleDeg, double2 shift, + eInterpolationType interpolation, eDeviceType device) { // clang-format off static const std::unordered_map void dispatch_threshold_dtype(hipStream_t stream, const Tensor &input, const Tensor &output, const Tensor &thresh, const Tensor &maxVal, eThresholdType m_threshType, eDeviceType device) { - ImageWrapper inputWrapper(input); - ImageWrapper outputWrapper(output); + TensorWrapper inputWrapper(input); + TensorWrapper outputWrapper(output); const auto height = input.shape()[input.shape().layout().height_index()]; const auto width = input.shape()[input.shape().layout().width_index()]; diff --git a/src/op_warp_perspective.cpp b/src/op_warp_perspective.cpp index ca77fc8c..1a7e12f2 100644 --- a/src/op_warp_perspective.cpp +++ b/src/op_warp_perspective.cpp @@ -36,8 +36,8 @@ template void dispatch_warp_perspective_interp(hipStream_t stream, const Tensor &input, const Tensor &output, const PerspectiveTransform transMatrix, T borderValue, eDeviceType device) { ArrayWrapper transform(transMatrix); - ImageWrapper outputWrapper(output); - InterpolationWrapper inputWrapper(input, borderValue); + TensorWrapper outputWrapper(output); + auto inputWrapper = MakeInterpolationWrapper(MakeBorderWrapper(TensorWrapper(input), borderValue)); // Launch CPU/GPU kernel depending on requested device type. switch (device) { diff --git a/tests/roccv/cpp/include/image_test_helpers.hpp b/tests/roccv/cpp/include/image_test_helpers.hpp new file mode 100644 index 00000000..c4613367 --- /dev/null +++ b/tests/roccv/cpp/include/image_test_helpers.hpp @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace roccv { +namespace tests { + +// Opaque sentinel pointers used by image-layer tests. ImageData / ImageBatchData +// carry pointers but never dereference them — the buffer is a metadata snapshot +// only — so tests use these to verify values flow through without needing real +// allocations. +inline void* const FAKE_PTR_A = reinterpret_cast(0xAAAAAAAAull); +inline void* const FAKE_PTR_B = reinterpret_cast(0xBBBBBBBBull); +inline void* const FAKE_PTR_C = reinterpret_cast(0xCCCCCCCCull); + +/** + * @brief Test allocator that backs every allocation kind with malloc and tallies + * how many times each entry point is invoked. Pure host-backed; no actual GPU + * dependency on the returned pointers — callers that exercise the Hip/pinned + * paths must only inspect metadata, never dereference device memory. + * + * `lastAllocBytes` is updated from every alloc path (hip, host, pinned), so + * callers may assert on the most recent allocation regardless of kind. + */ +class CountingAllocator : public IAllocator { + public: + mutable int hipAllocs = 0; + mutable int hipFrees = 0; + mutable int hostAllocs = 0; + mutable int hostFrees = 0; + mutable int pinnedAllocs = 0; + mutable int pinnedFrees = 0; + mutable size_t lastAllocBytes = 0; + + void* allocHipMem(size_t size) const override { + ++hipAllocs; + lastAllocBytes = size; + return std::malloc(size); + } + void freeHipMem(void* ptr) const noexcept override { + ++hipFrees; + std::free(ptr); + } + + void* allocHostMem(size_t size, int32_t /*alignment*/ = 0) const override { + ++hostAllocs; + lastAllocBytes = size; + return std::malloc(size); + } + void freeHostMem(void* ptr) const noexcept override { + ++hostFrees; + std::free(ptr); + } + + void* allocHostPinnedMem(size_t size) const override { + ++pinnedAllocs; + lastAllocBytes = size; + return std::malloc(size); + } + void freeHostPinnedMem(void* ptr) const noexcept override { + ++pinnedFrees; + std::free(ptr); + } +}; + +// Single-plane packed-row buffer descriptor around `basePtr`. The pointer is +// never dereferenced by the consumers (ImageData / ImageBatchVarShape). +inline ImageBufferStrided MakeSinglePlaneBuffer(int32_t width, int32_t height, int64_t rowStride, void* basePtr) { + ImageBufferStrided buf{}; + buf.numPlanes = 1; + buf.planes[0] = {width, height, rowStride, basePtr}; + return buf; +} + +// Single-plane GPU-resident ImageData snapshot with packed-row stride implied +// by `fmt`. For tests that need an ImageData but won't touch the pixels. +inline ImageDataStridedHip MakeFakeHipData(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { + const int64_t rowStride = static_cast(width) * fmt.channels() * DataType(fmt.dtype()).size(); + return ImageDataStridedHip(fmt, MakeSinglePlaneBuffer(width, height, rowStride, basePtr)); +} + +// Host counterpart of MakeFakeHipData. +inline ImageDataStridedHost MakeFakeHostData(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { + const int64_t rowStride = static_cast(width) * fmt.channels() * DataType(fmt.dtype()).size(); + return ImageDataStridedHost(fmt, MakeSinglePlaneBuffer(width, height, rowStride, basePtr)); +} + +// Single-plane GPU-resident Image wrapping a sentinel pointer via ImageWrapData. +// Use for batch tests where pushBack only reads the descriptor. +inline Image MakeFakeGpuImage(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { + return ImageWrapData(MakeFakeHipData(width, height, basePtr, fmt)); +} + +// Host counterpart of MakeFakeGpuImage. +inline Image MakeFakeHostImage(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { + return ImageWrapData(MakeFakeHostData(width, height, basePtr, fmt)); +} + +} // namespace tests +} // namespace roccv diff --git a/tests/roccv/cpp/include/test_helpers.hpp b/tests/roccv/cpp/include/test_helpers.hpp index 6c43053b..7ed56309 100644 --- a/tests/roccv/cpp/include/test_helpers.hpp +++ b/tests/roccv/cpp/include/test_helpers.hpp @@ -198,6 +198,12 @@ namespace tests { ". Expected no exceptions, but received the following exception: " + e.what()); \ } +// EXPECT_EQ pipes through std::to_string, so wrap enums/pointers/bools through +// these casts before comparing. +inline auto AsInt = [](auto v) { return static_cast(v); }; +inline auto AsAddr = [](const void* p) { return reinterpret_cast(p); }; +inline auto AsSize = [](auto v) { return static_cast(v); }; + /** * @brief Creates a NHWC tensor which contains data loaded from an image. * diff --git a/tests/roccv/cpp/src/tests/core/image/test_image.cpp b/tests/roccv/cpp/src/tests/core/image/test_image.cpp new file mode 100644 index 00000000..ce6ef69b --- /dev/null +++ b/tests/roccv/cpp/src/tests/core/image/test_image.cpp @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include + +#include "image_test_helpers.hpp" +#include "test_helpers.hpp" + +using namespace roccv; +using namespace roccv::tests; + +namespace { + +// ============================================================================= +// CalcRequirements +// ============================================================================= + +/** + * @brief Packed-row stride for a typical 3-channel uint8 image. Other fields + * propagate unchanged; remaining plane slots stay zeroed. + */ +void TestCalcRequirementsRgb8() { + auto reqs = Image::CalcRequirements({320, 240}, FMT_RGB8); + + EXPECT_EQ(reqs.size.w, 320); + EXPECT_EQ(reqs.size.h, 240); + EXPECT_EQ(reqs.format.channels(), 3); + EXPECT_EQ(reqs.planeRowStride[0], static_cast(320 * 3)); + EXPECT_EQ(reqs.planeRowStride[1], 0); + EXPECT_EQ(reqs.planeRowStride[5], 0); + EXPECT_EQ(reqs.alignBytes, 0); +} + +/** + * @brief Multi-byte dtype is reflected in the per-pixel byte count. + */ +void TestCalcRequirementsF32() { + auto reqs = Image::CalcRequirements({64, 64}, FMT_F32); + EXPECT_EQ(reqs.planeRowStride[0], static_cast(64 * 4)); +} + +/** + * @brief Single-channel U8 → row stride equals width. + */ +void TestCalcRequirementsU8() { + auto reqs = Image::CalcRequirements({100, 50}, FMT_U8); + EXPECT_EQ(reqs.planeRowStride[0], 100); +} + +/** + * @brief Width or height < 1 must throw INVALID_VALUE. + */ +void TestCalcRequirementsRejectsInvalidDims() { + EXPECT_EXCEPTION(Image::CalcRequirements({0, 100}, FMT_RGB8), eStatusType::INVALID_VALUE); + EXPECT_EXCEPTION(Image::CalcRequirements({100, 0}, FMT_RGB8), eStatusType::INVALID_VALUE); + EXPECT_EXCEPTION(Image::CalcRequirements({-5, 100}, FMT_RGB8), eStatusType::INVALID_VALUE); + EXPECT_EXCEPTION(Image::CalcRequirements({100, -5}, FMT_RGB8), eStatusType::INVALID_VALUE); +} + +/** + * @brief Large widths must not overflow during stride math; row stride must + * fit in int64. + */ +void TestCalcRequirementsLargeDims() { + // 8K image, RGBA8 (4 channels * 1 byte = 4 B/pixel) → 8192 * 4 = 32768 B/row. + auto reqs = Image::CalcRequirements({8192, 4320}, FMT_RGBA8); + EXPECT_EQ(reqs.planeRowStride[0], static_cast(8192 * 4)); +} + +// ============================================================================= +// Allocating constructors +// ============================================================================= + +/** + * @brief GPU-device ctor routes allocation through allocHipMem with the + * computed byte count. + */ +void TestImageHipAllocation() { + CountingAllocator alloc; + { + Image img({320, 240}, FMT_RGB8, alloc, eDeviceType::GPU); + + EXPECT_EQ(alloc.hipAllocs, 1); + EXPECT_EQ(alloc.hostAllocs, 0); + EXPECT_EQ(AsSize(alloc.lastAllocBytes), AsSize(320 * 3 * 240)); + + EXPECT_EQ(img.size().w, 320); + EXPECT_EQ(img.size().h, 240); + EXPECT_EQ(AsInt(img.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(img.format().channels(), 3); + + // Image is still alive — buffer not yet freed. + EXPECT_EQ(alloc.hipFrees, 0); + } + // Image dropped — buffer freed exactly once via the matching allocator. + EXPECT_EQ(alloc.hipFrees, 1); +} + +/** + * @brief Same shape as the Hip test but for CPU residency. + */ +void TestImageHostAllocation() { + CountingAllocator alloc; + { + Image img({100, 50}, FMT_U8, alloc, eDeviceType::CPU); + + EXPECT_EQ(alloc.hostAllocs, 1); + EXPECT_EQ(alloc.hipAllocs, 0); + EXPECT_EQ(AsSize(alloc.lastAllocBytes), AsSize(100 * 50)); + EXPECT_EQ(AsInt(img.device()), AsInt(eDeviceType::CPU)); + } + EXPECT_EQ(alloc.hostFrees, 1); +} + +/** + * @brief Constructing from precomputed Requirements yields observably + * identical state to the (Size2D, ImageFormat) sugar form. + */ +void TestImageRequirementsCtor() { + CountingAllocator alloc; + auto reqs = Image::CalcRequirements({64, 32}, FMT_RGBA8); + + Image img(reqs, alloc, eDeviceType::GPU); + + EXPECT_EQ(img.size().w, 64); + EXPECT_EQ(img.size().h, 32); + EXPECT_EQ(img.format().channels(), 4); + EXPECT_EQ(AsSize(alloc.lastAllocBytes), AsSize(64 * 4 * 32)); +} + +// ============================================================================= +// Refcount / lifecycle +// ============================================================================= + +/** + * @brief Copying an Image bumps the refcount: both handles see the same + * underlying buffer, and free is deferred until the LAST handle drops. + */ +void TestImageCopySharesBuffer() { + CountingAllocator alloc; + void* buf = nullptr; + { + Image first({16, 16}, FMT_U8, alloc, eDeviceType::GPU); + buf = first.exportData().cast()->plane(0).basePtr; + + Image second = first; // refcount bump + EXPECT_EQ(alloc.hipAllocs, 1); // No new allocation. + EXPECT_EQ(AsAddr(second.exportData().cast()->plane(0).basePtr), AsAddr(buf)); + + // Drop `first`; buffer must NOT be freed yet — `second` still holds it. + { + Image sink = std::move(first); + } + EXPECT_EQ(alloc.hipFrees, 0); + } + // All handles dropped — exactly one free. + EXPECT_EQ(alloc.hipFrees, 1); +} + +/** + * @brief Move-construction transfers the buffer; the source is left empty. + * The buffer must still free exactly once (when the destination drops). + */ +void TestImageMoveSemantics() { + CountingAllocator alloc; + { + Image src({8, 8}, FMT_U8, alloc, eDeviceType::CPU); + void* srcBuf = src.exportData().cast()->plane(0).basePtr; + + Image dst = std::move(src); + EXPECT_EQ(AsAddr(dst.exportData().cast()->plane(0).basePtr), AsAddr(srcBuf)); + EXPECT_EQ(alloc.hostFrees, 0); + } + EXPECT_EQ(alloc.hostFrees, 1); +} + +// ============================================================================= +// exportData / exportData() +// ============================================================================= + +/** + * @brief exportData() returns an ImageData snapshot that mirrors the Image's + * size, format, device, and base pointer. + */ +void TestImageExportData() { + CountingAllocator alloc; + Image img({80, 60}, FMT_RGBA8, alloc, eDeviceType::GPU); + ImageData data = img.exportData(); + + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(data.format().channels(), 4); + + auto strided = data.cast(); + EXPECT_EQ(AsInt(strided.has_value()), 1); + EXPECT_EQ(strided->plane(0).width, 80); + EXPECT_EQ(strided->plane(0).height, 60); + EXPECT_EQ(strided->plane(0).rowStride, static_cast(80 * 4)); +} + +/** + * @brief Templated exportData() returns the matching subclass directly. + */ +void TestImageExportDataTypedSuccess() { + CountingAllocator alloc; + Image img({4, 4}, FMT_U8, alloc, eDeviceType::GPU); + + auto hip = img.exportData(); + EXPECT_EQ(AsInt(hip.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(hip.plane(0).width, 4); +} + +/** + * @brief Templated exportData() throws std::bad_cast when the requested + * subclass does not match the underlying buffer kind. + */ +void TestImageExportDataTypedMismatch() { + CountingAllocator alloc; + Image img({4, 4}, FMT_U8, alloc, eDeviceType::GPU); + + bool threw = false; + try { + (void)img.exportData(); + } catch (const std::bad_cast&) { + threw = true; + } + EXPECT_EQ(AsInt(threw), 1); +} + +// ============================================================================= +// ImageWrapData +// ============================================================================= + +/** + * @brief View-only wrap (no cleanup callback) round-trips metadata and must + * not crash when the Image is destroyed (no free attempt on the sentinel ptr). + */ +void TestImageWrapDataViewOnly() { + Image wrapped = ImageWrapData(MakeFakeHipData(640, 480, FAKE_PTR_A)); + EXPECT_EQ(wrapped.size().w, 640); + EXPECT_EQ(wrapped.size().h, 480); + EXPECT_EQ(AsInt(wrapped.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(AsAddr(wrapped.exportData().cast()->plane(0).basePtr), AsAddr(FAKE_PTR_A)); +} + +/** + * @brief Wrap with a cleanup callback: the callback fires exactly once when + * the last Image handle goes out of scope. + */ +void TestImageWrapDataCleanupFires() { + int callbackInvocations = 0; + { + Image wrapped = + ImageWrapData(MakeFakeHipData(100, 100, FAKE_PTR_A), [&](const ImageData&) { ++callbackInvocations; }); + EXPECT_EQ(callbackInvocations, 0); // Not fired during normal use. + } + EXPECT_EQ(callbackInvocations, 1); +} + +/** + * @brief Cleanup callback receives the original wrapped ImageData snapshot — + * the captured base pointer must match what was passed to ImageWrapData. + */ +void TestImageWrapDataCleanupReceivesData() { + void* receivedBasePtr = nullptr; + { + Image wrapped = ImageWrapData(MakeFakeHipData(50, 50, FAKE_PTR_A), [&](const ImageData& d) { + receivedBasePtr = d.cast()->plane(0).basePtr; + }); + } + EXPECT_EQ(AsAddr(receivedBasePtr), AsAddr(FAKE_PTR_A)); +} + +/** + * @brief Cleanup must fire only on LAST handle drop — copies bump the + * refcount, intermediate drops do nothing. + */ +void TestImageWrapDataCleanupFiresOnce() { + int callbackInvocations = 0; + { + Image first = + ImageWrapData(MakeFakeHipData(10, 10, FAKE_PTR_A), [&](const ImageData&) { ++callbackInvocations; }); + Image second = first; // refcount = 2 + Image third = first; // refcount = 3 + { + Image fourth = third; + (void)fourth; + } // dropped → refcount = 3 + EXPECT_EQ(callbackInvocations, 0); + // first, second, third still alive at scope exit + } + EXPECT_EQ(callbackInvocations, 1); +} + +/** + * @brief Wrapped Image's accessors mirror the wrapped ImageData verbatim — + * size, format, device, and base pointer all round-trip unchanged. + */ +void TestImageWrapDataAccessors() { + auto fake = MakeFakeHipData(123, 45, FAKE_PTR_A, FMT_RGBA8); + Image wrapped = ImageWrapData(fake); + + EXPECT_EQ(wrapped.size().w, 123); + EXPECT_EQ(wrapped.size().h, 45); + EXPECT_EQ(wrapped.format().channels(), 4); + EXPECT_EQ(AsInt(wrapped.device()), AsInt(eDeviceType::GPU)); + + auto strided = wrapped.exportData().cast(); + EXPECT_EQ(AsInt(strided.has_value()), 1); + EXPECT_EQ(strided->plane(0).width, 123); + EXPECT_EQ(strided->plane(0).height, 45); + EXPECT_EQ(AsAddr(strided->plane(0).basePtr), AsAddr(FAKE_PTR_A)); +} + +} // namespace + +int main(int argc, char** argv) { + (void)argc; + (void)argv; + TEST_CASES_BEGIN(); + + // CalcRequirements + TEST_CASE(TestCalcRequirementsRgb8()); + TEST_CASE(TestCalcRequirementsF32()); + TEST_CASE(TestCalcRequirementsU8()); + TEST_CASE(TestCalcRequirementsRejectsInvalidDims()); + TEST_CASE(TestCalcRequirementsLargeDims()); + + // Allocating constructors + TEST_CASE(TestImageHipAllocation()); + TEST_CASE(TestImageHostAllocation()); + TEST_CASE(TestImageRequirementsCtor()); + + // Refcount / lifecycle + TEST_CASE(TestImageCopySharesBuffer()); + TEST_CASE(TestImageMoveSemantics()); + + // exportData + TEST_CASE(TestImageExportData()); + TEST_CASE(TestImageExportDataTypedSuccess()); + TEST_CASE(TestImageExportDataTypedMismatch()); + + // ImageWrapData + TEST_CASE(TestImageWrapDataViewOnly()); + TEST_CASE(TestImageWrapDataCleanupFires()); + TEST_CASE(TestImageWrapDataCleanupReceivesData()); + TEST_CASE(TestImageWrapDataCleanupFiresOnce()); + TEST_CASE(TestImageWrapDataAccessors()); + + TEST_CASES_END(); +} diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp new file mode 100644 index 00000000..31e449e6 --- /dev/null +++ b/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include + +#include "image_test_helpers.hpp" +#include "test_helpers.hpp" + +using namespace roccv; +using namespace roccv::tests; + +namespace { + +// Static descriptor/format storage for the batch buffer. These are real host +// allocations (so the pointers are valid) but the batch tests only read +// metadata back out of them; nothing dereferences the per-image basePtr fields. +ImageBufferStrided g_imageList[2]; +ImageFormat g_formatList[2] = {FMT_RGB8, FMT_RGB8}; +ImageFormat g_hostFormatList[2] = {FMT_RGB8, FMT_RGB8}; + +// Builds a homogeneous two-image varshape descriptor with a known bounding box +// and uniqueFormat. The returned struct's pointers reference module-static +// arrays so addresses remain stable across calls within a test. +ImageBatchVarShapeBufferStrided MakeHomogeneousBuffer() { + g_imageList[0] = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_PTR_A); + g_imageList[1] = MakeSinglePlaneBuffer(320, 240, 320 * 3, FAKE_PTR_B); + g_formatList[0] = FMT_RGB8; + g_formatList[1] = FMT_RGB8; + g_hostFormatList[0] = FMT_RGB8; + g_hostFormatList[1] = FMT_RGB8; + + ImageBatchVarShapeBufferStrided buf{}; + buf.uniqueFormat = FMT_RGB8; + buf.maxWidth = 640; + buf.maxHeight = 480; + buf.formatList = g_formatList; + buf.hostFormatList = g_hostFormatList; + buf.imageList = g_imageList; + return buf; +} + +/** + * @brief Verifies HIP-strided varshape construction populates all observable + * state and tags itself as GPU-resident. + */ +void TestImageBatchVarShapeDataStridedHipConstruction() { + auto buf = MakeHomogeneousBuffer(); + ImageBatchVarShapeDataStridedHip data(2, buf); + + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(data.numImages(), 2); + EXPECT_EQ(data.maxSize().w, 640); + EXPECT_EQ(data.maxSize().h, 480); + EXPECT_EQ(data.uniqueFormat().channels(), 3); + EXPECT_EQ(AsAddr(data.formatList()), AsAddr(g_formatList)); + EXPECT_EQ(AsAddr(data.hostFormatList()), AsAddr(g_hostFormatList)); + EXPECT_EQ(AsAddr(data.imageList()), AsAddr(g_imageList)); + EXPECT_EQ(data.imageList()[0].planes[0].width, 640); + EXPECT_EQ(data.imageList()[1].planes[0].width, 320); +} + +/** + * @brief Same shape as the Hip test but for Host-resident varshape data. + */ +void TestImageBatchVarShapeDataStridedHostConstruction() { + auto buf = MakeHomogeneousBuffer(); + ImageBatchVarShapeDataStridedHost data(2, buf); + + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::CPU)); + EXPECT_EQ(data.numImages(), 2); + EXPECT_EQ(data.maxSize().w, 640); + EXPECT_EQ(data.maxSize().h, 480); + EXPECT_EQ(data.uniqueFormat().channels(), 3); + EXPECT_EQ(AsAddr(data.imageList()), AsAddr(g_imageList)); +} + +/** + * @brief Empty batch: maxSize collapses to 0x0 and uniqueFormat is FMT_NONE. + * Producers signal "no images" via numImages == 0; the buffer fields stay + * valid pointers but get ignored. + */ +void TestImageBatchVarShapeDataEmpty() { + ImageBatchVarShapeBufferStrided buf{}; + buf.uniqueFormat = FMT_NONE; + buf.maxWidth = 0; + buf.maxHeight = 0; + buf.formatList = g_formatList; + buf.hostFormatList = g_hostFormatList; + buf.imageList = g_imageList; + + ImageBatchVarShapeDataStridedHip data(0, buf); + + EXPECT_EQ(data.numImages(), 0); + EXPECT_EQ(data.maxSize().w, 0); + EXPECT_EQ(data.maxSize().h, 0); + EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_NONE), 1); +} + +/** + * @brief Heterogeneous formats: per-image formatList carries each entry + * verbatim; uniqueFormat is FMT_NONE since no single format spans the batch. + */ +void TestImageBatchVarShapeDataHeterogeneousFormats() { + g_imageList[0] = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_PTR_A); + g_imageList[1] = MakeSinglePlaneBuffer(320, 240, 320 * 4, FAKE_PTR_B); + g_formatList[0] = FMT_RGB8; + g_formatList[1] = FMT_RGBA8; + g_hostFormatList[0] = FMT_RGB8; + g_hostFormatList[1] = FMT_RGBA8; + + ImageBatchVarShapeBufferStrided buf{}; + buf.uniqueFormat = FMT_NONE; + buf.maxWidth = 640; + buf.maxHeight = 480; + buf.formatList = g_formatList; + buf.hostFormatList = g_hostFormatList; + buf.imageList = g_imageList; + + ImageBatchVarShapeDataStridedHip data(2, buf); + + EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_NONE), 1); + EXPECT_EQ(AsInt(data.hostFormatList()[0] == FMT_RGB8), 1); + EXPECT_EQ(AsInt(data.hostFormatList()[1] == FMT_RGBA8), 1); +} + +/** + * @brief The two leaf ctors (taking ImageBatchBuffer vs the concrete strided + * buffer directly) must produce observably identical state. + */ +void TestImageBatchVarShapeDataSugarCtor() { + auto buf = MakeHomogeneousBuffer(); + + ImageBatchVarShapeDataStridedHip wide(2, ImageBatchBuffer{.varShapeStrided = buf}); + ImageBatchVarShapeDataStridedHip sugar(2, buf); + + EXPECT_EQ(AsInt(wide.device()), AsInt(sugar.device())); + EXPECT_EQ(wide.numImages(), sugar.numImages()); + EXPECT_EQ(wide.maxSize().w, sugar.maxSize().w); + EXPECT_EQ(wide.maxSize().h, sugar.maxSize().h); + EXPECT_EQ(AsAddr(wide.imageList()), AsAddr(sugar.imageList())); + + ImageBatchVarShapeDataStridedHost wideHost(2, ImageBatchBuffer{.varShapeStrided = buf}); + ImageBatchVarShapeDataStridedHost sugarHost(2, buf); + EXPECT_EQ(AsInt(wideHost.device()), AsInt(sugarHost.device())); + EXPECT_EQ(AsAddr(wideHost.imageList()), AsAddr(sugarHost.imageList())); +} + +/** + * @brief IsCompatibleKind on each level discriminates the buffer kinds it + * accepts. Base accepts anything-but-NONE; VarShape and VarShapeStrided accept + * both Hip and Host varshape; leaves accept only their own. + */ +void TestImageBatchDataIsCompatibleKind() { + EXPECT_EQ(AsInt(ImageBatchData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageBatchData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)), + 1); + EXPECT_EQ(AsInt(ImageBatchData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)), + 1); + + EXPECT_EQ(AsInt(ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), 0); + EXPECT_EQ( + AsInt(ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)), + 1); + EXPECT_EQ( + AsInt(ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)), + 1); + + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStrided::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStrided::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)), + 1); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStrided::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)), + 1); + + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHip::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), + 0); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHip::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)), + 1); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHip::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)), + 0); + + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHost::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), + 0); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHost::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)), + 0); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHost::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)), + 1); +} + +/** + * @brief Round-trip a derived ImageBatchData through the base reference and + * back via cast<>(). Successful casts must preserve every observable field; + * casts to incompatible kinds must return std::nullopt. + */ +void TestImageBatchDataCast() { + auto buf = MakeHomogeneousBuffer(); + + // Hip → base → Hip should round-trip; intermediate VarShape/Strided also + // succeed; Hip → Host fails. + { + ImageBatchVarShapeDataStridedHip hip(2, buf); + const ImageBatchData& base = hip; + + auto asHip = base.cast(); + EXPECT_EQ(AsInt(asHip.has_value()), 1); + EXPECT_EQ(AsInt(asHip->device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(asHip->numImages(), 2); + EXPECT_EQ(asHip->maxSize().w, 640); + EXPECT_EQ(AsAddr(asHip->imageList()), AsAddr(g_imageList)); + + auto asStrided = base.cast(); + EXPECT_EQ(AsInt(asStrided.has_value()), 1); + EXPECT_EQ(AsInt(asStrided->device()), AsInt(eDeviceType::GPU)); + + auto asVar = base.cast(); + EXPECT_EQ(AsInt(asVar.has_value()), 1); + EXPECT_EQ(asVar->maxSize().h, 480); + + auto asHost = base.cast(); + EXPECT_EQ(AsInt(asHost.has_value()), 0); + } + + // Symmetrically: Host → base → Host succeeds, Host → Hip fails. + { + ImageBatchVarShapeDataStridedHost host(2, buf); + const ImageBatchData& base = host; + + auto asHost = base.cast(); + EXPECT_EQ(AsInt(asHost.has_value()), 1); + EXPECT_EQ(AsInt(asHost->device()), AsInt(eDeviceType::CPU)); + EXPECT_EQ(asHost->numImages(), 2); + + auto asHip = base.cast(); + EXPECT_EQ(AsInt(asHip.has_value()), 0); + } +} + +} // namespace + +int main(int argc, char** argv) { + (void)argc; + (void)argv; + TEST_CASES_BEGIN(); + + TEST_CASE(TestImageBatchVarShapeDataStridedHipConstruction()); + TEST_CASE(TestImageBatchVarShapeDataStridedHostConstruction()); + TEST_CASE(TestImageBatchVarShapeDataEmpty()); + TEST_CASE(TestImageBatchVarShapeDataHeterogeneousFormats()); + TEST_CASE(TestImageBatchVarShapeDataSugarCtor()); + TEST_CASE(TestImageBatchDataIsCompatibleKind()); + TEST_CASE(TestImageBatchDataCast()); + + TEST_CASES_END(); +} diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp new file mode 100644 index 00000000..ab4b9e1b --- /dev/null +++ b/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp @@ -0,0 +1,487 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "image_test_helpers.hpp" +#include "test_helpers.hpp" + +using namespace roccv; +using namespace roccv::tests; + +namespace { + +// ============================================================================= +// Construction +// ============================================================================= + +void TestConstruction() { + CountingAllocator alloc; + { + ImageBatchVarShape batch(8, alloc); + EXPECT_EQ(batch.capacity(), 8); + EXPECT_EQ(batch.numImages(), 0); + EXPECT_EQ(AsInt(batch.begin() == batch.end()), 1); + } + EXPECT_EQ(alloc.hipAllocs, 2); + EXPECT_EQ(alloc.pinnedAllocs, 2); + EXPECT_EQ(alloc.hipFrees, 2); + EXPECT_EQ(alloc.pinnedFrees, 2); +} + +void TestConstructionCpu() { + CountingAllocator alloc; + { + ImageBatchVarShape batch(8, alloc, eDeviceType::CPU); + EXPECT_EQ(batch.capacity(), 8); + EXPECT_EQ(batch.numImages(), 0); + EXPECT_EQ(AsInt(batch.device()), AsInt(eDeviceType::CPU)); + } + // A CPU batch allocates only its two host descriptor buffers — no device + // memory, no pinned memory, no fence. + EXPECT_EQ(alloc.hostAllocs, 2); + EXPECT_EQ(alloc.hostFrees, 2); + EXPECT_EQ(alloc.hipAllocs, 0); + EXPECT_EQ(alloc.pinnedAllocs, 0); +} + +void TestConstructionRejectsBadCapacity() { + CountingAllocator alloc; + EXPECT_EXCEPTION(ImageBatchVarShape(0, alloc), eStatusType::INVALID_VALUE); + EXPECT_EXCEPTION(ImageBatchVarShape(-3, alloc), eStatusType::INVALID_VALUE); +} + +// ============================================================================= +// pushBack — basic +// ============================================================================= + +void TestPushBackSingle() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + Image img = MakeFakeGpuImage(640, 480, FAKE_PTR_A); + batch.pushBack(img); + + EXPECT_EQ(batch.numImages(), 1); + EXPECT_EQ(batch[0].size().w, 640); + EXPECT_EQ(batch[0].size().h, 480); + EXPECT_EQ(AsInt(batch[0].format() == FMT_RGB8), 1); +} + +void TestPushBackMultipleHeterogeneousSizes() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + batch.pushBack(MakeFakeGpuImage(640, 480, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(320, 240, FAKE_PTR_B)); + batch.pushBack(MakeFakeGpuImage(800, 200, FAKE_PTR_C)); + + EXPECT_EQ(batch.numImages(), 3); + EXPECT_EQ(batch.maxSize().w, 800); + EXPECT_EQ(batch.maxSize().h, 480); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_RGB8), 1); +} + +void TestPushBackIteratorRange() { + CountingAllocator alloc; + ImageBatchVarShape batch(8, alloc); + + std::vector imgs; + imgs.push_back(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + imgs.push_back(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + imgs.push_back(MakeFakeGpuImage(300, 300, FAKE_PTR_C)); + + batch.pushBack(imgs.begin(), imgs.end()); + + EXPECT_EQ(batch.numImages(), 3); + EXPECT_EQ(batch.maxSize().w, 300); +} + +// ============================================================================= +// pushBack — validation +// ============================================================================= + +void TestPushBackCapacityOverflow() { + CountingAllocator alloc; + ImageBatchVarShape batch(2, alloc); + + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_B)); + + EXPECT_EXCEPTION(batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_C)), eStatusType::OUT_OF_BOUNDS); +} + +void TestPushBackHostImageRejected() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + Image cpuImg = MakeFakeHostImage(64, 64, FAKE_PTR_A, FMT_U8); + EXPECT_EXCEPTION(batch.pushBack(cpuImg), eStatusType::INVALID_VALUE); +} + +void TestPushBackGpuImageRejectedOnCpuBatch() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc, eDeviceType::CPU); + + Image gpuImg = MakeFakeGpuImage(64, 64, FAKE_PTR_A); + EXPECT_EXCEPTION(batch.pushBack(gpuImg), eStatusType::INVALID_VALUE); +} + +// Note: pushBack's single-plane validation is defense-in-depth — Image's own +// exportData() (image.cpp:118) currently hardcodes numPlanes=1 regardless of +// the underlying buffer, so the public API can't construct a multi-plane Image +// for this guard to fire on. The test would need to be revisited when planar +// formats land in Image itself. + +void TestPushBackRangeRollbackOnFailure() { + CountingAllocator alloc; + ImageBatchVarShape batch(8, alloc); + + // Pre-populate so we can confirm the rollback restores exactly the + // pre-call state, not just back to zero. + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + EXPECT_EQ(batch.numImages(), 1); + + // Mid-range CPU image — should rollback the partially-pushed entries. + std::vector imgs; + imgs.push_back(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + imgs.push_back(MakeFakeHostImage(300, 300, FAKE_PTR_C)); // Will throw. + + EXPECT_EXCEPTION(batch.pushBack(imgs.begin(), imgs.end()), eStatusType::INVALID_VALUE); + + // Pre-call state is intact: 1 image, original maxSize. + EXPECT_EQ(batch.numImages(), 1); + EXPECT_EQ(batch.maxSize().w, 100); +} + +void TestPushBackRangeOverflowPrechecked() { + CountingAllocator alloc; + ImageBatchVarShape batch(2, alloc); + + std::vector imgs; + imgs.push_back(MakeFakeGpuImage(10, 10, FAKE_PTR_A)); + imgs.push_back(MakeFakeGpuImage(20, 20, FAKE_PTR_B)); + imgs.push_back(MakeFakeGpuImage(30, 30, FAKE_PTR_C)); // 3rd overflows capacity 2. + + EXPECT_EXCEPTION(batch.pushBack(imgs.begin(), imgs.end()), eStatusType::OUT_OF_BOUNDS); + // Pre-checked: nothing was pushed. + EXPECT_EQ(batch.numImages(), 0); +} + +// ============================================================================= +// popBack / clear +// ============================================================================= + +void TestPopBack() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + batch.popBack(); + + EXPECT_EQ(batch.numImages(), 1); + // maxSize was reset on pop; the rescan should drop back to 100. + EXPECT_EQ(batch.maxSize().w, 100); +} + +void TestPopBackMultiple() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + batch.pushBack(MakeFakeGpuImage(300, 300, FAKE_PTR_C)); + batch.popBack(2); + + EXPECT_EQ(batch.numImages(), 1); + EXPECT_EQ(batch.maxSize().w, 100); +} + +void TestPopBackUnderflow() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + + EXPECT_EXCEPTION(batch.popBack(2), eStatusType::OUT_OF_BOUNDS); + // State preserved. + EXPECT_EQ(batch.numImages(), 1); +} + +void TestClearAndReuse() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + batch.clear(); + + EXPECT_EQ(batch.numImages(), 0); + EXPECT_EQ(batch.maxSize().w, 0); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1); + + // Reuse after clear. + batch.pushBack(MakeFakeGpuImage(50, 50, FAKE_PTR_C, FMT_U8)); + EXPECT_EQ(batch.numImages(), 1); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_U8), 1); +} + +// ============================================================================= +// uniqueFormat / maxSize cache +// ============================================================================= + +void TestUniqueFormatHomogeneous() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(128, 128, FAKE_PTR_B)); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_RGB8), 1); +} + +void TestUniqueFormatHeterogeneous() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_B, FMT_RGBA8)); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1); +} + +void TestUniqueFormatEmptyBatch() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1); + EXPECT_EQ(batch.maxSize().w, 0); + EXPECT_EQ(batch.maxSize().h, 0); +} + +// ============================================================================= +// exportData +// ============================================================================= + +// exportData tests use the default allocator instead of CountingAllocator +// because they exercise the real H2D hipMemcpyAsync, which requires the +// device-side buffer to be a real hipMalloc'd pointer. + +void TestExportDataEmpty() { + ImageBatchVarShape batch(4); + + auto data = batch.exportData(0); + EXPECT_EQ(data.numImages(), 0); + EXPECT_EQ(data.maxSize().w, 0); + EXPECT_EQ(data.maxSize().h, 0); + EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_NONE), 1); + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU)); +} + +void TestExportDataMetadata() { + ImageBatchVarShape batch(4); + batch.pushBack(MakeFakeGpuImage(640, 480, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(320, 240, FAKE_PTR_B)); + + auto data = batch.exportData(0); + EXPECT_EQ(data.numImages(), 2); + EXPECT_EQ(data.maxSize().w, 640); + EXPECT_EQ(data.maxSize().h, 480); + EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_RGB8), 1); + EXPECT_EQ(AsInt(data.imageList() != nullptr), 1); + EXPECT_EQ(AsInt(data.formatList() != nullptr), 1); + EXPECT_EQ(AsInt(data.hostFormatList() != nullptr), 1); + // Pinned host mirror format entries are immediately host-readable. + EXPECT_EQ(AsInt(data.hostFormatList()[0] == FMT_RGB8), 1); + EXPECT_EQ(AsInt(data.hostFormatList()[1] == FMT_RGB8), 1); +} + +void TestExportDataCastRoundTrip() { + ImageBatchVarShape batch(4); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A)); + + auto hipData = batch.exportData(0); + EXPECT_EQ(hipData.numImages(), 1); + EXPECT_EQ(AsInt(hipData.device()), AsInt(eDeviceType::GPU)); + + // Cast through the base reference: succeeds for compatible kinds, nullopt + // for the host-resident leaf. + const ImageBatchData& base = hipData; + EXPECT_EQ(AsInt(base.cast().has_value()), 1); + EXPECT_EQ(AsInt(base.cast().has_value()), 0); +} + +// ============================================================================= +// CPU path +// ============================================================================= + +void TestCpuPushBackAndCaches() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc, eDeviceType::CPU); + + batch.pushBack(MakeFakeHostImage(640, 480, FAKE_PTR_A)); + batch.pushBack(MakeFakeHostImage(320, 240, FAKE_PTR_B)); + batch.pushBack(MakeFakeHostImage(800, 200, FAKE_PTR_C)); + + EXPECT_EQ(batch.numImages(), 3); + EXPECT_EQ(batch.maxSize().w, 800); + EXPECT_EQ(batch.maxSize().h, 480); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_RGB8), 1); + + batch.popBack(2); + EXPECT_EQ(batch.numImages(), 1); + EXPECT_EQ(batch.maxSize().w, 640); + + batch.clear(); + EXPECT_EQ(batch.numImages(), 0); + EXPECT_EQ(batch.maxSize().w, 0); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1); +} + +// CPU exportData performs no H2D copy, so it runs against the CountingAllocator's +// malloc-backed host buffers without needing a real device. +void TestExportDataHost() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc, eDeviceType::CPU); + batch.pushBack(MakeFakeHostImage(640, 480, FAKE_PTR_A)); + batch.pushBack(MakeFakeHostImage(320, 240, FAKE_PTR_B)); + + auto data = batch.exportData(0); + EXPECT_EQ(data.numImages(), 2); + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::CPU)); + EXPECT_EQ(data.maxSize().w, 640); + EXPECT_EQ(data.maxSize().h, 480); + EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_RGB8), 1); + EXPECT_EQ(AsInt(data.imageList() != nullptr), 1); + EXPECT_EQ(AsInt(data.formatList() != nullptr), 1); + // formatList and hostFormatList alias the same host allocation for CPU batches. + EXPECT_EQ(AsInt(data.formatList() == data.hostFormatList()), 1); + EXPECT_EQ(AsInt(data.hostFormatList()[0] == FMT_RGB8), 1); + // Host descriptor table is directly readable: per-image dimensions match. + EXPECT_EQ(static_cast(data.imageList()[0].planes[0].width), 640); + EXPECT_EQ(static_cast(data.imageList()[1].planes[0].width), 320); + EXPECT_EQ(AsInt(data.imageList()[0].planes[0].basePtr == FAKE_PTR_A), 1); +} + +void TestExportDataHostCastRoundTrip() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc, eDeviceType::CPU); + batch.pushBack(MakeFakeHostImage(64, 64, FAKE_PTR_A)); + + ImageBatchVarShapeDataStrided data = batch.exportData(0); + // A CPU snapshot casts to the host leaf but not the device leaf. + EXPECT_EQ(AsInt(data.cast().has_value()), 1); + EXPECT_EQ(AsInt(data.cast().has_value()), 0); +} + +// ============================================================================= +// Move semantics +// ============================================================================= + +void TestMoveConstruction() { + CountingAllocator alloc; + { + ImageBatchVarShape src(4, alloc); + src.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + src.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + + ImageBatchVarShape dst(std::move(src)); + EXPECT_EQ(dst.numImages(), 2); + EXPECT_EQ(dst.maxSize().w, 200); + + // Source is valid-but-empty; destructor must not double-free. + EXPECT_EQ(src.numImages(), 0); + EXPECT_EQ(src.capacity(), 0); + } + // Exactly one set of allocations should have been freed. + EXPECT_EQ(alloc.hipAllocs, alloc.hipFrees); + EXPECT_EQ(alloc.pinnedAllocs, alloc.pinnedFrees); +} + +// ============================================================================= +// Iterator +// ============================================================================= + +void TestIteratorRangeFor() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + batch.pushBack(MakeFakeGpuImage(300, 300, FAKE_PTR_C)); + + int32_t expectedW = 100; + int32_t count = 0; + for (const Image& img : batch) { + EXPECT_EQ(img.size().w, expectedW); + expectedW += 100; + ++count; + } + EXPECT_EQ(count, 3); +} + +} // namespace + +int main(int argc, char** argv) { + (void)argc; + (void)argv; + TEST_CASES_BEGIN(); + + TEST_CASE(TestConstruction()); + TEST_CASE(TestConstructionCpu()); + TEST_CASE(TestConstructionRejectsBadCapacity()); + + TEST_CASE(TestPushBackSingle()); + TEST_CASE(TestPushBackMultipleHeterogeneousSizes()); + TEST_CASE(TestPushBackIteratorRange()); + + TEST_CASE(TestPushBackCapacityOverflow()); + TEST_CASE(TestPushBackHostImageRejected()); + TEST_CASE(TestPushBackGpuImageRejectedOnCpuBatch()); + TEST_CASE(TestPushBackRangeRollbackOnFailure()); + TEST_CASE(TestPushBackRangeOverflowPrechecked()); + + TEST_CASE(TestPopBack()); + TEST_CASE(TestPopBackMultiple()); + TEST_CASE(TestPopBackUnderflow()); + TEST_CASE(TestClearAndReuse()); + + TEST_CASE(TestUniqueFormatHomogeneous()); + TEST_CASE(TestUniqueFormatHeterogeneous()); + TEST_CASE(TestUniqueFormatEmptyBatch()); + + TEST_CASE(TestExportDataEmpty()); + TEST_CASE(TestExportDataMetadata()); + TEST_CASE(TestExportDataCastRoundTrip()); + + TEST_CASE(TestCpuPushBackAndCaches()); + TEST_CASE(TestExportDataHost()); + TEST_CASE(TestExportDataHostCastRoundTrip()); + + TEST_CASE(TestMoveConstruction()); + + TEST_CASE(TestIteratorRangeFor()); + + TEST_CASES_END(); +} diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp new file mode 100644 index 00000000..8a7945fd --- /dev/null +++ b/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include +#include +#include + +#include "image_test_helpers.hpp" +#include "test_helpers.hpp" + +using namespace roccv; +using namespace roccv::tests; + +namespace { + +ImageBufferStrided MakeThreePlaneBuffer() { + // Mimics a planar layout (e.g. YUV420-style) with sub-sampled chroma — three + // planes of differing dimensions and strides backed by distinct buffers. + ImageBufferStrided buf{}; + buf.numPlanes = 3; + buf.planes[0] = {1920, 1080, 1920, FAKE_PTR_A}; // Y full-resolution + buf.planes[1] = {960, 540, 960, FAKE_PTR_B}; // U sub-sampled + buf.planes[2] = {960, 540, 960, FAKE_PTR_C}; // V sub-sampled + return buf; +} + +/** + * @brief Verifies HIP-strided construction populates all observable state and + * tags itself as GPU-resident. + */ +void TestImageDataStridedHipConstruction() { + auto buf = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_PTR_A); + ImageDataStridedHip data(FMT_RGB8, buf); + + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(data.numPlanes(), 1); + EXPECT_EQ(data.size().w, 640); + EXPECT_EQ(data.size().h, 480); + EXPECT_EQ(data.plane(0).width, 640); + EXPECT_EQ(data.plane(0).height, 480); + EXPECT_EQ(data.plane(0).rowStride, static_cast(640 * 3)); + EXPECT_EQ(AsAddr(data.plane(0).basePtr), AsAddr(FAKE_PTR_A)); + EXPECT_EQ(data.format().channels(), 3); +} + +/** + * @brief Same shape as the Hip test but for Host-resident strided data. + */ +void TestImageDataStridedHostConstruction() { + auto buf = MakeSinglePlaneBuffer(320, 240, 320, FAKE_PTR_B); + ImageDataStridedHost data(FMT_U8, buf); + + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::CPU)); + EXPECT_EQ(data.numPlanes(), 1); + EXPECT_EQ(data.size().w, 320); + EXPECT_EQ(data.size().h, 240); + EXPECT_EQ(AsAddr(data.plane(0).basePtr), AsAddr(FAKE_PTR_B)); + EXPECT_EQ(data.format().channels(), 1); +} + +/** + * @brief Multi-plane buffers must round-trip per-plane dimensions and pointers + * unchanged. size() reports plane 0 by convention; planes 1..N may be smaller. + */ +void TestImageDataStridedMultiPlane() { + auto buf = MakeThreePlaneBuffer(); + ImageDataStridedHip data(FMT_U8, buf); + + EXPECT_EQ(data.numPlanes(), 3); + EXPECT_EQ(data.size().w, 1920); + EXPECT_EQ(data.size().h, 1080); + + EXPECT_EQ(data.plane(0).width, 1920); + EXPECT_EQ(data.plane(0).height, 1080); + EXPECT_EQ(AsAddr(data.plane(0).basePtr), AsAddr(FAKE_PTR_A)); + + EXPECT_EQ(data.plane(1).width, 960); + EXPECT_EQ(data.plane(1).height, 540); + EXPECT_EQ(AsAddr(data.plane(1).basePtr), AsAddr(FAKE_PTR_B)); + + EXPECT_EQ(data.plane(2).width, 960); + EXPECT_EQ(data.plane(2).height, 540); + EXPECT_EQ(AsAddr(data.plane(2).basePtr), AsAddr(FAKE_PTR_C)); +} + +/** + * @brief The two leaf ctors (taking ImageBuffer vs ImageBufferStrided directly) + * must produce observably identical state. + */ +void TestImageDataStridedSugarCtor() { + auto buf = MakeSinglePlaneBuffer(100, 200, 400, FAKE_PTR_A); + + ImageDataStridedHip wide(FMT_RGBA8, ImageBuffer{.strided = buf}); + ImageDataStridedHip sugar(FMT_RGBA8, buf); + + EXPECT_EQ(AsInt(wide.device()), AsInt(sugar.device())); + EXPECT_EQ(wide.numPlanes(), sugar.numPlanes()); + EXPECT_EQ(AsAddr(wide.plane(0).basePtr), AsAddr(sugar.plane(0).basePtr)); + EXPECT_EQ(wide.plane(0).rowStride, sugar.plane(0).rowStride); + + ImageDataStridedHost wideHost(FMT_U8, ImageBuffer{.strided = buf}); + ImageDataStridedHost sugarHost(FMT_U8, buf); + EXPECT_EQ(AsInt(wideHost.device()), AsInt(sugarHost.device())); + EXPECT_EQ(AsAddr(wideHost.plane(0).basePtr), AsAddr(sugarHost.plane(0).basePtr)); +} + +/** + * @brief IsCompatibleKind on each level discriminates the buffer kinds it + * accepts. Base accepts anything-but-NONE; Strided accepts both Hip and Host; + * leaves accept only their own. + */ +void TestImageDataIsCompatibleKind() { + EXPECT_EQ(AsInt(ImageData::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageData::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 1); + EXPECT_EQ(AsInt(ImageData::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 1); + + EXPECT_EQ(AsInt(ImageDataStrided::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageDataStrided::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 1); + EXPECT_EQ(AsInt(ImageDataStrided::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 1); + + EXPECT_EQ(AsInt(ImageDataStridedHip::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageDataStridedHip::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 1); + EXPECT_EQ(AsInt(ImageDataStridedHip::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 0); + + EXPECT_EQ(AsInt(ImageDataStridedHost::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageDataStridedHost::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 0); + EXPECT_EQ(AsInt(ImageDataStridedHost::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 1); +} + +/** + * @brief Round-trip a derived ImageData through the base reference and back + * via cast<>(). Successful casts must preserve every observable field; casts + * to incompatible kinds must return std::nullopt. + */ +void TestImageDataCast() { + auto buf = MakeSinglePlaneBuffer(800, 600, 800 * 4, FAKE_PTR_A); + + // Hip → base → Hip should round-trip, Hip → Host should fail. + { + ImageDataStridedHip hip(FMT_RGBA8, buf); + const ImageData& base = hip; + + auto asHip = base.cast(); + EXPECT_EQ(AsInt(asHip.has_value()), 1); + EXPECT_EQ(AsInt(asHip->device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(AsAddr(asHip->plane(0).basePtr), AsAddr(FAKE_PTR_A)); + EXPECT_EQ(asHip->plane(0).width, 800); + + auto asStrided = base.cast(); + EXPECT_EQ(AsInt(asStrided.has_value()), 1); + EXPECT_EQ(AsInt(asStrided->device()), AsInt(eDeviceType::GPU)); + + auto asHost = base.cast(); + EXPECT_EQ(AsInt(asHost.has_value()), 0); + } + + // Symmetrically: Host → base → Host succeeds, Host → Hip fails. + { + ImageDataStridedHost host(FMT_RGBA8, buf); + const ImageData& base = host; + + auto asHost = base.cast(); + EXPECT_EQ(AsInt(asHost.has_value()), 1); + EXPECT_EQ(AsInt(asHost->device()), AsInt(eDeviceType::CPU)); + + auto asHip = base.cast(); + EXPECT_EQ(AsInt(asHip.has_value()), 0); + } +} + +} // namespace + +int main(int argc, char** argv) { + (void)argc; + (void)argv; + TEST_CASES_BEGIN(); + + TEST_CASE(TestImageDataStridedHipConstruction()); + TEST_CASE(TestImageDataStridedHostConstruction()); + TEST_CASE(TestImageDataStridedMultiPlane()); + TEST_CASE(TestImageDataStridedSugarCtor()); + TEST_CASE(TestImageDataIsCompatibleKind()); + TEST_CASE(TestImageDataCast()); + + TEST_CASES_END(); +} diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp index 873f05dc..6dd736ad 100644 --- a/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp +++ b/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp @@ -103,7 +103,7 @@ int64_t GetCoordOfBorderPel(int64_t u, int64_t dimSize, eBorderType borderMode) * * @tparam T The underlying datatype of the image. (e.g. uchar3) * @tparam BT The base datatype of the image (e.g. unsigned char) - * @param[in] input The input ImageWrapper referencing the underlying image data. + * @param[in] input The input TensorWrapper referencing the underlying image data. * @param[in] borderMode The border mode used to handle out of bounds coordinates. * @param[in] borderValue The value to fallback to when handling out of bounds coordinates with the CONSTANT border * mode. @@ -115,8 +115,8 @@ int64_t GetCoordOfBorderPel(int64_t u, int64_t dimSize, eBorderType borderMode) * coordinates fall out of bounds. */ template > -BT GoldenBorderAt(ImageWrapper& input, eBorderType borderMode, T borderValue, int64_t sample, int64_t y, - int64_t x, int64_t channel) { +BT GoldenBorderAt(TensorWrapper& input, eBorderType borderMode, T borderValue, int64_t sample, int64_t y, int64_t x, + int64_t channel) { int64_t outX = x, outY = y; if (borderMode == eBorderType::BORDER_TYPE_CONSTANT) { @@ -130,7 +130,7 @@ BT GoldenBorderAt(ImageWrapper& input, eBorderType borderMode, T borderValue, outY = GetCoordOfBorderPel(y, input.height(), borderMode); } - // Return the value at the modified outX, outY coordinates using the passed in ImageWrapper. + // Return the value at the modified outX, outY coordinates using the passed in TensorWrapper. return detail::GetElement(input.at(sample, outY, outX, 0), channel); } @@ -161,7 +161,8 @@ void TestCorrectness(float4 borderValue, int32_t batchSize, Size2D imageSize, in FillVector(inputData); // BorderWrapper to calculate the actual calculated values. - BorderWrapper borderWrap(ImageWrapper(inputData, batchSize, imageSize.w, imageSize.h), borderVal); + auto borderWrap = + MakeBorderWrapper(TensorWrapper(inputData, batchSize, imageSize.w, imageSize.h), borderVal); std::vector actualOutput(numElementsWithBorder); int actualIndex = 0; for (int batch = 0; batch < batchSize; ++batch) { @@ -176,9 +177,9 @@ void TestCorrectness(float4 borderValue, int32_t batchSize, Size2D imageSize, in } } - // ImageWrapper for use in the golden output generator. ImageWrapper is unit tested separately, and is + // TensorWrapper for use in the golden output generator. TensorWrapper is unit tested separately, and is // considered working at this point in the dependency chain. - ImageWrapper imageWrap(inputData, batchSize, imageSize.w, imageSize.h); + TensorWrapper imageWrap(inputData, batchSize, imageSize.w, imageSize.h); std::vector goldenOutput(numElementsWithBorder); int goldenIndex = 0; for (int batch = 0; batch < batchSize; ++batch) { diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_image_batch_var_shape_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_image_batch_var_shape_wrapper.cpp new file mode 100644 index 00000000..12d5000e --- /dev/null +++ b/tests/roccv/cpp/src/tests/core/wrappers/test_image_batch_var_shape_wrapper.cpp @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_helpers.hpp" + +using namespace roccv; +using namespace roccv::tests; + +namespace { + +// Per-image copy kernel: writes dst[n,y,x,c] = src[n,y,x,c] for the image at batch index n. +// Each launch covers exactly one image — the host loop steps through n and resizes the +// grid to that image's dimensions, which avoids needing a max-bounds check inside the kernel. +template +__global__ void VarShapeCopyKernel(ImageBatchVarShapeWrapper src, ImageBatchVarShapeWrapper dst, int32_t n) { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x >= dst.width(n) || y >= dst.height(n)) return; + dst.at(n, y, x, 0) = src.at(n, y, x, 0); +} + +// Roundtrip test: build a varshape batch from heterogeneous host pixel data, run the copy kernel from src varshape +// wrapper to dst varshape wrapper, read pixels back, and verify byte-equality with the original input. +template > +void TestRoundtripCopy(const std::vector& sizes, ImageFormat fmt) { + const int channels = detail::NumElements; + const int32_t numImages = static_cast(sizes.size()); + + // Generate per-image host pixel data. + std::vector> hostPixels(numImages); + for (int32_t i = 0; i < numImages; ++i) { + hostPixels[i].resize(static_cast(sizes[i].w) * sizes[i].h * channels); + FillVector(hostPixels[i], /*seed=*/static_cast(0x1000 + i)); + } + + hipStream_t stream = nullptr; + + // Build source batch and copy host pixels in. + ImageBatchVarShape srcBatch(numImages); + std::vector srcImages; + srcImages.reserve(numImages); + for (int32_t i = 0; i < numImages; ++i) { + srcImages.emplace_back(sizes[i], fmt); + auto sd = srcImages[i].exportData(); + const ImagePlaneStrided& sp = sd.plane(0); + const size_t rowBytes = static_cast(sizes[i].w) * channels * sizeof(BT); + HIP_VALIDATE_NO_ERRORS(hipMemcpy2DAsync(sp.basePtr, sp.rowStride, hostPixels[i].data(), rowBytes, rowBytes, + sizes[i].h, hipMemcpyHostToDevice, stream)); + srcBatch.pushBack(srcImages[i]); + } + + // Build destination batch with matching shapes. + ImageBatchVarShape dstBatch(numImages); + std::vector dstImages; + dstImages.reserve(numImages); + for (int32_t i = 0; i < numImages; ++i) { + dstImages.emplace_back(sizes[i], fmt); + dstBatch.pushBack(dstImages[i]); + } + + auto srcData = srcBatch.exportData(stream); + auto dstData = dstBatch.exportData(stream); + ImageBatchVarShapeWrapper srcWrap(srcData); + ImageBatchVarShapeWrapper dstWrap(dstData); + + // Launch one kernel per image (sizes vary so a single 3D launch can't bound y to per-image height cleanly). + for (int32_t i = 0; i < numImages; ++i) { + dim3 block(16, 16); + dim3 grid((sizes[i].w + block.x - 1) / block.x, (sizes[i].h + block.y - 1) / block.y); + VarShapeCopyKernel<<>>(srcWrap, dstWrap, i); + } + + HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream)); + + // Read back dst pixels and verify byte-for-byte against the original host input. + for (int32_t i = 0; i < numImages; ++i) { + std::vector dstHost(static_cast(sizes[i].w) * sizes[i].h * channels); + auto dd = dstImages[i].exportData(); + const ImagePlaneStrided& dp = dd.plane(0); + const size_t rowBytes = static_cast(sizes[i].w) * channels * sizeof(BT); + HIP_VALIDATE_NO_ERRORS(hipMemcpy2D(dstHost.data(), rowBytes, dp.basePtr, dp.rowStride, rowBytes, sizes[i].h, + hipMemcpyDeviceToHost)); + CompareVectors(dstHost, hostPixels[i]); + } +} + +// Border-composition test: write the BORDER_TYPE_CONSTANT fallback for every output pixel by reading from a coordinate +// that is guaranteed to be out of bounds for every image (-1, -1). Confirms BorderWrapper> correctly delegates to width(n) / height(n) for per-image bounds; otherwise it would +// dereference invalid memory. +template +__global__ void VarShapeBorderConstantKernel( + BorderWrapper> src, + ImageBatchVarShapeWrapper dst, int32_t n) { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x >= dst.width(n) || y >= dst.height(n)) return; + dst.at(n, y, x, 0) = src.at(n, -1, -1, 0); +} + +template > +void TestBorderConstantComposition(const std::vector& sizes, ImageFormat fmt, T borderValue) { + const int channels = detail::NumElements; + const int32_t numImages = static_cast(sizes.size()); + + // Source pixels content doesn't matter — every read is forced OOB. + ImageBatchVarShape srcBatch(numImages); + ImageBatchVarShape dstBatch(numImages); + std::vector srcImages, dstImages; + srcImages.reserve(numImages); + dstImages.reserve(numImages); + for (int32_t i = 0; i < numImages; ++i) { + srcImages.emplace_back(sizes[i], fmt); + dstImages.emplace_back(sizes[i], fmt); + srcBatch.pushBack(srcImages[i]); + dstBatch.pushBack(dstImages[i]); + } + + hipStream_t stream = nullptr; + auto srcData = srcBatch.exportData(stream); + auto dstData = dstBatch.exportData(stream); + + auto srcWrap = + MakeBorderWrapper(ImageBatchVarShapeWrapper(srcData), borderValue); + ImageBatchVarShapeWrapper dstWrap(dstData); + + for (int32_t i = 0; i < numImages; ++i) { + dim3 block(16, 16); + dim3 grid((sizes[i].w + block.x - 1) / block.x, (sizes[i].h + block.y - 1) / block.y); + VarShapeBorderConstantKernel<<>>(srcWrap, dstWrap, i); + } + HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream)); + + // Expect every output pixel of every image to equal borderValue. + std::vector borderBytes(channels); + for (int c = 0; c < channels; ++c) borderBytes[c] = detail::GetElement(borderValue, c); + + for (int32_t i = 0; i < numImages; ++i) { + const size_t pixels = static_cast(sizes[i].w) * sizes[i].h; + std::vector dstHost(pixels * channels); + auto dd = dstImages[i].exportData(); + const ImagePlaneStrided& dp = dd.plane(0); + const size_t rowBytes = static_cast(sizes[i].w) * channels * sizeof(BT); + HIP_VALIDATE_NO_ERRORS(hipMemcpy2D(dstHost.data(), rowBytes, dp.basePtr, dp.rowStride, rowBytes, sizes[i].h, + hipMemcpyDeviceToHost)); + std::vector expected(pixels * channels); + for (size_t p = 0; p < pixels; ++p) { + for (int c = 0; c < channels; ++c) expected[p * channels + c] = borderBytes[c]; + } + CompareVectors(dstHost, expected); + } +} + +// Interpolation-composition test: NEAREST interpolation at integer coordinates is the identity, so a roundtrip copy +// via InterpolationWrapper must equal the source. Confirms the full wrapper chain +// composes correctly over a VarShape backing. +template +__global__ void VarShapeInterpNearestKernel( + InterpolationWrapper>> + src, + ImageBatchVarShapeWrapper dst, int32_t n) { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x >= dst.width(n) || y >= dst.height(n)) return; + dst.at(n, y, x, 0) = src.at(n, static_cast(y), static_cast(x), 0); +} + +template > +void TestInterpolationNearestComposition(const std::vector& sizes, ImageFormat fmt) { + const int channels = detail::NumElements; + const int32_t numImages = static_cast(sizes.size()); + + std::vector> hostPixels(numImages); + for (int32_t i = 0; i < numImages; ++i) { + hostPixels[i].resize(static_cast(sizes[i].w) * sizes[i].h * channels); + FillVector(hostPixels[i], static_cast(0x2000 + i)); + } + + hipStream_t stream = nullptr; + ImageBatchVarShape srcBatch(numImages); + ImageBatchVarShape dstBatch(numImages); + std::vector srcImages, dstImages; + srcImages.reserve(numImages); + dstImages.reserve(numImages); + for (int32_t i = 0; i < numImages; ++i) { + srcImages.emplace_back(sizes[i], fmt); + auto sd = srcImages[i].exportData(); + const ImagePlaneStrided& sp = sd.plane(0); + const size_t rowBytes = static_cast(sizes[i].w) * channels * sizeof(BT); + HIP_VALIDATE_NO_ERRORS(hipMemcpy2DAsync(sp.basePtr, sp.rowStride, hostPixels[i].data(), rowBytes, rowBytes, + sizes[i].h, hipMemcpyHostToDevice, stream)); + srcBatch.pushBack(srcImages[i]); + + dstImages.emplace_back(sizes[i], fmt); + dstBatch.pushBack(dstImages[i]); + } + + auto srcData = srcBatch.exportData(stream); + auto dstData = dstBatch.exportData(stream); + + auto srcWrap = MakeInterpolationWrapper( + MakeBorderWrapper(ImageBatchVarShapeWrapper(srcData), T{})); + ImageBatchVarShapeWrapper dstWrap(dstData); + + for (int32_t i = 0; i < numImages; ++i) { + dim3 block(16, 16); + dim3 grid((sizes[i].w + block.x - 1) / block.x, (sizes[i].h + block.y - 1) / block.y); + VarShapeInterpNearestKernel<<>>(srcWrap, dstWrap, i); + } + HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream)); + + for (int32_t i = 0; i < numImages; ++i) { + std::vector dstHost(static_cast(sizes[i].w) * sizes[i].h * channels); + auto dd = dstImages[i].exportData(); + const ImagePlaneStrided& dp = dd.plane(0); + const size_t rowBytes = static_cast(sizes[i].w) * channels * sizeof(BT); + HIP_VALIDATE_NO_ERRORS(hipMemcpy2D(dstHost.data(), rowBytes, dp.basePtr, dp.rowStride, rowBytes, sizes[i].h, + hipMemcpyDeviceToHost)); + CompareVectors(dstHost, hostPixels[i]); + } +} + +// Verify accessor surface: width(n), height(n), batches(), channels(). +template +void TestAccessors(const std::vector& sizes, ImageFormat fmt) { + const int32_t numImages = static_cast(sizes.size()); + ImageBatchVarShape batch(numImages); + std::vector handles; + handles.reserve(numImages); + for (int32_t i = 0; i < numImages; ++i) { + handles.emplace_back(sizes[i], fmt); + batch.pushBack(handles[i]); + } + auto data = batch.exportData(0); + ImageBatchVarShapeWrapper wrap(data); + + EXPECT_EQ(wrap.batches(), static_cast(numImages)); + EXPECT_EQ(wrap.channels(), static_cast(detail::NumElements)); + // width/height are device pointers under the hood; reading them on host post-sync is safe because exportData + // recorded a hipEvent that hipStreamSynchronize on the null stream above (implicit) drains. The descriptor table + // lives in device memory though, so we round-trip the lookups through a tiny D->H pull via the wrapper's host + // mirror path — here we just check that the construction succeeded; per-image width/height behavior is exercised + // end-to-end by TestRoundtripCopy. + HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(0)); +} + +// Host-path roundtrip: a CPU-resident batch exports a host snapshot, and the wrapper reads/writes +// it directly from host code — no kernel launch, no stream. Mirrors TestRoundtripCopy for the GPU +// path and confirms the residency-agnostic constructor handles the ...Host leaf. +template > +void TestHostRoundtripCopy(const std::vector& sizes, ImageFormat fmt) { + const int channels = detail::NumElements; + const int32_t numImages = static_cast(sizes.size()); + + std::vector> hostPixels(numImages); + for (int32_t i = 0; i < numImages; ++i) { + hostPixels[i].resize(static_cast(sizes[i].w) * sizes[i].h * channels); + FillVector(hostPixels[i], /*seed=*/static_cast(0x2000 + i)); + } + + // CPU-resident batches; fill each source image's host buffer directly. + ImageBatchVarShape srcBatch(numImages, eDeviceType::CPU); + ImageBatchVarShape dstBatch(numImages, eDeviceType::CPU); + std::vector srcImages, dstImages; + srcImages.reserve(numImages); + dstImages.reserve(numImages); + for (int32_t i = 0; i < numImages; ++i) { + srcImages.emplace_back(sizes[i], fmt, eDeviceType::CPU); + dstImages.emplace_back(sizes[i], fmt, eDeviceType::CPU); + + auto sd = srcImages[i].exportData(); + const ImagePlaneStrided& sp = sd.plane(0); + const size_t rowBytes = static_cast(sizes[i].w) * channels * sizeof(BT); + for (int32_t y = 0; y < sizes[i].h; ++y) { + std::memcpy(static_cast(sp.basePtr) + y * sp.rowStride, + hostPixels[i].data() + static_cast(y) * sizes[i].w * channels, rowBytes); + } + + srcBatch.pushBack(srcImages[i]); + dstBatch.pushBack(dstImages[i]); + } + + auto srcData = srcBatch.exportData(0); + auto dstData = dstBatch.exportData(0); + ImageBatchVarShapeWrapper srcWrap(srcData); + ImageBatchVarShapeWrapper dstWrap(dstData); + + // Copy whole pixels through the wrapper entirely on the host (same at(n,y,x,0) semantics as + // VarShapeCopyKernel, just without a device launch). + for (int32_t n = 0; n < numImages; ++n) { + for (int64_t y = 0; y < srcWrap.height(n); ++y) { + for (int64_t x = 0; x < srcWrap.width(n); ++x) { + dstWrap.at(n, y, x, 0) = srcWrap.at(n, y, x, 0); + } + } + } + + // Read dst host buffers back and verify byte-for-byte against the original input. + for (int32_t i = 0; i < numImages; ++i) { + std::vector dstHost(static_cast(sizes[i].w) * sizes[i].h * channels); + auto dd = dstImages[i].exportData(); + const ImagePlaneStrided& dp = dd.plane(0); + const size_t rowBytes = static_cast(sizes[i].w) * channels * sizeof(BT); + for (int32_t y = 0; y < sizes[i].h; ++y) { + std::memcpy(dstHost.data() + static_cast(y) * sizes[i].w * channels, + static_cast(dp.basePtr) + y * dp.rowStride, rowBytes); + } + CompareVectors(dstHost, hostPixels[i]); + } +} + +} // namespace + +int main(int argc, char** argv) { + (void)argc; + (void)argv; + TEST_CASES_BEGIN(); + + // Single-channel, heterogeneous sizes. + TEST_CASE(TestRoundtripCopy({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_U8)); + TEST_CASE(TestRoundtripCopy({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_F32)); + + // Multi-channel interleaved. + TEST_CASE(TestRoundtripCopy({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_RGB8)); + TEST_CASE(TestRoundtripCopy({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_RGBA8)); + + // Homogeneous batch — the wrapper should still work when all images share the same shape. + TEST_CASE(TestRoundtripCopy({{64, 64}, {64, 64}, {64, 64}}, FMT_RGBA8)); + + // Single image, large. + TEST_CASE(TestRoundtripCopy({{128, 96}}, FMT_RGB8)); + + // CPU path: the same roundtrip driven entirely from host code over a host-resident snapshot. + TEST_CASE(TestHostRoundtripCopy({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_U8)); + TEST_CASE(TestHostRoundtripCopy({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_F32)); + TEST_CASE(TestHostRoundtripCopy({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_RGB8)); + TEST_CASE(TestHostRoundtripCopy({{64, 64}, {64, 64}}, FMT_RGBA8)); + + TEST_CASE(TestAccessors({{16, 12}, {32, 24}, {7, 5}}, FMT_RGB8)); + + // BorderWrapper composed over ImageBatchVarShapeWrapper: constant-fill via guaranteed-OOB read. + TEST_CASE( + TestBorderConstantComposition({{16, 12}, {32, 24}, {7, 5}}, FMT_RGB8, make_uchar3(0xAB, 0xCD, 0xEF))); + TEST_CASE(TestBorderConstantComposition({{16, 12}, {32, 24}, {7, 5}}, FMT_RGBA8, + make_uchar4(0x12, 0x34, 0x56, 0x78))); + + // InterpolationWrapper composed over ImageBatchVarShapeWrapper: integer-coord roundtrip is identity. + TEST_CASE(TestInterpolationNearestComposition({{16, 12}, {32, 24}, {7, 5}}, FMT_U8)); + TEST_CASE(TestInterpolationNearestComposition({{16, 12}, {32, 24}, {7, 5}}, FMT_RGB8)); + TEST_CASE(TestInterpolationNearestComposition({{16, 12}, {32, 24}, {7, 5}}, FMT_RGBA8)); + + TEST_CASES_END(); +} diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp index a4466530..43e07329 100644 --- a/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp +++ b/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp @@ -21,9 +21,9 @@ #include #include -#include "core/detail/vector_utils.hpp" #include +#include "core/detail/vector_utils.hpp" #include "test_helpers.hpp" using namespace roccv; @@ -45,7 +45,7 @@ namespace { * @return T The interpolated pixel. */ template -T GoldenLinear(BorderWrapper input, int64_t sample, float y, float x) { +T GoldenLinear(BorderWrapper> input, int64_t sample, float y, float x) { // Defines the vectorized float type for intermediate calculations. using WorkType = detail::MakeType>; @@ -86,7 +86,7 @@ T GoldenLinear(BorderWrapper input, int64_t sample, float y, floa * @return T The interpolated pixel. */ template -T GoldenNearest(BorderWrapper input, int64_t sample, float y, float x) { +T GoldenNearest(BorderWrapper> input, int64_t sample, float y, float x) { // Nearest neighbor interpolation. Rounds given floating point values to the nearest integer. return input.at(sample, lroundf(y), lroundf(x), 0); } @@ -98,7 +98,7 @@ T GoldenNearest(BorderWrapper input, int64_t sample, float y, flo * @return None. */ void CalBicubicWeights(float dist, float* weight) { - const float A = -0.5f; // Note OpenCV sets alpha to -0.75f + const float A = -0.5f; // Note OpenCV sets alpha to -0.75f weight[0] = ((A * (dist + 1) - 5 * A) * (dist + 1) + 8 * A) * (dist + 1) - 4 * A; weight[1] = ((A + 2) * dist - (A + 3)) * dist * dist + 1; @@ -107,7 +107,8 @@ void CalBicubicWeights(float dist, float* weight) { } /** - * @brief Golden model for Bicubic interpolation. This is the Catmull-Rom cubic interpolation commonly used in CV libraries. + * @brief Golden model for Bicubic interpolation. This is the Catmull-Rom cubic interpolation commonly used in CV + * libraries. * * @tparam T Image datatype. * @tparam BorderType Border type for boundary conditions. @@ -118,7 +119,7 @@ void CalBicubicWeights(float dist, float* weight) { * @return T The interpolated pixel. */ template -T GoldenBicubic(BorderWrapper input, int64_t sample, float y, float x) { +T GoldenBicubic(BorderWrapper> input, int64_t sample, float y, float x) { // Defines the vectorized float type for intermediate calculations. using WorkType = detail::MakeType>; @@ -135,7 +136,8 @@ T GoldenBicubic(BorderWrapper input, int64_t sample, float y, flo WorkType sum = SetAll(0.0f); for (int indexY = -1; indexY <= 2; indexY++) { for (int indexX = -1; indexX <= 2; indexX++) { - sum += detail::RangeCast(input.at(sample, intY + indexY, intX + indexX, 0)) * (weightX[indexX + 1] * weightY[indexY + 1]); + sum += detail::RangeCast(input.at(sample, intY + indexY, intX + indexX, 0)) * + (weightX[indexX + 1] * weightY[indexY + 1]); } } @@ -156,7 +158,7 @@ T GoldenBicubic(BorderWrapper input, int64_t sample, float y, flo * @return T The interpolated pixel. */ template -T GoldenInterpolationAt(BorderWrapper input, int64_t sample, float y, float x, +T GoldenInterpolationAt(BorderWrapper> input, int64_t sample, float y, float x, eInterpolationType interp) { switch (interp) { case eInterpolationType::INTERP_TYPE_NEAREST: @@ -202,9 +204,10 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl std::vector> goldenOutput; // Use roccv::InterpolationWrapper to get actual output - InterpolationWrapper actualWrap( - (BorderWrapper(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal))); - BorderWrapper goldenWrap(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal); + auto actualWrap = MakeInterpolationWrapper( + MakeBorderWrapper(TensorWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal)); + auto goldenWrap = + MakeBorderWrapper(TensorWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal); for (int b = 0; b < batchSize; b++) { for (float y = 0; y < imageSize.h; y += idxDelta) { @@ -220,7 +223,8 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl } } } - if constexpr (std::is_integral_v> && std::is_signed_v> && sizeof(detail::BaseType) == 4) { + if constexpr (std::is_integral_v> && std::is_signed_v> && + sizeof(detail::BaseType) == 4) { CompareVectorsNear(actualOutput, goldenOutput, NEAR_EQUAL_THRESHOLD * 2); } else { CompareVectorsNear(actualOutput, goldenOutput); @@ -228,7 +232,7 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl } } // namespace -int main(int argc, char **argv) { +int main(int argc, char** argv) { (void)argc; (void)argv; TEST_CASES_BEGIN(); @@ -322,7 +326,7 @@ int main(int argc, char **argv) { TEST_CASE((TestCorrectness(1, {20, 53}, make_float4(0, 0, 0, 1), 0.1f))); TEST_CASE((TestCorrectness(3, {38, 10}, make_float4(0, 0, 0, 1), 0.1f))); TEST_CASE((TestCorrectness(5, {65, 21}, make_float4(1, 0.5, 0.5, 1), 0.1f))); - // clang-format on + // clang-format on TEST_CASES_END(); } \ No newline at end of file diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_image_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_tensor_wrapper.cpp similarity index 89% rename from tests/roccv/cpp/src/tests/core/wrappers/test_image_wrapper.cpp rename to tests/roccv/cpp/src/tests/core/wrappers/test_tensor_wrapper.cpp index 70f03b0b..28a1acca 100644 --- a/tests/roccv/cpp/src/tests/core/wrappers/test_image_wrapper.cpp +++ b/tests/roccv/cpp/src/tests/core/wrappers/test_tensor_wrapper.cpp @@ -20,7 +20,7 @@ */ #include -#include +#include #include "test_helpers.hpp" @@ -37,11 +37,11 @@ void TestCorrectness(int numImages, Size2D size) { std::vector ref(numElements); FillVector(ref); - ImageWrapper input(ref, numImages, size.w, size.h); + TensorWrapper input(ref, numImages, size.w, size.h); std::vector actual; // To determine if coordinates are pointing to the proper values in memory, iterate through the reference vector - // element-by-element and iterate through the ImageWrapper coordinate-wise. All values should be the same if + // element-by-element and iterate through the TensorWrapper coordinate-wise. All values should be the same if // everything lines up. for (int b = 0; b < numImages; ++b) { @@ -58,9 +58,9 @@ void TestCorrectness(int numImages, Size2D size) { } template -void TestImageWrapperConstructor(int imageCount, Size2D imageSize, ImageFormat format) { +void TestTensorWrapperConstructor(int imageCount, Size2D imageSize, ImageFormat format) { Tensor input(imageCount, imageSize, format); - ImageWrapper wrapper(input); + TensorWrapper wrapper(input); EXPECT_EQ(wrapper.batches(), imageCount); EXPECT_EQ(wrapper.channels(), format.channels()); @@ -74,7 +74,7 @@ int main(int argc, char** argv) { (void)argv; TEST_CASES_BEGIN(); - TEST_CASE(TestImageWrapperConstructor(2, {54, 67}, FMT_RGB8)); + TEST_CASE(TestTensorWrapperConstructor(2, {54, 67}, FMT_RGB8)); TEST_CASE(TestCorrectness(1, {10, 10})); TEST_CASE(TestCorrectness(2, {43, 9})); diff --git a/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp b/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp index f208962c..d34a5187 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. #include #include #include -#include +#include #include #include "test_helpers.hpp" @@ -51,8 +51,8 @@ namespace { template > void GenerateGoldenBilateral(std::vector& input, std::vector& output, int32_t batchSize, Size2D imageSize, int diameter, float sigmaColor, float sigmaSpace, T borderValue) { - BorderWrapper src(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderValue); - ImageWrapper dst(output, batchSize, imageSize.w, imageSize.h); + auto src = MakeBorderWrapper(TensorWrapper(input, batchSize, imageSize.w, imageSize.h), borderValue); + TensorWrapper dst(output, batchSize, imageSize.w, imageSize.h); using namespace roccv::detail; using Worktype = MakeType>; @@ -179,9 +179,9 @@ int main(int argc, char** argv) { TEST_CASE((TestCorrectness(1, 20, 20, FMT_U8, 0, 50.0f, 1.2f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); TEST_CASE((TestCorrectness(2, 20, 20, FMT_RGB8, -1, 50.0f, 1.2f, - {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); - TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, - {500.0, 500.0, 0.0, 0.0}, eDeviceType::GPU))); + {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, {500.0, 500.0, 0.0, 0.0}, + eDeviceType::GPU))); TEST_CASE((TestCorrectness(1, 20, 20, FMT_RGB8, 4, 50.0f, 3.0f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); @@ -288,9 +288,9 @@ int main(int argc, char** argv) { TEST_CASE((TestCorrectness(1, 20, 20, FMT_U8, 0, 50.0f, 1.2f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); TEST_CASE((TestCorrectness(2, 20, 20, FMT_RGB8, -1, 50.0f, 1.2f, - {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); - TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, - {500.0, 500.0, 0.0, 0.0}, eDeviceType::CPU))); + {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, {500.0, 500.0, 0.0, 0.0}, + eDeviceType::CPU))); TEST_CASE((TestCorrectness(1, 20, 20, FMT_RGB8, 4, 50.0f, 3.0f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); diff --git a/tests/roccv/cpp/src/tests/operators/test_op_bnd_box.cpp b/tests/roccv/cpp/src/tests/operators/test_op_bnd_box.cpp index 3b5b5284..3121d7d9 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_bnd_box.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_bnd_box.cpp @@ -23,7 +23,7 @@ THE SOFTWARE. #include #include #include -#include +#include #include #include @@ -85,8 +85,8 @@ template > void GenerateGoldenBndBox(std::vector &input, std::vector &output, int32_t batchSize, int32_t width, int32_t height, const BndBoxes &bboxes) { // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, width, height); // Working type for internal pixel format, which has 4 channels. using WorkType = detail::MakeType; diff --git a/tests/roccv/cpp/src/tests/operators/test_op_center_crop.cpp b/tests/roccv/cpp/src/tests/operators/test_op_center_crop.cpp index f145b1b4..e4e9aa95 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_center_crop.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_center_crop.cpp @@ -23,7 +23,7 @@ THE SOFTWARE. #include #include #include -#include +#include #include #include "test_helpers.hpp" @@ -48,8 +48,8 @@ namespace { template > void GenerateGoldenCrop(std::vector& input, std::vector& output, int32_t batchSize, int32_t width, int32_t height, Size2D cropSize) { // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, cropSize.w, cropSize.h); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, cropSize.w, cropSize.h); int topLeftX = (width >> 1) - (cropSize.w >> 1); int topLeftY = (height >> 1) - (cropSize.h >> 1); diff --git a/tests/roccv/cpp/src/tests/operators/test_op_composite.cpp b/tests/roccv/cpp/src/tests/operators/test_op_composite.cpp index 43abf6c1..f332016f 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_composite.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_composite.cpp @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include "test_helpers.hpp" @@ -56,17 +56,17 @@ std::vector> GoldenComposite(std::vector>; - // Wrap input data into ImageWrappers for easy data access - ImageWrapper fgWrap(foreground, batchSize, width, height); - ImageWrapper bgWrap(background, batchSize, width, height); - ImageWrapper maskWrap(mask, batchSize, width, height); + // Wrap input data into TensorWrappers for easy data access + TensorWrapper fgWrap(foreground, batchSize, width, height); + TensorWrapper bgWrap(background, batchSize, width, height); + TensorWrapper maskWrap(mask, batchSize, width, height); // Size of the output depends on the requested number of output channels. If it is 3, then the output images will // have 3 channels. If it is 4, then an additional alpha channel is added to the output. This alpha channel is // always fully on. int numOutElements = batchSize * width * height * detail::NumElements; std::vector> output(numOutElements); - ImageWrapper outWrap(output, batchSize, width, height); + TensorWrapper outWrap(output, batchSize, width, height); for (int b = 0; b < batchSize; b++) { for (int y = 0; y < height; y++) { diff --git a/tests/roccv/cpp/src/tests/operators/test_op_convert_to.cpp b/tests/roccv/cpp/src/tests/operators/test_op_convert_to.cpp index eaa714a9..726b33d1 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_convert_to.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_convert_to.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include -#include +#include #include #include "test_helpers.hpp" @@ -55,8 +55,8 @@ std::vector GoldenConvertTo(std::vector& input, int32_t batchSi std::vector output(input.size()); // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, width, height); using AB_DT = decltype(float() * BT_SRC() * BT_DEST()); using work_type = detail::MakeType>; diff --git a/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp b/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp index 4320f04e..34a0191a 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp @@ -57,10 +57,11 @@ std::vector GoldenCopyMakeBorder(std::vector input, int batchSize, Size2 // Wrap the input images in a BorderWrapper to handle out of bounds image behavior. The BorderWrapper has already // been tested in another test so it can be used reliably. - BorderWrapper inputWrap(ImageWrapper(input, batchSize, inputSize.w, inputSize.h), borderVal); + auto inputWrap = + MakeBorderWrapper(TensorWrapper(input, batchSize, inputSize.w, inputSize.h), borderVal); std::vector output(batchSize * outputSize.h * outputSize.w * channels); - ImageWrapper outputWrap(output, batchSize, outputSize.w, outputSize.h); + TensorWrapper outputWrap(output, batchSize, outputSize.w, outputSize.h); for (int b = 0; b < batchSize; b++) { for (int y = 0; y < outputSize.h; y++) { diff --git a/tests/roccv/cpp/src/tests/operators/test_op_custom_crop.cpp b/tests/roccv/cpp/src/tests/operators/test_op_custom_crop.cpp index 03cbf9b4..a161cea7 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_custom_crop.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_custom_crop.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include #include -#include +#include #include #include @@ -50,8 +50,8 @@ template > void GenerateGoldenCrop(std::vector& input, std::vector& output, int32_t batchSize, int32_t width, int32_t height, Box_t cropRect) { // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, cropRect.width, cropRect.height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, cropRect.width, cropRect.height); for (int b = 0; b < batchSize; b++) { for (int y = 0; y < cropRect.height; y++) { diff --git a/tests/roccv/cpp/src/tests/operators/test_op_cvt_color.cpp b/tests/roccv/cpp/src/tests/operators/test_op_cvt_color.cpp index 52c3735d..3c06c397 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_cvt_color.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_cvt_color.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include -#include +#include #include #include "test_helpers.hpp" @@ -34,9 +34,9 @@ namespace { template > std::vector GoldenReorder(std::vector& input, int samples, int width, int height) { - ImageWrapper inputWrap(input, samples, width, height); + TensorWrapper inputWrap(input, samples, width, height); std::vector output(samples * width * height * detail::NumElements); - ImageWrapper outputWrap(output, samples, width, height); + TensorWrapper outputWrap(output, samples, width, height); for (int b = 0; b < samples; b++) { for (int y = 0; y < height; y++) { @@ -51,9 +51,9 @@ std::vector GoldenReorder(std::vector& input, int samples, int width, in template > std::vector GoldenYUVToRGB(std::vector& input, int samples, int width, int height, float delta) { - ImageWrapper inputWrap(input, samples, width, height); + TensorWrapper inputWrap(input, samples, width, height); std::vector output(samples * width * height * detail::NumElements); - ImageWrapper outputWrap(output, samples, width, height); + TensorWrapper outputWrap(output, samples, width, height); for (int b = 0; b < samples; b++) { for (int y = 0; y < height; y++) { @@ -77,9 +77,9 @@ std::vector GoldenYUVToRGB(std::vector& input, int samples, int width, i template > std::vector GoldenRGBToYUV(std::vector& input, int samples, int width, int height, float delta) { - ImageWrapper inputWrap(input, samples, width, height); + TensorWrapper inputWrap(input, samples, width, height); std::vector output(samples * width * height * detail::NumElements); - ImageWrapper outputWrap(output, samples, width, height); + TensorWrapper outputWrap(output, samples, width, height); for (int b = 0; b < samples; b++) { for (int y = 0; y < height; y++) { @@ -104,11 +104,11 @@ std::vector GoldenRGBToYUV(std::vector& input, int samples, int width, i template > std::vector GoldenRGBToGrayscale(std::vector& input, int samples, int width, int height) { - ImageWrapper inputWrap(input, samples, width, height); + TensorWrapper inputWrap(input, samples, width, height); std::vector output(samples * width * height); // Output must always be uchar1 for grayscale - ImageWrapper outputWrap(output, samples, width, height); + TensorWrapper outputWrap(output, samples, width, height); for (int b = 0; b < samples; b++) { for (int y = 0; y < height; y++) { diff --git a/tests/roccv/cpp/src/tests/operators/test_op_flip.cpp b/tests/roccv/cpp/src/tests/operators/test_op_flip.cpp index 36bfbcc4..1a83da71 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_flip.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_flip.cpp @@ -23,7 +23,7 @@ THE SOFTWARE. #include #include #include -#include +#include #include #include "test_helpers.hpp" @@ -52,8 +52,8 @@ std::vector GoldenFlip(std::vector& input, int32_t batchSize, int32_t wi std::vector output(input.size()); // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, width, height); for (int b = 0; b < batchSize; ++b) { for (int y = 0; y < height; ++y) { diff --git a/tests/roccv/cpp/src/tests/operators/test_op_gamma_contrast.cpp b/tests/roccv/cpp/src/tests/operators/test_op_gamma_contrast.cpp index d24df72e..71b1014e 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_gamma_contrast.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_gamma_contrast.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. #include "core/detail/casting.hpp" #include "core/detail/type_traits.hpp" #include "core/detail/math/vectorized_type_math.hpp" -#include +#include #include #include #include "operator_types.h" @@ -56,8 +56,8 @@ std::vector GoldenGammaContrast(std::vector& input, int32_t batchSize, i std::vector output(input.size()); // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, width, height); using work_type = detail::MakeType>; for (int b = 0; b < batchSize; ++b) { diff --git a/tests/roccv/cpp/src/tests/operators/test_op_histogram.cpp b/tests/roccv/cpp/src/tests/operators/test_op_histogram.cpp index 05a5009c..513df448 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_histogram.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_histogram.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include -#include +#include #include #include #include @@ -57,7 +57,7 @@ std::vector GoldenHistogram(std::vector& input, int32_t batchSize, in std::vector local_histogram(256); // Wrap the input vector for simplified data access - ImageWrapper src(input, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); for (int b = 0; b < batchSize; ++b) { std::fill(local_histogram.begin(), local_histogram.end(), 0); @@ -94,8 +94,8 @@ std::vector GoldenHistogramMask(std::vector& input, std::vector local_histogram(256); // Wrap input/mask vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper maskWrap(mask, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper maskWrap(mask, batchSize, width, height); for (int b = 0; b < batchSize; ++b) { std::fill(local_histogram.begin(), local_histogram.end(), 0); diff --git a/tests/roccv/cpp/src/tests/operators/test_op_normalize.cpp b/tests/roccv/cpp/src/tests/operators/test_op_normalize.cpp index fd850ddb..3b077b36 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_normalize.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_normalize.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include -#include +#include #include #include "test_helpers.hpp" diff --git a/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp b/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp index 634344a4..18847056 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp @@ -21,12 +21,13 @@ THE SOFTWARE. */ #include -#include +#include #include #include #include -#include "core/detail/internal_structs.hpp" + #include "core/detail/casting.hpp" +#include "core/detail/internal_structs.hpp" #include "core/detail/math/vectorized_type_math.hpp" #include "core/detail/type_traits.hpp" #include "operator_types.h" @@ -39,11 +40,11 @@ using namespace roccv::detail; // Keep all non-entrypoint functions in an anonymous namespace to prevent redefinition errors across translation units. namespace { -RemapParams GetRemapParams(const int2 &srcSize, const int2 &dstSize, const int2 &mapSize, bool alignCorners, eRemapType mapValueType) -{ +RemapParams GetRemapParams(const int2& srcSize, const int2& dstSize, const int2& mapSize, bool alignCorners, + eRemapType mapValueType) { RemapParams params; - switch(mapValueType) { + switch (mapValueType) { case REMAP_ABSOLUTE: params.srcScale = make_float2(0.f, 0.f); params.mapScale = StaticCast(mapSize) / StaticCast(dstSize); @@ -54,7 +55,7 @@ RemapParams GetRemapParams(const int2 &srcSize, const int2 &dstSize, const int2 case REMAP_ABSOLUTE_NORMALIZED: params.srcScale = make_float2(0.f, 0.f); params.mapScale = StaticCast(mapSize) / StaticCast(dstSize); - params.valScale = (StaticCast(srcSize) - (alignCorners ? 1.f : 0.f)) / 2.f; + params.valScale = (StaticCast(srcSize) - (alignCorners ? 1.f : 0.f)) / 2.f; params.srcOffset = params.valScale - (alignCorners ? 0.f : .5f); params.dstOffset = 0.f; break; @@ -87,24 +88,24 @@ RemapParams GetRemapParams(const int2 &srcSize, const int2 &dstSize, const int2 */ template > -std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t mapBatchSize, int32_t inWidth, int32_t inHeight, int32_t outWidth, - int32_t outHeight, int32_t mapWidth, int32_t mapHeight, std::vector& mapData, eRemapType mapType, bool alignCorners, float4 borderValue) { - +std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t mapBatchSize, int32_t inWidth, + int32_t inHeight, int32_t outWidth, int32_t outHeight, int32_t mapWidth, int32_t mapHeight, + std::vector& mapData, eRemapType mapType, bool alignCorners, float4 borderValue) { int channels = detail::NumElements; int outputSize = batchSize * outWidth * outHeight * channels; std::vector output(outputSize); // Create interpolation wrapper for input vector - InterpolationWrapper src((BorderWrapper( - ImageWrapper(input, batchSize, inWidth, inHeight), detail::SaturateCast(borderValue)))); + auto src = MakeInterpolationWrapper(MakeBorderWrapper( + TensorWrapper(input, batchSize, inWidth, inHeight), detail::SaturateCast(borderValue))); // Wrap the output vector for simplified data access - ImageWrapper dst(output, batchSize, outWidth, outHeight); + TensorWrapper dst(output, batchSize, outWidth, outHeight); // Create an interpolation wrapper for the map tensor - // InterpolationWrapper wrappedMapTensor(map, make_float2(0, 0)); - InterpolationWrapper map((BorderWrapper( - ImageWrapper(mapData.data(), mapBatchSize, mapWidth, mapHeight), detail::SaturateCast(borderValue)))); + auto map = MakeInterpolationWrapper( + MakeBorderWrapper(TensorWrapper(mapData.data(), mapBatchSize, mapWidth, mapHeight), + detail::SaturateCast(borderValue))); int2 srcSize = make_int2(src.width(), src.height()); int2 dstSize = make_int2(dst.width(), dst.height()); @@ -119,13 +120,12 @@ std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t m for (int b = 0; b < dst.batches(); b++) { for (int y = 0; y < dst.height(); y++) { for (int x = 0; x < dst.width(); x++) { - dstCoord.x = static_cast(x); dstCoord.y = static_cast(y); - + mapCoord.x = (dstCoord.x + params.dstOffset) * params.mapScale.x; mapCoord.y = (dstCoord.y + params.dstOffset) * params.mapScale.y; - + float2 mapValue = map.at((mapBatchSize == 1 ? 0 : b), mapCoord.y, mapCoord.x, 0); srcCoord.x = dstCoord.x * params.srcScale.x + mapValue.x * params.valScale.x + params.srcOffset.x; @@ -162,7 +162,8 @@ std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t m */ template > -void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, int outWidth, int outHeight, int mapWidth, int mapHeight, ImageFormat format, float4 borderValue, eRemapType mapType, +void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, int outWidth, int outHeight, + int mapWidth, int mapHeight, ImageFormat format, float4 borderValue, eRemapType mapType, bool alignCorners, eDeviceType device) { // Create input and output tensor based on test parameters Tensor input(batchSize, {inWidth, inHeight}, format, device); @@ -174,7 +175,7 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, // Copy generated input data into input tensor CopyVectorIntoTensor(input, inputData); - + int mapSize = mapBatchSize * mapWidth * mapHeight; std::vector mapData(mapSize); @@ -188,11 +189,10 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, } } } - } - else if (mapType == REMAP_ABSOLUTE_NORMALIZED) { + } else if (mapType == REMAP_ABSOLUTE_NORMALIZED) { for (int b = 0; b < mapBatchSize; b++) { - for (int y = 0; y < mapHeight; y++){ - for (int x = 0; x < mapWidth; x++){ + for (int y = 0; y < mapHeight; y++) { + for (int x = 0; x < mapWidth; x++) { float normX = ((2.0f * static_cast(x)) / static_cast(mapWidth - 1)) - 1.0f; float normY = ((2.0f * static_cast(y)) / static_cast(mapHeight - 1)) - 1.0f; @@ -204,11 +204,10 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, } } } - } - else if (mapType == REMAP_RELATIVE_NORMALIZED) { + } else if (mapType == REMAP_RELATIVE_NORMALIZED) { for (int b = 0; b < mapBatchSize; b++) { - for (int y = 0; y < mapHeight; y++){ - for (int x = 0; x < mapWidth; x++){ + for (int y = 0; y < mapHeight; y++) { + for (int x = 0; x < mapWidth; x++) { // Generate normalized coordinates in [-1, 1] range float normX = ((2.0f * static_cast(x)) / static_cast(mapWidth - 1)) - 1.0f; float normY = ((2.0f * static_cast(y)) / static_cast(mapHeight - 1)) - 1.0f; @@ -235,7 +234,8 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, hipStream_t stream; HIP_VALIDATE_NO_ERRORS(hipStreamCreate(&stream)); Remap op; - op(stream, input, output, mapTensor, InterpType, MapInterpType, mapType, alignCorners, BorderType, borderValue, device); + op(stream, input, output, mapTensor, InterpType, MapInterpType, mapType, alignCorners, BorderType, borderValue, + device); HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream)); HIP_VALIDATE_NO_ERRORS(hipStreamDestroy(stream)); @@ -243,9 +243,9 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, std::vector result(output.shape().size()); CopyTensorIntoVector(result, output); - std::vector ref = GoldenRemap(inputData, batchSize, mapBatchSize, inWidth, - inHeight, outWidth, outHeight, - mapWidth, mapHeight, mapData, mapType, alignCorners, borderValue); + std::vector ref = GoldenRemap( + inputData, batchSize, mapBatchSize, inWidth, inHeight, outWidth, outHeight, mapWidth, mapHeight, mapData, + mapType, alignCorners, borderValue); // Compare data in actual output versus the generated golden reference image CompareVectors(result, ref); @@ -258,144 +258,186 @@ int main(int argc, char** argv) { TEST_CASES_BEGIN(); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU))); - + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::GPU))); + TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); - - + eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASES_END(); } \ No newline at end of file diff --git a/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp b/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp index d7c385d0..02f567cf 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp @@ -50,13 +50,12 @@ std::vector GoldenResize(std::vector> &input, int batchS size_t numOutputElements = batchSize * outputSize.w * outputSize.h * detail::NumElements; std::vector> output(numOutputElements); - ImageWrapper outputWrap(output, batchSize, outputSize.w, outputSize.h); + TensorWrapper outputWrap(output, batchSize, outputSize.w, outputSize.h); // Use the replicate (or clamping) border mode by default to handle out of bounds conditions with certain // interpolation modes. - InterpolationWrapper inputWrap( - BorderWrapper( - ImageWrapper(input, batchSize, inputSize.w, inputSize.h), T{})); + auto inputWrap = MakeInterpolationWrapper(MakeBorderWrapper( + TensorWrapper(input, batchSize, inputSize.w, inputSize.h), T{})); // Determine the scaling factor required to map from the output coordinates to the corresponding input coordinates // on both the x and y axes. diff --git a/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp b/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp index 56deeabb..fb9fa55e 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp @@ -67,10 +67,9 @@ std::vector> GoldenRotate(std::vector>& T borderVal = detail::SaturateCast(make_float4(0.0f, 0.0f, 0.0f, 0.0f)); - ImageWrapper outputWrapper(output, batchSize, imageSize.w, imageSize.h); - InterpolationWrapper inputWrapper( - BorderWrapper(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), - borderVal)); + TensorWrapper outputWrapper(output, batchSize, imageSize.w, imageSize.h); + auto inputWrapper = MakeInterpolationWrapper(MakeBorderWrapper( + TensorWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal)); /** * Affine warp for a combined rotation and translate looks like the following when in its inverse representation: diff --git a/tests/roccv/cpp/src/tests/operators/test_op_thresholding.cpp b/tests/roccv/cpp/src/tests/operators/test_op_thresholding.cpp index 09ef3262..279959e0 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_thresholding.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_thresholding.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. #include "core/detail/casting.hpp" #include "core/detail/type_traits.hpp" #include "core/detail/math/vectorized_type_math.hpp" -#include +#include #include #include #include "operator_types.h" @@ -59,8 +59,8 @@ std::vector GoldenBinaryThreshold(std::vector& input, int32_t batchSize, std::vector output(input.size()); // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, width, height); for (int b = 0; b < batchSize; ++b) { double th = thresh[b]; @@ -88,8 +88,8 @@ std::vector GoldenBinaryInvThreshold(std::vector& input, int32_t batchSi std::vector output(input.size()); // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, width, height); for (int b = 0; b < batchSize; ++b) { double th = thresh[b]; @@ -117,8 +117,8 @@ std::vector GoldenTruncThreshold(std::vector& input, int32_t batchSize, std::vector output(input.size()); // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, width, height); for (int b = 0; b < batchSize; ++b) { double th = thresh[b]; @@ -145,8 +145,8 @@ std::vector GoldenToZeroThreshold(std::vector& input, int32_t batchSize, std::vector output(input.size()); // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, width, height); for (int b = 0; b < batchSize; ++b) { double th = thresh[b]; @@ -173,8 +173,8 @@ std::vector GoldenToZeroInvThreshold(std::vector& input, int32_t batchSi std::vector output(input.size()); // Wrap input/output vectors for simplified data access - ImageWrapper src(input, batchSize, width, height); - ImageWrapper dst(output, batchSize, width, height); + TensorWrapper src(input, batchSize, width, height); + TensorWrapper dst(output, batchSize, width, height); for (int b = 0; b < batchSize; ++b) { double th = thresh[b]; diff --git a/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp b/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp index 93c91ae9..5af99149 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp @@ -55,12 +55,12 @@ std::vector> GoldenWarpAffine(std::vector& mat, bool isInverted, int batchSize, Size2D inputSize, Size2D outputSize, float4 borderValue) { // Create interpolation wrapper for input vector - InterpolationWrapper inputWrap((BorderWrapper( - ImageWrapper(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast(borderValue)))); + auto inputWrap = MakeInterpolationWrapper(MakeBorderWrapper( + TensorWrapper(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast(borderValue))); - // Create ImageWrapper for output vector. We also need to create said output vector. + // Create TensorWrapper for output vector. We also need to create said output vector. std::vector> output(batchSize * outputSize.w * outputSize.h * detail::NumElements); - ImageWrapper outputWrap(output, batchSize, outputSize.w, outputSize.h); + TensorWrapper outputWrap(output, batchSize, outputSize.w, outputSize.h); // Prepare the transformation matrix. An affine transform is effectively a 3x3 perspective transform with its last // row set to [0, 0, 1]. diff --git a/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp b/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp index 1461365c..2c7559cb 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp @@ -52,12 +52,12 @@ std::vector> GoldenWarpPerspective(std::vector& mat, bool isInverted, int batchSize, Size2D inputSize, Size2D outputSize, float4 borderValue) { // Create interpolation wrapper for input vector - InterpolationWrapper inputWrap((BorderWrapper( - ImageWrapper(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast(borderValue)))); + auto inputWrap = MakeInterpolationWrapper(MakeBorderWrapper( + TensorWrapper(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast(borderValue))); - // Create ImageWrapper for output vector. We also need to create said output vector. + // Create TensorWrapper for output vector. We also need to create said output vector. std::vector> output(batchSize * outputSize.w * outputSize.h * detail::NumElements); - ImageWrapper outputWrap(output, batchSize, outputSize.w, outputSize.h); + TensorWrapper outputWrap(output, batchSize, outputSize.w, outputSize.h); // If given matrix is not the inverted representation of the transformation, we have to invert it first (since we // transform from output -> input).