diff --git a/include/core/detail/var_shape_descriptor_table.hpp b/include/core/detail/var_shape_descriptor_table.hpp
new file mode 100644
index 00000000..10bae71d
--- /dev/null
+++ b/include/core/detail/var_shape_descriptor_table.hpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <hip/hip_runtime.h>
+#include <stdint.h>
+
+#include "core/detail/allocators/i_allocator.hpp"
+#include "core/image_buffer.hpp"
+#include "core/image_format.hpp"
+#include "core/util_enums.h"
+
+namespace roccv::detail {
+
+/**
+ * @brief Owns the per-image descriptor table (strided buffer + format arrays)
+ * for an ImageBatchVarShape, along with its residency-specific lifecycle.
+ *
+ * GPU: a device-resident table mirrored by pinned host buffers, brought up to
+ * date by a lazy H2D copy guarded by a hipEvent. CPU: a single host-resident
+ * table handed straight to host kernels — no device buffers, no fence, no sync.
+ *
+ * All device dispatch lives here so ImageBatchVarShape stays device-agnostic and
+ * gets a defaulted destructor and trivial move. Move-only: the moved-from table
+ * is left empty (every pointer null) so its destructor is a no-op.
+ */
+class VarShapeDescriptorTable {
+   public:
+    /**
+     * @brief The kernel-facing pointer set for one exported snapshot. `imageList`
+     * and `formatList` are device pointers for a GPU table and host pointers for a
+     * CPU table; `hostFormatList` is always host-resident (it aliases `formatList`
+     * for a CPU table).
+     */
+    struct Snapshot {
+        ImageBufferStrided* imageList;
+        ImageFormat* formatList;
+        const ImageFormat* hostFormatList;
+    };
+
+    /**
+     * @brief Allocate a table sized for `capacity` images on `device`. Throws
+     * INVALID_VALUE if capacity is not positive.
+     */
+    VarShapeDescriptorTable(int32_t capacity, eDeviceType device, const IAllocator& alloc);
+    ~VarShapeDescriptorTable();
+
+    VarShapeDescriptorTable(const VarShapeDescriptorTable&) = delete;
+    VarShapeDescriptorTable& operator=(const VarShapeDescriptorTable&) = delete;
+    VarShapeDescriptorTable(VarShapeDescriptorTable&&) noexcept;
+    VarShapeDescriptorTable& operator=(VarShapeDescriptorTable&&) = delete;
+
+    eDeviceType device() const noexcept { return m_device; }
+
+    /** Host-resident mirrors, always valid (both devices) for cache rebuilds. */
+    const ImageBufferStrided* hostImages() const noexcept { return m_hostImages; }
+    const ImageFormat* hostFormats() const noexcept { return m_hostFormats; }
+
+    /**
+     * @brief Write descriptor slot `index` from already-validated image data. For
+     * a GPU table, drains any in-flight H2D copy first so the host mirror a
+     * consumer is reading never tears.
+     */
+    void writeSlot(int32_t index, const ImageBufferStrided& slot, ImageFormat format);
+
+    /**
+     * @brief Adjust dirty tracking after the live image count shrinks (popBack /
+     * clear). Pass the new image count (0 for clear).
+     */
+    void onShrink(int32_t newNumImages) noexcept;
+
+    /**
+     * @brief Flush the dirty suffix [dirtyStart, numImages) to the device on
+     * `stream` (GPU) or do nothing (CPU), then return the kernel-facing pointers.
+     */
+    Snapshot sync(hipStream_t stream, int32_t numImages);
+
+   private:
+    void freeAll() noexcept;
+
+    eDeviceType m_device;
+    const IAllocator& m_allocator;
+    int32_t m_dirtyStartingFromIndex = 0;
+    bool m_fencePending = false;
+
+    ImageBufferStrided* m_devImages = nullptr;
+    ImageFormat* m_devFormats = nullptr;
+    ImageBufferStrided* m_hostImages = nullptr;
+    ImageFormat* m_hostFormats = nullptr;
+    hipEvent_t m_fence = nullptr;
+};
+
+}  // namespace roccv::detail
diff --git a/include/core/image.hpp b/include/core/image.hpp
new file mode 100644
index 00000000..ef7707a8
--- /dev/null
+++ b/include/core/image.hpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <typeinfo>
+
+#include "core/detail/allocators/i_allocator.hpp"
+#include "core/image_buffer.hpp"
+#include "core/image_data.hpp"
+#include "core/image_format.hpp"
+#include "core/util_enums.h"
+#include "operator_types.h"
+
+namespace roccv {
+
+class ImageStorage;
+
+/**
+ * @brief Cleanup callback signature for ImageWrapData. Invoked when the last
+ * Image handle referencing the wrapped buffer is destroyed. Receives the
+ * ImageData snapshot that was originally wrapped, so callbacks can free
+ * multi-plane buffers or dispatch on format.
+ */
+using ImageDataCleanupFunc = std::function<void(const ImageData&)>;
+
+/**
+ * @brief Per-image allocation spec describing what to allocate for a single
+ * variable-sized image. Mirrors NVCVImageRequirements: size, format, per-plane
+ * row strides, and base-address alignment. Used as the input to Image's
+ * allocating constructors and as the output of CalcRequirements; also
+ * preserved on the Image itself as the source of truth from which exportData()
+ * rebuilds an ImageData snapshot on demand.
+ *
+ * Per-plane row strides are populated only for planes 0..numPlanes(format)-1;
+ * remaining slots are unused. Today's interleaved-only ImageFormat means only
+ * planeRowStride[0] is populated in practice.
+ */
+struct ImageRequirements {
+    Size2D size;                                     // Width and height in pixels.
+    ImageFormat format;                              // Pixel format (dtype + channel count + swizzle).
+    int64_t planeRowStride[ROCCV_MAX_IMAGE_PLANES];  // Per-plane row stride in bytes.
+    int32_t alignBytes;                              // Required base-address alignment, in bytes.
+};
+
+/**
+ * @brief A single variable-sized image with device-resident pixel data.
+ *
+ * Image is the per-element type held by ImageBatchVarShape. It is a handle
+ * over a refcounted ImageStorage: copying an Image bumps the refcount and
+ * leaves both handles pointing at the same underlying buffer. The buffer is
+ * freed when the last handle is destroyed (for owning Images) or when the
+ * cleanup callback fires (for ImageWrapData with a callback).
+ *
+ * Storage shape: Image holds the buffer pointer (via ImageStorage) plus the
+ * "ingredients" describing it (size, format, device, per-plane row strides).
+ * It does NOT hold a precomputed ImageData snapshot — exportData() rebuilds
+ * one on demand from the ingredients. This keeps a single source of truth for
+ * the buffer pointer and aligns with how ImageBatchVarShape produces its
+ * own snapshots.
+ */
+class Image {
+   public:
+    using Requirements = ImageRequirements;
+
+    /**
+     * @brief Compute the requirements (row stride, etc.) for an image of the
+     * given dimensions and format.
+     */
+    static Requirements CalcRequirements(Size2D size, ImageFormat format);
+
+    /**
+     * @brief Allocate a new device buffer for an image of the given dimensions
+     * and format using the global default allocator.
+     */
+    explicit Image(Size2D size, ImageFormat format, eDeviceType device = eDeviceType::GPU);
+
+    /**
+     * @brief Allocate a new device buffer using a caller-supplied allocator.
+     */
+    explicit Image(Size2D size, ImageFormat format, const IAllocator& alloc, eDeviceType device = eDeviceType::GPU);
+
+    /**
+     * @brief Allocate a new device buffer from precomputed requirements.
+     */
+    explicit Image(const Requirements& reqs, eDeviceType device = eDeviceType::GPU);
+    explicit Image(const Requirements& reqs, const IAllocator& alloc, eDeviceType device = eDeviceType::GPU);
+
+    Image(const Image&) = default;  // refcount bump
+    Image(Image&&) noexcept = default;
+    Image& operator=(const Image&) = default;  // refcount bump
+    Image& operator=(Image&&) noexcept = default;
+    ~Image() = default;
+
+    Size2D size() const noexcept { return m_size; }
+    ImageFormat format() const noexcept { return m_format; }
+    eDeviceType device() const noexcept { return m_device; }
+
+    /**
+     * @brief Build and return an ImageData snapshot describing this image.
+     *
+     * Returned by value (not by reference) — Image stores ingredients, not a
+     * precomputed snapshot, so each call constructs a fresh ImageData. The
+     * snapshot's plane descriptors point into this Image's buffer; it remains
+     * valid as long as any handle to this storage is alive.
+     */
+    ImageData exportData() const;
+
+    /**
+     * @brief Build a snapshot and down-cast it to a specific subclass. Throws
+     * std::bad_cast if the underlying buffer kind doesn't match Derived.
+     */
+    template <typename Derived>
+    Derived exportData() const;
+
+   private:
+    Image(const Requirements& reqs, eDeviceType device, std::shared_ptr<ImageStorage> storage);
+
+    friend Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup);
+
+    std::shared_ptr<ImageStorage> m_data;
+    Size2D m_size;
+    ImageFormat m_format;
+    eDeviceType m_device;
+    std::array<int64_t, ROCCV_MAX_IMAGE_PLANES> m_planeRowStride;
+};
+
+template <typename Derived>
+Derived Image::exportData() const {
+    ImageData data = exportData();
+    auto derived = data.cast<Derived>();
+    if (!derived.has_value()) {
+        throw std::bad_cast();
+    }
+    return derived.value();
+}
+
+/**
+ * @brief Wrap an externally-owned buffer as an Image without allocating.
+ *
+ * View-only by default: the wrapped buffer is NOT freed when the returned
+ * Image (and any copies) go out of scope. The caller is responsible for
+ * keeping the underlying memory alive for as long as any handle survives.
+ *
+ * Pass a non-null cleanup callback to opt into ownership transfer; the
+ * callback runs exactly once, when the last handle is destroyed.
+ *
+ * @param[in] data Pre-existing image data (pointer, layout, device).
+ * @param[in] cleanup Optional callback to free the buffer on last destruction.
+ * @return An Image referencing the wrapped buffer.
+ */
+extern Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup = nullptr);
+
+}  // namespace roccv
diff --git a/include/core/image_batch_buffer.hpp b/include/core/image_batch_buffer.hpp
new file mode 100644
index 00000000..06f0c4b3
--- /dev/null
+++ b/include/core/image_batch_buffer.hpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include "core/image_buffer.hpp"
+#include "core/image_format.hpp"
+
+namespace roccv {
+
+/**
+ * @brief Pitch-linear descriptor table for a variable-shape image batch.
+ *
+ * Each entry of `imageList` is a full per-image strided buffer descriptor —
+ * reusing `ImageBufferStrided` keeps the per-image shape (multi-plane-capable,
+ * one base pointer per plane, per-plane row stride) identical to what a single
+ * `Image` carries today.
+ *
+ * Pointer residency:
+ *  - `imageList` is the descriptor table read by GPU kernels. For a GPU-resident
+ *    batch this points into device memory; for a hypothetical CPU-resident
+ *    batch it would point into host memory. The producing batch class owns the
+ *    allocation and decides residency.
+ *  - `formatList` mirrors `imageList`'s residency and holds one ImageFormat per
+ *    image (so kernels can branch on per-image format without dereferencing the
+ *    descriptor table).
+ *  - `hostFormatList` is always host-resident. It exists so host-side validation
+ *    code can read per-image formats without paying a D->H copy. For a
+ *    CPU-resident batch this MAY alias `formatList`; for a GPU-resident batch
+ *    it is a separate host mirror kept in sync by the producer.
+ *
+ * `uniqueFormat` is the common ImageFormat across all images, or FMT_NONE if
+ * formats are heterogeneous or the batch is empty. Cached to fast-path the
+ * homogeneous case.
+ *
+ * `maxWidth` / `maxHeight` are the bounding box across all images. Used by
+ * operators to size launch grids. Both are 0 when the batch is empty.
+ *
+ * The struct is intentionally trivially copyable so it can ride inside
+ * `ImageBatchBuffer` without an allocation, mirroring `ImageBufferStrided`'s
+ * relationship to `ImageBuffer`.
+ */
+struct ImageBatchVarShapeBufferStrided {
+    /** Common format across all images in the batch, or a default-constructed
+     *  ImageFormat if formats are heterogeneous or the batch is empty. */
+    ImageFormat uniqueFormat;
+
+    /** Bounding box across all images, in pixels. Both 0 when empty. */
+    int32_t maxWidth;
+    int32_t maxHeight;
+
+    /** Per-image format array, length == numImages. Residency matches
+     *  `imageList` (device for GPU batches, host for CPU batches). */
+    ImageFormat* formatList;
+
+    /** Host-resident mirror of `formatList`. May alias `formatList` for
+     *  CPU-resident batches. Length == numImages. */
+    const ImageFormat* hostFormatList;
+
+    /** Per-image descriptor table, length == numImages. The kernel-facing
+     *  pointer; residency determines which device the batch lives on. */
+    ImageBufferStrided* imageList;
+};
+
+/**
+ * @brief An image-batch buffer. Currently only the variable-shape strided
+ * variant is supported. Shaped as a tagged-union-style aggregate so additional
+ * batch buffer kinds can be added later (e.g. tensor-backed batches) without
+ * changing the public type.
+ */
+struct ImageBatchBuffer {
+    ImageBatchVarShapeBufferStrided varShapeStrided;
+};
+
+}  // namespace roccv
diff --git a/include/core/image_batch_data.hpp b/include/core/image_batch_data.hpp
new file mode 100644
index 00000000..c50de010
--- /dev/null
+++ b/include/core/image_batch_data.hpp
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <optional>
+#include <type_traits>
+
+#include "core/image_batch_buffer.hpp"
+#include "core/image_format.hpp"
+#include "core/util_enums.h"
+#include "operator_types.h"
+
+namespace roccv {
+
+/**
+ * @brief Discriminator for the kind of buffer an ImageBatchData carries. Used
+ * by IsCompatibleKind() / cast<>() to perform safe runtime down-casting through
+ * the ImageBatchData hierarchy.
+ *
+ * The hierarchy currently exposes only one concrete buffer kind
+ * (variable-shape, strided, GPU-resident); the enum is shaped to grow into
+ * additional kinds (e.g. tensor-backed batches, host-resident varshape) without
+ * breaking the existing buffer kind values.
+ */
+enum class ImageBatchBufferType {
+    IMAGE_BATCH_BUFFER_NONE,                   // Default/invalid buffer type.
+    IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP,   // GPU-accessible varshape descriptor table.
+    IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST,  // Host-accessible varshape descriptor table.
+};
+
+/**
+ * @brief Holds the underlying image-batch data alongside metadata
+ * (numImages, buffer kind). Non-strided batch data is not supported for use
+ * right now; use ImageBatchVarShapeDataStrided to access strided varshape data
+ * instead.
+ *
+ * ImageBatchData is the interchange type for a batch of variable-sized images.
+ * It does not own any of the underlying buffers (the descriptor table, the
+ * format arrays, or the per-image pixel buffers) — it is a metadata snapshot,
+ * valid only as long as the producing batch outlives it.
+ *
+ * Lazy-sync note: for a GPU-resident batch the producer (ImageBatchVarShape)
+ * is responsible for ensuring the device-side descriptor table is up to date
+ * with any pushBack/popBack edits before handing out an ImageBatchData. The
+ * snapshot itself carries no synchronization state.
+ */
+class ImageBatchData {
+   public:
+    ImageBatchData() = delete;
+    virtual ~ImageBatchData() = default;
+
+    /**
+     * @brief Returns the number of images currently in the batch.
+     */
+    virtual int32_t numImages() const;
+
+    /**
+     * @brief Returns the device the descriptor table (and per-image pixel
+     * buffers) reside on.
+     */
+    virtual eDeviceType device() const;
+
+    /**
+     * @brief Attempts to down-cast this ImageBatchData to a more specific
+     * subclass. Returns the casted value if the underlying buffer kind matches
+     * what Derived expects, or std::nullopt otherwise.
+     *
+     * @tparam Derived The target subclass to cast to.
+     */
+    template <typename Derived>
+    std::optional<Derived> cast() const {
+        static_assert(std::is_base_of<ImageBatchData, Derived>::value,
+                      "Cannot cast ImageBatchData to an unrelated type.");
+        static_assert(sizeof(Derived) == sizeof(ImageBatchData),
+                      "Derived type must not add any additional data members.");
+
+        if (!Derived::IsCompatibleKind(m_bufferType)) {
+            return std::nullopt;
+        }
+
+        return std::make_optional<Derived>(m_numImages, m_buffer);
+    }
+
+    static bool IsCompatibleKind(ImageBatchBufferType bufferType);
+
+   protected:
+    ImageBatchData(int32_t numImages, const ImageBatchBuffer& buffer);
+
+    int32_t m_numImages;
+    eDeviceType m_deviceType;
+    ImageBatchBufferType m_bufferType;
+    ImageBatchBuffer m_buffer;
+};
+
+/**
+ * @brief Image-batch data backed by a variable-shape descriptor table. Adds
+ * typed accessors for the per-image format arrays and the bounding box across
+ * the batch. Sub-classed by ImageBatchVarShapeDataStrided to discriminate
+ * pitch-linear storage; further sub-classed by ImageBatchVarShapeDataStridedHip
+ * to tag device residency.
+ */
+class ImageBatchVarShapeData : public ImageBatchData {
+   public:
+    using Buffer = ImageBatchVarShapeBufferStrided;
+
+    ImageBatchVarShapeData(int32_t numImages, const ImageBatchBuffer& buffer);
+
+    static bool IsCompatibleKind(ImageBatchBufferType bufferType);
+
+    /**
+     * @brief Bounding box across all images in the batch, in pixels. Both
+     * dimensions are 0 when the batch is empty. Used by operators to size
+     * launch grids without iterating the descriptor table.
+     */
+    Size2D maxSize() const;
+
+    /**
+     * @brief Returns the common ImageFormat across all images, or FMT_NONE if
+     * formats are heterogeneous or the batch is empty.
+     */
+    ImageFormat uniqueFormat() const;
+
+    /**
+     * @brief Per-image format array. Residency matches the descriptor table
+     * (device for GPU batches). Length == numImages().
+     *
+     * Prefer hostFormatList() for host-side validation paths to avoid a D->H
+     * copy.
+     */
+    const ImageFormat* formatList() const;
+
+    /**
+     * @brief Host-resident mirror of formatList(). Always safe to dereference
+     * from host code. Length == numImages().
+     */
+    const ImageFormat* hostFormatList() const;
+};
+
+/**
+ * @brief Variable-shape image-batch data backed by a pitch-linear descriptor
+ * table. Adds the per-image descriptor accessor on top of
+ * ImageBatchVarShapeData.
+ */
+class ImageBatchVarShapeDataStrided : public ImageBatchVarShapeData {
+   public:
+    using Buffer = ImageBatchVarShapeBufferStrided;
+
+    ImageBatchVarShapeDataStrided(int32_t numImages, const ImageBatchBuffer& buffer);
+
+    static bool IsCompatibleKind(ImageBatchBufferType bufferType);
+
+    /**
+     * @brief Per-image descriptor table. Length == numImages(). Residency
+     * matches the enclosing data type — for ImageBatchVarShapeDataStridedHip
+     * this is a device pointer; kernels read it directly.
+     *
+     * Each entry is a full ImageBufferStrided so the per-image shape
+     * (multi-plane-capable, per-plane stride and base pointer) matches what a
+     * single Image carries.
+     */
+    const ImageBufferStrided* imageList() const;
+};
+
+/**
+ * @brief GPU-accessible variable-shape image-batch data.
+ */
+class ImageBatchVarShapeDataStridedHip : public ImageBatchVarShapeDataStrided {
+   public:
+    using Buffer = ImageBatchVarShapeBufferStrided;
+
+    ImageBatchVarShapeDataStridedHip(int32_t numImages, const ImageBatchBuffer& buffer);
+
+    /**
+     * @brief Constructs GPU-accessible varshape image-batch data from the
+     * concrete strided buffer directly.
+     *
+     * @param[in] numImages Number of images currently in the batch.
+     * @param[in] buffer    Descriptor table + per-image format arrays. The
+     *                      descriptor table and `formatList` must point to GPU
+     *                      memory; `hostFormatList` to host memory.
+     */
+    ImageBatchVarShapeDataStridedHip(int32_t numImages, const Buffer& buffer);
+
+    static bool IsCompatibleKind(ImageBatchBufferType bufferType);
+};
+
+/**
+ * @brief Host-accessible variable-shape image-batch data.
+ *
+ * The host-resident counterpart to ImageBatchVarShapeDataStridedHip. The
+ * descriptor table, `formatList`, and `hostFormatList` all point to host
+ * memory; `formatList` and `hostFormatList` MAY alias the same allocation
+ * since no D->H sync is required.
+ *
+ * The lazy host->device descriptor sync that the GPU producer needs is not
+ * applicable here — host-only varshape batches can edit the descriptor table
+ * in place and hand it straight to host kernels. The matching producer-side
+ * design (whether host batches are a separate type, a runtime-tagged variant
+ * of ImageBatchVarShape, or skipped entirely in favor of CPU-side per-image
+ * loops) is still open.
+ */
+class ImageBatchVarShapeDataStridedHost : public ImageBatchVarShapeDataStrided {
+   public:
+    using Buffer = ImageBatchVarShapeBufferStrided;
+
+    ImageBatchVarShapeDataStridedHost(int32_t numImages, const ImageBatchBuffer& buffer);
+
+    /**
+     * @brief Constructs host-accessible varshape image-batch data from the
+     * concrete strided buffer directly.
+     *
+     * @param[in] numImages Number of images currently in the batch.
+     * @param[in] buffer    Descriptor table + per-image format arrays. All
+     *                      pointers must reference host memory; `formatList`
+     *                      and `hostFormatList` may alias.
+     */
+    ImageBatchVarShapeDataStridedHost(int32_t numImages, const Buffer& buffer);
+
+    static bool IsCompatibleKind(ImageBatchBufferType bufferType);
+};
+
+}  // namespace roccv
diff --git a/include/core/image_batch_var_shape.hpp b/include/core/image_batch_var_shape.hpp
new file mode 100644
index 00000000..3c56f0a4
--- /dev/null
+++ b/include/core/image_batch_var_shape.hpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <hip/hip_runtime.h>
+#include <stdint.h>
+
+#include <iterator>
+#include <optional>
+#include <vector>
+
+#include "core/detail/allocators/i_allocator.hpp"
+#include "core/detail/var_shape_descriptor_table.hpp"
+#include "core/image.hpp"
+#include "core/image_batch_data.hpp"
+#include "core/image_format.hpp"
+#include "exception.hpp"
+#include "operator_types.h"
+
+namespace roccv {
+
+/**
+ * @brief Producer-side container for a batch of variable-sized images that
+ * share a single GPU-resident descriptor table.
+ *
+ * Holds up to `capacity()` Image handles and maintains a parallel descriptor
+ * table that operators can dispatch over without iterating Image-by-Image.
+ * Capacity is fixed at construction; pushBack/popBack move within it.
+ *
+ * The host descriptor mirrors are pinned so the H2D copy in exportData() is a
+ * true DMA (no runtime bounce buffer) and so the snapshot can expose the same
+ * pinned pointer as both `formatList`'s host shadow and `hostFormatList`.
+ *
+ * Sync model: pushBack/popBack mutate the host mirrors only; the device
+ * descriptor table is brought up to date lazily inside exportData(stream),
+ * which copies just the dirty suffix `[dirtyStart, numImages)`. A hipEvent
+ * (`m_postFence`) guards the host buffers — if a previous exportData's H2D
+ * is still in flight, pushBack hipEventSynchronize's on the CPU before
+ * mutating, so the snapshot a consumer is reading never tears.
+ *
+ * Residency is fixed at construction (defaults to GPU). A GPU batch keeps a
+ * device descriptor table mirrored by pinned host buffers and the lazy H2D sync
+ * above; a CPU batch holds a single host-resident descriptor table with no
+ * device buffers, no fence, and no sync (exportData hands the host table
+ * straight to host kernels). pushBack rejects images whose device doesn't match
+ * the batch's.
+ */
+class ImageBatchVarShape {
+   public:
+    using const_iterator = std::vector<Image>::const_iterator;
+
+    /**
+     * @brief Construct an empty batch with `capacity` slots on `device`, using
+     * the global default allocator.
+     */
+    explicit ImageBatchVarShape(int32_t capacity, eDeviceType device = eDeviceType::GPU);
+
+    /**
+     * @brief Construct an empty batch with `capacity` slots on `device`, using
+     * the supplied allocator. The allocator must outlive the batch.
+     */
+    explicit ImageBatchVarShape(int32_t capacity, const IAllocator &alloc, eDeviceType device = eDeviceType::GPU);
+
+    ~ImageBatchVarShape() = default;
+
+    ImageBatchVarShape(const ImageBatchVarShape &) = delete;
+    ImageBatchVarShape &operator=(const ImageBatchVarShape &) = delete;
+    ImageBatchVarShape(ImageBatchVarShape &&) noexcept;
+    ImageBatchVarShape &operator=(ImageBatchVarShape &&) = delete;
+
+    int32_t capacity() const noexcept { return m_capacity; }
+    int32_t numImages() const noexcept { return static_cast<int32_t>(m_images.size()); }
+
+    /**
+     * @brief The device the batch (and every image it accepts) resides on.
+     */
+    eDeviceType device() const noexcept { return m_table.device(); }
+
+    /**
+     * @brief Append an image to the batch. Throws if capacity would be
+     * exceeded, the image is CPU-resident, or the image has more than one
+     * plane (rocCV is single-plane today).
+     */
+    void pushBack(const Image &img);
+
+    /**
+     * @brief Append a range of images. Strong exception guarantee — if any
+     * image fails validation, the batch is rolled back to its pre-call state
+     * and the exception is rethrown.
+     */
+    template <typename It>
+    void pushBack(It begin, It end);
+
+    /**
+     * @brief Remove the trailing `count` images. Throws if `count` exceeds
+     * numImages().
+     */
+    void popBack(int32_t count = 1);
+
+    /**
+     * @brief Drop all images. Buffers are kept; the batch is reusable.
+     */
+    void clear();
+
+    const Image &operator[](int32_t i) const { return m_images[i]; }
+
+    const_iterator begin() const noexcept { return m_images.cbegin(); }
+    const_iterator end() const noexcept { return m_images.cend(); }
+
+    /**
+     * @brief Bounding box across all images, in pixels. Returns Size2D{0, 0}
+     * for an empty batch.
+     */
+    Size2D maxSize() const;
+
+    /**
+     * @brief The common ImageFormat across all images, or FMT_NONE if formats
+     * are heterogeneous or the batch is empty. popBack invalidates the cache
+     * so the next call rescans and may return an exact format again.
+     */
+    ImageFormat uniqueFormat() const;
+
+    /**
+     * @brief Build (and return by value) a snapshot of the batch, residency
+     * matching the batch's device.
+     *
+     * The concrete returned object is an ImageBatchVarShapeDataStridedHip for a
+     * GPU batch or an ImageBatchVarShapeDataStridedHost for a CPU batch; both are
+     * returned through the common ImageBatchVarShapeDataStrided base, which
+     * carries the device/buffer-kind tag so callers can recover the leaf via
+     * cast<>() (see the templated overload). The snapshot is a metadata view
+     * valid as long as this batch outlives it.
+     *
+     * GPU: synchronizes the dirty suffix of the host mirrors to the device
+     * descriptor table on `stream` first; `imageList`/`formatList` are device
+     * pointers safe for kernels enqueued on the same stream, and `hostFormatList`
+     * aliases the pinned host format mirror. CPU: `stream` is unused, no sync
+     * occurs, and `imageList`/`formatList`/`hostFormatList` are all host pointers
+     * (`formatList` and `hostFormatList` alias).
+     */
+    ImageBatchVarShapeDataStrided exportData(hipStream_t stream);
+
+    /**
+     * @brief Build a snapshot and down-cast it to a specific subclass. Throws
+     * std::bad_cast if the underlying buffer kind doesn't match Derived.
+     */
+    template <typename Derived>
+    Derived exportData(hipStream_t stream);
+
+   private:
+    void doUpdateCache() const;
+
+    int32_t m_capacity;
+    detail::VarShapeDescriptorTable m_table;  // owns the descriptor buffers, fence, and sync.
+    std::vector<Image> m_images;
+
+    mutable std::optional<Size2D> m_cacheMaxSize;
+    mutable std::optional<ImageFormat> m_cacheUniqueFormat;
+};
+
+template <typename It>
+void ImageBatchVarShape::pushBack(It begin, It end) {
+    const int32_t incoming = static_cast<int32_t>(std::distance(begin, end));
+    if (incoming + numImages() > m_capacity) {
+        throw Exception("ImageBatchVarShape::pushBack range would exceed capacity", eStatusType::OUT_OF_BOUNDS);
+    }
+
+    const int32_t oldNumImages = numImages();
+    const auto oldMaxSize = m_cacheMaxSize;
+    const auto oldUniqueFormat = m_cacheUniqueFormat;
+
+    try {
+        for (auto it = begin; it != end; ++it) {
+            pushBack(*it);
+        }
+    } catch (...) {
+        m_images.erase(m_images.begin() + oldNumImages, m_images.end());
+        m_cacheMaxSize = oldMaxSize;
+        m_cacheUniqueFormat = oldUniqueFormat;
+        throw;
+    }
+}
+
+template <typename Derived>
+Derived ImageBatchVarShape::exportData(hipStream_t stream) {
+    ImageBatchVarShapeDataStrided data = exportData(stream);
+    auto derived = data.cast<Derived>();
+    if (!derived.has_value()) {
+        throw std::bad_cast();
+    }
+    return derived.value();
+}
+
+}  // namespace roccv
diff --git a/include/core/image_buffer.hpp b/include/core/image_buffer.hpp
new file mode 100644
index 00000000..aea93c2d
--- /dev/null
+++ b/include/core/image_buffer.hpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+/** Maximum number of data planes an image can have. */
+#define ROCCV_MAX_IMAGE_PLANES (6)
+
+namespace roccv {
+
+/**
+ * @brief Describes a single pitch-linear image plane.
+ *
+ * For interleaved-channel formats there is exactly one plane covering the whole
+ * image. For planar formats (e.g. NV12, YUV420) each channel/plane carries its
+ * own width, height, and row stride and lives in its own buffer.
+ */
+struct ImagePlaneStrided {
+    /** Width of this plane in pixels. Must be >= 1. */
+    int32_t width;
+
+    /** Height of this plane in pixels. Must be >= 1. */
+    int32_t height;
+
+    /** Distance in bytes between the start of consecutive rows. Must be at
+     *  least `(width * bits-per-pixel + 7) / 8`. */
+    int64_t rowStride;
+
+    /** Pointer to the first byte of plane data. Validity (device vs host) is
+     *  determined by the enclosing data type. */
+    void* basePtr;
+};
+
+/**
+ * @brief A pitch-linear image buffer: one or more `ImagePlaneStrided` entries.
+ *
+ * Only the first `numPlanes` entries carry valid data; the remainder of the
+ * fixed-size `planes` array is unused. Capping the array size keeps the buffer
+ * trivially copyable so it can ride inside `ImageBuffer` without an
+ * allocation.
+ */
+struct ImageBufferStrided {
+    /** Number of valid planes. Must be >= 1. */
+    int32_t numPlanes;
+
+    /** Per-plane descriptors. Only the first `numPlanes` are valid. */
+    ImagePlaneStrided planes[ROCCV_MAX_IMAGE_PLANES];
+};
+
+/**
+ * @brief An image buffer. Currently only the strided variant is supported.
+ * Mirrors the role `TensorBuffer` plays for tensors and is intentionally
+ * shaped as a tagged-union-style aggregate so additional buffer kinds can be
+ * added later (e.g. HIP textures) without changing the public type.
+ */
+struct ImageBuffer {
+    ImageBufferStrided strided;
+};
+
+}  // namespace roccv
diff --git a/include/core/image_data.hpp b/include/core/image_data.hpp
new file mode 100644
index 00000000..cf45e71c
--- /dev/null
+++ b/include/core/image_data.hpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <optional>
+#include <type_traits>
+
+#include "core/image_buffer.hpp"
+#include "core/image_format.hpp"
+#include "core/util_enums.h"
+#include "operator_types.h"
+
+namespace roccv {
+
+/**
+ * @brief Discriminator for the kind of buffer an ImageData carries. Used by
+ * IsCompatibleKind() / cast<>() to perform safe runtime down-casting through
+ * the ImageData hierarchy.
+ */
+enum class ImageBufferType {
+    IMAGE_BUFFER_NONE,         // Default/invalid buffer type. Used when no buffer type is specified.
+    IMAGE_BUFFER_STRIDED_HIP,  // GPU-accessible buffer with strided access.
+    IMAGE_BUFFER_STRIDED_HOST  // Host-accessible buffer with strided access.
+};
+
+/**
+ * @brief Holds the underlying image data alongside metadata (format, buffer
+ * kind). Non-strided image data is not supported for use right now; use
+ * ImageDataStrided to access strided image data instead.
+ *
+ * ImageData is the interchange type for a single variable-sized image. It
+ * does not own the underlying pixel buffer — it is a metadata snapshot, valid
+ * only as long as the producing buffer outlives it.
+ */
+class ImageData {
+   public:
+    ImageData() = delete;
+    virtual ~ImageData() = default;
+
+    /**
+     * @brief Returns the pixel format of the image.
+     */
+    virtual const ImageFormat &format() const;
+
+    /**
+     * @brief Returns the device the image data resides on.
+     */
+    virtual eDeviceType device() const;
+
+    /**
+     * @brief Attempts to down-cast this ImageData to a more specific subclass.
+     * Returns the casted value if the underlying buffer kind matches what
+     * Derived expects, or std::nullopt otherwise.
+     *
+     * @tparam Derived The target subclass to cast to.
+     */
+    template <typename Derived>
+    std::optional<Derived> cast() const {
+        static_assert(std::is_base_of<ImageData, Derived>::value, "Cannot cast ImageData to an unrelated type.");
+        static_assert(sizeof(Derived) == sizeof(ImageData), "Derived type must not add any additional data members.");
+
+        if (!Derived::IsCompatibleKind(m_bufferType)) {
+            return std::nullopt;
+        }
+
+        return std::make_optional<Derived>(m_format, m_buffer);
+    }
+
+    static bool IsCompatibleKind(ImageBufferType bufferType);
+
+   protected:
+    ImageData(const ImageFormat &format, const ImageBuffer &buffer);
+
+    ImageFormat m_format;
+    eDeviceType m_deviceType;
+    ImageBufferType m_bufferType;
+    ImageBuffer m_buffer;
+};
+
+/**
+ * @brief Image data backed by one or more pitch-linear planes. Adds typed
+ * accessors for plane descriptors on top of the base ImageData. Sub-classed
+ * by ImageDataStridedHip and ImageDataStridedHost to discriminate device vs
+ * host residency.
+ */
+class ImageDataStrided : public ImageData {
+   public:
+    using Buffer = ImageBufferStrided;
+
+    ImageDataStrided(const ImageFormat &format, const ImageBuffer &buffer);
+
+    static bool IsCompatibleKind(ImageBufferType bufferType);
+
+    /**
+     * @brief Returns the logical image dimensions, taken from plane 0. For
+     * planar formats, individual planes may have smaller dimensions (e.g.
+     * chroma sub-sampling); use plane(p) to inspect each plane directly.
+     */
+    Size2D size() const;
+
+    /**
+     * @brief Returns the number of valid planes in the buffer.
+     */
+    int32_t numPlanes() const;
+
+    /**
+     * @brief Returns the descriptor for the requested plane.
+     *
+     * @param[in] p The plane index. Must satisfy `0 <= p < numPlanes()`.
+     */
+    const ImagePlaneStrided &plane(int32_t p) const;
+};
+
+/**
+ * @brief GPU-accessible strided image data.
+ */
+class ImageDataStridedHip : public ImageDataStrided {
+   public:
+    using Buffer = ImageBufferStrided;
+
+    ImageDataStridedHip(const ImageFormat &format, const ImageBuffer &buffer);
+
+    /**
+     * @brief Constructs GPU-accessible strided image data from a strided
+     * image buffer directly.
+     *
+     * @param[in] format The pixel format.
+     * @param[in] buffer A strided image buffer with planes allocated on the GPU.
+     */
+    ImageDataStridedHip(const ImageFormat &format, const Buffer &buffer);
+
+    static bool IsCompatibleKind(ImageBufferType bufferType);
+};
+
+/**
+ * @brief Host-accessible strided image data.
+ */
+class ImageDataStridedHost : public ImageDataStrided {
+   public:
+    using Buffer = ImageBufferStrided;
+
+    ImageDataStridedHost(const ImageFormat &format, const ImageBuffer &buffer);
+
+    /**
+     * @brief Constructs host-accessible strided image data from a strided
+     * image buffer directly.
+     *
+     * @param[in] format The pixel format.
+     * @param[in] buffer A strided image buffer with planes allocated on the host.
+     */
+    ImageDataStridedHost(const ImageFormat &format, const Buffer &buffer);
+
+    static bool IsCompatibleKind(ImageBufferType bufferType);
+};
+
+}  // namespace roccv
diff --git a/include/core/image_format.hpp b/include/core/image_format.hpp
index 7dd891f3..ddb1d100 100644
--- a/include/core/image_format.hpp
+++ b/include/core/image_format.hpp
@@ -40,7 +40,10 @@ enum class eSwizzle {
  */
 class ImageFormat {
    public:
-    explicit ImageFormat() {}
+    /**
+     * @brief Default-constructs to FMT_NONE.
+     */
+    constexpr ImageFormat() : m_dtype(eDataType::DATA_TYPE_U8), m_numChannels(0), m_swizzle(eSwizzle::XYZW) {}
     explicit constexpr ImageFormat(eDataType dtype, int32_t numChannels, eSwizzle swizzle = eSwizzle::XYZW)
         : m_dtype(dtype), m_numChannels(numChannels), m_swizzle(swizzle) {}
 
@@ -48,12 +51,20 @@ class ImageFormat {
     int32_t channels() const noexcept;
     eSwizzle swizzle() const noexcept;
 
+    constexpr bool operator==(const ImageFormat& other) const noexcept {
+        return m_dtype == other.m_dtype && m_numChannels == other.m_numChannels && m_swizzle == other.m_swizzle;
+    }
+    constexpr bool operator!=(const ImageFormat& other) const noexcept { return !(*this == other); }
+
    private:
     eDataType m_dtype;
     int32_t m_numChannels;
     eSwizzle m_swizzle;
 };
 
+// Undefined format. Used to represent an uninitialized or invalid format.
+constexpr ImageFormat FMT_NONE{eDataType::DATA_TYPE_U8, 0, eSwizzle::XYZW};
+
 // Single plane with one 8-bit unsigned integer channel.
 constexpr ImageFormat FMT_U8(eDataType::DATA_TYPE_U8, 1, eSwizzle::XYZW);
 
diff --git a/include/core/image_storage.hpp b/include/core/image_storage.hpp
new file mode 100644
index 00000000..70984742
--- /dev/null
+++ b/include/core/image_storage.hpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+namespace roccv {
+
+/**
+ * @brief Holds the raw data pointer for a single Image and serves as the
+ * refcount target shared between Image handles.
+ *
+ * ImageStorage carries no lifecycle logic of its own: freeing the underlying
+ * buffer is the responsibility of the shared_ptr<ImageStorage> deleter
+ * installed at the Image construction site. The allocating Image ctor
+ * captures the allocator + device into its deleter; ImageWrapData captures
+ * the user's cleanup callback (or installs none for the view-only case).
+ *
+ * As a result, ImageStorage is held only by shared_ptr — never by value, never
+ * copied. Move/copy are deleted to enforce that.
+ */
+class ImageStorage {
+   public:
+    explicit ImageStorage(void* data) : m_data(data) {}
+
+    ImageStorage(const ImageStorage&) = delete;
+    ImageStorage& operator=(const ImageStorage&) = delete;
+
+    void* data() const noexcept { return m_data; }
+
+   private:
+    void* m_data;
+};
+
+}  // namespace roccv
diff --git a/include/core/wrappers/border_wrapper.hpp b/include/core/wrappers/border_wrapper.hpp
index f55493a5..36128b37 100644
--- a/include/core/wrappers/border_wrapper.hpp
+++ b/include/core/wrappers/border_wrapper.hpp
@@ -23,38 +23,39 @@
 
 #include <hip/hip_runtime.h>
 
-#include "core/wrappers/image_wrapper.hpp"
+#include <algorithm>
+#include <cstdlib>
+
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "operator_types.h"
 
 namespace roccv {
 
 /**
- * @brief Wrapper class for ImageWrapper. This extends the descriptors by defining behaviors for when tensor
- * coordinates go out of scope.
+ * @brief Wrapper class which adds border-handling behavior on top of an underlying image wrapper.
+ *
+ * Templated on the wrapper type W (e.g. TensorWrapper<T>, ImageBatchVarShapeWrapper<T>) so the same border math
+ * serves both uniform-shape and variable-shape image batches. The pixel value type T is recovered from
+ * W::ValueType. W must expose: ValueType, at(n,h,w,c), width(n), height(n), batches(), channels().
  *
- * @tparam T The underlying data type of the tensor.
  * @tparam BorderType The border type to use when coordinates are out of bounds.
+ * @tparam W          The underlying image wrapper type.
  */
-template <typename T, eBorderType BorderType>
+template <eBorderType BorderType, typename W>
 class BorderWrapper {
    public:
-    /**
-     * @brief Wraps an ImageWrapper and extends its capabilities to handle out of bounds coordinates.
-     *
-     * @param tensor The tensor to wrap.
-     * @param border_value The fallback border color to use when using a constant border mode.
-     */
-    BorderWrapper(const Tensor& tensor, T border_value) : m_desc(tensor), m_border_value(border_value) {}
+    using ValueType = typename W::ValueType;
+    using WrapperType = W;
+    static constexpr eBorderType kBorderType = BorderType;
 
     /**
-     * @brief Constructs a BorderWrapper from an existing ImageWrapper. Extends its capabilities to handle out of bound
-     * coordinates.
+     * @brief Constructs a BorderWrapper from an existing image wrapper. Extends its capabilities to handle out of
+     * bound coordinates.
      *
-     * @param image_wrapper The ImageWrapper to wrap around the BorderWrapper.
-     * @param border_value The fallback border color to use when using a constant border mode.
+     * @param image_wrapper The image wrapper to wrap.
+     * @param border_value  The fallback border color to use when using a constant border mode.
      */
-    BorderWrapper(ImageWrapper<T> image_wrapper, T border_value)
-        : m_desc(image_wrapper), m_border_value(border_value) {}
+    BorderWrapper(W image_wrapper, ValueType border_value) : m_desc(image_wrapper), m_border_value(border_value) {}
 
     /**
      * @brief Returns a reference to the underlying data given image coordinates. If the coordinates fall out of bounds,
@@ -66,11 +67,14 @@ class BorderWrapper {
      * @param c The channel index.
      * @return A reference to the underlying data or a fallback border value of type T.
      */
-    __device__ __host__ const T at(int64_t n, int64_t h, int64_t w, int64_t c) const {
+    __device__ __host__ const ValueType at(int64_t n, int64_t h, int64_t w, int64_t c) const {
+        const int64_t imgWidth = width(n);
+        const int64_t imgHeight = height(n);
+
         // Constant border type implementation. This is a special case which doesn't remap values, but rather returns
         // the provided constant value.
         if constexpr (BorderType == eBorderType::BORDER_TYPE_CONSTANT) {
-            if (w < 0 || w >= width() || h < 0 || h >= height())
+            if (w < 0 || w >= imgWidth || h < 0 || h >= imgHeight)
                 return m_border_value;
             else
                 return m_desc.at(n, h, w, c);
@@ -80,13 +84,12 @@ class BorderWrapper {
         // required at image borders. While this may cause branch divergence, a good bulk of the pixels should fall
         // within image bounds and will take the same branch. This is preferred over having to do expensive calculations
         // for EVERY pixel in the image (most of which do not require said calculations).
-        if (w >= 0 && w < width() && h >= 0 && h < height()) {
+        if (w >= 0 && w < imgWidth && h >= 0 && h < imgHeight) {
             return m_desc.at(n, h, w, c);
         }
 
         // Otherwise, do some additional calculations to map the provided x and y coordinates to be within bounds.
         int64_t x = w, y = h;
-        int64_t imgWidth = width(), imgHeight = height();
 
         // Reflect border type implementation. (Note: This is NOT REFLECT101, pixels at the border will be duplicated as
         // is the intended behavior for this border mode.)
@@ -139,18 +142,20 @@ class BorderWrapper {
     }
 
     /**
-     * @brief Retrives the height of the images.
+     * @brief Retrives the height of the image at batch index n.
      *
+     * @param n Batch index. Ignored when W is a uniform-shape wrapper.
      * @return Image height.
      */
-    __device__ __host__ inline int64_t height() const { return m_desc.height(); }
+    __device__ __host__ inline int64_t height(int64_t n = 0) const { return m_desc.height(n); }
 
     /**
-     * @brief Retrieves the width of the image.
+     * @brief Retrieves the width of the image at batch index n.
      *
+     * @param n Batch index. Ignored when W is a uniform-shape wrapper.
      * @return Image width.
      */
-    __device__ __host__ inline int64_t width() const { return m_desc.width(); }
+    __device__ __host__ inline int64_t width(int64_t n = 0) const { return m_desc.width(n); }
 
     /**
      * @brief Retrieves the number of batches in the image tensor.
@@ -167,7 +172,21 @@ class BorderWrapper {
     __device__ __host__ inline int64_t channels() const { return m_desc.channels(); }
 
    private:
-    ImageWrapper<T> m_desc;
-    T m_border_value;
+    W m_desc;
+    ValueType m_border_value;
 };
-}  // namespace roccv
\ No newline at end of file
+
+/**
+ * @brief Factory for BorderWrapper. Deduces the underlying wrapper type W from the argument so callers
+ * only need to spell the border-mode policy explicitly.
+ *
+ * @tparam B The border mode to apply.
+ * @param wrap        The underlying image wrapper.
+ * @param borderValue Fallback value used when B is BORDER_TYPE_CONSTANT.
+ */
+template <eBorderType B, typename W>
+auto MakeBorderWrapper(W wrap, typename W::ValueType borderValue) {
+    return BorderWrapper<B, W>(wrap, borderValue);
+}
+
+}  // namespace roccv
diff --git a/include/core/wrappers/image_batch_var_shape_wrapper.hpp b/include/core/wrappers/image_batch_var_shape_wrapper.hpp
new file mode 100644
index 00000000..b32bbdbb
--- /dev/null
+++ b/include/core/wrappers/image_batch_var_shape_wrapper.hpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <hip/hip_runtime.h>
+
+#include <cassert>
+#include <cstdint>
+
+#include "core/detail/type_traits.hpp"
+#include "core/image_batch_data.hpp"
+#include "core/image_buffer.hpp"
+
+namespace roccv {
+
+/**
+ * @brief ImageBatchVarShapeWrapper is a non-owning, kernel-friendly view over an ImageBatchVarShape's
+ * descriptor table. It satisfies the same wrapper concept as TensorWrapper<T>
+ * (ValueType, at(n,h,w,c), width(n), height(n), batches(), channels()) so it composes with
+ * BorderWrapper / InterpolationWrapper unchanged.
+ *
+ * Single-plane interleaved (NHWC-style) only — ImageBatchVarShape rejects multi-plane images
+ * at pushBack, and channel count is derived from T via detail::NumElements<T>.
+ *
+ * Pointer residency follows the snapshot it is built from: for a GPU batch
+ * (ImageBatchVarShapeDataStridedHip) m_imageList is a device pointer, so the wrapper is usable from
+ * device code; for a CPU batch (ImageBatchVarShapeDataStridedHost) it is a host pointer, usable from
+ * host code. Every at()/width()/height() call dereferences it, so the caller must run the wrapper on
+ * the side matching the snapshot's residency.
+ *
+ * @tparam T The datatype of an individual pixel (e.g. uchar1, uchar3, uchar4, float1, float4).
+ */
+template <typename T>
+class ImageBatchVarShapeWrapper {
+   public:
+    using ValueType = T;
+    using BaseType = detail::BaseType<T>;
+
+    ImageBatchVarShapeWrapper() = default;
+
+    /**
+     * @brief Creates a ImageBatchVarShapeWrapper from a varshape batch data snapshot.
+     *
+     * Accepts either residency through the common ImageBatchVarShapeDataStrided base — a GPU
+     * (...Hip) snapshot yields a device-usable wrapper, a CPU (...Host) snapshot a host-usable one.
+     *
+     * @param data The exported descriptor table from ImageBatchVarShape::exportData(stream).
+     */
+    __host__ ImageBatchVarShapeWrapper(const ImageBatchVarShapeDataStrided& data)
+        : m_imageList(data.imageList()), m_numImages(data.numImages()) {}
+
+    /**
+     * @brief Returns a reference to data at given image-batch coordinates.
+     *
+     * @param n Batch index.
+     * @param h Row index within image n.
+     * @param w Column index within image n.
+     * @param c Channel index within the pixel.
+     * @return A reference to the underlying pixel-channel value.
+     */
+    __device__ __host__ T& at(int64_t n, int64_t h, int64_t w, int64_t c) { return *doGetPtr(n, h, w, c); }
+
+    __device__ __host__ const T at(int64_t n, int64_t h, int64_t w, int64_t c) const { return *doGetPtr(n, h, w, c); }
+
+    /**
+     * @brief Width of the image at batch index n.
+     */
+    __device__ __host__ inline int64_t width(int64_t n) const { return m_imageList[n].planes[0].width; }
+
+    /**
+     * @brief Height of the image at batch index n.
+     */
+    __device__ __host__ inline int64_t height(int64_t n) const { return m_imageList[n].planes[0].height; }
+
+    /**
+     * @brief Number of images in the batch.
+     */
+    __device__ __host__ inline int64_t batches() const { return m_numImages; }
+
+    /**
+     * @brief Number of channels per pixel. Derived from T, identical across all images in v1.
+     */
+    __device__ __host__ inline int64_t channels() const { return detail::NumElements<T>; }
+
+   private:
+    __device__ __host__ inline T* doGetPtr(int64_t n, int64_t h, int64_t w, int64_t c) const {
+        // Single-plane interleaved NHWC layout: pixel stride is sizeof(T), channel stride is
+        // sizeof(BaseType). Match TensorWrapper<T>::at semantics — returns a T* offset to (h, w)
+        // and additionally shifted by c channels.
+        const ImagePlaneStrided& p = m_imageList[n].planes[0];
+        unsigned char* addr =
+            reinterpret_cast<unsigned char*>(p.basePtr) + h * p.rowStride + w * sizeof(T) + c * sizeof(BaseType);
+        return reinterpret_cast<T*>(addr);
+    }
+
+    const ImageBufferStrided* m_imageList = nullptr;
+    int32_t m_numImages = 0;
+};
+
+}  // namespace roccv
diff --git a/include/core/wrappers/interpolation_wrapper.hpp b/include/core/wrappers/interpolation_wrapper.hpp
index 7adb8cb6..e4e8c037 100644
--- a/include/core/wrappers/interpolation_wrapper.hpp
+++ b/include/core/wrappers/interpolation_wrapper.hpp
@@ -23,41 +23,38 @@
 
 #include "core/detail/casting.hpp"
 #include "core/detail/math/vectorized_type_math.hpp"
-#include "core/wrappers/border_wrapper.hpp"
 #include "core/detail/vector_utils.hpp"
+#include "core/wrappers/border_wrapper.hpp"
 #include "operator_types.h"
 
 namespace roccv {
 
 /**
- * @brief A kernel-friendly wrapper which provides interpolation logic based on the given
- * coordinates. This tensor wrapper is typically only used for input tensors and does not provide write access to its
- * underlying data.
+ * @brief A kernel-friendly wrapper which provides interpolation logic on top of a BorderWrapper.
+ *
+ * Templated directly on the BorderWrapper type so the redundant border-mode and underlying-wrapper template
+ * parameters need only be spelled once (in the BorderWrapper type). Recover the border mode via
+ * BW::kBorderType and the underlying wrapper type via BW::WrapperType.
  *
- * @tparam T Underlying data type of the tensor data.
- * @tparam C Number of channels in data type.
- * @tparam B Border type to use for interpolation.
- * @tparam I Interpolation type to use.
+ * Read-only access; do not use for output tensors.
+ *
+ * @tparam I  Interpolation type to use.
+ * @tparam BW The BorderWrapper type to wrap. Must expose ValueType plus at(n,h,w,c), width(n), height(n).
  */
-template <typename T, eBorderType B, eInterpolationType I>
+template <eInterpolationType I, typename BW>
 class InterpolationWrapper {
    public:
-    /**
-     * @brief Wraps a roccv::Tensor in an InterpolationWrapper to provide pixel interpolation when accessing
-     * non-integer coordinate mappings.
-     *
-     * @param tensor The tensor to wrap.
-     * @param border_value A fallback border value to use in the case of a constant border mode.
-     */
-    InterpolationWrapper(const Tensor& tensor, T border_value) : m_desc(tensor, border_value) {}
+    using ValueType = typename BW::ValueType;
+    using BorderType = BW;
+    static constexpr eInterpolationType kInterpolationType = I;
 
     /**
-     * @brief Wraps a BorderWrapper in an Interpolation wrapper. Extends capabilities to interpolate pixel values when
-     * given non-integer coordinates.
+     * @brief Wraps a BorderWrapper in an InterpolationWrapper. Extends capabilities to interpolate pixel values
+     * when given non-integer coordinates.
      *
      * @param borderWrapper The BorderWrapper to wrap.
      */
-    InterpolationWrapper(BorderWrapper<T, B> borderWrapper) : m_desc(borderWrapper) {}
+    InterpolationWrapper(BW borderWrapper) : m_desc(borderWrapper) {}
 
     /**
      * @brief This function calculates the weighting coefficients for the Catmull-Rom cubic interpolation.
@@ -92,7 +89,7 @@ class InterpolationWrapper {
      * @param w Width coordinates.
      * @return An interpolated value.
      */
-    inline __device__ __host__ const T at(int64_t n, float h, float w, int64_t c) const {
+    inline __device__ __host__ ValueType at(int64_t n, float h, float w, int64_t c) const {
         if constexpr (I == eInterpolationType::INTERP_TYPE_NEAREST) {
             // Nearest neighbor interpolation implementation
             return m_desc.at(n, lroundf(h), lroundf(w), c);
@@ -102,7 +99,7 @@ class InterpolationWrapper {
             // -     -
             // v3 -- v4
 
-            using WorkType = detail::MakeType<float, detail::NumElements<T>>;
+            using WorkType = detail::MakeType<float, detail::NumElements<ValueType>>;
 
             int64_t x0 = static_cast<int64_t>(floorf(w));
             int64_t x1 = x0 + 1;
@@ -118,10 +115,10 @@ class InterpolationWrapper {
             auto q2 = v3 * (x1 - w) + v4 * (w - x0);
             auto q = q1 * (y1 - h) + q2 * (h - y0);
 
-            return detail::RangeCast<T>(q);
+            return detail::RangeCast<ValueType>(q);
         } else if constexpr (I == eInterpolationType::INTERP_TYPE_CUBIC) {
             using namespace roccv::detail;
-            using WorkType = detail::MakeType<float, detail::NumElements<T>>;
+            using WorkType = detail::MakeType<float, detail::NumElements<ValueType>>;
 
             // Integer coordinates for pixel (x, y)
             int64_t int_x = static_cast<int64_t>(floorf(w));
@@ -136,20 +133,34 @@ class InterpolationWrapper {
             WorkType sum = SetAll<WorkType>(0.0f);
             for (int index_y = -1; index_y <= 2; index_y++) {
                 for (int index_x = -1; index_x <= 2; index_x++) {
-                    sum += detail::RangeCast<WorkType>(m_desc.at(n, int_y + index_y, int_x + index_x, 0)) * (weight_x[index_x + 1] * weight_y[index_y + 1]);
+                    sum += detail::RangeCast<WorkType>(m_desc.at(n, int_y + index_y, int_x + index_x, 0)) *
+                           (weight_x[index_x + 1] * weight_y[index_y + 1]);
                 }
             }
 
-            return detail::RangeCast<T>(sum);
+            return detail::RangeCast<ValueType>(sum);
         }
     }
 
-    __device__ __host__ inline int64_t height() const { return m_desc.height(); }
-    __device__ __host__ inline int64_t width() const { return m_desc.width(); }
+    __device__ __host__ inline int64_t height(int64_t n = 0) const { return m_desc.height(n); }
+    __device__ __host__ inline int64_t width(int64_t n = 0) const { return m_desc.width(n); }
     __device__ __host__ inline int64_t batches() const { return m_desc.batches(); }
     __device__ __host__ inline int64_t channels() const { return m_desc.channels(); }
 
    private:
-    BorderWrapper<T, B> m_desc;
+    BW m_desc;
 };
+
+/**
+ * @brief Factory for InterpolationWrapper. Deduces the BorderWrapper type BW (and its border mode +
+ * underlying wrapper) from the argument; callers only need to spell the interpolation policy.
+ *
+ * @tparam I The interpolation type.
+ * @param borderWrap An already-constructed BorderWrapper (typically via MakeBorderWrapper<B>(...)).
+ */
+template <eInterpolationType I, typename BW>
+auto MakeInterpolationWrapper(BW borderWrap) {
+    return InterpolationWrapper<I, BW>(borderWrap);
+}
+
 }  // namespace roccv
\ No newline at end of file
diff --git a/include/core/wrappers/image_wrapper.hpp b/include/core/wrappers/tensor_wrapper.hpp
similarity index 81%
rename from include/core/wrappers/image_wrapper.hpp
rename to include/core/wrappers/tensor_wrapper.hpp
index e174c64a..be776576 100644
--- a/include/core/wrappers/image_wrapper.hpp
+++ b/include/core/wrappers/tensor_wrapper.hpp
@@ -31,28 +31,30 @@
 namespace roccv {
 
 /**
- * @brief ImageWrapper is a non-owning wrapper for roccv::Tensors with a NHWC/NCHW/HWC layout. It provides
+ * @brief TensorWrapper is a non-owning wrapper for roccv::Tensors with a NHWC/NCHW/HWC layout. It provides
  * methods for accessing the underlying data within HIP kernels.
  *
  * @tparam T The datatype of the underlying tensor data.
  */
 template <typename T>
-class ImageWrapper {
+class TensorWrapper {
    public:
     using ValueType = T;
     using BaseType = detail::BaseType<T>;
 
+    TensorWrapper() = default;
+
     /**
-     * @brief Creates an ImageWrapper from a Tensor.
+     * @brief Creates an TensorWrapper from a Tensor.
      *
-     * @param tensor The Tensor to be represented by the ImageWrapper.
+     * @param tensor The Tensor to be represented by the TensorWrapper.
      */
-    ImageWrapper(const Tensor& tensor) {
+    TensorWrapper(const Tensor& tensor) {
         if (tensor.layout() != eTensorLayout::TENSOR_LAYOUT_NHWC &&
             tensor.layout() != eTensorLayout::TENSOR_LAYOUT_NCHW &&
             tensor.layout() != eTensorLayout::TENSOR_LAYOUT_HWC &&
             tensor.layout() != eTensorLayout::TENSOR_LAYOUT_CHW) {
-            throw Exception("The given tensor layout is not supported for ImageWrapper", eStatusType::NOT_IMPLEMENTED);
+            throw Exception("The given tensor layout is not supported for TensorWrapper", eStatusType::NOT_IMPLEMENTED);
         }
 
         // Copy tensor data into image tensor descriptor
@@ -71,14 +73,14 @@ class ImageWrapper {
     }
 
     /**
-     * @brief Creates an ImageWrapper from a vector.
+     * @brief Creates an TensorWrapper from a vector.
      *
      * @param input The input vector to wrap.
      * @param batchSize The number of images within the batch.
      * @param width The width of each image within the batch.
      * @param height The height of each image within the batch.
      */
-    ImageWrapper(std::vector<BaseType>& input, int32_t batchSize, int32_t width, int32_t height) {
+    TensorWrapper(std::vector<BaseType>& input, int32_t batchSize, int32_t width, int32_t height) {
         // Calculate strides based on input (byte-wise strides)
         stride.c = sizeof(BaseType);
         stride.w = stride.c * detail::NumElements<T>;
@@ -96,14 +98,14 @@ class ImageWrapper {
     }
 
     /**
-     * @brief Creates an ImageWrapper from a pointer.
+     * @brief Creates an TensorWrapper from a pointer.
      *
      * @param input The input pointer to wrap.
      * @param batchSize The number of images within the batch.
      * @param width The width of each image within the batch.
      * @param height The height of each image within the batch.
      */
-    ImageWrapper(void* input, int32_t batchSize, int32_t width, int32_t height) {
+    TensorWrapper(void* input, int32_t batchSize, int32_t width, int32_t height) {
         // Calculate strides based on input (byte-wise strides)
         stride.c = sizeof(BaseType);
         stride.w = stride.c * detail::NumElements<T>;
@@ -139,16 +141,22 @@ class ImageWrapper {
     /**
      * @brief Retrives the height of the images.
      *
+     * @param n Batch index. Ignored for uniform-shape TensorWrapper; included so the signature matches
+     *          ImageBatchVarShapeWrapper, allowing both to satisfy the wrapper concept consumed by
+     *          BorderWrapper / InterpolationWrapper.
      * @return Image height.
      */
-    __device__ __host__ inline int64_t height() const { return shape.h; }
+    __device__ __host__ inline int64_t height(int64_t /*n*/ = 0) const { return shape.h; }
 
     /**
      * @brief Retrieves the width of the image.
      *
+     * @param n Batch index. Ignored for uniform-shape TensorWrapper; included so the signature matches
+     *          ImageBatchVarShapeWrapper, allowing both to satisfy the wrapper concept consumed by
+     *          BorderWrapper / InterpolationWrapper.
      * @return Image width.
      */
-    __device__ __host__ inline int64_t width() const { return shape.w; }
+    __device__ __host__ inline int64_t width(int64_t /*n*/ = 0) const { return shape.w; }
 
     /**
      * @brief Retrieves the number of batches in the image tensor.
diff --git a/include/kernels/device/convert_to_device.hpp b/include/kernels/device/convert_to_device.hpp
index 67596f36..4e20be42 100644
--- a/include/kernels/device/convert_to_device.hpp
+++ b/include/kernels/device/convert_to_device.hpp
@@ -26,7 +26,7 @@ THE SOFTWARE.
 
 #include "core/detail/casting.hpp"
 #include "core/detail/type_traits.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 
 namespace Kernels {
 namespace Device {
diff --git a/include/kernels/device/copy_make_border_device.hpp b/include/kernels/device/copy_make_border_device.hpp
index aeae2d38..7fd16461 100644
--- a/include/kernels/device/copy_make_border_device.hpp
+++ b/include/kernels/device/copy_make_border_device.hpp
@@ -29,9 +29,9 @@ namespace Device {
  * @brief GPU kernel for CopyMakeBorder operator.
  *
  * @tparam SrcDesc Must be a BorderWrapper.
- * @tparam DstDesc Must be a ImageWrapper.
+ * @tparam DstDesc Must be a TensorWrapper.
  * @param src A BorderWrapper containing information for the input tensor.
- * @param dst A ImageWrapper containing information for the output tensor.
+ * @param dst A TensorWrapper containing information for the output tensor.
  * @param top The top pixel coordinate on the y-axis where the border should start.
  * @param left The left-most pixel coordinate on the x-axis where the border should start.
  * @return __global__
diff --git a/include/kernels/device/reformat_device.hpp b/include/kernels/device/reformat_device.hpp
index 35054752..95d6c32a 100644
--- a/include/kernels/device/reformat_device.hpp
+++ b/include/kernels/device/reformat_device.hpp
@@ -21,7 +21,7 @@
 
 #include <hip/hip_runtime.h>
 
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 
 namespace Kernels::Device {
 
@@ -34,7 +34,7 @@ namespace Kernels::Device {
  * @param[out] output The output tensor.
  */
 template <int Channels, typename T>
-__global__ void reformat(roccv::ImageWrapper<T> input, roccv::ImageWrapper<T> output) {
+__global__ void reformat(roccv::TensorWrapper<T> input, roccv::TensorWrapper<T> output) {
     const int x = blockDim.x * blockIdx.x + threadIdx.x;
     const int y = blockDim.y * blockIdx.y + threadIdx.y;
     const int b = blockIdx.z;
diff --git a/include/kernels/host/convert_to_host.hpp b/include/kernels/host/convert_to_host.hpp
index 93c19521..13b8381c 100644
--- a/include/kernels/host/convert_to_host.hpp
+++ b/include/kernels/host/convert_to_host.hpp
@@ -25,7 +25,7 @@ THE SOFTWARE.
 #include <hip/hip_runtime.h>
 #include "core/detail/casting.hpp"
 #include "core/detail/type_traits.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 
 namespace Kernels {
 namespace Host {
diff --git a/include/kernels/host/reformat_host.hpp b/include/kernels/host/reformat_host.hpp
index 69e21b67..00980104 100644
--- a/include/kernels/host/reformat_host.hpp
+++ b/include/kernels/host/reformat_host.hpp
@@ -23,7 +23,7 @@
 
 #include <hip/hip_runtime.h>
 
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 
 namespace Kernels::Host {
 
@@ -36,7 +36,7 @@ namespace Kernels::Host {
  * @param[out] output The output tensor.
  */
 template <int Channels, typename T>
-void reformat(roccv::ImageWrapper<T> input, roccv::ImageWrapper<T> output) {
+void reformat(roccv::TensorWrapper<T> input, roccv::TensorWrapper<T> output) {
 #pragma omp parallel for
     for (int b = 0; b < output.batches(); b++) {
         for (int y = 0; y < output.height(); y++) {
diff --git a/src/core/detail/var_shape_descriptor_table.cpp b/src/core/detail/var_shape_descriptor_table.cpp
new file mode 100644
index 00000000..d22f6c4c
--- /dev/null
+++ b/src/core/detail/var_shape_descriptor_table.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "core/detail/var_shape_descriptor_table.hpp"
+
+#include <algorithm>
+
+#include "core/exception.hpp"
+#include "core/hip_assert.h"
+
+namespace roccv::detail {
+
+VarShapeDescriptorTable::VarShapeDescriptorTable(int32_t capacity, eDeviceType device, const IAllocator& alloc)
+    : m_device(device), m_allocator(alloc) {
+    if (capacity <= 0) {
+        throw Exception("ImageBatchVarShape capacity must be positive", eStatusType::INVALID_VALUE);
+    }
+
+    const size_t imagesBytes = sizeof(ImageBufferStrided) * capacity;
+    const size_t formatsBytes = sizeof(ImageFormat) * capacity;
+
+    try {
+        if (m_device == eDeviceType::GPU) {
+            // Device descriptor table + pinned host mirrors, kept in sync by the
+            // lazy H2D copy in sync() and guarded by m_fence.
+            m_devImages = static_cast<ImageBufferStrided*>(m_allocator.allocHipMem(imagesBytes));
+            m_devFormats = static_cast<ImageFormat*>(m_allocator.allocHipMem(formatsBytes));
+            m_hostImages = static_cast<ImageBufferStrided*>(m_allocator.allocHostPinnedMem(imagesBytes));
+            m_hostFormats = static_cast<ImageFormat*>(m_allocator.allocHostPinnedMem(formatsBytes));
+
+            HIP_VALIDATE_NO_ERRORS(hipEventCreateWithFlags(&m_fence, hipEventDisableTiming));
+        } else {
+            // A single host-resident table handed straight to host kernels: no
+            // device buffers, no pinned memory, no fence.
+            m_hostImages = static_cast<ImageBufferStrided*>(m_allocator.allocHostMem(imagesBytes));
+            m_hostFormats = static_cast<ImageFormat*>(m_allocator.allocHostMem(formatsBytes));
+        }
+    } catch (...) {
+        freeAll();
+        throw;
+    }
+}
+
+VarShapeDescriptorTable::~VarShapeDescriptorTable() {
+    if (m_fencePending && m_fence != nullptr) {
+        // Drain any in-flight H2D copy before freeing the host mirrors it reads
+        // from. (void) — destructors must not throw.
+        (void)hipEventSynchronize(m_fence);
+    }
+    if (m_fence != nullptr) {
+        (void)hipEventDestroy(m_fence);
+    }
+    freeAll();
+}
+
+VarShapeDescriptorTable::VarShapeDescriptorTable(VarShapeDescriptorTable&& other) noexcept
+    : m_device(other.m_device),
+      m_allocator(other.m_allocator),
+      m_dirtyStartingFromIndex(other.m_dirtyStartingFromIndex),
+      m_fencePending(other.m_fencePending),
+      m_devImages(other.m_devImages),
+      m_devFormats(other.m_devFormats),
+      m_hostImages(other.m_hostImages),
+      m_hostFormats(other.m_hostFormats),
+      m_fence(other.m_fence) {
+    other.m_dirtyStartingFromIndex = 0;
+    other.m_fencePending = false;
+    other.m_devImages = nullptr;
+    other.m_devFormats = nullptr;
+    other.m_hostImages = nullptr;
+    other.m_hostFormats = nullptr;
+    other.m_fence = nullptr;
+}
+
+void VarShapeDescriptorTable::freeAll() noexcept {
+    // The host mirrors are pinned for a GPU table and plain host memory for a CPU
+    // table, so only their free path differs. The device frees are null-guarded,
+    // so a CPU table (whose device pointers are null) skips them.
+    if (m_device == eDeviceType::GPU) {
+        if (m_hostFormats != nullptr) m_allocator.freeHostPinnedMem(m_hostFormats);
+        if (m_hostImages != nullptr) m_allocator.freeHostPinnedMem(m_hostImages);
+    } else {
+        if (m_hostFormats != nullptr) m_allocator.freeHostMem(m_hostFormats);
+        if (m_hostImages != nullptr) m_allocator.freeHostMem(m_hostImages);
+    }
+    if (m_devFormats != nullptr) m_allocator.freeHipMem(m_devFormats);
+    if (m_devImages != nullptr) m_allocator.freeHipMem(m_devImages);
+}
+
+void VarShapeDescriptorTable::writeSlot(int32_t index, const ImageBufferStrided& slot, ImageFormat format) {
+    if (m_fencePending) {
+        HIP_VALIDATE_NO_ERRORS(hipEventSynchronize(m_fence));
+        m_fencePending = false;
+    }
+    m_hostImages[index] = slot;
+    m_hostFormats[index] = format;
+}
+
+void VarShapeDescriptorTable::onShrink(int32_t newNumImages) noexcept {
+    m_dirtyStartingFromIndex = std::min(m_dirtyStartingFromIndex, newNumImages);
+}
+
+VarShapeDescriptorTable::Snapshot VarShapeDescriptorTable::sync(hipStream_t stream, int32_t numImages) {
+    // CPU tables have a single host table — nothing to copy. Only a GPU table with
+    // a dirty suffix issues an H2D copy and records the fence.
+    if (m_device == eDeviceType::GPU && m_dirtyStartingFromIndex < numImages) {
+        const int32_t dirtyCount = numImages - m_dirtyStartingFromIndex;
+
+        if (m_fencePending) {
+            HIP_VALIDATE_NO_ERRORS(hipStreamWaitEvent(stream, m_fence, /*flags=*/0));
+        }
+
+        HIP_VALIDATE_NO_ERRORS(hipMemcpyAsync(m_devImages + m_dirtyStartingFromIndex,
+                                              m_hostImages + m_dirtyStartingFromIndex,
+                                              sizeof(ImageBufferStrided) * dirtyCount, hipMemcpyHostToDevice, stream));
+        HIP_VALIDATE_NO_ERRORS(hipMemcpyAsync(m_devFormats + m_dirtyStartingFromIndex,
+                                              m_hostFormats + m_dirtyStartingFromIndex,
+                                              sizeof(ImageFormat) * dirtyCount, hipMemcpyHostToDevice, stream));
+
+        HIP_VALIDATE_NO_ERRORS(hipEventRecord(m_fence, stream));
+        m_fencePending = true;
+    }
+    m_dirtyStartingFromIndex = numImages;
+
+    if (m_device == eDeviceType::GPU) {
+        return Snapshot{m_devImages, m_devFormats, m_hostFormats};
+    }
+    // CPU: imageList/formatList are the host table; hostFormatList aliases it.
+    return Snapshot{m_hostImages, m_hostFormats, m_hostFormats};
+}
+
+}  // namespace roccv::detail
diff --git a/src/core/image.cpp b/src/core/image.cpp
new file mode 100644
index 00000000..d6077dcb
--- /dev/null
+++ b/src/core/image.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "core/image.hpp"
+
+#include <algorithm>
+#include <iterator>
+
+#include "core/data_type.hpp"
+#include "core/detail/context.hpp"
+#include "core/exception.hpp"
+#include "core/image_storage.hpp"
+
+namespace roccv {
+
+namespace {
+
+// Allocates a buffer through `alloc` for the requested device and wraps it
+// in an ImageStorage whose shared_ptr deleter frees through the same allocator.
+// The allocator reference is captured by reference; callers must ensure it
+// outlives every Image (and any handle copied from it) it creates.
+std::shared_ptr<ImageStorage> makeStorage(const ImageRequirements& reqs, const IAllocator& alloc, eDeviceType device) {
+    const size_t bytes = static_cast<size_t>(reqs.planeRowStride[0]) * reqs.size.h;
+
+    void* buf = nullptr;
+    switch (device) {
+        case eDeviceType::GPU:
+            buf = alloc.allocHipMem(bytes);
+            break;
+        case eDeviceType::CPU:
+            buf = alloc.allocHostMem(bytes);
+            break;
+    }
+
+    return std::shared_ptr<ImageStorage>(new ImageStorage(buf), [&alloc, device](ImageStorage* s) {
+        switch (device) {
+            case eDeviceType::GPU:
+                alloc.freeHipMem(s->data());
+                break;
+            case eDeviceType::CPU:
+                alloc.freeHostMem(s->data());
+                break;
+        }
+        delete s;
+    });
+}
+
+}  // namespace
+
+// -----------------------------------------------------------------------------
+// CalcRequirements
+// -----------------------------------------------------------------------------
+
+Image::Requirements Image::CalcRequirements(Size2D size, ImageFormat format) {
+    if (size.w < 1 || size.h < 1) {
+        throw Exception("Image dimensions must be >= 1.", eStatusType::INVALID_VALUE);
+    }
+
+    const int64_t bytesPerPixel = static_cast<int64_t>(DataType(format.dtype()).size()) * format.channels();
+
+    // Guard signed-overflow in the rowStride = bytesPerPixel * width product
+    // (UB on overflow). Realistic image sizes don't approach INT64_MAX, but
+    // pathological callers shouldn't silently propagate garbage into strides.
+    int64_t rowStride = 0;
+    if (__builtin_mul_overflow(bytesPerPixel, static_cast<int64_t>(size.w), &rowStride)) {
+        throw Exception("Image row stride overflows int64.", eStatusType::INVALID_VALUE);
+    }
+
+    // TODO: derive a sensible default base/row alignment from device attributes.
+    return ImageRequirements{
+        .size = size,
+        .format = format,
+        .planeRowStride = {rowStride},
+        .alignBytes = 0,
+    };
+}
+
+// -----------------------------------------------------------------------------
+// Constructors
+// -----------------------------------------------------------------------------
+
+Image::Image(Size2D size, ImageFormat format, eDeviceType device)
+    : Image(size, format, GlobalContext().getDefaultAllocator(), device) {}
+
+Image::Image(Size2D size, ImageFormat format, const IAllocator& alloc, eDeviceType device)
+    : Image(CalcRequirements(size, format), alloc, device) {}
+
+Image::Image(const Requirements& reqs, eDeviceType device)
+    : Image(reqs, GlobalContext().getDefaultAllocator(), device) {}
+
+Image::Image(const Requirements& reqs, const IAllocator& alloc, eDeviceType device)
+    : Image(reqs, device, makeStorage(reqs, alloc, device)) {}
+
+Image::Image(const Requirements& reqs, eDeviceType device, std::shared_ptr<ImageStorage> storage)
+    : m_data(std::move(storage)), m_size(reqs.size), m_format(reqs.format), m_device(device), m_planeRowStride{} {
+    std::copy(std::begin(reqs.planeRowStride), std::end(reqs.planeRowStride), m_planeRowStride.begin());
+}
+
+// -----------------------------------------------------------------------------
+// exportData
+// -----------------------------------------------------------------------------
+
+ImageData Image::exportData() const {
+    // TODO: derive numPlanes from m_format when planar formats land. Today's
+    // ImageFormat is interleaved-only, so plane 0 covers the whole image and
+    // its dimensions match m_size verbatim.
+    ImageBufferStrided strided{};
+    strided.numPlanes = 1;
+    strided.planes[0].width = m_size.w;
+    strided.planes[0].height = m_size.h;
+    strided.planes[0].rowStride = m_planeRowStride[0];
+    strided.planes[0].basePtr = m_data->data();
+
+    switch (m_device) {
+        case eDeviceType::GPU:
+            return ImageDataStridedHip(m_format, strided);
+        case eDeviceType::CPU:
+            return ImageDataStridedHost(m_format, strided);
+    }
+
+    throw Exception("Unsupported device type in Image::exportData.", eStatusType::INVALID_VALUE);
+}
+
+// -----------------------------------------------------------------------------
+// ImageWrapData
+// -----------------------------------------------------------------------------
+
+Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup) {
+    auto strided = data.cast<ImageDataStrided>();
+    if (!strided.has_value()) {
+        throw Exception("ImageWrapData requires strided image data.", eStatusType::INVALID_VALUE);
+    }
+
+    // Single-plane assumption: storage tracks plane(0) and Requirements only
+    // populates planeRowStride[0]. Multi-plane wraps will need to copy each
+    // plane's stride and either store per-plane base pointers or derive them
+    // from a single owning allocation.
+    const ImagePlaneStrided& plane0 = strided->plane(0);
+
+    // Designated initializers to avoid value-initializing ImageFormat through
+    // its explicit default ctor (which copy-list-init refuses).
+    Image::Requirements reqs{
+        .size = Size2D{plane0.width, plane0.height},
+        .format = data.format(),
+        .planeRowStride = {plane0.rowStride},
+        .alignBytes = 0,
+    };
+
+    // The deleter captures `data` by value so the original snapshot survives
+    // long enough to be passed to the cleanup callback on last-handle drop.
+    // Swallow exceptions from `cleanup` — shared_ptr deleters run during
+    // destruction, and a throw would propagate into std::terminate.
+    auto storage =
+        std::shared_ptr<ImageStorage>(new ImageStorage(plane0.basePtr), [data, cleanup](ImageStorage* s) noexcept {
+            if (cleanup) {
+                try {
+                    cleanup(data);
+                } catch (...) {
+                }
+            }
+            delete s;
+        });
+
+    return Image(reqs, data.device(), std::move(storage));
+}
+
+}  // namespace roccv
diff --git a/src/core/image_batch_data.cpp b/src/core/image_batch_data.cpp
new file mode 100644
index 00000000..a8ce07ba
--- /dev/null
+++ b/src/core/image_batch_data.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "core/image_batch_data.hpp"
+
+#include "core/image_batch_buffer.hpp"
+#include "core/image_format.hpp"
+#include "core/util_enums.h"
+
+namespace roccv {
+
+int32_t ImageBatchData::numImages() const { return m_numImages; }
+
+eDeviceType ImageBatchData::device() const { return m_deviceType; }
+
+ImageBatchData::ImageBatchData(int32_t numImages, const ImageBatchBuffer& buffer)
+    : m_numImages(numImages),
+      m_deviceType(eDeviceType::GPU),
+      m_bufferType(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE),
+      m_buffer(buffer) {}
+
+bool ImageBatchData::IsCompatibleKind(ImageBatchBufferType bufferType) {
+    return bufferType != ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE;
+}
+
+ImageBatchVarShapeData::ImageBatchVarShapeData(int32_t numImages, const ImageBatchBuffer& buffer)
+    : ImageBatchData(numImages, buffer) {}
+
+bool ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType bufferType) {
+    return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP ||
+           bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST;
+}
+
+Size2D ImageBatchVarShapeData::maxSize() const {
+    return Size2D{m_buffer.varShapeStrided.maxWidth, m_buffer.varShapeStrided.maxHeight};
+}
+
+ImageFormat ImageBatchVarShapeData::uniqueFormat() const { return m_buffer.varShapeStrided.uniqueFormat; }
+
+const ImageFormat* ImageBatchVarShapeData::formatList() const { return m_buffer.varShapeStrided.formatList; }
+
+const ImageFormat* ImageBatchVarShapeData::hostFormatList() const { return m_buffer.varShapeStrided.hostFormatList; }
+
+ImageBatchVarShapeDataStrided::ImageBatchVarShapeDataStrided(int32_t numImages, const ImageBatchBuffer& buffer)
+    : ImageBatchVarShapeData(numImages, buffer) {}
+
+bool ImageBatchVarShapeDataStrided::IsCompatibleKind(ImageBatchBufferType bufferType) {
+    return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP ||
+           bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST;
+}
+
+const ImageBufferStrided* ImageBatchVarShapeDataStrided::imageList() const {
+    return m_buffer.varShapeStrided.imageList;
+}
+
+ImageBatchVarShapeDataStridedHip::ImageBatchVarShapeDataStridedHip(int32_t numImages, const ImageBatchBuffer& buffer)
+    : ImageBatchVarShapeDataStrided(numImages, buffer) {
+    m_bufferType = ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP;
+    m_deviceType = eDeviceType::GPU;
+}
+
+ImageBatchVarShapeDataStridedHip::ImageBatchVarShapeDataStridedHip(
+    int32_t numImages, const ImageBatchVarShapeDataStridedHip::Buffer& buffer)
+    : ImageBatchVarShapeDataStridedHip(numImages, ImageBatchBuffer{.varShapeStrided = buffer}) {}
+
+bool ImageBatchVarShapeDataStridedHip::IsCompatibleKind(ImageBatchBufferType bufferType) {
+    return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP;
+}
+
+ImageBatchVarShapeDataStridedHost::ImageBatchVarShapeDataStridedHost(int32_t numImages, const ImageBatchBuffer& buffer)
+    : ImageBatchVarShapeDataStrided(numImages, buffer) {
+    m_bufferType = ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST;
+    m_deviceType = eDeviceType::CPU;
+}
+
+ImageBatchVarShapeDataStridedHost::ImageBatchVarShapeDataStridedHost(
+    int32_t numImages, const ImageBatchVarShapeDataStridedHost::Buffer& buffer)
+    : ImageBatchVarShapeDataStridedHost(numImages, ImageBatchBuffer{.varShapeStrided = buffer}) {}
+
+bool ImageBatchVarShapeDataStridedHost::IsCompatibleKind(ImageBatchBufferType bufferType) {
+    return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST;
+}
+
+}  // namespace roccv
diff --git a/src/core/image_batch_var_shape.cpp b/src/core/image_batch_var_shape.cpp
new file mode 100644
index 00000000..aeeaa6b3
--- /dev/null
+++ b/src/core/image_batch_var_shape.cpp
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "core/image_batch_var_shape.hpp"
+
+#include <algorithm>
+
+#include "core/detail/context.hpp"
+#include "core/exception.hpp"
+#include "core/image_batch_buffer.hpp"
+#include "core/image_buffer.hpp"
+
+namespace roccv {
+
+ImageBatchVarShape::ImageBatchVarShape(int32_t capacity, eDeviceType device)
+    : ImageBatchVarShape(capacity, GlobalContext().getDefaultAllocator(), device) {}
+
+ImageBatchVarShape::ImageBatchVarShape(int32_t capacity, const IAllocator& alloc, eDeviceType device)
+    : m_capacity(capacity), m_table(capacity, device, alloc) {
+    m_images.reserve(capacity);
+}
+
+ImageBatchVarShape::ImageBatchVarShape(ImageBatchVarShape&& other) noexcept
+    : m_capacity(other.m_capacity),
+      m_table(std::move(other.m_table)),
+      m_images(std::move(other.m_images)),
+      m_cacheMaxSize(other.m_cacheMaxSize),
+      m_cacheUniqueFormat(other.m_cacheUniqueFormat) {
+    other.m_capacity = 0;
+    other.m_cacheMaxSize.reset();
+    other.m_cacheUniqueFormat.reset();
+}
+
+void ImageBatchVarShape::pushBack(const Image& img) {
+    const int32_t n = numImages();
+    if (n >= m_capacity) {
+        throw Exception("ImageBatchVarShape::pushBack would exceed capacity", eStatusType::OUT_OF_BOUNDS);
+    }
+    if (img.device() != m_table.device()) {
+        throw Exception("ImageBatchVarShape only accepts images matching its device", eStatusType::INVALID_VALUE);
+    }
+
+    // Export through the strided base so this works for both GPU- and
+    // CPU-resident images (a typed cast<...Hip> would reject host images).
+    auto strided = img.exportData().cast<ImageDataStrided>();
+    if (!strided.has_value()) {
+        throw Exception("ImageBatchVarShape requires strided image data", eStatusType::INVALID_VALUE);
+    }
+    const ImageDataStrided& data = strided.value();
+    if (data.numPlanes() != 1) {
+        throw Exception("ImageBatchVarShape only supports single-plane images", eStatusType::INVALID_VALUE);
+    }
+
+    ImageBufferStrided slot{};
+    slot.numPlanes = 1;
+    slot.planes[0] = data.plane(0);
+    m_table.writeSlot(n, slot, img.format());
+
+    const Size2D imgSize = img.size();
+    if (n == 0) {
+        // Seed from scratch: an empty-batch query may have populated the
+        // cache with sentinels (FMT_NONE, 0×0); replacing avoids merging the
+        // first real image into them.
+        m_cacheMaxSize = imgSize;
+        m_cacheUniqueFormat = img.format();
+    } else {
+        // popBack invalidates m_cacheMaxSize without rescanning, so make sure
+        // both halves of the cache are populated before merging in.
+        doUpdateCache();
+        m_cacheMaxSize->w = std::max(m_cacheMaxSize->w, imgSize.w);
+        m_cacheMaxSize->h = std::max(m_cacheMaxSize->h, imgSize.h);
+        if (*m_cacheUniqueFormat != img.format()) {
+            m_cacheUniqueFormat = FMT_NONE;
+        }
+    }
+
+    m_images.push_back(img);
+}
+
+void ImageBatchVarShape::popBack(int32_t count) {
+    if (count < 0) {
+        throw Exception("ImageBatchVarShape::popBack count must be non-negative", eStatusType::INVALID_VALUE);
+    }
+    if (count > numImages()) {
+        throw Exception("ImageBatchVarShape::popBack count exceeds numImages", eStatusType::OUT_OF_BOUNDS);
+    }
+
+    m_images.erase(m_images.end() - count, m_images.end());
+    m_table.onShrink(numImages());
+
+    // maxSize can only shrink on pop; force a rescan on next query. uniqueFormat
+    // stays — it may now be conservatively FMT_NONE, but never wrong.
+    m_cacheMaxSize.reset();
+    if (numImages() == 0) {
+        m_cacheUniqueFormat.reset();
+    }
+}
+
+void ImageBatchVarShape::clear() {
+    m_images.clear();
+    m_table.onShrink(0);
+    m_cacheMaxSize.reset();
+    m_cacheUniqueFormat.reset();
+}
+
+Size2D ImageBatchVarShape::maxSize() const {
+    doUpdateCache();
+    return m_cacheMaxSize.value_or(Size2D{0, 0});
+}
+
+ImageFormat ImageBatchVarShape::uniqueFormat() const {
+    doUpdateCache();
+    return m_cacheUniqueFormat.value_or(FMT_NONE);
+}
+
+void ImageBatchVarShape::doUpdateCache() const {
+    if (m_cacheMaxSize.has_value() && m_cacheUniqueFormat.has_value()) {
+        return;
+    }
+    const int32_t n = static_cast<int32_t>(m_images.size());
+    if (n == 0) {
+        m_cacheMaxSize = Size2D{0, 0};
+        m_cacheUniqueFormat = FMT_NONE;
+        return;
+    }
+
+    const ImageBufferStrided* hostImages = m_table.hostImages();
+    const ImageFormat* hostFormats = m_table.hostFormats();
+
+    Size2D maxSz{0, 0};
+    ImageFormat unique = hostFormats[0];
+    bool heterogeneous = false;
+    for (int32_t i = 0; i < n; ++i) {
+        const ImagePlaneStrided& p0 = hostImages[i].planes[0];
+        maxSz.w = std::max(maxSz.w, p0.width);
+        maxSz.h = std::max(maxSz.h, p0.height);
+        if (!heterogeneous && hostFormats[i] != unique) {
+            heterogeneous = true;
+        }
+    }
+    m_cacheMaxSize = maxSz;
+    m_cacheUniqueFormat = heterogeneous ? FMT_NONE : unique;
+}
+
+ImageBatchVarShapeDataStrided ImageBatchVarShape::exportData(hipStream_t stream) {
+    const auto snap = m_table.sync(stream, numImages());
+    doUpdateCache();
+
+    const Size2D maxSz = m_cacheMaxSize.value();
+    ImageBatchVarShapeBufferStrided buffer{};
+    buffer.uniqueFormat = m_cacheUniqueFormat.value();
+    buffer.maxWidth = maxSz.w;
+    buffer.maxHeight = maxSz.h;
+    buffer.imageList = snap.imageList;
+    buffer.formatList = snap.formatList;
+    buffer.hostFormatList = snap.hostFormatList;
+
+    if (m_table.device() == eDeviceType::GPU) {
+        return ImageBatchVarShapeDataStridedHip(numImages(), buffer);
+    }
+    return ImageBatchVarShapeDataStridedHost(numImages(), buffer);
+}
+
+}  // namespace roccv
diff --git a/src/core/image_data.cpp b/src/core/image_data.cpp
new file mode 100644
index 00000000..6fb0fc83
--- /dev/null
+++ b/src/core/image_data.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "core/image_data.hpp"
+
+#include "core/image_buffer.hpp"
+#include "core/image_format.hpp"
+#include "core/util_enums.h"
+
+namespace roccv {
+
+const ImageFormat& ImageData::format() const { return m_format; }
+
+eDeviceType ImageData::device() const { return m_deviceType; }
+
+ImageData::ImageData(const ImageFormat& format, const ImageBuffer& buffer)
+    : m_format(format),
+      m_deviceType(eDeviceType::GPU),
+      m_bufferType(ImageBufferType::IMAGE_BUFFER_NONE),
+      m_buffer(buffer) {}
+
+bool ImageData::IsCompatibleKind(ImageBufferType bufferType) {
+    return bufferType != ImageBufferType::IMAGE_BUFFER_NONE;
+}
+
+ImageDataStrided::ImageDataStrided(const ImageFormat& format, const ImageBuffer& buffer)
+    : ImageData(format, buffer) {}
+
+bool ImageDataStrided::IsCompatibleKind(ImageBufferType bufferType) {
+    return bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HIP ||
+           bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HOST;
+}
+
+Size2D ImageDataStrided::size() const {
+    const ImagePlaneStrided& p0 = m_buffer.strided.planes[0];
+    return Size2D{p0.width, p0.height};
+}
+
+int32_t ImageDataStrided::numPlanes() const { return m_buffer.strided.numPlanes; }
+
+const ImagePlaneStrided& ImageDataStrided::plane(int32_t p) const { return m_buffer.strided.planes[p]; }
+
+ImageDataStridedHip::ImageDataStridedHip(const ImageFormat& format, const ImageBuffer& buffer)
+    : ImageDataStrided(format, buffer) {
+    m_bufferType = ImageBufferType::IMAGE_BUFFER_STRIDED_HIP;
+    m_deviceType = eDeviceType::GPU;
+}
+
+ImageDataStridedHip::ImageDataStridedHip(const ImageFormat& format, const ImageDataStridedHip::Buffer& buffer)
+    : ImageDataStridedHip(format, ImageBuffer{.strided = buffer}) {}
+
+bool ImageDataStridedHip::IsCompatibleKind(ImageBufferType bufferType) {
+    return bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HIP;
+}
+
+ImageDataStridedHost::ImageDataStridedHost(const ImageFormat& format, const ImageBuffer& buffer)
+    : ImageDataStrided(format, buffer) {
+    m_bufferType = ImageBufferType::IMAGE_BUFFER_STRIDED_HOST;
+    m_deviceType = eDeviceType::CPU;
+}
+
+ImageDataStridedHost::ImageDataStridedHost(const ImageFormat& format, const ImageDataStridedHost::Buffer& buffer)
+    : ImageDataStridedHost(format, ImageBuffer{.strided = buffer}) {}
+
+bool ImageDataStridedHost::IsCompatibleKind(ImageBufferType bufferType) {
+    return bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HOST;
+}
+
+}  // namespace roccv
diff --git a/src/op_adv_cvt_color.cpp b/src/op_adv_cvt_color.cpp
index 7f035507..c2c69175 100644
--- a/src/op_adv_cvt_color.cpp
+++ b/src/op_adv_cvt_color.cpp
@@ -26,7 +26,7 @@ THE SOFTWARE.
 
 #include "common/validation_helpers.hpp"
 #include "core/tensor.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/common/adv_cvt_color_coefficients.hpp"
 #include "kernels/device/adv_cvt_color_device.hpp"
 #include "kernels/host/adv_cvt_color_host.hpp"
@@ -189,22 +189,22 @@ void AdvCvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &ou
             switch (conversionCode) {
                 case COLOR_BGR2YUV:
                     Kernels::Device::rgb_or_bgr_to_yuv_adv<uchar3, eSwizzle::ZYXW>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar3>(input), ImageWrapper<uchar3>(output),
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar3>(input), TensorWrapper<uchar3>(output),
                                                               coeff, kDelta);
                     break;
                 case COLOR_RGB2YUV:
                     Kernels::Device::rgb_or_bgr_to_yuv_adv<uchar3, eSwizzle::XYZW>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar3>(input), ImageWrapper<uchar3>(output),
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar3>(input), TensorWrapper<uchar3>(output),
                                                               coeff, kDelta);
                     break;
                 case COLOR_YUV2BGR:
                     Kernels::Device::yuv_to_rgb_or_bgr_adv<uchar3, eSwizzle::ZYXW>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar3>(input), ImageWrapper<uchar3>(output),
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar3>(input), TensorWrapper<uchar3>(output),
                                                               coeff, kDelta);
                     break;
                 case COLOR_YUV2RGB:
                     Kernels::Device::yuv_to_rgb_or_bgr_adv<uchar3, eSwizzle::XYZW>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar3>(input), ImageWrapper<uchar3>(output),
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar3>(input), TensorWrapper<uchar3>(output),
                                                               coeff, kDelta);
                     break;
                 default: throw Exception("Unsupported conversion code.", eStatusType::INVALID_COMBINATION);
@@ -215,26 +215,26 @@ void AdvCvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &ou
 
             if (outChannels == 3) {
                 if (bgr) {
-                    Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::ZYXW, ImageWrapper<uchar1>,
-                                                                     ImageWrapper<uchar3>, uchar3>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar1>(input), ImageWrapper<uchar3>(output),
+                    Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::ZYXW, TensorWrapper<uchar1>,
+                                                                     TensorWrapper<uchar3>, uchar3>
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar1>(input), TensorWrapper<uchar3>(output),
                                                               coeff, kDelta, uidx);
                 } else {
-                    Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::XYZW, ImageWrapper<uchar1>,
-                                                                     ImageWrapper<uchar3>, uchar3>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar1>(input), ImageWrapper<uchar3>(output),
+                    Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::XYZW, TensorWrapper<uchar1>,
+                                                                     TensorWrapper<uchar3>, uchar3>
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar1>(input), TensorWrapper<uchar3>(output),
                                                               coeff, kDelta, uidx);
                 }
             } else {
                 if (bgr) {
-                    Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::ZYXW, ImageWrapper<uchar1>,
-                                                                     ImageWrapper<uchar4>, uchar4>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar1>(input), ImageWrapper<uchar4>(output),
+                    Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::ZYXW, TensorWrapper<uchar1>,
+                                                                     TensorWrapper<uchar4>, uchar4>
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar1>(input), TensorWrapper<uchar4>(output),
                                                               coeff, kDelta, uidx);
                 } else {
-                    Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::XYZW, ImageWrapper<uchar1>,
-                                                                     ImageWrapper<uchar4>, uchar4>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar1>(input), ImageWrapper<uchar4>(output),
+                    Kernels::Device::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::XYZW, TensorWrapper<uchar1>,
+                                                                     TensorWrapper<uchar4>, uchar4>
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar1>(input), TensorWrapper<uchar4>(output),
                                                               coeff, kDelta, uidx);
                 }
             }
@@ -245,21 +245,21 @@ void AdvCvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &ou
             if (inChannels == 3) {
                 if (bgr) {
                     Kernels::Device::rgb_or_bgr_to_nv12_or_nv21_adv<uchar3, eSwizzle::ZYXW>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar3>(input), ImageWrapper<uchar1>(output),
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar3>(input), TensorWrapper<uchar1>(output),
                                                               coeff, kDelta, uidx);
                 } else {
                     Kernels::Device::rgb_or_bgr_to_nv12_or_nv21_adv<uchar3, eSwizzle::XYZW>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar3>(input), ImageWrapper<uchar1>(output),
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar3>(input), TensorWrapper<uchar1>(output),
                                                               coeff, kDelta, uidx);
                 }
             } else {
                 if (bgr) {
                     Kernels::Device::rgb_or_bgr_to_nv12_or_nv21_adv<uchar4, eSwizzle::ZYXW>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar4>(input), ImageWrapper<uchar1>(output),
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar4>(input), TensorWrapper<uchar1>(output),
                                                               coeff, kDelta, uidx);
                 } else {
                     Kernels::Device::rgb_or_bgr_to_nv12_or_nv21_adv<uchar4, eSwizzle::XYZW>
-                        <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar4>(input), ImageWrapper<uchar1>(output),
+                        <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar4>(input), TensorWrapper<uchar1>(output),
                                                               coeff, kDelta, uidx);
                 }
             }
@@ -268,23 +268,23 @@ void AdvCvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &ou
         if (IsInterleaved444(conversionCode)) {
             switch (conversionCode) {
                 case COLOR_BGR2YUV:
-                    Kernels::Host::rgb_or_bgr_to_yuv_adv<uchar3, eSwizzle::ZYXW>(ImageWrapper<uchar3>(input),
-                                                                                  ImageWrapper<uchar3>(output), coeff,
+                    Kernels::Host::rgb_or_bgr_to_yuv_adv<uchar3, eSwizzle::ZYXW>(TensorWrapper<uchar3>(input),
+                                                                                  TensorWrapper<uchar3>(output), coeff,
                                                                                   kDelta);
                     break;
                 case COLOR_RGB2YUV:
-                    Kernels::Host::rgb_or_bgr_to_yuv_adv<uchar3, eSwizzle::XYZW>(ImageWrapper<uchar3>(input),
-                                                                                  ImageWrapper<uchar3>(output), coeff,
+                    Kernels::Host::rgb_or_bgr_to_yuv_adv<uchar3, eSwizzle::XYZW>(TensorWrapper<uchar3>(input),
+                                                                                  TensorWrapper<uchar3>(output), coeff,
                                                                                   kDelta);
                     break;
                 case COLOR_YUV2BGR:
-                    Kernels::Host::yuv_to_rgb_or_bgr_adv<uchar3, eSwizzle::ZYXW>(ImageWrapper<uchar3>(input),
-                                                                                  ImageWrapper<uchar3>(output), coeff,
+                    Kernels::Host::yuv_to_rgb_or_bgr_adv<uchar3, eSwizzle::ZYXW>(TensorWrapper<uchar3>(input),
+                                                                                  TensorWrapper<uchar3>(output), coeff,
                                                                                   kDelta);
                     break;
                 case COLOR_YUV2RGB:
-                    Kernels::Host::yuv_to_rgb_or_bgr_adv<uchar3, eSwizzle::XYZW>(ImageWrapper<uchar3>(input),
-                                                                                  ImageWrapper<uchar3>(output), coeff,
+                    Kernels::Host::yuv_to_rgb_or_bgr_adv<uchar3, eSwizzle::XYZW>(TensorWrapper<uchar3>(input),
+                                                                                  TensorWrapper<uchar3>(output), coeff,
                                                                                   kDelta);
                     break;
                 default: throw Exception("Unsupported conversion code.", eStatusType::INVALID_COMBINATION);
@@ -292,41 +292,41 @@ void AdvCvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &ou
         } else if (IsSemiPlanarToInterleaved(conversionCode)) {
             if (outChannels == 3) {
                 if (bgr) {
-                    Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::ZYXW, ImageWrapper<uchar1>,
-                                                                   ImageWrapper<uchar3>, uchar3>(
-                        ImageWrapper<uchar1>(input), ImageWrapper<uchar3>(output), coeff, kDelta, uidx);
+                    Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::ZYXW, TensorWrapper<uchar1>,
+                                                                   TensorWrapper<uchar3>, uchar3>(
+                        TensorWrapper<uchar1>(input), TensorWrapper<uchar3>(output), coeff, kDelta, uidx);
                 } else {
-                    Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::XYZW, ImageWrapper<uchar1>,
-                                                                   ImageWrapper<uchar3>, uchar3>(
-                        ImageWrapper<uchar1>(input), ImageWrapper<uchar3>(output), coeff, kDelta, uidx);
+                    Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::XYZW, TensorWrapper<uchar1>,
+                                                                   TensorWrapper<uchar3>, uchar3>(
+                        TensorWrapper<uchar1>(input), TensorWrapper<uchar3>(output), coeff, kDelta, uidx);
                 }
             } else {
                 if (bgr) {
-                    Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::ZYXW, ImageWrapper<uchar1>,
-                                                                   ImageWrapper<uchar4>, uchar4>(
-                        ImageWrapper<uchar1>(input), ImageWrapper<uchar4>(output), coeff, kDelta, uidx);
+                    Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::ZYXW, TensorWrapper<uchar1>,
+                                                                   TensorWrapper<uchar4>, uchar4>(
+                        TensorWrapper<uchar1>(input), TensorWrapper<uchar4>(output), coeff, kDelta, uidx);
                 } else {
-                    Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::XYZW, ImageWrapper<uchar1>,
-                                                                   ImageWrapper<uchar4>, uchar4>(
-                        ImageWrapper<uchar1>(input), ImageWrapper<uchar4>(output), coeff, kDelta, uidx);
+                    Kernels::Host::nv12_or_nv21_to_rgb_or_bgr_adv<eSwizzle::XYZW, TensorWrapper<uchar1>,
+                                                                   TensorWrapper<uchar4>, uchar4>(
+                        TensorWrapper<uchar1>(input), TensorWrapper<uchar4>(output), coeff, kDelta, uidx);
                 }
             }
         } else {
             if (inChannels == 3) {
                 if (bgr) {
                     Kernels::Host::rgb_or_bgr_to_nv12_or_nv21_adv<uchar3, eSwizzle::ZYXW>(
-                        ImageWrapper<uchar3>(input), ImageWrapper<uchar1>(output), coeff, kDelta, uidx);
+                        TensorWrapper<uchar3>(input), TensorWrapper<uchar1>(output), coeff, kDelta, uidx);
                 } else {
                     Kernels::Host::rgb_or_bgr_to_nv12_or_nv21_adv<uchar3, eSwizzle::XYZW>(
-                        ImageWrapper<uchar3>(input), ImageWrapper<uchar1>(output), coeff, kDelta, uidx);
+                        TensorWrapper<uchar3>(input), TensorWrapper<uchar1>(output), coeff, kDelta, uidx);
                 }
             } else {
                 if (bgr) {
                     Kernels::Host::rgb_or_bgr_to_nv12_or_nv21_adv<uchar4, eSwizzle::ZYXW>(
-                        ImageWrapper<uchar4>(input), ImageWrapper<uchar1>(output), coeff, kDelta, uidx);
+                        TensorWrapper<uchar4>(input), TensorWrapper<uchar1>(output), coeff, kDelta, uidx);
                 } else {
                     Kernels::Host::rgb_or_bgr_to_nv12_or_nv21_adv<uchar4, eSwizzle::XYZW>(
-                        ImageWrapper<uchar4>(input), ImageWrapper<uchar1>(output), coeff, kDelta, uidx);
+                        TensorWrapper<uchar4>(input), TensorWrapper<uchar1>(output), coeff, kDelta, uidx);
                 }
             }
         }
diff --git a/src/op_bilateral_filter.cpp b/src/op_bilateral_filter.cpp
index dffba8ae..5c079b31 100644
--- a/src/op_bilateral_filter.cpp
+++ b/src/op_bilateral_filter.cpp
@@ -31,7 +31,7 @@ THE SOFTWARE.
 #include "common/validation_helpers.hpp"
 #include "core/detail/casting.hpp"
 #include "core/wrappers/border_wrapper.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/bilateral_filter_device.hpp"
 #include "kernels/host/bilateral_filter_host.hpp"
 
@@ -43,8 +43,8 @@ BilateralFilter::~BilateralFilter() {}
 template <typename T, eBorderType B>
 void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &input, const Tensor &output, int diameter,
                                            float sigmaColor, float sigmaSpace, T borderValue, eDeviceType device) {
-    BorderWrapper<T, B> inputWrapper(input, borderValue);
-    ImageWrapper<T> outputWrapper(output);
+    auto inputWrapper = MakeBorderWrapper<B>(TensorWrapper<T>(input), borderValue);
+    TensorWrapper<T> outputWrapper(output);
 
     if (outputWrapper.channels() > 4 || outputWrapper.channels() < 1) {
         throw Exception("Invalid channel size: cannot be greater than 4 or less than 1.", eStatusType::OUT_OF_BOUNDS);
@@ -61,8 +61,7 @@ void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &inp
         sigmaSpace = 1.0f;
     }
 
-    const int radius =
-        (diameter <= 0) ? static_cast<int>(std::roundf(sigmaSpace * 1.5f)) : (diameter >> 1);
+    const int radius = (diameter <= 0) ? static_cast<int>(std::roundf(sigmaSpace * 1.5f)) : (diameter >> 1);
 
     float spaceCoeff = -1 / (2 * sigmaSpace * sigmaSpace);
     float colorCoeff = -1 / (2 * sigmaColor * sigmaColor);
@@ -89,9 +88,9 @@ void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &inp
 
         for (int j = 0; j < divisor; j++) {
             for (int i = 0; i < dividend; i++) {
-                threads.push_back(std::thread(Kernels::Host::bilateral_filter<T, BorderWrapper<T, B>, ImageWrapper<T>>,
-                                              inputWrapper, outputWrapper, radius, rollingHeight, rollingWidth,
-                                              prevHeight, prevWidth, spaceCoeff, colorCoeff));
+                threads.push_back(std::thread(
+                    Kernels::Host::bilateral_filter<T, decltype(inputWrapper), TensorWrapper<T>>, inputWrapper,
+                    outputWrapper, radius, rollingHeight, rollingWidth, prevHeight, prevWidth, spaceCoeff, colorCoeff));
                 prevWidth = rollingWidth;
                 rollingWidth += factorW;
             }
diff --git a/src/op_bnd_box.cpp b/src/op_bnd_box.cpp
index 3b2443e0..abaff32d 100644
--- a/src/op_bnd_box.cpp
+++ b/src/op_bnd_box.cpp
@@ -32,7 +32,7 @@ THE SOFTWARE.
 #include "common/validation_helpers.hpp"
 #include "core/detail/hip_utils.hpp"
 #include "core/tensor.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/bnd_box_device.hpp"
 #include "kernels/host/bnd_box_host.hpp"
 
@@ -44,8 +44,8 @@ BndBox::~BndBox() {}
 template <bool has_alpha, typename T>
 void dispatch_bnd_box_dtype(hipStream_t stream, const Tensor &input, const Tensor &output,
                             std::shared_ptr<std::vector<Rect_t>> rects, eDeviceType device) {
-    ImageWrapper<T> inputWrapper(input);
-    ImageWrapper<T> outputWrapper(output);
+    TensorWrapper<T> inputWrapper(input);
+    TensorWrapper<T> outputWrapper(output);
 
     auto width = inputWrapper.width();
     auto height = inputWrapper.height();
diff --git a/src/op_composite.cpp b/src/op_composite.cpp
index 650a7bf1..c73ac51f 100644
--- a/src/op_composite.cpp
+++ b/src/op_composite.cpp
@@ -24,7 +24,7 @@
 #include <functional>
 
 #include "common/validation_helpers.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/composite_device.hpp"
 #include "kernels/host/composite_host.hpp"
 
@@ -33,10 +33,10 @@ namespace roccv {
 template <typename SrcType, typename DstType, typename MaskType>
 void dispatch_composite_masktype(hipStream_t stream, const Tensor& foreground, const Tensor& background,
                                  const Tensor& mask, const Tensor& output, eDeviceType device) {
-    ImageWrapper<SrcType> fgWrapper(foreground);
-    ImageWrapper<SrcType> bgWrapper(background);
-    ImageWrapper<MaskType> maskWrapper(mask);
-    ImageWrapper<DstType> outputWrapper(output);
+    TensorWrapper<SrcType> fgWrapper(foreground);
+    TensorWrapper<SrcType> bgWrapper(background);
+    TensorWrapper<MaskType> maskWrapper(mask);
+    TensorWrapper<DstType> outputWrapper(output);
 
     switch (device) {
         case eDeviceType::GPU: {
diff --git a/src/op_convert_to.cpp b/src/op_convert_to.cpp
index 3baa4577..2265a73e 100644
--- a/src/op_convert_to.cpp
+++ b/src/op_convert_to.cpp
@@ -24,7 +24,7 @@ THE SOFTWARE.
 #include <functional>
 
 #include <hip/hip_runtime.h>
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "common/validation_helpers.hpp"
 #include "core/detail/casting.hpp"
 #include "core/detail/type_traits.hpp"
@@ -40,8 +40,8 @@ void dispatch_convert_to_channels(hipStream_t stream, const Tensor &input, const
     using SRC_DT_NC = detail::MakeType<SRC_DT, NC>;
     using DST_DT_NC = detail::MakeType<DST_DT, NC>;
 
-    ImageWrapper<SRC_DT_NC> inputWrapper(input);
-    ImageWrapper<DST_DT_NC> outputWrapper(output);
+    TensorWrapper<SRC_DT_NC> inputWrapper(input);
+    TensorWrapper<DST_DT_NC> outputWrapper(output);
 
     using SRC_BT = detail::BaseType<SRC_DT>;
     using DST_BT = detail::BaseType<DST_DT>;
diff --git a/src/op_copy_make_border.cpp b/src/op_copy_make_border.cpp
index feacfbd9..ffc3e0a7 100644
--- a/src/op_copy_make_border.cpp
+++ b/src/op_copy_make_border.cpp
@@ -25,7 +25,7 @@
 
 #include "common/validation_helpers.hpp"
 #include "core/wrappers/border_wrapper.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "core/wrappers/interpolation_wrapper.hpp"
 #include "kernels/device/copy_make_border_device.hpp"
 #include "kernels/host/copy_make_border_host.hpp"
@@ -38,8 +38,8 @@ namespace roccv {
 template <typename T, eBorderType BorderMode>
 void dispatch_copy_make_border_border_mode(hipStream_t stream, const Tensor& input, const Tensor& output, int32_t top,
                                            int32_t left, T border_value, eDeviceType device) {
-    BorderWrapper<T, BorderMode> in_desc(input, border_value);
-    ImageWrapper<T> out_desc(output);
+    auto in_desc = MakeBorderWrapper<BorderMode>(TensorWrapper<T>(input), border_value);
+    TensorWrapper<T> out_desc(output);
 
     switch (device) {
         case eDeviceType::GPU: {
@@ -83,8 +83,7 @@ void dispatch_copy_make_border(hipStream_t stream, const Tensor& input, const Te
 }
 
 void CopyMakeBorder::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, int32_t top,
-                                int32_t left, eBorderType border_mode, float4 border_value,
-                                eDeviceType device) const {
+                                int32_t left, eBorderType border_mode, float4 border_value, eDeviceType device) const {
     CHECK_TENSOR_DEVICE(input, device);
     CHECK_TENSOR_LAYOUT(input, eTensorLayout::TENSOR_LAYOUT_NHWC, eTensorLayout::TENSOR_LAYOUT_HWC);
     CHECK_TENSOR_DATATYPES(input, eDataType::DATA_TYPE_U8, eDataType::DATA_TYPE_S8, eDataType::DATA_TYPE_U16,
diff --git a/src/op_custom_crop.cpp b/src/op_custom_crop.cpp
index 3d1a7056..13e5017c 100644
--- a/src/op_custom_crop.cpp
+++ b/src/op_custom_crop.cpp
@@ -26,7 +26,7 @@ THE SOFTWARE.
 #include <functional>
 
 #include "common/validation_helpers.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/custom_crop_device.hpp"
 #include "kernels/host/custom_crop_host.hpp"
 
@@ -35,8 +35,8 @@ namespace roccv {
 template <typename T>
 void dispatch_custom_crop_dtype(hipStream_t stream, const Tensor& input, const Tensor& output, Box_t cropRect,
                                 eDeviceType device) {
-    ImageWrapper<T> inputWrapper(input);
-    ImageWrapper<T> outputWrapper(output);
+    TensorWrapper<T> inputWrapper(input);
+    TensorWrapper<T> outputWrapper(output);
 
     switch (device) {
         case eDeviceType::GPU: {
diff --git a/src/op_cvt_color.cpp b/src/op_cvt_color.cpp
index 31abd103..7785b465 100644
--- a/src/op_cvt_color.cpp
+++ b/src/op_cvt_color.cpp
@@ -27,7 +27,7 @@ THE SOFTWARE.
 
 #include "common/validation_helpers.hpp"
 #include "core/tensor.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/cvt_color_device.hpp"
 #include "kernels/host/cvt_color_host.hpp"
 
@@ -87,38 +87,38 @@ void CvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &outpu
         switch (conversionCode) {
             case eColorConversionCode::COLOR_BGR2GRAY:
                 Kernels::Device::rgb_or_bgr_to_grayscale<uchar3, eSwizzle::ZYXW>
-                    <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar3>(input), ImageWrapper<uchar1>(output));
+                    <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar3>(input), TensorWrapper<uchar1>(output));
                 break;
 
             case eColorConversionCode::COLOR_RGB2GRAY:
                 Kernels::Device::rgb_or_bgr_to_grayscale<uchar3, eSwizzle::XYZW>
-                    <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar3>(input), ImageWrapper<uchar1>(output));
+                    <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar3>(input), TensorWrapper<uchar1>(output));
                 break;
 
             case eColorConversionCode::COLOR_BGR2RGB:
             case eColorConversionCode::COLOR_RGB2BGR:
                 Kernels::Device::reorder<uchar3, eSwizzle::ZYXW>
-                    <<<gridSize, blockSize, 0, stream>>>(ImageWrapper<uchar3>(input), ImageWrapper<uchar3>(output));
+                    <<<gridSize, blockSize, 0, stream>>>(TensorWrapper<uchar3>(input), TensorWrapper<uchar3>(output));
                 break;
 
             case eColorConversionCode::COLOR_BGR2YUV:
                 Kernels::Device::rgb_or_bgr_to_yuv<uchar3, eSwizzle::ZYXW><<<gridSize, blockSize, 0, stream>>>(
-                    ImageWrapper<uchar3>(input), ImageWrapper<uchar3>(output), 128.0f);
+                    TensorWrapper<uchar3>(input), TensorWrapper<uchar3>(output), 128.0f);
                 break;
 
             case eColorConversionCode::COLOR_RGB2YUV:
                 Kernels::Device::rgb_or_bgr_to_yuv<uchar3, eSwizzle::XYZW><<<gridSize, blockSize, 0, stream>>>(
-                    ImageWrapper<uchar3>(input), ImageWrapper<uchar3>(output), 128.0f);
+                    TensorWrapper<uchar3>(input), TensorWrapper<uchar3>(output), 128.0f);
                 break;
 
             case eColorConversionCode::COLOR_YUV2BGR:
                 Kernels::Device::yuv_to_rgb_or_bgr<uchar3, eSwizzle::ZYXW><<<gridSize, blockSize, 0, stream>>>(
-                    ImageWrapper<uchar3>(input), ImageWrapper<uchar3>(output), 128.0f);
+                    TensorWrapper<uchar3>(input), TensorWrapper<uchar3>(output), 128.0f);
                 break;
 
             case eColorConversionCode::COLOR_YUV2RGB:
                 Kernels::Device::yuv_to_rgb_or_bgr<uchar3, eSwizzle::XYZW><<<gridSize, blockSize, 0, stream>>>(
-                    ImageWrapper<uchar3>(input), ImageWrapper<uchar3>(output), 128.0f);
+                    TensorWrapper<uchar3>(input), TensorWrapper<uchar3>(output), 128.0f);
                 break;
 
             default:
@@ -129,39 +129,39 @@ void CvtColor::operator()(hipStream_t stream, const Tensor &input, Tensor &outpu
 
         switch (conversionCode) {
             case eColorConversionCode::COLOR_BGR2GRAY:
-                Kernels::Host::rgb_or_bgr_to_grayscale<uchar3, eSwizzle::ZYXW>(ImageWrapper<uchar3>(input),
-                                                                               ImageWrapper<uchar1>(output));
+                Kernels::Host::rgb_or_bgr_to_grayscale<uchar3, eSwizzle::ZYXW>(TensorWrapper<uchar3>(input),
+                                                                               TensorWrapper<uchar1>(output));
                 break;
 
             case eColorConversionCode::COLOR_RGB2GRAY:
-                Kernels::Host::rgb_or_bgr_to_grayscale<uchar3, eSwizzle::XYZW>(ImageWrapper<uchar3>(input),
-                                                                               ImageWrapper<uchar1>(output));
+                Kernels::Host::rgb_or_bgr_to_grayscale<uchar3, eSwizzle::XYZW>(TensorWrapper<uchar3>(input),
+                                                                               TensorWrapper<uchar1>(output));
                 break;
 
             case eColorConversionCode::COLOR_BGR2RGB:
             case eColorConversionCode::COLOR_RGB2BGR:
-                Kernels::Host::reorder<uchar3, eSwizzle::ZYXW>(ImageWrapper<uchar3>(input),
-                                                               ImageWrapper<uchar3>(output));
+                Kernels::Host::reorder<uchar3, eSwizzle::ZYXW>(TensorWrapper<uchar3>(input),
+                                                               TensorWrapper<uchar3>(output));
                 break;
 
             case eColorConversionCode::COLOR_BGR2YUV:
-                Kernels::Host::rgb_or_bgr_to_yuv<uchar3, eSwizzle::ZYXW>(ImageWrapper<uchar3>(input),
-                                                                         ImageWrapper<uchar3>(output), 128.0f);
+                Kernels::Host::rgb_or_bgr_to_yuv<uchar3, eSwizzle::ZYXW>(TensorWrapper<uchar3>(input),
+                                                                         TensorWrapper<uchar3>(output), 128.0f);
                 break;
 
             case eColorConversionCode::COLOR_RGB2YUV:
-                Kernels::Host::rgb_or_bgr_to_yuv<uchar3, eSwizzle::XYZW>(ImageWrapper<uchar3>(input),
-                                                                         ImageWrapper<uchar3>(output), 128.0f);
+                Kernels::Host::rgb_or_bgr_to_yuv<uchar3, eSwizzle::XYZW>(TensorWrapper<uchar3>(input),
+                                                                         TensorWrapper<uchar3>(output), 128.0f);
                 break;
 
             case eColorConversionCode::COLOR_YUV2BGR:
-                Kernels::Host::yuv_to_rgb_or_bgr<uchar3, eSwizzle::ZYXW>(ImageWrapper<uchar3>(input),
-                                                                         ImageWrapper<uchar3>(output), 128.0f);
+                Kernels::Host::yuv_to_rgb_or_bgr<uchar3, eSwizzle::ZYXW>(TensorWrapper<uchar3>(input),
+                                                                         TensorWrapper<uchar3>(output), 128.0f);
                 break;
 
             case eColorConversionCode::COLOR_YUV2RGB:
-                Kernels::Host::yuv_to_rgb_or_bgr<uchar3, eSwizzle::XYZW>(ImageWrapper<uchar3>(input),
-                                                                         ImageWrapper<uchar3>(output), 128.0f);
+                Kernels::Host::yuv_to_rgb_or_bgr<uchar3, eSwizzle::XYZW>(TensorWrapper<uchar3>(input),
+                                                                         TensorWrapper<uchar3>(output), 128.0f);
                 break;
 
             default:
diff --git a/src/op_flip.cpp b/src/op_flip.cpp
index 5566a0b6..d28ab8fa 100644
--- a/src/op_flip.cpp
+++ b/src/op_flip.cpp
@@ -29,7 +29,7 @@ THE SOFTWARE.
 #include "common/validation_helpers.hpp"
 #include "core/exception.hpp"
 #include "core/status_type.h"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/flip_device.hpp"
 #include "kernels/host/flip_host.hpp"
 
@@ -37,8 +37,8 @@ namespace roccv {
 
 template <typename T, eAxis FlipType>
 void dispatch_flip_axis(hipStream_t stream, const Tensor& input, const Tensor& output, eDeviceType device) {
-    ImageWrapper<T> inputWrapper(input);
-    ImageWrapper<T> outputWrapper(output);
+    TensorWrapper<T> inputWrapper(input);
+    TensorWrapper<T> outputWrapper(output);
 
     switch (device) {
         case eDeviceType::GPU: {
diff --git a/src/op_gamma_contrast.cpp b/src/op_gamma_contrast.cpp
index d6c78690..e4ca3520 100644
--- a/src/op_gamma_contrast.cpp
+++ b/src/op_gamma_contrast.cpp
@@ -34,7 +34,7 @@ THE SOFTWARE.
 #include "common/math_vector.hpp"
 #include "common/validation_helpers.hpp"
 #include "core/tensor.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/gamma_contrast_device.hpp"
 #include "kernels/host/gamma_contrast_host.hpp"
 
@@ -43,8 +43,8 @@ namespace roccv {
 template <typename T>
 void dispatch_gamma_contrast_dtype(hipStream_t stream, const Tensor &input, const Tensor &output, float gamma,
                                    eDeviceType device) {
-    ImageWrapper<T> inputWrapper(input);
-    ImageWrapper<T> outputWrapper(output);
+    TensorWrapper<T> inputWrapper(input);
+    TensorWrapper<T> outputWrapper(output);
 
     if (device == eDeviceType::GPU) {
         dim3 block(64, 16);
diff --git a/src/op_histogram.cpp b/src/op_histogram.cpp
index 24a2ef79..0627a732 100644
--- a/src/op_histogram.cpp
+++ b/src/op_histogram.cpp
@@ -31,7 +31,7 @@ THE SOFTWARE.
 #include "common/array_wrapper.hpp"
 #include "common/validation_helpers.hpp"
 #include "core/wrappers/generic_tensor_wrapper.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/histogram_device.hpp"
 #include "kernels/host/histogram_host.hpp"
 
@@ -44,7 +44,7 @@ template <typename T>
 void dispatch_histogram_dtype(hipStream_t stream, const Tensor& input,
                               std::optional<std::reference_wrapper<const Tensor>> mask, const Tensor& histogram,
                               eDeviceType device) {
-    ImageWrapper<uchar1> inputWrapper(input);
+    TensorWrapper<uchar1> inputWrapper(input);
 
     const auto o_height = histogram.shape()[histogram.shape().layout().height_index()];
     const auto o_width = histogram.shape()[histogram.shape().layout().width_index()];
@@ -91,7 +91,7 @@ void dispatch_histogram_dtype(hipStream_t stream, const Tensor& input,
                 std::reference_wrapper<const Tensor> mask_ref = mask.value();
                 const Tensor& actual_mask = mask_ref.get();
                 CHECK_TENSOR_COMPARISON(input.shape() == actual_mask.shape());
-                ImageWrapper<uchar1> maskWrapper(actual_mask);
+                TensorWrapper<uchar1> maskWrapper(actual_mask);
                 Kernels::Device::histogram_kernel<T><<<grid_size, threads_block, shared_mem_size, stream>>>(
                     inputWrapper, maskWrapper, GenericTensorWrapper<T>(histogram));
             } else {
@@ -115,7 +115,7 @@ void dispatch_histogram_dtype(hipStream_t stream, const Tensor& input,
                 std::reference_wrapper<const Tensor> mask_ref = mask.value();
                 const Tensor& actual_mask = mask_ref.get();
                 CHECK_TENSOR_COMPARISON(input.shape() == actual_mask.shape());
-                ImageWrapper<uchar1> maskWrapper(actual_mask);
+                TensorWrapper<uchar1> maskWrapper(actual_mask);
                 Kernels::Host::histogram_kernel(inputWrapper, maskWrapper, GenericTensorWrapper<T>(histogram));
             } else {
                 Kernels::Host::histogram_kernel(inputWrapper, GenericTensorWrapper<T>(histogram));
diff --git a/src/op_normalize.cpp b/src/op_normalize.cpp
index 529638b8..ca2fde96 100644
--- a/src/op_normalize.cpp
+++ b/src/op_normalize.cpp
@@ -28,7 +28,7 @@ THE SOFTWARE.
 #include "common/validation_helpers.hpp"
 #include "core/detail/type_traits.hpp"
 #include "core/tensor.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/normalize_device.hpp"
 #include "kernels/host/normalize_host.hpp"
 
@@ -42,10 +42,10 @@ void dispatch_normalize_stddev(hipStream_t stream, const Tensor& input, const Te
     // tensors.
     using work_type = detail::MakeType<float, detail::NumComponents<T>>;
 
-    ImageWrapper<T> inputWrap(input);
-    ImageWrapper<T> outputWrap(output);
-    ImageWrapper<work_type> scaleWrap(scale);
-    ImageWrapper<work_type> baseWrap(base);
+    TensorWrapper<T> inputWrap(input);
+    TensorWrapper<T> outputWrap(output);
+    TensorWrapper<work_type> scaleWrap(scale);
+    TensorWrapper<work_type> baseWrap(base);
 
     switch (device) {
         case eDeviceType::GPU: {
diff --git a/src/op_reformat.cpp b/src/op_reformat.cpp
index 4a79145b..cda3c809 100644
--- a/src/op_reformat.cpp
+++ b/src/op_reformat.cpp
@@ -26,7 +26,7 @@ THE SOFTWARE.
 #include <unordered_map>
 
 #include "common/validation_helpers.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/reformat_device.hpp"
 #include "kernels/host/reformat_host.hpp"
 
@@ -35,8 +35,8 @@ namespace roccv {
 namespace {
 template <int Channels, typename T>
 void DispatchReformatChannels(hipStream_t stream, const Tensor& input, const Tensor& output, eDeviceType device) {
-    ImageWrapper<T> inputWrap(input);
-    ImageWrapper<T> outputWrap(output);
+    TensorWrapper<T> inputWrap(input);
+    TensorWrapper<T> outputWrap(output);
 
     switch (device) {
         case eDeviceType::GPU: {
diff --git a/src/op_remap.cpp b/src/op_remap.cpp
index 0992cf44..40d14427 100644
--- a/src/op_remap.cpp
+++ b/src/op_remap.cpp
@@ -29,7 +29,7 @@ THE SOFTWARE.
 #include "core/detail/internal_structs.hpp"
 #include "core/detail/math/math.hpp"
 #include "core/detail/type_traits.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "core/wrappers/interpolation_wrapper.hpp"
 #include "kernels/device/remap_device.hpp"
 #include "kernels/host/remap_host.hpp"
@@ -76,9 +76,10 @@ template <typename T, eBorderType B, eInterpolationType I, eInterpolationType M>
 void dispatch_remap_mapInterp(hipStream_t stream, const Tensor &input, const Tensor &output, const Tensor &map,
                               const eRemapType mapValueType, const bool alignCorners, const T borderValue,
                               const eDeviceType device) {
-    ImageWrapper<T> outputWrapper(output);
-    InterpolationWrapper<float2, B, M> wrappedMapTensor(map, make_float2(0, 0));
-    InterpolationWrapper<T, B, I> inputWrapper(input, borderValue);
+    TensorWrapper<T> outputWrapper(output);
+    auto wrappedMapTensor =
+        MakeInterpolationWrapper<M>(MakeBorderWrapper<B>(TensorWrapper<float2>(map), make_float2(0, 0)));
+    auto inputWrapper = MakeInterpolationWrapper<I>(MakeBorderWrapper<B>(TensorWrapper<T>(input), borderValue));
 
     int mapBatchSize = wrappedMapTensor.batches();
 
diff --git a/src/op_resize.cpp b/src/op_resize.cpp
index d7cd0b61..7275727f 100644
--- a/src/op_resize.cpp
+++ b/src/op_resize.cpp
@@ -36,9 +36,10 @@ namespace roccv {
 
 template <typename T, eInterpolationType I>
 void dispatch_resize_interp(hipStream_t stream, const Tensor& input, const Tensor& output, eDeviceType device) {
-    ImageWrapper<T> outputWrapper(output);
+    TensorWrapper<T> outputWrapper(output);
     // Resize operation should clamp values at the border (REPLICATE border mode)
-    InterpolationWrapper<T, eBorderType::BORDER_TYPE_REPLICATE, I> inputWrapper(input, T{});
+    auto inputWrapper =
+        MakeInterpolationWrapper<I>(MakeBorderWrapper<eBorderType::BORDER_TYPE_REPLICATE>(TensorWrapper<T>(input), T{}));
 
     float scaleX = inputWrapper.width() / static_cast<float>(outputWrapper.width());
     float scaleY = inputWrapper.height() / static_cast<float>(outputWrapper.height());
@@ -62,13 +63,13 @@ void dispatch_resize_interp(hipStream_t stream, const Tensor& input, const Tenso
 template <typename T>
 void dispatch_resize_dtype(hipStream_t stream, const Tensor& input, const Tensor& output,
                            eInterpolationType interpolation, eDeviceType device) {
-    static const std::unordered_map<
-        eInterpolationType,
-        std::function<void(hipStream_t stream, const Tensor& input, const Tensor& output, eDeviceType device)>>
-        funcs = {{eInterpolationType::INTERP_TYPE_NEAREST, dispatch_resize_interp<T, eInterpolationType::INTERP_TYPE_NEAREST>},
-                 {eInterpolationType::INTERP_TYPE_LINEAR, dispatch_resize_interp<T, eInterpolationType::INTERP_TYPE_LINEAR>},
-                 {eInterpolationType::INTERP_TYPE_CUBIC, dispatch_resize_interp<T, eInterpolationType::INTERP_TYPE_CUBIC>}
-                };
+    static const std::unordered_map<eInterpolationType, std::function<void(hipStream_t stream, const Tensor& input,
+                                                                           const Tensor& output, eDeviceType device)>>
+        funcs = {
+            {eInterpolationType::INTERP_TYPE_NEAREST,
+             dispatch_resize_interp<T, eInterpolationType::INTERP_TYPE_NEAREST>},
+            {eInterpolationType::INTERP_TYPE_LINEAR, dispatch_resize_interp<T, eInterpolationType::INTERP_TYPE_LINEAR>},
+            {eInterpolationType::INTERP_TYPE_CUBIC, dispatch_resize_interp<T, eInterpolationType::INTERP_TYPE_CUBIC>}};
 
     if (!funcs.contains(interpolation)) {
         throw Exception("Operation does not support the given interpolation mode.", eStatusType::NOT_IMPLEMENTED);
@@ -78,8 +79,8 @@ void dispatch_resize_dtype(hipStream_t stream, const Tensor& input, const Tensor
     func(stream, input, output, device);
 }
 
-void Resize::operator()(hipStream_t stream, const Tensor& input, const Tensor& output,
-                        eInterpolationType interpolation, eDeviceType device) const {
+void Resize::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, eInterpolationType interpolation,
+                        eDeviceType device) const {
     CHECK_TENSOR_DEVICE(input, device);
     CHECK_TENSOR_DEVICE(output, device);
 
diff --git a/src/op_rotate.cpp b/src/op_rotate.cpp
index 28806779..508de16c 100644
--- a/src/op_rotate.cpp
+++ b/src/op_rotate.cpp
@@ -54,8 +54,9 @@ void dispatch_rotate_interp(hipStream_t stream, const Tensor &input, const Tenso
 
     T borderVal = detail::SaturateCast<T>(make_float4(0.0f, 0.0f, 0.0f, 0.0f));
 
-    ImageWrapper<T> outputWrap(output);
-    InterpolationWrapper<T, eBorderType::BORDER_TYPE_CONSTANT, InterpType> inputWrap(input, borderVal);
+    TensorWrapper<T> outputWrap(output);
+    auto inputWrap = MakeInterpolationWrapper<InterpType>(
+        MakeBorderWrapper<eBorderType::BORDER_TYPE_CONSTANT>(TensorWrapper<T>(input), borderVal));
 
     switch (device) {
         case eDeviceType::GPU: {
@@ -74,8 +75,8 @@ void dispatch_rotate_interp(hipStream_t stream, const Tensor &input, const Tenso
 }
 
 template <typename T>
-void dispatch_rotate_type(hipStream_t stream, const Tensor &input, const Tensor &output, double angleDeg,
-                          double2 shift, eInterpolationType interpolation, eDeviceType device) {
+void dispatch_rotate_type(hipStream_t stream, const Tensor &input, const Tensor &output, double angleDeg, double2 shift,
+                          eInterpolationType interpolation, eDeviceType device) {
     // clang-format off
     static const std::unordered_map<eInterpolationType,
                                     std::function<void(hipStream_t, const Tensor &, const Tensor &, double,
@@ -94,8 +95,8 @@ void dispatch_rotate_type(hipStream_t stream, const Tensor &input, const Tensor
     func(stream, input, output, angleDeg, shift, device);
 }
 
-void Rotate::operator()(hipStream_t stream, const Tensor &input, const Tensor &output, double angleDeg,
-                        double2 shift, eInterpolationType interpolation, eDeviceType device) const {
+void Rotate::operator()(hipStream_t stream, const Tensor &input, const Tensor &output, double angleDeg, double2 shift,
+                        eInterpolationType interpolation, eDeviceType device) const {
     CHECK_TENSOR_DEVICE(input, device);
     CHECK_TENSOR_CHANNELS(input, 1, 3, 4);
     CHECK_TENSOR_DATATYPES(input, eDataType::DATA_TYPE_U8, eDataType::DATA_TYPE_S8, eDataType::DATA_TYPE_U16,
diff --git a/src/op_thresholding.cpp b/src/op_thresholding.cpp
index 420ac84a..bda21c7c 100644
--- a/src/op_thresholding.cpp
+++ b/src/op_thresholding.cpp
@@ -27,7 +27,7 @@ THE SOFTWARE.
 
 #include "common/validation_helpers.hpp"
 #include "core/wrappers/generic_tensor_wrapper.hpp"
-#include "core/wrappers/image_wrapper.hpp"
+#include "core/wrappers/tensor_wrapper.hpp"
 #include "kernels/device/thresholding_device.hpp"
 #include "kernels/host/thresholding_host.hpp"
 
@@ -43,8 +43,8 @@ Threshold::~Threshold() {}
 template <typename T>
 void dispatch_threshold_dtype(hipStream_t stream, const Tensor &input, const Tensor &output, const Tensor &thresh,
                               const Tensor &maxVal, eThresholdType m_threshType, eDeviceType device) {
-    ImageWrapper<T> inputWrapper(input);
-    ImageWrapper<T> outputWrapper(output);
+    TensorWrapper<T> inputWrapper(input);
+    TensorWrapper<T> outputWrapper(output);
 
     const auto height = input.shape()[input.shape().layout().height_index()];
     const auto width = input.shape()[input.shape().layout().width_index()];
diff --git a/src/op_warp_perspective.cpp b/src/op_warp_perspective.cpp
index ca77fc8c..1a7e12f2 100644
--- a/src/op_warp_perspective.cpp
+++ b/src/op_warp_perspective.cpp
@@ -36,8 +36,8 @@ template <typename T, eBorderType B, eInterpolationType I>
 void dispatch_warp_perspective_interp(hipStream_t stream, const Tensor &input, const Tensor &output,
                                       const PerspectiveTransform transMatrix, T borderValue, eDeviceType device) {
     ArrayWrapper<float, 9> transform(transMatrix);
-    ImageWrapper<T> outputWrapper(output);
-    InterpolationWrapper<T, B, I> inputWrapper(input, borderValue);
+    TensorWrapper<T> outputWrapper(output);
+    auto inputWrapper = MakeInterpolationWrapper<I>(MakeBorderWrapper<B>(TensorWrapper<T>(input), borderValue));
 
     // Launch CPU/GPU kernel depending on requested device type.
     switch (device) {
diff --git a/tests/roccv/cpp/include/image_test_helpers.hpp b/tests/roccv/cpp/include/image_test_helpers.hpp
new file mode 100644
index 00000000..c4613367
--- /dev/null
+++ b/tests/roccv/cpp/include/image_test_helpers.hpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#pragma once
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <core/data_type.hpp>
+#include <core/detail/allocators/i_allocator.hpp>
+#include <core/image.hpp>
+#include <core/image_buffer.hpp>
+#include <core/image_data.hpp>
+#include <core/image_format.hpp>
+
+namespace roccv {
+namespace tests {
+
+// Opaque sentinel pointers used by image-layer tests. ImageData / ImageBatchData
+// carry pointers but never dereference them — the buffer is a metadata snapshot
+// only — so tests use these to verify values flow through without needing real
+// allocations.
+inline void* const FAKE_PTR_A = reinterpret_cast<void*>(0xAAAAAAAAull);
+inline void* const FAKE_PTR_B = reinterpret_cast<void*>(0xBBBBBBBBull);
+inline void* const FAKE_PTR_C = reinterpret_cast<void*>(0xCCCCCCCCull);
+
+/**
+ * @brief Test allocator that backs every allocation kind with malloc and tallies
+ * how many times each entry point is invoked. Pure host-backed; no actual GPU
+ * dependency on the returned pointers — callers that exercise the Hip/pinned
+ * paths must only inspect metadata, never dereference device memory.
+ *
+ * `lastAllocBytes` is updated from every alloc path (hip, host, pinned), so
+ * callers may assert on the most recent allocation regardless of kind.
+ */
+class CountingAllocator : public IAllocator {
+   public:
+    mutable int hipAllocs = 0;
+    mutable int hipFrees = 0;
+    mutable int hostAllocs = 0;
+    mutable int hostFrees = 0;
+    mutable int pinnedAllocs = 0;
+    mutable int pinnedFrees = 0;
+    mutable size_t lastAllocBytes = 0;
+
+    void* allocHipMem(size_t size) const override {
+        ++hipAllocs;
+        lastAllocBytes = size;
+        return std::malloc(size);
+    }
+    void freeHipMem(void* ptr) const noexcept override {
+        ++hipFrees;
+        std::free(ptr);
+    }
+
+    void* allocHostMem(size_t size, int32_t /*alignment*/ = 0) const override {
+        ++hostAllocs;
+        lastAllocBytes = size;
+        return std::malloc(size);
+    }
+    void freeHostMem(void* ptr) const noexcept override {
+        ++hostFrees;
+        std::free(ptr);
+    }
+
+    void* allocHostPinnedMem(size_t size) const override {
+        ++pinnedAllocs;
+        lastAllocBytes = size;
+        return std::malloc(size);
+    }
+    void freeHostPinnedMem(void* ptr) const noexcept override {
+        ++pinnedFrees;
+        std::free(ptr);
+    }
+};
+
+// Single-plane packed-row buffer descriptor around `basePtr`. The pointer is
+// never dereferenced by the consumers (ImageData / ImageBatchVarShape).
+inline ImageBufferStrided MakeSinglePlaneBuffer(int32_t width, int32_t height, int64_t rowStride, void* basePtr) {
+    ImageBufferStrided buf{};
+    buf.numPlanes = 1;
+    buf.planes[0] = {width, height, rowStride, basePtr};
+    return buf;
+}
+
+// Single-plane GPU-resident ImageData snapshot with packed-row stride implied
+// by `fmt`. For tests that need an ImageData but won't touch the pixels.
+inline ImageDataStridedHip MakeFakeHipData(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) {
+    const int64_t rowStride = static_cast<int64_t>(width) * fmt.channels() * DataType(fmt.dtype()).size();
+    return ImageDataStridedHip(fmt, MakeSinglePlaneBuffer(width, height, rowStride, basePtr));
+}
+
+// Host counterpart of MakeFakeHipData.
+inline ImageDataStridedHost MakeFakeHostData(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) {
+    const int64_t rowStride = static_cast<int64_t>(width) * fmt.channels() * DataType(fmt.dtype()).size();
+    return ImageDataStridedHost(fmt, MakeSinglePlaneBuffer(width, height, rowStride, basePtr));
+}
+
+// Single-plane GPU-resident Image wrapping a sentinel pointer via ImageWrapData.
+// Use for batch tests where pushBack only reads the descriptor.
+inline Image MakeFakeGpuImage(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) {
+    return ImageWrapData(MakeFakeHipData(width, height, basePtr, fmt));
+}
+
+// Host counterpart of MakeFakeGpuImage.
+inline Image MakeFakeHostImage(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) {
+    return ImageWrapData(MakeFakeHostData(width, height, basePtr, fmt));
+}
+
+}  // namespace tests
+}  // namespace roccv
diff --git a/tests/roccv/cpp/include/test_helpers.hpp b/tests/roccv/cpp/include/test_helpers.hpp
index 6c43053b..7ed56309 100644
--- a/tests/roccv/cpp/include/test_helpers.hpp
+++ b/tests/roccv/cpp/include/test_helpers.hpp
@@ -198,6 +198,12 @@ namespace tests {
                                  ". Expected no exceptions, but received the following exception: " + e.what()); \
     }
 
+// EXPECT_EQ pipes through std::to_string, so wrap enums/pointers/bools through
+// these casts before comparing.
+inline auto AsInt = [](auto v) { return static_cast<int>(v); };
+inline auto AsAddr = [](const void* p) { return reinterpret_cast<uintptr_t>(p); };
+inline auto AsSize = [](auto v) { return static_cast<size_t>(v); };
+
 /**
  * @brief Creates a NHWC tensor which contains data loaded from an image.
  *
diff --git a/tests/roccv/cpp/src/tests/core/image/test_image.cpp b/tests/roccv/cpp/src/tests/core/image/test_image.cpp
new file mode 100644
index 00000000..ce6ef69b
--- /dev/null
+++ b/tests/roccv/cpp/src/tests/core/image/test_image.cpp
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include <core/image.hpp>
+#include <core/image_data.hpp>
+#include <core/image_format.hpp>
+#include <typeinfo>
+#include <utility>
+
+#include "image_test_helpers.hpp"
+#include "test_helpers.hpp"
+
+using namespace roccv;
+using namespace roccv::tests;
+
+namespace {
+
+// =============================================================================
+// CalcRequirements
+// =============================================================================
+
+/**
+ * @brief Packed-row stride for a typical 3-channel uint8 image. Other fields
+ * propagate unchanged; remaining plane slots stay zeroed.
+ */
+void TestCalcRequirementsRgb8() {
+    auto reqs = Image::CalcRequirements({320, 240}, FMT_RGB8);
+
+    EXPECT_EQ(reqs.size.w, 320);
+    EXPECT_EQ(reqs.size.h, 240);
+    EXPECT_EQ(reqs.format.channels(), 3);
+    EXPECT_EQ(reqs.planeRowStride[0], static_cast<int64_t>(320 * 3));
+    EXPECT_EQ(reqs.planeRowStride[1], 0);
+    EXPECT_EQ(reqs.planeRowStride[5], 0);
+    EXPECT_EQ(reqs.alignBytes, 0);
+}
+
+/**
+ * @brief Multi-byte dtype is reflected in the per-pixel byte count.
+ */
+void TestCalcRequirementsF32() {
+    auto reqs = Image::CalcRequirements({64, 64}, FMT_F32);
+    EXPECT_EQ(reqs.planeRowStride[0], static_cast<int64_t>(64 * 4));
+}
+
+/**
+ * @brief Single-channel U8 → row stride equals width.
+ */
+void TestCalcRequirementsU8() {
+    auto reqs = Image::CalcRequirements({100, 50}, FMT_U8);
+    EXPECT_EQ(reqs.planeRowStride[0], 100);
+}
+
+/**
+ * @brief Width or height < 1 must throw INVALID_VALUE.
+ */
+void TestCalcRequirementsRejectsInvalidDims() {
+    EXPECT_EXCEPTION(Image::CalcRequirements({0, 100}, FMT_RGB8), eStatusType::INVALID_VALUE);
+    EXPECT_EXCEPTION(Image::CalcRequirements({100, 0}, FMT_RGB8), eStatusType::INVALID_VALUE);
+    EXPECT_EXCEPTION(Image::CalcRequirements({-5, 100}, FMT_RGB8), eStatusType::INVALID_VALUE);
+    EXPECT_EXCEPTION(Image::CalcRequirements({100, -5}, FMT_RGB8), eStatusType::INVALID_VALUE);
+}
+
+/**
+ * @brief Large widths must not overflow during stride math; row stride must
+ * fit in int64.
+ */
+void TestCalcRequirementsLargeDims() {
+    // 8K image, RGBA8 (4 channels * 1 byte = 4 B/pixel) → 8192 * 4 = 32768 B/row.
+    auto reqs = Image::CalcRequirements({8192, 4320}, FMT_RGBA8);
+    EXPECT_EQ(reqs.planeRowStride[0], static_cast<int64_t>(8192 * 4));
+}
+
+// =============================================================================
+// Allocating constructors
+// =============================================================================
+
+/**
+ * @brief GPU-device ctor routes allocation through allocHipMem with the
+ * computed byte count.
+ */
+void TestImageHipAllocation() {
+    CountingAllocator alloc;
+    {
+        Image img({320, 240}, FMT_RGB8, alloc, eDeviceType::GPU);
+
+        EXPECT_EQ(alloc.hipAllocs, 1);
+        EXPECT_EQ(alloc.hostAllocs, 0);
+        EXPECT_EQ(AsSize(alloc.lastAllocBytes), AsSize(320 * 3 * 240));
+
+        EXPECT_EQ(img.size().w, 320);
+        EXPECT_EQ(img.size().h, 240);
+        EXPECT_EQ(AsInt(img.device()), AsInt(eDeviceType::GPU));
+        EXPECT_EQ(img.format().channels(), 3);
+
+        // Image is still alive — buffer not yet freed.
+        EXPECT_EQ(alloc.hipFrees, 0);
+    }
+    // Image dropped — buffer freed exactly once via the matching allocator.
+    EXPECT_EQ(alloc.hipFrees, 1);
+}
+
+/**
+ * @brief Same shape as the Hip test but for CPU residency.
+ */
+void TestImageHostAllocation() {
+    CountingAllocator alloc;
+    {
+        Image img({100, 50}, FMT_U8, alloc, eDeviceType::CPU);
+
+        EXPECT_EQ(alloc.hostAllocs, 1);
+        EXPECT_EQ(alloc.hipAllocs, 0);
+        EXPECT_EQ(AsSize(alloc.lastAllocBytes), AsSize(100 * 50));
+        EXPECT_EQ(AsInt(img.device()), AsInt(eDeviceType::CPU));
+    }
+    EXPECT_EQ(alloc.hostFrees, 1);
+}
+
+/**
+ * @brief Constructing from precomputed Requirements yields observably
+ * identical state to the (Size2D, ImageFormat) sugar form.
+ */
+void TestImageRequirementsCtor() {
+    CountingAllocator alloc;
+    auto reqs = Image::CalcRequirements({64, 32}, FMT_RGBA8);
+
+    Image img(reqs, alloc, eDeviceType::GPU);
+
+    EXPECT_EQ(img.size().w, 64);
+    EXPECT_EQ(img.size().h, 32);
+    EXPECT_EQ(img.format().channels(), 4);
+    EXPECT_EQ(AsSize(alloc.lastAllocBytes), AsSize(64 * 4 * 32));
+}
+
+// =============================================================================
+// Refcount / lifecycle
+// =============================================================================
+
+/**
+ * @brief Copying an Image bumps the refcount: both handles see the same
+ * underlying buffer, and free is deferred until the LAST handle drops.
+ */
+void TestImageCopySharesBuffer() {
+    CountingAllocator alloc;
+    void* buf = nullptr;
+    {
+        Image first({16, 16}, FMT_U8, alloc, eDeviceType::GPU);
+        buf = first.exportData().cast<ImageDataStrided>()->plane(0).basePtr;
+
+        Image second = first;           // refcount bump
+        EXPECT_EQ(alloc.hipAllocs, 1);  // No new allocation.
+        EXPECT_EQ(AsAddr(second.exportData().cast<ImageDataStrided>()->plane(0).basePtr), AsAddr(buf));
+
+        // Drop `first`; buffer must NOT be freed yet — `second` still holds it.
+        {
+            Image sink = std::move(first);
+        }
+        EXPECT_EQ(alloc.hipFrees, 0);
+    }
+    // All handles dropped — exactly one free.
+    EXPECT_EQ(alloc.hipFrees, 1);
+}
+
+/**
+ * @brief Move-construction transfers the buffer; the source is left empty.
+ * The buffer must still free exactly once (when the destination drops).
+ */
+void TestImageMoveSemantics() {
+    CountingAllocator alloc;
+    {
+        Image src({8, 8}, FMT_U8, alloc, eDeviceType::CPU);
+        void* srcBuf = src.exportData().cast<ImageDataStrided>()->plane(0).basePtr;
+
+        Image dst = std::move(src);
+        EXPECT_EQ(AsAddr(dst.exportData().cast<ImageDataStrided>()->plane(0).basePtr), AsAddr(srcBuf));
+        EXPECT_EQ(alloc.hostFrees, 0);
+    }
+    EXPECT_EQ(alloc.hostFrees, 1);
+}
+
+// =============================================================================
+// exportData / exportData<DATA>()
+// =============================================================================
+
+/**
+ * @brief exportData() returns an ImageData snapshot that mirrors the Image's
+ * size, format, device, and base pointer.
+ */
+void TestImageExportData() {
+    CountingAllocator alloc;
+    Image img({80, 60}, FMT_RGBA8, alloc, eDeviceType::GPU);
+    ImageData data = img.exportData();
+
+    EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU));
+    EXPECT_EQ(data.format().channels(), 4);
+
+    auto strided = data.cast<ImageDataStrided>();
+    EXPECT_EQ(AsInt(strided.has_value()), 1);
+    EXPECT_EQ(strided->plane(0).width, 80);
+    EXPECT_EQ(strided->plane(0).height, 60);
+    EXPECT_EQ(strided->plane(0).rowStride, static_cast<int64_t>(80 * 4));
+}
+
+/**
+ * @brief Templated exportData<T>() returns the matching subclass directly.
+ */
+void TestImageExportDataTypedSuccess() {
+    CountingAllocator alloc;
+    Image img({4, 4}, FMT_U8, alloc, eDeviceType::GPU);
+
+    auto hip = img.exportData<ImageDataStridedHip>();
+    EXPECT_EQ(AsInt(hip.device()), AsInt(eDeviceType::GPU));
+    EXPECT_EQ(hip.plane(0).width, 4);
+}
+
+/**
+ * @brief Templated exportData<T>() throws std::bad_cast when the requested
+ * subclass does not match the underlying buffer kind.
+ */
+void TestImageExportDataTypedMismatch() {
+    CountingAllocator alloc;
+    Image img({4, 4}, FMT_U8, alloc, eDeviceType::GPU);
+
+    bool threw = false;
+    try {
+        (void)img.exportData<ImageDataStridedHost>();
+    } catch (const std::bad_cast&) {
+        threw = true;
+    }
+    EXPECT_EQ(AsInt(threw), 1);
+}
+
+// =============================================================================
+// ImageWrapData
+// =============================================================================
+
+/**
+ * @brief View-only wrap (no cleanup callback) round-trips metadata and must
+ * not crash when the Image is destroyed (no free attempt on the sentinel ptr).
+ */
+void TestImageWrapDataViewOnly() {
+    Image wrapped = ImageWrapData(MakeFakeHipData(640, 480, FAKE_PTR_A));
+    EXPECT_EQ(wrapped.size().w, 640);
+    EXPECT_EQ(wrapped.size().h, 480);
+    EXPECT_EQ(AsInt(wrapped.device()), AsInt(eDeviceType::GPU));
+    EXPECT_EQ(AsAddr(wrapped.exportData().cast<ImageDataStrided>()->plane(0).basePtr), AsAddr(FAKE_PTR_A));
+}
+
+/**
+ * @brief Wrap with a cleanup callback: the callback fires exactly once when
+ * the last Image handle goes out of scope.
+ */
+void TestImageWrapDataCleanupFires() {
+    int callbackInvocations = 0;
+    {
+        Image wrapped =
+            ImageWrapData(MakeFakeHipData(100, 100, FAKE_PTR_A), [&](const ImageData&) { ++callbackInvocations; });
+        EXPECT_EQ(callbackInvocations, 0);  // Not fired during normal use.
+    }
+    EXPECT_EQ(callbackInvocations, 1);
+}
+
+/**
+ * @brief Cleanup callback receives the original wrapped ImageData snapshot —
+ * the captured base pointer must match what was passed to ImageWrapData.
+ */
+void TestImageWrapDataCleanupReceivesData() {
+    void* receivedBasePtr = nullptr;
+    {
+        Image wrapped = ImageWrapData(MakeFakeHipData(50, 50, FAKE_PTR_A), [&](const ImageData& d) {
+            receivedBasePtr = d.cast<ImageDataStrided>()->plane(0).basePtr;
+        });
+    }
+    EXPECT_EQ(AsAddr(receivedBasePtr), AsAddr(FAKE_PTR_A));
+}
+
+/**
+ * @brief Cleanup must fire only on LAST handle drop — copies bump the
+ * refcount, intermediate drops do nothing.
+ */
+void TestImageWrapDataCleanupFiresOnce() {
+    int callbackInvocations = 0;
+    {
+        Image first =
+            ImageWrapData(MakeFakeHipData(10, 10, FAKE_PTR_A), [&](const ImageData&) { ++callbackInvocations; });
+        Image second = first;  // refcount = 2
+        Image third = first;   // refcount = 3
+        {
+            Image fourth = third;
+            (void)fourth;
+        }  // dropped → refcount = 3
+        EXPECT_EQ(callbackInvocations, 0);
+        // first, second, third still alive at scope exit
+    }
+    EXPECT_EQ(callbackInvocations, 1);
+}
+
+/**
+ * @brief Wrapped Image's accessors mirror the wrapped ImageData verbatim —
+ * size, format, device, and base pointer all round-trip unchanged.
+ */
+void TestImageWrapDataAccessors() {
+    auto fake = MakeFakeHipData(123, 45, FAKE_PTR_A, FMT_RGBA8);
+    Image wrapped = ImageWrapData(fake);
+
+    EXPECT_EQ(wrapped.size().w, 123);
+    EXPECT_EQ(wrapped.size().h, 45);
+    EXPECT_EQ(wrapped.format().channels(), 4);
+    EXPECT_EQ(AsInt(wrapped.device()), AsInt(eDeviceType::GPU));
+
+    auto strided = wrapped.exportData().cast<ImageDataStrided>();
+    EXPECT_EQ(AsInt(strided.has_value()), 1);
+    EXPECT_EQ(strided->plane(0).width, 123);
+    EXPECT_EQ(strided->plane(0).height, 45);
+    EXPECT_EQ(AsAddr(strided->plane(0).basePtr), AsAddr(FAKE_PTR_A));
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+    (void)argc;
+    (void)argv;
+    TEST_CASES_BEGIN();
+
+    // CalcRequirements
+    TEST_CASE(TestCalcRequirementsRgb8());
+    TEST_CASE(TestCalcRequirementsF32());
+    TEST_CASE(TestCalcRequirementsU8());
+    TEST_CASE(TestCalcRequirementsRejectsInvalidDims());
+    TEST_CASE(TestCalcRequirementsLargeDims());
+
+    // Allocating constructors
+    TEST_CASE(TestImageHipAllocation());
+    TEST_CASE(TestImageHostAllocation());
+    TEST_CASE(TestImageRequirementsCtor());
+
+    // Refcount / lifecycle
+    TEST_CASE(TestImageCopySharesBuffer());
+    TEST_CASE(TestImageMoveSemantics());
+
+    // exportData
+    TEST_CASE(TestImageExportData());
+    TEST_CASE(TestImageExportDataTypedSuccess());
+    TEST_CASE(TestImageExportDataTypedMismatch());
+
+    // ImageWrapData
+    TEST_CASE(TestImageWrapDataViewOnly());
+    TEST_CASE(TestImageWrapDataCleanupFires());
+    TEST_CASE(TestImageWrapDataCleanupReceivesData());
+    TEST_CASE(TestImageWrapDataCleanupFiresOnce());
+    TEST_CASE(TestImageWrapDataAccessors());
+
+    TEST_CASES_END();
+}
diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp
new file mode 100644
index 00000000..31e449e6
--- /dev/null
+++ b/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include <core/image_batch_buffer.hpp>
+#include <core/image_batch_data.hpp>
+#include <core/image_buffer.hpp>
+#include <core/image_format.hpp>
+
+#include "image_test_helpers.hpp"
+#include "test_helpers.hpp"
+
+using namespace roccv;
+using namespace roccv::tests;
+
+namespace {
+
+// Static descriptor/format storage for the batch buffer. These are real host
+// allocations (so the pointers are valid) but the batch tests only read
+// metadata back out of them; nothing dereferences the per-image basePtr fields.
+ImageBufferStrided g_imageList[2];
+ImageFormat g_formatList[2] = {FMT_RGB8, FMT_RGB8};
+ImageFormat g_hostFormatList[2] = {FMT_RGB8, FMT_RGB8};
+
+// Builds a homogeneous two-image varshape descriptor with a known bounding box
+// and uniqueFormat. The returned struct's pointers reference module-static
+// arrays so addresses remain stable across calls within a test.
+ImageBatchVarShapeBufferStrided MakeHomogeneousBuffer() {
+    g_imageList[0] = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_PTR_A);
+    g_imageList[1] = MakeSinglePlaneBuffer(320, 240, 320 * 3, FAKE_PTR_B);
+    g_formatList[0] = FMT_RGB8;
+    g_formatList[1] = FMT_RGB8;
+    g_hostFormatList[0] = FMT_RGB8;
+    g_hostFormatList[1] = FMT_RGB8;
+
+    ImageBatchVarShapeBufferStrided buf{};
+    buf.uniqueFormat = FMT_RGB8;
+    buf.maxWidth = 640;
+    buf.maxHeight = 480;
+    buf.formatList = g_formatList;
+    buf.hostFormatList = g_hostFormatList;
+    buf.imageList = g_imageList;
+    return buf;
+}
+
+/**
+ * @brief Verifies HIP-strided varshape construction populates all observable
+ * state and tags itself as GPU-resident.
+ */
+void TestImageBatchVarShapeDataStridedHipConstruction() {
+    auto buf = MakeHomogeneousBuffer();
+    ImageBatchVarShapeDataStridedHip data(2, buf);
+
+    EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU));
+    EXPECT_EQ(data.numImages(), 2);
+    EXPECT_EQ(data.maxSize().w, 640);
+    EXPECT_EQ(data.maxSize().h, 480);
+    EXPECT_EQ(data.uniqueFormat().channels(), 3);
+    EXPECT_EQ(AsAddr(data.formatList()), AsAddr(g_formatList));
+    EXPECT_EQ(AsAddr(data.hostFormatList()), AsAddr(g_hostFormatList));
+    EXPECT_EQ(AsAddr(data.imageList()), AsAddr(g_imageList));
+    EXPECT_EQ(data.imageList()[0].planes[0].width, 640);
+    EXPECT_EQ(data.imageList()[1].planes[0].width, 320);
+}
+
+/**
+ * @brief Same shape as the Hip test but for Host-resident varshape data.
+ */
+void TestImageBatchVarShapeDataStridedHostConstruction() {
+    auto buf = MakeHomogeneousBuffer();
+    ImageBatchVarShapeDataStridedHost data(2, buf);
+
+    EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::CPU));
+    EXPECT_EQ(data.numImages(), 2);
+    EXPECT_EQ(data.maxSize().w, 640);
+    EXPECT_EQ(data.maxSize().h, 480);
+    EXPECT_EQ(data.uniqueFormat().channels(), 3);
+    EXPECT_EQ(AsAddr(data.imageList()), AsAddr(g_imageList));
+}
+
+/**
+ * @brief Empty batch: maxSize collapses to 0x0 and uniqueFormat is FMT_NONE.
+ * Producers signal "no images" via numImages == 0; the buffer fields stay
+ * valid pointers but get ignored.
+ */
+void TestImageBatchVarShapeDataEmpty() {
+    ImageBatchVarShapeBufferStrided buf{};
+    buf.uniqueFormat = FMT_NONE;
+    buf.maxWidth = 0;
+    buf.maxHeight = 0;
+    buf.formatList = g_formatList;
+    buf.hostFormatList = g_hostFormatList;
+    buf.imageList = g_imageList;
+
+    ImageBatchVarShapeDataStridedHip data(0, buf);
+
+    EXPECT_EQ(data.numImages(), 0);
+    EXPECT_EQ(data.maxSize().w, 0);
+    EXPECT_EQ(data.maxSize().h, 0);
+    EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_NONE), 1);
+}
+
+/**
+ * @brief Heterogeneous formats: per-image formatList carries each entry
+ * verbatim; uniqueFormat is FMT_NONE since no single format spans the batch.
+ */
+void TestImageBatchVarShapeDataHeterogeneousFormats() {
+    g_imageList[0] = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_PTR_A);
+    g_imageList[1] = MakeSinglePlaneBuffer(320, 240, 320 * 4, FAKE_PTR_B);
+    g_formatList[0] = FMT_RGB8;
+    g_formatList[1] = FMT_RGBA8;
+    g_hostFormatList[0] = FMT_RGB8;
+    g_hostFormatList[1] = FMT_RGBA8;
+
+    ImageBatchVarShapeBufferStrided buf{};
+    buf.uniqueFormat = FMT_NONE;
+    buf.maxWidth = 640;
+    buf.maxHeight = 480;
+    buf.formatList = g_formatList;
+    buf.hostFormatList = g_hostFormatList;
+    buf.imageList = g_imageList;
+
+    ImageBatchVarShapeDataStridedHip data(2, buf);
+
+    EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_NONE), 1);
+    EXPECT_EQ(AsInt(data.hostFormatList()[0] == FMT_RGB8), 1);
+    EXPECT_EQ(AsInt(data.hostFormatList()[1] == FMT_RGBA8), 1);
+}
+
+/**
+ * @brief The two leaf ctors (taking ImageBatchBuffer vs the concrete strided
+ * buffer directly) must produce observably identical state.
+ */
+void TestImageBatchVarShapeDataSugarCtor() {
+    auto buf = MakeHomogeneousBuffer();
+
+    ImageBatchVarShapeDataStridedHip wide(2, ImageBatchBuffer{.varShapeStrided = buf});
+    ImageBatchVarShapeDataStridedHip sugar(2, buf);
+
+    EXPECT_EQ(AsInt(wide.device()), AsInt(sugar.device()));
+    EXPECT_EQ(wide.numImages(), sugar.numImages());
+    EXPECT_EQ(wide.maxSize().w, sugar.maxSize().w);
+    EXPECT_EQ(wide.maxSize().h, sugar.maxSize().h);
+    EXPECT_EQ(AsAddr(wide.imageList()), AsAddr(sugar.imageList()));
+
+    ImageBatchVarShapeDataStridedHost wideHost(2, ImageBatchBuffer{.varShapeStrided = buf});
+    ImageBatchVarShapeDataStridedHost sugarHost(2, buf);
+    EXPECT_EQ(AsInt(wideHost.device()), AsInt(sugarHost.device()));
+    EXPECT_EQ(AsAddr(wideHost.imageList()), AsAddr(sugarHost.imageList()));
+}
+
+/**
+ * @brief IsCompatibleKind on each level discriminates the buffer kinds it
+ * accepts. Base accepts anything-but-NONE; VarShape and VarShapeStrided accept
+ * both Hip and Host varshape; leaves accept only their own.
+ */
+void TestImageBatchDataIsCompatibleKind() {
+    EXPECT_EQ(AsInt(ImageBatchData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), 0);
+    EXPECT_EQ(AsInt(ImageBatchData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)),
+              1);
+    EXPECT_EQ(AsInt(ImageBatchData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)),
+              1);
+
+    EXPECT_EQ(AsInt(ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), 0);
+    EXPECT_EQ(
+        AsInt(ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)),
+        1);
+    EXPECT_EQ(
+        AsInt(ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)),
+        1);
+
+    EXPECT_EQ(AsInt(ImageBatchVarShapeDataStrided::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), 0);
+    EXPECT_EQ(AsInt(ImageBatchVarShapeDataStrided::IsCompatibleKind(
+                  ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)),
+              1);
+    EXPECT_EQ(AsInt(ImageBatchVarShapeDataStrided::IsCompatibleKind(
+                  ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)),
+              1);
+
+    EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHip::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)),
+              0);
+    EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHip::IsCompatibleKind(
+                  ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)),
+              1);
+    EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHip::IsCompatibleKind(
+                  ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)),
+              0);
+
+    EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHost::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)),
+              0);
+    EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHost::IsCompatibleKind(
+                  ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)),
+              0);
+    EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHost::IsCompatibleKind(
+                  ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)),
+              1);
+}
+
+/**
+ * @brief Round-trip a derived ImageBatchData through the base reference and
+ * back via cast<>(). Successful casts must preserve every observable field;
+ * casts to incompatible kinds must return std::nullopt.
+ */
+void TestImageBatchDataCast() {
+    auto buf = MakeHomogeneousBuffer();
+
+    // Hip → base → Hip should round-trip; intermediate VarShape/Strided also
+    // succeed; Hip → Host fails.
+    {
+        ImageBatchVarShapeDataStridedHip hip(2, buf);
+        const ImageBatchData& base = hip;
+
+        auto asHip = base.cast<ImageBatchVarShapeDataStridedHip>();
+        EXPECT_EQ(AsInt(asHip.has_value()), 1);
+        EXPECT_EQ(AsInt(asHip->device()), AsInt(eDeviceType::GPU));
+        EXPECT_EQ(asHip->numImages(), 2);
+        EXPECT_EQ(asHip->maxSize().w, 640);
+        EXPECT_EQ(AsAddr(asHip->imageList()), AsAddr(g_imageList));
+
+        auto asStrided = base.cast<ImageBatchVarShapeDataStrided>();
+        EXPECT_EQ(AsInt(asStrided.has_value()), 1);
+        EXPECT_EQ(AsInt(asStrided->device()), AsInt(eDeviceType::GPU));
+
+        auto asVar = base.cast<ImageBatchVarShapeData>();
+        EXPECT_EQ(AsInt(asVar.has_value()), 1);
+        EXPECT_EQ(asVar->maxSize().h, 480);
+
+        auto asHost = base.cast<ImageBatchVarShapeDataStridedHost>();
+        EXPECT_EQ(AsInt(asHost.has_value()), 0);
+    }
+
+    // Symmetrically: Host → base → Host succeeds, Host → Hip fails.
+    {
+        ImageBatchVarShapeDataStridedHost host(2, buf);
+        const ImageBatchData& base = host;
+
+        auto asHost = base.cast<ImageBatchVarShapeDataStridedHost>();
+        EXPECT_EQ(AsInt(asHost.has_value()), 1);
+        EXPECT_EQ(AsInt(asHost->device()), AsInt(eDeviceType::CPU));
+        EXPECT_EQ(asHost->numImages(), 2);
+
+        auto asHip = base.cast<ImageBatchVarShapeDataStridedHip>();
+        EXPECT_EQ(AsInt(asHip.has_value()), 0);
+    }
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+    (void)argc;
+    (void)argv;
+    TEST_CASES_BEGIN();
+
+    TEST_CASE(TestImageBatchVarShapeDataStridedHipConstruction());
+    TEST_CASE(TestImageBatchVarShapeDataStridedHostConstruction());
+    TEST_CASE(TestImageBatchVarShapeDataEmpty());
+    TEST_CASE(TestImageBatchVarShapeDataHeterogeneousFormats());
+    TEST_CASE(TestImageBatchVarShapeDataSugarCtor());
+    TEST_CASE(TestImageBatchDataIsCompatibleKind());
+    TEST_CASE(TestImageBatchDataCast());
+
+    TEST_CASES_END();
+}
diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp
new file mode 100644
index 00000000..ab4b9e1b
--- /dev/null
+++ b/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp
@@ -0,0 +1,487 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <hip/hip_runtime.h>
+#include <stdint.h>
+
+#include <core/image.hpp>
+#include <core/image_batch_data.hpp>
+#include <core/image_batch_var_shape.hpp>
+#include <core/image_format.hpp>
+#include <utility>
+#include <vector>
+
+#include "image_test_helpers.hpp"
+#include "test_helpers.hpp"
+
+using namespace roccv;
+using namespace roccv::tests;
+
+namespace {
+
+// =============================================================================
+// Construction
+// =============================================================================
+
+void TestConstruction() {
+    CountingAllocator alloc;
+    {
+        ImageBatchVarShape batch(8, alloc);
+        EXPECT_EQ(batch.capacity(), 8);
+        EXPECT_EQ(batch.numImages(), 0);
+        EXPECT_EQ(AsInt(batch.begin() == batch.end()), 1);
+    }
+    EXPECT_EQ(alloc.hipAllocs, 2);
+    EXPECT_EQ(alloc.pinnedAllocs, 2);
+    EXPECT_EQ(alloc.hipFrees, 2);
+    EXPECT_EQ(alloc.pinnedFrees, 2);
+}
+
+void TestConstructionCpu() {
+    CountingAllocator alloc;
+    {
+        ImageBatchVarShape batch(8, alloc, eDeviceType::CPU);
+        EXPECT_EQ(batch.capacity(), 8);
+        EXPECT_EQ(batch.numImages(), 0);
+        EXPECT_EQ(AsInt(batch.device()), AsInt(eDeviceType::CPU));
+    }
+    // A CPU batch allocates only its two host descriptor buffers — no device
+    // memory, no pinned memory, no fence.
+    EXPECT_EQ(alloc.hostAllocs, 2);
+    EXPECT_EQ(alloc.hostFrees, 2);
+    EXPECT_EQ(alloc.hipAllocs, 0);
+    EXPECT_EQ(alloc.pinnedAllocs, 0);
+}
+
+void TestConstructionRejectsBadCapacity() {
+    CountingAllocator alloc;
+    EXPECT_EXCEPTION(ImageBatchVarShape(0, alloc), eStatusType::INVALID_VALUE);
+    EXPECT_EXCEPTION(ImageBatchVarShape(-3, alloc), eStatusType::INVALID_VALUE);
+}
+
+// =============================================================================
+// pushBack — basic
+// =============================================================================
+
+void TestPushBackSingle() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+
+    Image img = MakeFakeGpuImage(640, 480, FAKE_PTR_A);
+    batch.pushBack(img);
+
+    EXPECT_EQ(batch.numImages(), 1);
+    EXPECT_EQ(batch[0].size().w, 640);
+    EXPECT_EQ(batch[0].size().h, 480);
+    EXPECT_EQ(AsInt(batch[0].format() == FMT_RGB8), 1);
+}
+
+void TestPushBackMultipleHeterogeneousSizes() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+
+    batch.pushBack(MakeFakeGpuImage(640, 480, FAKE_PTR_A));
+    batch.pushBack(MakeFakeGpuImage(320, 240, FAKE_PTR_B));
+    batch.pushBack(MakeFakeGpuImage(800, 200, FAKE_PTR_C));
+
+    EXPECT_EQ(batch.numImages(), 3);
+    EXPECT_EQ(batch.maxSize().w, 800);
+    EXPECT_EQ(batch.maxSize().h, 480);
+    EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_RGB8), 1);
+}
+
+void TestPushBackIteratorRange() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(8, alloc);
+
+    std::vector<Image> imgs;
+    imgs.push_back(MakeFakeGpuImage(100, 100, FAKE_PTR_A));
+    imgs.push_back(MakeFakeGpuImage(200, 200, FAKE_PTR_B));
+    imgs.push_back(MakeFakeGpuImage(300, 300, FAKE_PTR_C));
+
+    batch.pushBack(imgs.begin(), imgs.end());
+
+    EXPECT_EQ(batch.numImages(), 3);
+    EXPECT_EQ(batch.maxSize().w, 300);
+}
+
+// =============================================================================
+// pushBack — validation
+// =============================================================================
+
+void TestPushBackCapacityOverflow() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(2, alloc);
+
+    batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A));
+    batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_B));
+
+    EXPECT_EXCEPTION(batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_C)), eStatusType::OUT_OF_BOUNDS);
+}
+
+void TestPushBackHostImageRejected() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+
+    Image cpuImg = MakeFakeHostImage(64, 64, FAKE_PTR_A, FMT_U8);
+    EXPECT_EXCEPTION(batch.pushBack(cpuImg), eStatusType::INVALID_VALUE);
+}
+
+void TestPushBackGpuImageRejectedOnCpuBatch() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc, eDeviceType::CPU);
+
+    Image gpuImg = MakeFakeGpuImage(64, 64, FAKE_PTR_A);
+    EXPECT_EXCEPTION(batch.pushBack(gpuImg), eStatusType::INVALID_VALUE);
+}
+
+// Note: pushBack's single-plane validation is defense-in-depth — Image's own
+// exportData() (image.cpp:118) currently hardcodes numPlanes=1 regardless of
+// the underlying buffer, so the public API can't construct a multi-plane Image
+// for this guard to fire on. The test would need to be revisited when planar
+// formats land in Image itself.
+
+void TestPushBackRangeRollbackOnFailure() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(8, alloc);
+
+    // Pre-populate so we can confirm the rollback restores exactly the
+    // pre-call state, not just back to zero.
+    batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A));
+    EXPECT_EQ(batch.numImages(), 1);
+
+    // Mid-range CPU image — should rollback the partially-pushed entries.
+    std::vector<Image> imgs;
+    imgs.push_back(MakeFakeGpuImage(200, 200, FAKE_PTR_B));
+    imgs.push_back(MakeFakeHostImage(300, 300, FAKE_PTR_C));  // Will throw.
+
+    EXPECT_EXCEPTION(batch.pushBack(imgs.begin(), imgs.end()), eStatusType::INVALID_VALUE);
+
+    // Pre-call state is intact: 1 image, original maxSize.
+    EXPECT_EQ(batch.numImages(), 1);
+    EXPECT_EQ(batch.maxSize().w, 100);
+}
+
+void TestPushBackRangeOverflowPrechecked() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(2, alloc);
+
+    std::vector<Image> imgs;
+    imgs.push_back(MakeFakeGpuImage(10, 10, FAKE_PTR_A));
+    imgs.push_back(MakeFakeGpuImage(20, 20, FAKE_PTR_B));
+    imgs.push_back(MakeFakeGpuImage(30, 30, FAKE_PTR_C));  // 3rd overflows capacity 2.
+
+    EXPECT_EXCEPTION(batch.pushBack(imgs.begin(), imgs.end()), eStatusType::OUT_OF_BOUNDS);
+    // Pre-checked: nothing was pushed.
+    EXPECT_EQ(batch.numImages(), 0);
+}
+
+// =============================================================================
+// popBack / clear
+// =============================================================================
+
+void TestPopBack() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+
+    batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A));
+    batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B));
+    batch.popBack();
+
+    EXPECT_EQ(batch.numImages(), 1);
+    // maxSize was reset on pop; the rescan should drop back to 100.
+    EXPECT_EQ(batch.maxSize().w, 100);
+}
+
+void TestPopBackMultiple() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+
+    batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A));
+    batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B));
+    batch.pushBack(MakeFakeGpuImage(300, 300, FAKE_PTR_C));
+    batch.popBack(2);
+
+    EXPECT_EQ(batch.numImages(), 1);
+    EXPECT_EQ(batch.maxSize().w, 100);
+}
+
+void TestPopBackUnderflow() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+    batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A));
+
+    EXPECT_EXCEPTION(batch.popBack(2), eStatusType::OUT_OF_BOUNDS);
+    // State preserved.
+    EXPECT_EQ(batch.numImages(), 1);
+}
+
+void TestClearAndReuse() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+
+    batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A));
+    batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B));
+    batch.clear();
+
+    EXPECT_EQ(batch.numImages(), 0);
+    EXPECT_EQ(batch.maxSize().w, 0);
+    EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1);
+
+    // Reuse after clear.
+    batch.pushBack(MakeFakeGpuImage(50, 50, FAKE_PTR_C, FMT_U8));
+    EXPECT_EQ(batch.numImages(), 1);
+    EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_U8), 1);
+}
+
+// =============================================================================
+// uniqueFormat / maxSize cache
+// =============================================================================
+
+void TestUniqueFormatHomogeneous() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+    batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A));
+    batch.pushBack(MakeFakeGpuImage(128, 128, FAKE_PTR_B));
+    EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_RGB8), 1);
+}
+
+void TestUniqueFormatHeterogeneous() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+    batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A));
+    batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_B, FMT_RGBA8));
+    EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1);
+}
+
+void TestUniqueFormatEmptyBatch() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+    EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1);
+    EXPECT_EQ(batch.maxSize().w, 0);
+    EXPECT_EQ(batch.maxSize().h, 0);
+}
+
+// =============================================================================
+// exportData
+// =============================================================================
+
+// exportData tests use the default allocator instead of CountingAllocator
+// because they exercise the real H2D hipMemcpyAsync, which requires the
+// device-side buffer to be a real hipMalloc'd pointer.
+
+void TestExportDataEmpty() {
+    ImageBatchVarShape batch(4);
+
+    auto data = batch.exportData(0);
+    EXPECT_EQ(data.numImages(), 0);
+    EXPECT_EQ(data.maxSize().w, 0);
+    EXPECT_EQ(data.maxSize().h, 0);
+    EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_NONE), 1);
+    EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU));
+}
+
+void TestExportDataMetadata() {
+    ImageBatchVarShape batch(4);
+    batch.pushBack(MakeFakeGpuImage(640, 480, FAKE_PTR_A));
+    batch.pushBack(MakeFakeGpuImage(320, 240, FAKE_PTR_B));
+
+    auto data = batch.exportData(0);
+    EXPECT_EQ(data.numImages(), 2);
+    EXPECT_EQ(data.maxSize().w, 640);
+    EXPECT_EQ(data.maxSize().h, 480);
+    EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_RGB8), 1);
+    EXPECT_EQ(AsInt(data.imageList() != nullptr), 1);
+    EXPECT_EQ(AsInt(data.formatList() != nullptr), 1);
+    EXPECT_EQ(AsInt(data.hostFormatList() != nullptr), 1);
+    // Pinned host mirror format entries are immediately host-readable.
+    EXPECT_EQ(AsInt(data.hostFormatList()[0] == FMT_RGB8), 1);
+    EXPECT_EQ(AsInt(data.hostFormatList()[1] == FMT_RGB8), 1);
+}
+
+void TestExportDataCastRoundTrip() {
+    ImageBatchVarShape batch(4);
+    batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A));
+
+    auto hipData = batch.exportData<ImageBatchVarShapeDataStridedHip>(0);
+    EXPECT_EQ(hipData.numImages(), 1);
+    EXPECT_EQ(AsInt(hipData.device()), AsInt(eDeviceType::GPU));
+
+    // Cast through the base reference: succeeds for compatible kinds, nullopt
+    // for the host-resident leaf.
+    const ImageBatchData& base = hipData;
+    EXPECT_EQ(AsInt(base.cast<ImageBatchVarShapeDataStridedHip>().has_value()), 1);
+    EXPECT_EQ(AsInt(base.cast<ImageBatchVarShapeDataStridedHost>().has_value()), 0);
+}
+
+// =============================================================================
+// CPU path
+// =============================================================================
+
+void TestCpuPushBackAndCaches() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc, eDeviceType::CPU);
+
+    batch.pushBack(MakeFakeHostImage(640, 480, FAKE_PTR_A));
+    batch.pushBack(MakeFakeHostImage(320, 240, FAKE_PTR_B));
+    batch.pushBack(MakeFakeHostImage(800, 200, FAKE_PTR_C));
+
+    EXPECT_EQ(batch.numImages(), 3);
+    EXPECT_EQ(batch.maxSize().w, 800);
+    EXPECT_EQ(batch.maxSize().h, 480);
+    EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_RGB8), 1);
+
+    batch.popBack(2);
+    EXPECT_EQ(batch.numImages(), 1);
+    EXPECT_EQ(batch.maxSize().w, 640);
+
+    batch.clear();
+    EXPECT_EQ(batch.numImages(), 0);
+    EXPECT_EQ(batch.maxSize().w, 0);
+    EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1);
+}
+
+// CPU exportData performs no H2D copy, so it runs against the CountingAllocator's
+// malloc-backed host buffers without needing a real device.
+void TestExportDataHost() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc, eDeviceType::CPU);
+    batch.pushBack(MakeFakeHostImage(640, 480, FAKE_PTR_A));
+    batch.pushBack(MakeFakeHostImage(320, 240, FAKE_PTR_B));
+
+    auto data = batch.exportData<ImageBatchVarShapeDataStridedHost>(0);
+    EXPECT_EQ(data.numImages(), 2);
+    EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::CPU));
+    EXPECT_EQ(data.maxSize().w, 640);
+    EXPECT_EQ(data.maxSize().h, 480);
+    EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_RGB8), 1);
+    EXPECT_EQ(AsInt(data.imageList() != nullptr), 1);
+    EXPECT_EQ(AsInt(data.formatList() != nullptr), 1);
+    // formatList and hostFormatList alias the same host allocation for CPU batches.
+    EXPECT_EQ(AsInt(data.formatList() == data.hostFormatList()), 1);
+    EXPECT_EQ(AsInt(data.hostFormatList()[0] == FMT_RGB8), 1);
+    // Host descriptor table is directly readable: per-image dimensions match.
+    EXPECT_EQ(static_cast<int32_t>(data.imageList()[0].planes[0].width), 640);
+    EXPECT_EQ(static_cast<int32_t>(data.imageList()[1].planes[0].width), 320);
+    EXPECT_EQ(AsInt(data.imageList()[0].planes[0].basePtr == FAKE_PTR_A), 1);
+}
+
+void TestExportDataHostCastRoundTrip() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc, eDeviceType::CPU);
+    batch.pushBack(MakeFakeHostImage(64, 64, FAKE_PTR_A));
+
+    ImageBatchVarShapeDataStrided data = batch.exportData(0);
+    // A CPU snapshot casts to the host leaf but not the device leaf.
+    EXPECT_EQ(AsInt(data.cast<ImageBatchVarShapeDataStridedHost>().has_value()), 1);
+    EXPECT_EQ(AsInt(data.cast<ImageBatchVarShapeDataStridedHip>().has_value()), 0);
+}
+
+// =============================================================================
+// Move semantics
+// =============================================================================
+
+void TestMoveConstruction() {
+    CountingAllocator alloc;
+    {
+        ImageBatchVarShape src(4, alloc);
+        src.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A));
+        src.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B));
+
+        ImageBatchVarShape dst(std::move(src));
+        EXPECT_EQ(dst.numImages(), 2);
+        EXPECT_EQ(dst.maxSize().w, 200);
+
+        // Source is valid-but-empty; destructor must not double-free.
+        EXPECT_EQ(src.numImages(), 0);
+        EXPECT_EQ(src.capacity(), 0);
+    }
+    // Exactly one set of allocations should have been freed.
+    EXPECT_EQ(alloc.hipAllocs, alloc.hipFrees);
+    EXPECT_EQ(alloc.pinnedAllocs, alloc.pinnedFrees);
+}
+
+// =============================================================================
+// Iterator
+// =============================================================================
+
+void TestIteratorRangeFor() {
+    CountingAllocator alloc;
+    ImageBatchVarShape batch(4, alloc);
+    batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A));
+    batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B));
+    batch.pushBack(MakeFakeGpuImage(300, 300, FAKE_PTR_C));
+
+    int32_t expectedW = 100;
+    int32_t count = 0;
+    for (const Image& img : batch) {
+        EXPECT_EQ(img.size().w, expectedW);
+        expectedW += 100;
+        ++count;
+    }
+    EXPECT_EQ(count, 3);
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+    (void)argc;
+    (void)argv;
+    TEST_CASES_BEGIN();
+
+    TEST_CASE(TestConstruction());
+    TEST_CASE(TestConstructionCpu());
+    TEST_CASE(TestConstructionRejectsBadCapacity());
+
+    TEST_CASE(TestPushBackSingle());
+    TEST_CASE(TestPushBackMultipleHeterogeneousSizes());
+    TEST_CASE(TestPushBackIteratorRange());
+
+    TEST_CASE(TestPushBackCapacityOverflow());
+    TEST_CASE(TestPushBackHostImageRejected());
+    TEST_CASE(TestPushBackGpuImageRejectedOnCpuBatch());
+    TEST_CASE(TestPushBackRangeRollbackOnFailure());
+    TEST_CASE(TestPushBackRangeOverflowPrechecked());
+
+    TEST_CASE(TestPopBack());
+    TEST_CASE(TestPopBackMultiple());
+    TEST_CASE(TestPopBackUnderflow());
+    TEST_CASE(TestClearAndReuse());
+
+    TEST_CASE(TestUniqueFormatHomogeneous());
+    TEST_CASE(TestUniqueFormatHeterogeneous());
+    TEST_CASE(TestUniqueFormatEmptyBatch());
+
+    TEST_CASE(TestExportDataEmpty());
+    TEST_CASE(TestExportDataMetadata());
+    TEST_CASE(TestExportDataCastRoundTrip());
+
+    TEST_CASE(TestCpuPushBackAndCaches());
+    TEST_CASE(TestExportDataHost());
+    TEST_CASE(TestExportDataHostCastRoundTrip());
+
+    TEST_CASE(TestMoveConstruction());
+
+    TEST_CASE(TestIteratorRangeFor());
+
+    TEST_CASES_END();
+}
diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp
new file mode 100644
index 00000000..8a7945fd
--- /dev/null
+++ b/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include <core/image_buffer.hpp>
+#include <core/image_data.hpp>
+#include <core/image_format.hpp>
+
+#include "image_test_helpers.hpp"
+#include "test_helpers.hpp"
+
+using namespace roccv;
+using namespace roccv::tests;
+
+namespace {
+
+ImageBufferStrided MakeThreePlaneBuffer() {
+    // Mimics a planar layout (e.g. YUV420-style) with sub-sampled chroma — three
+    // planes of differing dimensions and strides backed by distinct buffers.
+    ImageBufferStrided buf{};
+    buf.numPlanes = 3;
+    buf.planes[0] = {1920, 1080, 1920, FAKE_PTR_A};  // Y full-resolution
+    buf.planes[1] = {960, 540, 960, FAKE_PTR_B};     // U sub-sampled
+    buf.planes[2] = {960, 540, 960, FAKE_PTR_C};     // V sub-sampled
+    return buf;
+}
+
+/**
+ * @brief Verifies HIP-strided construction populates all observable state and
+ * tags itself as GPU-resident.
+ */
+void TestImageDataStridedHipConstruction() {
+    auto buf = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_PTR_A);
+    ImageDataStridedHip data(FMT_RGB8, buf);
+
+    EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU));
+    EXPECT_EQ(data.numPlanes(), 1);
+    EXPECT_EQ(data.size().w, 640);
+    EXPECT_EQ(data.size().h, 480);
+    EXPECT_EQ(data.plane(0).width, 640);
+    EXPECT_EQ(data.plane(0).height, 480);
+    EXPECT_EQ(data.plane(0).rowStride, static_cast<int64_t>(640 * 3));
+    EXPECT_EQ(AsAddr(data.plane(0).basePtr), AsAddr(FAKE_PTR_A));
+    EXPECT_EQ(data.format().channels(), 3);
+}
+
+/**
+ * @brief Same shape as the Hip test but for Host-resident strided data.
+ */
+void TestImageDataStridedHostConstruction() {
+    auto buf = MakeSinglePlaneBuffer(320, 240, 320, FAKE_PTR_B);
+    ImageDataStridedHost data(FMT_U8, buf);
+
+    EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::CPU));
+    EXPECT_EQ(data.numPlanes(), 1);
+    EXPECT_EQ(data.size().w, 320);
+    EXPECT_EQ(data.size().h, 240);
+    EXPECT_EQ(AsAddr(data.plane(0).basePtr), AsAddr(FAKE_PTR_B));
+    EXPECT_EQ(data.format().channels(), 1);
+}
+
+/**
+ * @brief Multi-plane buffers must round-trip per-plane dimensions and pointers
+ * unchanged. size() reports plane 0 by convention; planes 1..N may be smaller.
+ */
+void TestImageDataStridedMultiPlane() {
+    auto buf = MakeThreePlaneBuffer();
+    ImageDataStridedHip data(FMT_U8, buf);
+
+    EXPECT_EQ(data.numPlanes(), 3);
+    EXPECT_EQ(data.size().w, 1920);
+    EXPECT_EQ(data.size().h, 1080);
+
+    EXPECT_EQ(data.plane(0).width, 1920);
+    EXPECT_EQ(data.plane(0).height, 1080);
+    EXPECT_EQ(AsAddr(data.plane(0).basePtr), AsAddr(FAKE_PTR_A));
+
+    EXPECT_EQ(data.plane(1).width, 960);
+    EXPECT_EQ(data.plane(1).height, 540);
+    EXPECT_EQ(AsAddr(data.plane(1).basePtr), AsAddr(FAKE_PTR_B));
+
+    EXPECT_EQ(data.plane(2).width, 960);
+    EXPECT_EQ(data.plane(2).height, 540);
+    EXPECT_EQ(AsAddr(data.plane(2).basePtr), AsAddr(FAKE_PTR_C));
+}
+
+/**
+ * @brief The two leaf ctors (taking ImageBuffer vs ImageBufferStrided directly)
+ * must produce observably identical state.
+ */
+void TestImageDataStridedSugarCtor() {
+    auto buf = MakeSinglePlaneBuffer(100, 200, 400, FAKE_PTR_A);
+
+    ImageDataStridedHip wide(FMT_RGBA8, ImageBuffer{.strided = buf});
+    ImageDataStridedHip sugar(FMT_RGBA8, buf);
+
+    EXPECT_EQ(AsInt(wide.device()), AsInt(sugar.device()));
+    EXPECT_EQ(wide.numPlanes(), sugar.numPlanes());
+    EXPECT_EQ(AsAddr(wide.plane(0).basePtr), AsAddr(sugar.plane(0).basePtr));
+    EXPECT_EQ(wide.plane(0).rowStride, sugar.plane(0).rowStride);
+
+    ImageDataStridedHost wideHost(FMT_U8, ImageBuffer{.strided = buf});
+    ImageDataStridedHost sugarHost(FMT_U8, buf);
+    EXPECT_EQ(AsInt(wideHost.device()), AsInt(sugarHost.device()));
+    EXPECT_EQ(AsAddr(wideHost.plane(0).basePtr), AsAddr(sugarHost.plane(0).basePtr));
+}
+
+/**
+ * @brief IsCompatibleKind on each level discriminates the buffer kinds it
+ * accepts. Base accepts anything-but-NONE; Strided accepts both Hip and Host;
+ * leaves accept only their own.
+ */
+void TestImageDataIsCompatibleKind() {
+    EXPECT_EQ(AsInt(ImageData::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0);
+    EXPECT_EQ(AsInt(ImageData::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 1);
+    EXPECT_EQ(AsInt(ImageData::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 1);
+
+    EXPECT_EQ(AsInt(ImageDataStrided::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0);
+    EXPECT_EQ(AsInt(ImageDataStrided::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 1);
+    EXPECT_EQ(AsInt(ImageDataStrided::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 1);
+
+    EXPECT_EQ(AsInt(ImageDataStridedHip::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0);
+    EXPECT_EQ(AsInt(ImageDataStridedHip::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 1);
+    EXPECT_EQ(AsInt(ImageDataStridedHip::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 0);
+
+    EXPECT_EQ(AsInt(ImageDataStridedHost::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0);
+    EXPECT_EQ(AsInt(ImageDataStridedHost::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 0);
+    EXPECT_EQ(AsInt(ImageDataStridedHost::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 1);
+}
+
+/**
+ * @brief Round-trip a derived ImageData through the base reference and back
+ * via cast<>(). Successful casts must preserve every observable field; casts
+ * to incompatible kinds must return std::nullopt.
+ */
+void TestImageDataCast() {
+    auto buf = MakeSinglePlaneBuffer(800, 600, 800 * 4, FAKE_PTR_A);
+
+    // Hip → base → Hip should round-trip, Hip → Host should fail.
+    {
+        ImageDataStridedHip hip(FMT_RGBA8, buf);
+        const ImageData& base = hip;
+
+        auto asHip = base.cast<ImageDataStridedHip>();
+        EXPECT_EQ(AsInt(asHip.has_value()), 1);
+        EXPECT_EQ(AsInt(asHip->device()), AsInt(eDeviceType::GPU));
+        EXPECT_EQ(AsAddr(asHip->plane(0).basePtr), AsAddr(FAKE_PTR_A));
+        EXPECT_EQ(asHip->plane(0).width, 800);
+
+        auto asStrided = base.cast<ImageDataStrided>();
+        EXPECT_EQ(AsInt(asStrided.has_value()), 1);
+        EXPECT_EQ(AsInt(asStrided->device()), AsInt(eDeviceType::GPU));
+
+        auto asHost = base.cast<ImageDataStridedHost>();
+        EXPECT_EQ(AsInt(asHost.has_value()), 0);
+    }
+
+    // Symmetrically: Host → base → Host succeeds, Host → Hip fails.
+    {
+        ImageDataStridedHost host(FMT_RGBA8, buf);
+        const ImageData& base = host;
+
+        auto asHost = base.cast<ImageDataStridedHost>();
+        EXPECT_EQ(AsInt(asHost.has_value()), 1);
+        EXPECT_EQ(AsInt(asHost->device()), AsInt(eDeviceType::CPU));
+
+        auto asHip = base.cast<ImageDataStridedHip>();
+        EXPECT_EQ(AsInt(asHip.has_value()), 0);
+    }
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+    (void)argc;
+    (void)argv;
+    TEST_CASES_BEGIN();
+
+    TEST_CASE(TestImageDataStridedHipConstruction());
+    TEST_CASE(TestImageDataStridedHostConstruction());
+    TEST_CASE(TestImageDataStridedMultiPlane());
+    TEST_CASE(TestImageDataStridedSugarCtor());
+    TEST_CASE(TestImageDataIsCompatibleKind());
+    TEST_CASE(TestImageDataCast());
+
+    TEST_CASES_END();
+}
diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp
index 873f05dc..6dd736ad 100644
--- a/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp
+++ b/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp
@@ -103,7 +103,7 @@ int64_t GetCoordOfBorderPel(int64_t u, int64_t dimSize, eBorderType borderMode)
  *
  * @tparam T The underlying datatype of the image. (e.g. uchar3)
  * @tparam BT The base datatype of the image (e.g. unsigned char)
- * @param[in] input The input ImageWrapper referencing the underlying image data.
+ * @param[in] input The input TensorWrapper referencing the underlying image data.
  * @param[in] borderMode The border mode used to handle out of bounds coordinates.
  * @param[in] borderValue The value to fallback to when handling out of bounds coordinates with the CONSTANT border
  * mode.
@@ -115,8 +115,8 @@ int64_t GetCoordOfBorderPel(int64_t u, int64_t dimSize, eBorderType borderMode)
  * coordinates fall out of bounds.
  */
 template <typename T, typename BT = detail::BaseType<T>>
-BT GoldenBorderAt(ImageWrapper<T>& input, eBorderType borderMode, T borderValue, int64_t sample, int64_t y,
-                  int64_t x, int64_t channel) {
+BT GoldenBorderAt(TensorWrapper<T>& input, eBorderType borderMode, T borderValue, int64_t sample, int64_t y, int64_t x,
+                  int64_t channel) {
     int64_t outX = x, outY = y;
 
     if (borderMode == eBorderType::BORDER_TYPE_CONSTANT) {
@@ -130,7 +130,7 @@ BT GoldenBorderAt(ImageWrapper<T>& input, eBorderType borderMode, T borderValue,
         outY = GetCoordOfBorderPel(y, input.height(), borderMode);
     }
 
-    // Return the value at the modified outX, outY coordinates using the passed in ImageWrapper.
+    // Return the value at the modified outX, outY coordinates using the passed in TensorWrapper.
     return detail::GetElement(input.at(sample, outY, outX, 0), channel);
 }
 
@@ -161,7 +161,8 @@ void TestCorrectness(float4 borderValue, int32_t batchSize, Size2D imageSize, in
     FillVector(inputData);
 
     // BorderWrapper to calculate the actual calculated values.
-    BorderWrapper<T, BorderType> borderWrap(ImageWrapper<T>(inputData, batchSize, imageSize.w, imageSize.h), borderVal);
+    auto borderWrap =
+        MakeBorderWrapper<BorderType>(TensorWrapper<T>(inputData, batchSize, imageSize.w, imageSize.h), borderVal);
     std::vector<BT> actualOutput(numElementsWithBorder);
     int actualIndex = 0;
     for (int batch = 0; batch < batchSize; ++batch) {
@@ -176,9 +177,9 @@ void TestCorrectness(float4 borderValue, int32_t batchSize, Size2D imageSize, in
         }
     }
 
-    // ImageWrapper for use in the golden output generator. ImageWrapper is unit tested separately, and is
+    // TensorWrapper for use in the golden output generator. TensorWrapper is unit tested separately, and is
     // considered working at this point in the dependency chain.
-    ImageWrapper<T> imageWrap(inputData, batchSize, imageSize.w, imageSize.h);
+    TensorWrapper<T> imageWrap(inputData, batchSize, imageSize.w, imageSize.h);
     std::vector<BT> goldenOutput(numElementsWithBorder);
     int goldenIndex = 0;
     for (int batch = 0; batch < batchSize; ++batch) {
diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_image_batch_var_shape_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_image_batch_var_shape_wrapper.cpp
new file mode 100644
index 00000000..12d5000e
--- /dev/null
+++ b/tests/roccv/cpp/src/tests/core/wrappers/test_image_batch_var_shape_wrapper.cpp
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <core/hip_assert.h>
+#include <hip/hip_runtime.h>
+
+#include <core/detail/type_traits.hpp>
+#include <core/image.hpp>
+#include <core/image_batch_var_shape.hpp>
+#include <core/image_data.hpp>
+#include <core/image_format.hpp>
+#include <core/wrappers/border_wrapper.hpp>
+#include <core/wrappers/image_batch_var_shape_wrapper.hpp>
+#include <core/wrappers/interpolation_wrapper.hpp>
+#include <cstring>
+#include <vector>
+
+#include "test_helpers.hpp"
+
+using namespace roccv;
+using namespace roccv::tests;
+
+namespace {
+
+// Per-image copy kernel: writes dst[n,y,x,c] = src[n,y,x,c] for the image at batch index n.
+// Each launch covers exactly one image — the host loop steps through n and resizes the
+// grid to that image's dimensions, which avoids needing a max-bounds check inside the kernel.
+template <typename T>
+__global__ void VarShapeCopyKernel(ImageBatchVarShapeWrapper<T> src, ImageBatchVarShapeWrapper<T> dst, int32_t n) {
+    const int x = blockIdx.x * blockDim.x + threadIdx.x;
+    const int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= dst.width(n) || y >= dst.height(n)) return;
+    dst.at(n, y, x, 0) = src.at(n, y, x, 0);
+}
+
+// Roundtrip test: build a varshape batch from heterogeneous host pixel data, run the copy kernel from src varshape
+// wrapper to dst varshape wrapper, read pixels back, and verify byte-equality with the original input.
+template <typename T, typename BT = detail::BaseType<T>>
+void TestRoundtripCopy(const std::vector<Size2D>& sizes, ImageFormat fmt) {
+    const int channels = detail::NumElements<T>;
+    const int32_t numImages = static_cast<int32_t>(sizes.size());
+
+    // Generate per-image host pixel data.
+    std::vector<std::vector<BT>> hostPixels(numImages);
+    for (int32_t i = 0; i < numImages; ++i) {
+        hostPixels[i].resize(static_cast<size_t>(sizes[i].w) * sizes[i].h * channels);
+        FillVector(hostPixels[i], /*seed=*/static_cast<uint32_t>(0x1000 + i));
+    }
+
+    hipStream_t stream = nullptr;
+
+    // Build source batch and copy host pixels in.
+    ImageBatchVarShape srcBatch(numImages);
+    std::vector<Image> srcImages;
+    srcImages.reserve(numImages);
+    for (int32_t i = 0; i < numImages; ++i) {
+        srcImages.emplace_back(sizes[i], fmt);
+        auto sd = srcImages[i].exportData<ImageDataStridedHip>();
+        const ImagePlaneStrided& sp = sd.plane(0);
+        const size_t rowBytes = static_cast<size_t>(sizes[i].w) * channels * sizeof(BT);
+        HIP_VALIDATE_NO_ERRORS(hipMemcpy2DAsync(sp.basePtr, sp.rowStride, hostPixels[i].data(), rowBytes, rowBytes,
+                                                sizes[i].h, hipMemcpyHostToDevice, stream));
+        srcBatch.pushBack(srcImages[i]);
+    }
+
+    // Build destination batch with matching shapes.
+    ImageBatchVarShape dstBatch(numImages);
+    std::vector<Image> dstImages;
+    dstImages.reserve(numImages);
+    for (int32_t i = 0; i < numImages; ++i) {
+        dstImages.emplace_back(sizes[i], fmt);
+        dstBatch.pushBack(dstImages[i]);
+    }
+
+    auto srcData = srcBatch.exportData(stream);
+    auto dstData = dstBatch.exportData(stream);
+    ImageBatchVarShapeWrapper<T> srcWrap(srcData);
+    ImageBatchVarShapeWrapper<T> dstWrap(dstData);
+
+    // Launch one kernel per image (sizes vary so a single 3D launch can't bound y to per-image height cleanly).
+    for (int32_t i = 0; i < numImages; ++i) {
+        dim3 block(16, 16);
+        dim3 grid((sizes[i].w + block.x - 1) / block.x, (sizes[i].h + block.y - 1) / block.y);
+        VarShapeCopyKernel<T><<<grid, block, 0, stream>>>(srcWrap, dstWrap, i);
+    }
+
+    HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream));
+
+    // Read back dst pixels and verify byte-for-byte against the original host input.
+    for (int32_t i = 0; i < numImages; ++i) {
+        std::vector<BT> dstHost(static_cast<size_t>(sizes[i].w) * sizes[i].h * channels);
+        auto dd = dstImages[i].exportData<ImageDataStridedHip>();
+        const ImagePlaneStrided& dp = dd.plane(0);
+        const size_t rowBytes = static_cast<size_t>(sizes[i].w) * channels * sizeof(BT);
+        HIP_VALIDATE_NO_ERRORS(hipMemcpy2D(dstHost.data(), rowBytes, dp.basePtr, dp.rowStride, rowBytes, sizes[i].h,
+                                           hipMemcpyDeviceToHost));
+        CompareVectors(dstHost, hostPixels[i]);
+    }
+}
+
+// Border-composition test: write the BORDER_TYPE_CONSTANT fallback for every output pixel by reading from a coordinate
+// that is guaranteed to be out of bounds for every image (-1, -1). Confirms BorderWrapper<B,
+// ImageBatchVarShapeWrapper<T>> correctly delegates to width(n) / height(n) for per-image bounds; otherwise it would
+// dereference invalid memory.
+template <typename T>
+__global__ void VarShapeBorderConstantKernel(
+    BorderWrapper<eBorderType::BORDER_TYPE_CONSTANT, ImageBatchVarShapeWrapper<T>> src,
+    ImageBatchVarShapeWrapper<T> dst, int32_t n) {
+    const int x = blockIdx.x * blockDim.x + threadIdx.x;
+    const int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= dst.width(n) || y >= dst.height(n)) return;
+    dst.at(n, y, x, 0) = src.at(n, -1, -1, 0);
+}
+
+template <typename T, typename BT = detail::BaseType<T>>
+void TestBorderConstantComposition(const std::vector<Size2D>& sizes, ImageFormat fmt, T borderValue) {
+    const int channels = detail::NumElements<T>;
+    const int32_t numImages = static_cast<int32_t>(sizes.size());
+
+    // Source pixels content doesn't matter — every read is forced OOB.
+    ImageBatchVarShape srcBatch(numImages);
+    ImageBatchVarShape dstBatch(numImages);
+    std::vector<Image> srcImages, dstImages;
+    srcImages.reserve(numImages);
+    dstImages.reserve(numImages);
+    for (int32_t i = 0; i < numImages; ++i) {
+        srcImages.emplace_back(sizes[i], fmt);
+        dstImages.emplace_back(sizes[i], fmt);
+        srcBatch.pushBack(srcImages[i]);
+        dstBatch.pushBack(dstImages[i]);
+    }
+
+    hipStream_t stream = nullptr;
+    auto srcData = srcBatch.exportData(stream);
+    auto dstData = dstBatch.exportData(stream);
+
+    auto srcWrap =
+        MakeBorderWrapper<eBorderType::BORDER_TYPE_CONSTANT>(ImageBatchVarShapeWrapper<T>(srcData), borderValue);
+    ImageBatchVarShapeWrapper<T> dstWrap(dstData);
+
+    for (int32_t i = 0; i < numImages; ++i) {
+        dim3 block(16, 16);
+        dim3 grid((sizes[i].w + block.x - 1) / block.x, (sizes[i].h + block.y - 1) / block.y);
+        VarShapeBorderConstantKernel<T><<<grid, block, 0, stream>>>(srcWrap, dstWrap, i);
+    }
+    HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream));
+
+    // Expect every output pixel of every image to equal borderValue.
+    std::vector<BT> borderBytes(channels);
+    for (int c = 0; c < channels; ++c) borderBytes[c] = detail::GetElement(borderValue, c);
+
+    for (int32_t i = 0; i < numImages; ++i) {
+        const size_t pixels = static_cast<size_t>(sizes[i].w) * sizes[i].h;
+        std::vector<BT> dstHost(pixels * channels);
+        auto dd = dstImages[i].exportData<ImageDataStridedHip>();
+        const ImagePlaneStrided& dp = dd.plane(0);
+        const size_t rowBytes = static_cast<size_t>(sizes[i].w) * channels * sizeof(BT);
+        HIP_VALIDATE_NO_ERRORS(hipMemcpy2D(dstHost.data(), rowBytes, dp.basePtr, dp.rowStride, rowBytes, sizes[i].h,
+                                           hipMemcpyDeviceToHost));
+        std::vector<BT> expected(pixels * channels);
+        for (size_t p = 0; p < pixels; ++p) {
+            for (int c = 0; c < channels; ++c) expected[p * channels + c] = borderBytes[c];
+        }
+        CompareVectors(dstHost, expected);
+    }
+}
+
+// Interpolation-composition test: NEAREST interpolation at integer coordinates is the identity, so a roundtrip copy
+// via InterpolationWrapper<NEAREST, REPLICATE, VarShape> must equal the source. Confirms the full wrapper chain
+// composes correctly over a VarShape backing.
+template <typename T>
+__global__ void VarShapeInterpNearestKernel(
+    InterpolationWrapper<eInterpolationType::INTERP_TYPE_NEAREST,
+                         BorderWrapper<eBorderType::BORDER_TYPE_REPLICATE, ImageBatchVarShapeWrapper<T>>>
+        src,
+    ImageBatchVarShapeWrapper<T> dst, int32_t n) {
+    const int x = blockIdx.x * blockDim.x + threadIdx.x;
+    const int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= dst.width(n) || y >= dst.height(n)) return;
+    dst.at(n, y, x, 0) = src.at(n, static_cast<float>(y), static_cast<float>(x), 0);
+}
+
+template <typename T, typename BT = detail::BaseType<T>>
+void TestInterpolationNearestComposition(const std::vector<Size2D>& sizes, ImageFormat fmt) {
+    const int channels = detail::NumElements<T>;
+    const int32_t numImages = static_cast<int32_t>(sizes.size());
+
+    std::vector<std::vector<BT>> hostPixels(numImages);
+    for (int32_t i = 0; i < numImages; ++i) {
+        hostPixels[i].resize(static_cast<size_t>(sizes[i].w) * sizes[i].h * channels);
+        FillVector(hostPixels[i], static_cast<uint32_t>(0x2000 + i));
+    }
+
+    hipStream_t stream = nullptr;
+    ImageBatchVarShape srcBatch(numImages);
+    ImageBatchVarShape dstBatch(numImages);
+    std::vector<Image> srcImages, dstImages;
+    srcImages.reserve(numImages);
+    dstImages.reserve(numImages);
+    for (int32_t i = 0; i < numImages; ++i) {
+        srcImages.emplace_back(sizes[i], fmt);
+        auto sd = srcImages[i].exportData<ImageDataStridedHip>();
+        const ImagePlaneStrided& sp = sd.plane(0);
+        const size_t rowBytes = static_cast<size_t>(sizes[i].w) * channels * sizeof(BT);
+        HIP_VALIDATE_NO_ERRORS(hipMemcpy2DAsync(sp.basePtr, sp.rowStride, hostPixels[i].data(), rowBytes, rowBytes,
+                                                sizes[i].h, hipMemcpyHostToDevice, stream));
+        srcBatch.pushBack(srcImages[i]);
+
+        dstImages.emplace_back(sizes[i], fmt);
+        dstBatch.pushBack(dstImages[i]);
+    }
+
+    auto srcData = srcBatch.exportData(stream);
+    auto dstData = dstBatch.exportData(stream);
+
+    auto srcWrap = MakeInterpolationWrapper<eInterpolationType::INTERP_TYPE_NEAREST>(
+        MakeBorderWrapper<eBorderType::BORDER_TYPE_REPLICATE>(ImageBatchVarShapeWrapper<T>(srcData), T{}));
+    ImageBatchVarShapeWrapper<T> dstWrap(dstData);
+
+    for (int32_t i = 0; i < numImages; ++i) {
+        dim3 block(16, 16);
+        dim3 grid((sizes[i].w + block.x - 1) / block.x, (sizes[i].h + block.y - 1) / block.y);
+        VarShapeInterpNearestKernel<T><<<grid, block, 0, stream>>>(srcWrap, dstWrap, i);
+    }
+    HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream));
+
+    for (int32_t i = 0; i < numImages; ++i) {
+        std::vector<BT> dstHost(static_cast<size_t>(sizes[i].w) * sizes[i].h * channels);
+        auto dd = dstImages[i].exportData<ImageDataStridedHip>();
+        const ImagePlaneStrided& dp = dd.plane(0);
+        const size_t rowBytes = static_cast<size_t>(sizes[i].w) * channels * sizeof(BT);
+        HIP_VALIDATE_NO_ERRORS(hipMemcpy2D(dstHost.data(), rowBytes, dp.basePtr, dp.rowStride, rowBytes, sizes[i].h,
+                                           hipMemcpyDeviceToHost));
+        CompareVectors(dstHost, hostPixels[i]);
+    }
+}
+
+// Verify accessor surface: width(n), height(n), batches(), channels().
+template <typename T>
+void TestAccessors(const std::vector<Size2D>& sizes, ImageFormat fmt) {
+    const int32_t numImages = static_cast<int32_t>(sizes.size());
+    ImageBatchVarShape batch(numImages);
+    std::vector<Image> handles;
+    handles.reserve(numImages);
+    for (int32_t i = 0; i < numImages; ++i) {
+        handles.emplace_back(sizes[i], fmt);
+        batch.pushBack(handles[i]);
+    }
+    auto data = batch.exportData(0);
+    ImageBatchVarShapeWrapper<T> wrap(data);
+
+    EXPECT_EQ(wrap.batches(), static_cast<int64_t>(numImages));
+    EXPECT_EQ(wrap.channels(), static_cast<int64_t>(detail::NumElements<T>));
+    // width/height are device pointers under the hood; reading them on host post-sync is safe because exportData
+    // recorded a hipEvent that hipStreamSynchronize on the null stream above (implicit) drains. The descriptor table
+    // lives in device memory though, so we round-trip the lookups through a tiny D->H pull via the wrapper's host
+    // mirror path — here we just check that the construction succeeded; per-image width/height behavior is exercised
+    // end-to-end by TestRoundtripCopy.
+    HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(0));
+}
+
+// Host-path roundtrip: a CPU-resident batch exports a host snapshot, and the wrapper reads/writes
+// it directly from host code — no kernel launch, no stream. Mirrors TestRoundtripCopy for the GPU
+// path and confirms the residency-agnostic constructor handles the ...Host leaf.
+template <typename T, typename BT = detail::BaseType<T>>
+void TestHostRoundtripCopy(const std::vector<Size2D>& sizes, ImageFormat fmt) {
+    const int channels = detail::NumElements<T>;
+    const int32_t numImages = static_cast<int32_t>(sizes.size());
+
+    std::vector<std::vector<BT>> hostPixels(numImages);
+    for (int32_t i = 0; i < numImages; ++i) {
+        hostPixels[i].resize(static_cast<size_t>(sizes[i].w) * sizes[i].h * channels);
+        FillVector(hostPixels[i], /*seed=*/static_cast<uint32_t>(0x2000 + i));
+    }
+
+    // CPU-resident batches; fill each source image's host buffer directly.
+    ImageBatchVarShape srcBatch(numImages, eDeviceType::CPU);
+    ImageBatchVarShape dstBatch(numImages, eDeviceType::CPU);
+    std::vector<Image> srcImages, dstImages;
+    srcImages.reserve(numImages);
+    dstImages.reserve(numImages);
+    for (int32_t i = 0; i < numImages; ++i) {
+        srcImages.emplace_back(sizes[i], fmt, eDeviceType::CPU);
+        dstImages.emplace_back(sizes[i], fmt, eDeviceType::CPU);
+
+        auto sd = srcImages[i].exportData<ImageDataStridedHost>();
+        const ImagePlaneStrided& sp = sd.plane(0);
+        const size_t rowBytes = static_cast<size_t>(sizes[i].w) * channels * sizeof(BT);
+        for (int32_t y = 0; y < sizes[i].h; ++y) {
+            std::memcpy(static_cast<unsigned char*>(sp.basePtr) + y * sp.rowStride,
+                        hostPixels[i].data() + static_cast<size_t>(y) * sizes[i].w * channels, rowBytes);
+        }
+
+        srcBatch.pushBack(srcImages[i]);
+        dstBatch.pushBack(dstImages[i]);
+    }
+
+    auto srcData = srcBatch.exportData(0);
+    auto dstData = dstBatch.exportData(0);
+    ImageBatchVarShapeWrapper<T> srcWrap(srcData);
+    ImageBatchVarShapeWrapper<T> dstWrap(dstData);
+
+    // Copy whole pixels through the wrapper entirely on the host (same at(n,y,x,0) semantics as
+    // VarShapeCopyKernel, just without a device launch).
+    for (int32_t n = 0; n < numImages; ++n) {
+        for (int64_t y = 0; y < srcWrap.height(n); ++y) {
+            for (int64_t x = 0; x < srcWrap.width(n); ++x) {
+                dstWrap.at(n, y, x, 0) = srcWrap.at(n, y, x, 0);
+            }
+        }
+    }
+
+    // Read dst host buffers back and verify byte-for-byte against the original input.
+    for (int32_t i = 0; i < numImages; ++i) {
+        std::vector<BT> dstHost(static_cast<size_t>(sizes[i].w) * sizes[i].h * channels);
+        auto dd = dstImages[i].exportData<ImageDataStridedHost>();
+        const ImagePlaneStrided& dp = dd.plane(0);
+        const size_t rowBytes = static_cast<size_t>(sizes[i].w) * channels * sizeof(BT);
+        for (int32_t y = 0; y < sizes[i].h; ++y) {
+            std::memcpy(dstHost.data() + static_cast<size_t>(y) * sizes[i].w * channels,
+                        static_cast<const unsigned char*>(dp.basePtr) + y * dp.rowStride, rowBytes);
+        }
+        CompareVectors(dstHost, hostPixels[i]);
+    }
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+    (void)argc;
+    (void)argv;
+    TEST_CASES_BEGIN();
+
+    // Single-channel, heterogeneous sizes.
+    TEST_CASE(TestRoundtripCopy<uchar1>({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_U8));
+    TEST_CASE(TestRoundtripCopy<float1>({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_F32));
+
+    // Multi-channel interleaved.
+    TEST_CASE(TestRoundtripCopy<uchar3>({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_RGB8));
+    TEST_CASE(TestRoundtripCopy<uchar4>({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_RGBA8));
+
+    // Homogeneous batch — the wrapper should still work when all images share the same shape.
+    TEST_CASE(TestRoundtripCopy<uchar4>({{64, 64}, {64, 64}, {64, 64}}, FMT_RGBA8));
+
+    // Single image, large.
+    TEST_CASE(TestRoundtripCopy<uchar3>({{128, 96}}, FMT_RGB8));
+
+    // CPU path: the same roundtrip driven entirely from host code over a host-resident snapshot.
+    TEST_CASE(TestHostRoundtripCopy<uchar1>({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_U8));
+    TEST_CASE(TestHostRoundtripCopy<float1>({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_F32));
+    TEST_CASE(TestHostRoundtripCopy<uchar3>({{16, 12}, {32, 24}, {7, 5}, {48, 9}}, FMT_RGB8));
+    TEST_CASE(TestHostRoundtripCopy<uchar4>({{64, 64}, {64, 64}}, FMT_RGBA8));
+
+    TEST_CASE(TestAccessors<uchar3>({{16, 12}, {32, 24}, {7, 5}}, FMT_RGB8));
+
+    // BorderWrapper composed over ImageBatchVarShapeWrapper: constant-fill via guaranteed-OOB read.
+    TEST_CASE(
+        TestBorderConstantComposition<uchar3>({{16, 12}, {32, 24}, {7, 5}}, FMT_RGB8, make_uchar3(0xAB, 0xCD, 0xEF)));
+    TEST_CASE(TestBorderConstantComposition<uchar4>({{16, 12}, {32, 24}, {7, 5}}, FMT_RGBA8,
+                                                    make_uchar4(0x12, 0x34, 0x56, 0x78)));
+
+    // InterpolationWrapper<NEAREST> composed over ImageBatchVarShapeWrapper: integer-coord roundtrip is identity.
+    TEST_CASE(TestInterpolationNearestComposition<uchar1>({{16, 12}, {32, 24}, {7, 5}}, FMT_U8));
+    TEST_CASE(TestInterpolationNearestComposition<uchar3>({{16, 12}, {32, 24}, {7, 5}}, FMT_RGB8));
+    TEST_CASE(TestInterpolationNearestComposition<uchar4>({{16, 12}, {32, 24}, {7, 5}}, FMT_RGBA8));
+
+    TEST_CASES_END();
+}
diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp
index a4466530..43e07329 100644
--- a/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp
+++ b/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp
@@ -21,9 +21,9 @@
 
 #include <core/detail/casting.hpp>
 #include <core/detail/type_traits.hpp>
-#include "core/detail/vector_utils.hpp"
 #include <core/wrappers/interpolation_wrapper.hpp>
 
+#include "core/detail/vector_utils.hpp"
 #include "test_helpers.hpp"
 
 using namespace roccv;
@@ -45,7 +45,7 @@ namespace {
  * @return T The interpolated pixel.
  */
 template <typename T, eBorderType BorderType>
-T GoldenLinear(BorderWrapper<T, BorderType> input, int64_t sample, float y, float x) {
+T GoldenLinear(BorderWrapper<BorderType, TensorWrapper<T>> input, int64_t sample, float y, float x) {
     // Defines the vectorized float type for intermediate calculations.
     using WorkType = detail::MakeType<float, detail::NumComponents<T>>;
 
@@ -86,7 +86,7 @@ T GoldenLinear(BorderWrapper<T, BorderType> input, int64_t sample, float y, floa
  * @return T The interpolated pixel.
  */
 template <typename T, eBorderType BorderType>
-T GoldenNearest(BorderWrapper<T, BorderType> input, int64_t sample, float y, float x) {
+T GoldenNearest(BorderWrapper<BorderType, TensorWrapper<T>> input, int64_t sample, float y, float x) {
     // Nearest neighbor interpolation. Rounds given floating point values to the nearest integer.
     return input.at(sample, lroundf(y), lroundf(x), 0);
 }
@@ -98,7 +98,7 @@ T GoldenNearest(BorderWrapper<T, BorderType> input, int64_t sample, float y, flo
  * @return None.
  */
 void CalBicubicWeights(float dist, float* weight) {
-    const float A = -0.5f; // Note OpenCV sets alpha to -0.75f
+    const float A = -0.5f;  // Note OpenCV sets alpha to -0.75f
 
     weight[0] = ((A * (dist + 1) - 5 * A) * (dist + 1) + 8 * A) * (dist + 1) - 4 * A;
     weight[1] = ((A + 2) * dist - (A + 3)) * dist * dist + 1;
@@ -107,7 +107,8 @@ void CalBicubicWeights(float dist, float* weight) {
 }
 
 /**
- * @brief Golden model for Bicubic interpolation. This is the Catmull-Rom cubic interpolation commonly used in CV libraries.
+ * @brief Golden model for Bicubic interpolation. This is the Catmull-Rom cubic interpolation commonly used in CV
+ * libraries.
  *
  * @tparam T Image datatype.
  * @tparam BorderType Border type for boundary conditions.
@@ -118,7 +119,7 @@ void CalBicubicWeights(float dist, float* weight) {
  * @return T The interpolated pixel.
  */
 template <typename T, eBorderType BorderType>
-T GoldenBicubic(BorderWrapper<T, BorderType> input, int64_t sample, float y, float x) {
+T GoldenBicubic(BorderWrapper<BorderType, TensorWrapper<T>> input, int64_t sample, float y, float x) {
     // Defines the vectorized float type for intermediate calculations.
     using WorkType = detail::MakeType<float, detail::NumComponents<T>>;
 
@@ -135,7 +136,8 @@ T GoldenBicubic(BorderWrapper<T, BorderType> input, int64_t sample, float y, flo
     WorkType sum = SetAll<WorkType>(0.0f);
     for (int indexY = -1; indexY <= 2; indexY++) {
         for (int indexX = -1; indexX <= 2; indexX++) {
-            sum += detail::RangeCast<WorkType>(input.at(sample, intY + indexY, intX + indexX, 0)) * (weightX[indexX + 1] * weightY[indexY + 1]);
+            sum += detail::RangeCast<WorkType>(input.at(sample, intY + indexY, intX + indexX, 0)) *
+                   (weightX[indexX + 1] * weightY[indexY + 1]);
         }
     }
 
@@ -156,7 +158,7 @@ T GoldenBicubic(BorderWrapper<T, BorderType> input, int64_t sample, float y, flo
  * @return T The interpolated pixel.
  */
 template <typename T, eBorderType BorderType>
-T GoldenInterpolationAt(BorderWrapper<T, BorderType> input, int64_t sample, float y, float x,
+T GoldenInterpolationAt(BorderWrapper<BorderType, TensorWrapper<T>> input, int64_t sample, float y, float x,
                         eInterpolationType interp) {
     switch (interp) {
         case eInterpolationType::INTERP_TYPE_NEAREST:
@@ -202,9 +204,10 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl
     std::vector<detail::BaseType<T>> goldenOutput;
 
     // Use roccv::InterpolationWrapper to get actual output
-    InterpolationWrapper<T, BorderType, InterpType> actualWrap(
-        (BorderWrapper<T, BorderType>(ImageWrapper<T>(input, batchSize, imageSize.w, imageSize.h), borderVal)));
-    BorderWrapper<T, BorderType> goldenWrap(ImageWrapper<T>(input, batchSize, imageSize.w, imageSize.h), borderVal);
+    auto actualWrap = MakeInterpolationWrapper<InterpType>(
+        MakeBorderWrapper<BorderType>(TensorWrapper<T>(input, batchSize, imageSize.w, imageSize.h), borderVal));
+    auto goldenWrap =
+        MakeBorderWrapper<BorderType>(TensorWrapper<T>(input, batchSize, imageSize.w, imageSize.h), borderVal);
 
     for (int b = 0; b < batchSize; b++) {
         for (float y = 0; y < imageSize.h; y += idxDelta) {
@@ -220,7 +223,8 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl
             }
         }
     }
-    if constexpr (std::is_integral_v<detail::BaseType<T>> && std::is_signed_v<detail::BaseType<T>> && sizeof(detail::BaseType<T>) == 4) {
+    if constexpr (std::is_integral_v<detail::BaseType<T>> && std::is_signed_v<detail::BaseType<T>> &&
+                  sizeof(detail::BaseType<T>) == 4) {
         CompareVectorsNear(actualOutput, goldenOutput, NEAR_EQUAL_THRESHOLD * 2);
     } else {
         CompareVectorsNear(actualOutput, goldenOutput);
@@ -228,7 +232,7 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl
 }
 }  // namespace
 
-int main(int argc, char **argv) {
+int main(int argc, char** argv) {
     (void)argc;
     (void)argv;
     TEST_CASES_BEGIN();
@@ -322,7 +326,7 @@ int main(int argc, char **argv) {
     TEST_CASE((TestCorrectness<float1, eBorderType::BORDER_TYPE_REFLECT, eInterpolationType::INTERP_TYPE_CUBIC>(1, {20, 53}, make_float4(0, 0, 0, 1), 0.1f)));
     TEST_CASE((TestCorrectness<float3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_CUBIC>(3, {38, 10}, make_float4(0, 0, 0, 1), 0.1f)));
     TEST_CASE((TestCorrectness<float4, eBorderType::BORDER_TYPE_WRAP, eInterpolationType::INTERP_TYPE_CUBIC>(5, {65, 21}, make_float4(1, 0.5, 0.5, 1), 0.1f)));
-     // clang-format on
+    // clang-format on
 
     TEST_CASES_END();
 }
\ No newline at end of file
diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_image_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_tensor_wrapper.cpp
similarity index 89%
rename from tests/roccv/cpp/src/tests/core/wrappers/test_image_wrapper.cpp
rename to tests/roccv/cpp/src/tests/core/wrappers/test_tensor_wrapper.cpp
index 70f03b0b..28a1acca 100644
--- a/tests/roccv/cpp/src/tests/core/wrappers/test_image_wrapper.cpp
+++ b/tests/roccv/cpp/src/tests/core/wrappers/test_tensor_wrapper.cpp
@@ -20,7 +20,7 @@
  */
 
 #include <core/detail/type_traits.hpp>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 
 #include "test_helpers.hpp"
 
@@ -37,11 +37,11 @@ void TestCorrectness(int numImages, Size2D size) {
     std::vector<BT> ref(numElements);
     FillVector(ref);
 
-    ImageWrapper<T> input(ref, numImages, size.w, size.h);
+    TensorWrapper<T> input(ref, numImages, size.w, size.h);
     std::vector<BT> actual;
 
     // To determine if coordinates are pointing to the proper values in memory, iterate through the reference vector
-    // element-by-element and iterate through the ImageWrapper coordinate-wise. All values should be the same if
+    // element-by-element and iterate through the TensorWrapper coordinate-wise. All values should be the same if
     // everything lines up.
 
     for (int b = 0; b < numImages; ++b) {
@@ -58,9 +58,9 @@ void TestCorrectness(int numImages, Size2D size) {
 }
 
 template <typename T>
-void TestImageWrapperConstructor(int imageCount, Size2D imageSize, ImageFormat format) {
+void TestTensorWrapperConstructor(int imageCount, Size2D imageSize, ImageFormat format) {
     Tensor input(imageCount, imageSize, format);
-    ImageWrapper<T> wrapper(input);
+    TensorWrapper<T> wrapper(input);
 
     EXPECT_EQ(wrapper.batches(), imageCount);
     EXPECT_EQ(wrapper.channels(), format.channels());
@@ -74,7 +74,7 @@ int main(int argc, char** argv) {
     (void)argv;
     TEST_CASES_BEGIN();
 
-    TEST_CASE(TestImageWrapperConstructor<uchar3>(2, {54, 67}, FMT_RGB8));
+    TEST_CASE(TestTensorWrapperConstructor<uchar3>(2, {54, 67}, FMT_RGB8));
 
     TEST_CASE(TestCorrectness<uchar1>(1, {10, 10}));
     TEST_CASE(TestCorrectness<uchar3>(2, {43, 9}));
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp b/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp
index f208962c..d34a5187 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp
@@ -24,7 +24,7 @@ THE SOFTWARE.
 #include <core/detail/type_traits.hpp>
 #include <core/detail/vector_utils.hpp>
 #include <core/wrappers/border_wrapper.hpp>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <op_bilateral_filter.hpp>
 
 #include "test_helpers.hpp"
@@ -51,8 +51,8 @@ namespace {
 template <typename T, eBorderType borderMode, typename BT = detail::BaseType<T>>
 void GenerateGoldenBilateral(std::vector<BT>& input, std::vector<BT>& output, int32_t batchSize, Size2D imageSize,
                              int diameter, float sigmaColor, float sigmaSpace, T borderValue) {
-    BorderWrapper<T, borderMode> src(ImageWrapper<T>(input, batchSize, imageSize.w, imageSize.h), borderValue);
-    ImageWrapper<T> dst(output, batchSize, imageSize.w, imageSize.h);
+    auto src = MakeBorderWrapper<borderMode>(TensorWrapper<T>(input, batchSize, imageSize.w, imageSize.h), borderValue);
+    TensorWrapper<T> dst(output, batchSize, imageSize.w, imageSize.h);
     using namespace roccv::detail;
     using Worktype = MakeType<float, NumElements<T>>;
 
@@ -179,9 +179,9 @@ int main(int argc, char** argv) {
     TEST_CASE((TestCorrectness<uchar, BORDER_TYPE_CONSTANT>(1, 20, 20, FMT_U8, 0, 50.0f, 1.2f, {0.0, 0.0, 0.0, 0.0},
                                                             eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar3, BORDER_TYPE_REPLICATE>(2, 20, 20, FMT_RGB8, -1, 50.0f, 1.2f,
-                                                             {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU)));
-    TEST_CASE((TestCorrectness<float1, BORDER_TYPE_WRAP>(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f,
-                                                         {500.0, 500.0, 0.0, 0.0}, eDeviceType::GPU)));
+                                                              {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<float1, BORDER_TYPE_WRAP>(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, {500.0, 500.0, 0.0, 0.0},
+                                                         eDeviceType::GPU)));
 
     TEST_CASE((TestCorrectness<uchar3, BORDER_TYPE_CONSTANT>(1, 20, 20, FMT_RGB8, 4, 50.0f, 3.0f, {0.0, 0.0, 0.0, 0.0},
                                                              eDeviceType::GPU)));
@@ -288,9 +288,9 @@ int main(int argc, char** argv) {
     TEST_CASE((TestCorrectness<uchar, BORDER_TYPE_CONSTANT>(1, 20, 20, FMT_U8, 0, 50.0f, 1.2f, {0.0, 0.0, 0.0, 0.0},
                                                             eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar3, BORDER_TYPE_REPLICATE>(2, 20, 20, FMT_RGB8, -1, 50.0f, 1.2f,
-                                                             {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU)));
-    TEST_CASE((TestCorrectness<float1, BORDER_TYPE_WRAP>(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f,
-                                                         {500.0, 500.0, 0.0, 0.0}, eDeviceType::CPU)));
+                                                              {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<float1, BORDER_TYPE_WRAP>(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, {500.0, 500.0, 0.0, 0.0},
+                                                         eDeviceType::CPU)));
 
     TEST_CASE((TestCorrectness<uchar3, BORDER_TYPE_CONSTANT>(1, 20, 20, FMT_RGB8, 4, 50.0f, 3.0f, {0.0, 0.0, 0.0, 0.0},
                                                              eDeviceType::CPU)));
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_bnd_box.cpp b/tests/roccv/cpp/src/tests/operators/test_op_bnd_box.cpp
index 3b5b5284..3121d7d9 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_bnd_box.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_bnd_box.cpp
@@ -23,7 +23,7 @@ THE SOFTWARE.
 #include <algorithm>
 #include <core/detail/casting.hpp>
 #include <core/detail/type_traits.hpp>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <iostream>
 #include <op_bnd_box.hpp>
 
@@ -85,8 +85,8 @@ template <typename T, typename BT = detail::BaseType<T>>
 void GenerateGoldenBndBox(std::vector<BT> &input, std::vector<BT> &output, int32_t batchSize, int32_t width,
                           int32_t height, const BndBoxes &bboxes) {
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<T> src(input, batchSize, width, height);
-    ImageWrapper<T> dst(output, batchSize, width, height);
+    TensorWrapper<T> src(input, batchSize, width, height);
+    TensorWrapper<T> dst(output, batchSize, width, height);
 
     // Working type for internal pixel format, which has 4 channels.
     using WorkType = detail::MakeType<BT, 4>;
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_center_crop.cpp b/tests/roccv/cpp/src/tests/operators/test_op_center_crop.cpp
index f145b1b4..e4e9aa95 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_center_crop.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_center_crop.cpp
@@ -23,7 +23,7 @@ THE SOFTWARE.
 #include <cstdint>
 #include <core/detail/casting.hpp>
 #include <core/detail/type_traits.hpp>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <op_center_crop.hpp>
 #include "test_helpers.hpp"
 
@@ -48,8 +48,8 @@ namespace {
 template <typename T, typename BT = detail::BaseType<T>>
 void GenerateGoldenCrop(std::vector<BT>& input, std::vector<BT>& output, int32_t batchSize, int32_t width, int32_t height, Size2D cropSize) {
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<T> src(input, batchSize, width, height);
-    ImageWrapper<T> dst(output, batchSize, cropSize.w, cropSize.h);
+    TensorWrapper<T> src(input, batchSize, width, height);
+    TensorWrapper<T> dst(output, batchSize, cropSize.w, cropSize.h);
 
     int topLeftX = (width >> 1) - (cropSize.w >> 1);
     int topLeftY = (height >> 1) - (cropSize.h >> 1);
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_composite.cpp b/tests/roccv/cpp/src/tests/operators/test_op_composite.cpp
index 43abf6c1..f332016f 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_composite.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_composite.cpp
@@ -21,7 +21,7 @@
 
 #include <core/detail/casting.hpp>
 #include <core/detail/type_traits.hpp>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <op_composite.hpp>
 
 #include "test_helpers.hpp"
@@ -56,17 +56,17 @@ std::vector<detail::BaseType<OutputType>> GoldenComposite(std::vector<detail::Ba
     // number of elements as the InputType. For example, if input type is uchar3, working type is float3.
     using WorkType = detail::MakeType<float, detail::NumElements<InputType>>;
 
-    // Wrap input data into ImageWrappers for easy data access
-    ImageWrapper<InputType> fgWrap(foreground, batchSize, width, height);
-    ImageWrapper<InputType> bgWrap(background, batchSize, width, height);
-    ImageWrapper<MaskType> maskWrap(mask, batchSize, width, height);
+    // Wrap input data into TensorWrappers for easy data access
+    TensorWrapper<InputType> fgWrap(foreground, batchSize, width, height);
+    TensorWrapper<InputType> bgWrap(background, batchSize, width, height);
+    TensorWrapper<MaskType> maskWrap(mask, batchSize, width, height);
 
     // Size of the output depends on the requested number of output channels. If it is 3, then the output images will
     // have 3 channels. If it is 4, then an additional alpha channel is added to the output. This alpha channel is
     // always fully on.
     int numOutElements = batchSize * width * height * detail::NumElements<OutputType>;
     std::vector<detail::BaseType<OutputType>> output(numOutElements);
-    ImageWrapper<OutputType> outWrap(output, batchSize, width, height);
+    TensorWrapper<OutputType> outWrap(output, batchSize, width, height);
 
     for (int b = 0; b < batchSize; b++) {
         for (int y = 0; y < height; y++) {
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_convert_to.cpp b/tests/roccv/cpp/src/tests/operators/test_op_convert_to.cpp
index eaa714a9..726b33d1 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_convert_to.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_convert_to.cpp
@@ -22,7 +22,7 @@ THE SOFTWARE.
 
 #include <core/detail/casting.hpp>
 #include <core/detail/type_traits.hpp>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <op_convert_to.hpp>
 
 #include "test_helpers.hpp"
@@ -55,8 +55,8 @@ std::vector<BT_DEST> GoldenConvertTo(std::vector<BT_SRC>& input, int32_t batchSi
     std::vector<BT_DEST> output(input.size());
 
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<SRC_DT> src(input, batchSize, width, height);
-    ImageWrapper<DEST_DT> dst(output, batchSize, width, height);
+    TensorWrapper<SRC_DT> src(input, batchSize, width, height);
+    TensorWrapper<DEST_DT> dst(output, batchSize, width, height);
 
     using AB_DT = decltype(float() * BT_SRC() * BT_DEST());
     using work_type = detail::MakeType<AB_DT, detail::NumElements<DEST_DT>>;
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp b/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp
index 4320f04e..34a0191a 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp
@@ -57,10 +57,11 @@ std::vector<BT> GoldenCopyMakeBorder(std::vector<BT> input, int batchSize, Size2
 
     // Wrap the input images in a BorderWrapper to handle out of bounds image behavior. The BorderWrapper has already
     // been tested in another test so it can be used reliably.
-    BorderWrapper<T, BorderType> inputWrap(ImageWrapper<T>(input, batchSize, inputSize.w, inputSize.h), borderVal);
+    auto inputWrap =
+        MakeBorderWrapper<BorderType>(TensorWrapper<T>(input, batchSize, inputSize.w, inputSize.h), borderVal);
 
     std::vector<BT> output(batchSize * outputSize.h * outputSize.w * channels);
-    ImageWrapper<T> outputWrap(output, batchSize, outputSize.w, outputSize.h);
+    TensorWrapper<T> outputWrap(output, batchSize, outputSize.w, outputSize.h);
 
     for (int b = 0; b < batchSize; b++) {
         for (int y = 0; y < outputSize.h; y++) {
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_custom_crop.cpp b/tests/roccv/cpp/src/tests/operators/test_op_custom_crop.cpp
index 03cbf9b4..a161cea7 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_custom_crop.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_custom_crop.cpp
@@ -22,7 +22,7 @@ THE SOFTWARE.
 #include <algorithm>
 #include <core/detail/casting.hpp>
 #include <core/detail/type_traits.hpp>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <cstdint>
 #include <op_custom_crop.hpp>
 
@@ -50,8 +50,8 @@ template <typename T, typename BT = detail::BaseType<T>>
 void GenerateGoldenCrop(std::vector<BT>& input, std::vector<BT>& output, int32_t batchSize, int32_t width,
                         int32_t height, Box_t cropRect) {
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<T> src(input, batchSize, width, height);
-    ImageWrapper<T> dst(output, batchSize, cropRect.width, cropRect.height);
+    TensorWrapper<T> src(input, batchSize, width, height);
+    TensorWrapper<T> dst(output, batchSize, cropRect.width, cropRect.height);
 
     for (int b = 0; b < batchSize; b++) {
         for (int y = 0; y < cropRect.height; y++) {
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_cvt_color.cpp b/tests/roccv/cpp/src/tests/operators/test_op_cvt_color.cpp
index 52c3735d..3c06c397 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_cvt_color.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_cvt_color.cpp
@@ -22,7 +22,7 @@ THE SOFTWARE.
 
 #include <core/detail/swizzling.hpp>
 #include <core/detail/type_traits.hpp>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <op_cvt_color.hpp>
 
 #include "test_helpers.hpp"
@@ -34,9 +34,9 @@ namespace {
 
 template <typename T, eSwizzle S, typename BT = detail::BaseType<T>>
 std::vector<BT> GoldenReorder(std::vector<BT>& input, int samples, int width, int height) {
-    ImageWrapper<T> inputWrap(input, samples, width, height);
+    TensorWrapper<T> inputWrap(input, samples, width, height);
     std::vector<BT> output(samples * width * height * detail::NumElements<T>);
-    ImageWrapper<T> outputWrap(output, samples, width, height);
+    TensorWrapper<T> outputWrap(output, samples, width, height);
 
     for (int b = 0; b < samples; b++) {
         for (int y = 0; y < height; y++) {
@@ -51,9 +51,9 @@ std::vector<BT> GoldenReorder(std::vector<BT>& input, int samples, int width, in
 
 template <typename T, eSwizzle S, typename BT = detail::BaseType<T>>
 std::vector<BT> GoldenYUVToRGB(std::vector<BT>& input, int samples, int width, int height, float delta) {
-    ImageWrapper<T> inputWrap(input, samples, width, height);
+    TensorWrapper<T> inputWrap(input, samples, width, height);
     std::vector<BT> output(samples * width * height * detail::NumElements<T>);
-    ImageWrapper<T> outputWrap(output, samples, width, height);
+    TensorWrapper<T> outputWrap(output, samples, width, height);
 
     for (int b = 0; b < samples; b++) {
         for (int y = 0; y < height; y++) {
@@ -77,9 +77,9 @@ std::vector<BT> GoldenYUVToRGB(std::vector<BT>& input, int samples, int width, i
 
 template <typename T, eSwizzle S, typename BT = detail::BaseType<T>>
 std::vector<BT> GoldenRGBToYUV(std::vector<BT>& input, int samples, int width, int height, float delta) {
-    ImageWrapper<T> inputWrap(input, samples, width, height);
+    TensorWrapper<T> inputWrap(input, samples, width, height);
     std::vector<BT> output(samples * width * height * detail::NumElements<T>);
-    ImageWrapper<T> outputWrap(output, samples, width, height);
+    TensorWrapper<T> outputWrap(output, samples, width, height);
 
     for (int b = 0; b < samples; b++) {
         for (int y = 0; y < height; y++) {
@@ -104,11 +104,11 @@ std::vector<BT> GoldenRGBToYUV(std::vector<BT>& input, int samples, int width, i
 
 template <typename T, eSwizzle S, typename BT = detail::BaseType<T>>
 std::vector<BT> GoldenRGBToGrayscale(std::vector<BT>& input, int samples, int width, int height) {
-    ImageWrapper<T> inputWrap(input, samples, width, height);
+    TensorWrapper<T> inputWrap(input, samples, width, height);
     std::vector<BT> output(samples * width * height);
 
     // Output must always be uchar1 for grayscale
-    ImageWrapper<uchar1> outputWrap(output, samples, width, height);
+    TensorWrapper<uchar1> outputWrap(output, samples, width, height);
 
     for (int b = 0; b < samples; b++) {
         for (int y = 0; y < height; y++) {
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_flip.cpp b/tests/roccv/cpp/src/tests/operators/test_op_flip.cpp
index 36bfbcc4..1a83da71 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_flip.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_flip.cpp
@@ -23,7 +23,7 @@ THE SOFTWARE.
 #include <algorithm>
 #include <core/detail/casting.hpp>
 #include <core/detail/type_traits.hpp>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <op_flip.hpp>
 #include "test_helpers.hpp"
 
@@ -52,8 +52,8 @@ std::vector<BT> GoldenFlip(std::vector<BT>& input, int32_t batchSize, int32_t wi
     std::vector<BT> output(input.size());
 
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<T> src(input, batchSize, width, height);
-    ImageWrapper<T> dst(output, batchSize, width, height);
+    TensorWrapper<T> src(input, batchSize, width, height);
+    TensorWrapper<T> dst(output, batchSize, width, height);
 
     for (int b = 0; b < batchSize; ++b) {
         for (int y = 0; y < height; ++y) {
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_gamma_contrast.cpp b/tests/roccv/cpp/src/tests/operators/test_op_gamma_contrast.cpp
index d24df72e..71b1014e 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_gamma_contrast.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_gamma_contrast.cpp
@@ -24,7 +24,7 @@ THE SOFTWARE.
 #include "core/detail/casting.hpp"
 #include "core/detail/type_traits.hpp"
 #include "core/detail/math/vectorized_type_math.hpp"
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <iostream>
 #include <op_gamma_contrast.hpp>
 #include "operator_types.h"
@@ -56,8 +56,8 @@ std::vector<BT> GoldenGammaContrast(std::vector<BT>& input, int32_t batchSize, i
     std::vector<BT> output(input.size());
 
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<T> src(input, batchSize, width, height);
-    ImageWrapper<T> dst(output, batchSize, width, height);
+    TensorWrapper<T> src(input, batchSize, width, height);
+    TensorWrapper<T> dst(output, batchSize, width, height);
     using work_type = detail::MakeType<float, detail::NumElements<T>>;
 
     for (int b = 0; b < batchSize; ++b) {
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_histogram.cpp b/tests/roccv/cpp/src/tests/operators/test_op_histogram.cpp
index 05a5009c..513df448 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_histogram.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_histogram.cpp
@@ -21,7 +21,7 @@ THE SOFTWARE.
 */
 
 #include <algorithm>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <iostream>
 #include <op_histogram.hpp>
 #include <optional>
@@ -57,7 +57,7 @@ std::vector<BT> GoldenHistogram(std::vector<uchar>& input, int32_t batchSize, in
     std::vector<BT> local_histogram(256);
 
     // Wrap the input vector for simplified data access
-    ImageWrapper<uchar> src(input, batchSize, width, height);
+    TensorWrapper<uchar> src(input, batchSize, width, height);
 
     for (int b = 0; b < batchSize; ++b) {
         std::fill(local_histogram.begin(), local_histogram.end(), 0);
@@ -94,8 +94,8 @@ std::vector<BT> GoldenHistogramMask(std::vector<uchar>& input, std::vector<uchar
     std::vector<BT> local_histogram(256);
 
     // Wrap input/mask vectors for simplified data access
-    ImageWrapper<uchar> src(input, batchSize, width, height);
-    ImageWrapper<uchar> maskWrap(mask, batchSize, width, height);
+    TensorWrapper<uchar> src(input, batchSize, width, height);
+    TensorWrapper<uchar> maskWrap(mask, batchSize, width, height);
 
     for (int b = 0; b < batchSize; ++b) {
         std::fill(local_histogram.begin(), local_histogram.end(), 0);
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_normalize.cpp b/tests/roccv/cpp/src/tests/operators/test_op_normalize.cpp
index fd850ddb..3b077b36 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_normalize.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_normalize.cpp
@@ -22,7 +22,7 @@ THE SOFTWARE.
 
 #include <core/detail/casting.hpp>
 #include <core/detail/type_traits.hpp>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <op_normalize.hpp>
 
 #include "test_helpers.hpp"
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp b/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp
index 634344a4..18847056 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp
@@ -21,12 +21,13 @@ THE SOFTWARE.
 */
 
 #include <algorithm>
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <core/wrappers/interpolation_wrapper.hpp>
 #include <iostream>
 #include <op_remap.hpp>
-#include "core/detail/internal_structs.hpp"
+
 #include "core/detail/casting.hpp"
+#include "core/detail/internal_structs.hpp"
 #include "core/detail/math/vectorized_type_math.hpp"
 #include "core/detail/type_traits.hpp"
 #include "operator_types.h"
@@ -39,11 +40,11 @@ using namespace roccv::detail;
 // Keep all non-entrypoint functions in an anonymous namespace to prevent redefinition errors across translation units.
 namespace {
 
-RemapParams GetRemapParams(const int2 &srcSize, const int2 &dstSize, const int2 &mapSize, bool alignCorners, eRemapType mapValueType)
-{
+RemapParams GetRemapParams(const int2& srcSize, const int2& dstSize, const int2& mapSize, bool alignCorners,
+                           eRemapType mapValueType) {
     RemapParams params;
 
-    switch(mapValueType) {
+    switch (mapValueType) {
         case REMAP_ABSOLUTE:
             params.srcScale = make_float2(0.f, 0.f);
             params.mapScale = StaticCast<float2>(mapSize) / StaticCast<float2>(dstSize);
@@ -54,7 +55,7 @@ RemapParams GetRemapParams(const int2 &srcSize, const int2 &dstSize, const int2
         case REMAP_ABSOLUTE_NORMALIZED:
             params.srcScale = make_float2(0.f, 0.f);
             params.mapScale = StaticCast<float2>(mapSize) / StaticCast<float2>(dstSize);
-            params.valScale  = (StaticCast<float2>(srcSize) - (alignCorners ? 1.f : 0.f)) / 2.f;
+            params.valScale = (StaticCast<float2>(srcSize) - (alignCorners ? 1.f : 0.f)) / 2.f;
             params.srcOffset = params.valScale - (alignCorners ? 0.f : .5f);
             params.dstOffset = 0.f;
             break;
@@ -87,24 +88,24 @@ RemapParams GetRemapParams(const int2 &srcSize, const int2 &dstSize, const int2
  */
 template <typename T, eBorderType BorderType, eInterpolationType InterpType, eInterpolationType MapInterpType,
           typename BT = detail::BaseType<T>>
-std::vector<BT> GoldenRemap(std::vector<BT>& input, int32_t batchSize, int32_t mapBatchSize, int32_t inWidth, int32_t inHeight, int32_t outWidth, 
-                            int32_t outHeight, int32_t mapWidth, int32_t mapHeight, std::vector<float2>& mapData, eRemapType mapType, bool alignCorners, float4 borderValue) {
-    
+std::vector<BT> GoldenRemap(std::vector<BT>& input, int32_t batchSize, int32_t mapBatchSize, int32_t inWidth,
+                            int32_t inHeight, int32_t outWidth, int32_t outHeight, int32_t mapWidth, int32_t mapHeight,
+                            std::vector<float2>& mapData, eRemapType mapType, bool alignCorners, float4 borderValue) {
     int channels = detail::NumElements<T>;
     int outputSize = batchSize * outWidth * outHeight * channels;
     std::vector<BT> output(outputSize);
 
     // Create interpolation wrapper for input vector
-    InterpolationWrapper<T, BorderType, InterpType> src((BorderWrapper<T, BorderType>(
-        ImageWrapper<T>(input, batchSize, inWidth, inHeight), detail::SaturateCast<T>(borderValue))));
+    auto src = MakeInterpolationWrapper<InterpType>(MakeBorderWrapper<BorderType>(
+        TensorWrapper<T>(input, batchSize, inWidth, inHeight), detail::SaturateCast<T>(borderValue)));
 
     // Wrap the output vector for simplified data access
-    ImageWrapper<T> dst(output, batchSize, outWidth, outHeight);
+    TensorWrapper<T> dst(output, batchSize, outWidth, outHeight);
 
     // Create an interpolation wrapper for the map tensor
-    // InterpolationWrapper<float2, BorderType, MapInterpType> wrappedMapTensor(map, make_float2(0, 0));
-    InterpolationWrapper<float2, BorderType, MapInterpType> map((BorderWrapper<float2, BorderType>(
-        ImageWrapper<float2>(mapData.data(), mapBatchSize, mapWidth, mapHeight), detail::SaturateCast<float2>(borderValue))));
+    auto map = MakeInterpolationWrapper<MapInterpType>(
+        MakeBorderWrapper<BorderType>(TensorWrapper<float2>(mapData.data(), mapBatchSize, mapWidth, mapHeight),
+                                      detail::SaturateCast<float2>(borderValue)));
 
     int2 srcSize = make_int2(src.width(), src.height());
     int2 dstSize = make_int2(dst.width(), dst.height());
@@ -119,13 +120,12 @@ std::vector<BT> GoldenRemap(std::vector<BT>& input, int32_t batchSize, int32_t m
     for (int b = 0; b < dst.batches(); b++) {
         for (int y = 0; y < dst.height(); y++) {
             for (int x = 0; x < dst.width(); x++) {
-                
                 dstCoord.x = static_cast<float>(x);
                 dstCoord.y = static_cast<float>(y);
-                
+
                 mapCoord.x = (dstCoord.x + params.dstOffset) * params.mapScale.x;
                 mapCoord.y = (dstCoord.y + params.dstOffset) * params.mapScale.y;
-                
+
                 float2 mapValue = map.at((mapBatchSize == 1 ? 0 : b), mapCoord.y, mapCoord.x, 0);
 
                 srcCoord.x = dstCoord.x * params.srcScale.x + mapValue.x * params.valScale.x + params.srcOffset.x;
@@ -162,7 +162,8 @@ std::vector<BT> GoldenRemap(std::vector<BT>& input, int32_t batchSize, int32_t m
  */
 template <typename T, eBorderType BorderType, eInterpolationType InterpType, eInterpolationType MapInterpType,
           typename BT = detail::BaseType<T>>
-void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, int outWidth, int outHeight, int mapWidth, int mapHeight, ImageFormat format, float4 borderValue, eRemapType mapType,
+void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, int outWidth, int outHeight,
+                     int mapWidth, int mapHeight, ImageFormat format, float4 borderValue, eRemapType mapType,
                      bool alignCorners, eDeviceType device) {
     // Create input and output tensor based on test parameters
     Tensor input(batchSize, {inWidth, inHeight}, format, device);
@@ -174,7 +175,7 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight,
 
     // Copy generated input data into input tensor
     CopyVectorIntoTensor(input, inputData);
-    
+
     int mapSize = mapBatchSize * mapWidth * mapHeight;
 
     std::vector<float2> mapData(mapSize);
@@ -188,11 +189,10 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight,
                 }
             }
         }
-    }
-    else if (mapType == REMAP_ABSOLUTE_NORMALIZED) {
+    } else if (mapType == REMAP_ABSOLUTE_NORMALIZED) {
         for (int b = 0; b < mapBatchSize; b++) {
-            for (int y = 0; y < mapHeight; y++){
-                for (int x = 0; x < mapWidth; x++){
+            for (int y = 0; y < mapHeight; y++) {
+                for (int x = 0; x < mapWidth; x++) {
                     float normX = ((2.0f * static_cast<float>(x)) / static_cast<float>(mapWidth - 1)) - 1.0f;
                     float normY = ((2.0f * static_cast<float>(y)) / static_cast<float>(mapHeight - 1)) - 1.0f;
 
@@ -204,11 +204,10 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight,
                 }
             }
         }
-    }
-    else if (mapType == REMAP_RELATIVE_NORMALIZED) {
+    } else if (mapType == REMAP_RELATIVE_NORMALIZED) {
         for (int b = 0; b < mapBatchSize; b++) {
-            for (int y = 0; y < mapHeight; y++){
-                for (int x = 0; x < mapWidth; x++){
+            for (int y = 0; y < mapHeight; y++) {
+                for (int x = 0; x < mapWidth; x++) {
                     // Generate normalized coordinates in [-1, 1] range
                     float normX = ((2.0f * static_cast<float>(x)) / static_cast<float>(mapWidth - 1)) - 1.0f;
                     float normY = ((2.0f * static_cast<float>(y)) / static_cast<float>(mapHeight - 1)) - 1.0f;
@@ -235,7 +234,8 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight,
     hipStream_t stream;
     HIP_VALIDATE_NO_ERRORS(hipStreamCreate(&stream));
     Remap op;
-    op(stream, input, output, mapTensor, InterpType, MapInterpType, mapType, alignCorners, BorderType, borderValue, device);
+    op(stream, input, output, mapTensor, InterpType, MapInterpType, mapType, alignCorners, BorderType, borderValue,
+       device);
     HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream));
     HIP_VALIDATE_NO_ERRORS(hipStreamDestroy(stream));
 
@@ -243,9 +243,9 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight,
     std::vector<BT> result(output.shape().size());
     CopyTensorIntoVector(result, output);
 
-    std::vector<BT> ref = GoldenRemap<T, BorderType, InterpType, MapInterpType>(inputData, batchSize, mapBatchSize, inWidth,
-                                                                                        inHeight, outWidth, outHeight, 
-                                                                                        mapWidth, mapHeight, mapData, mapType, alignCorners, borderValue);
+    std::vector<BT> ref = GoldenRemap<T, BorderType, InterpType, MapInterpType>(
+        inputData, batchSize, mapBatchSize, inWidth, inHeight, outWidth, outHeight, mapWidth, mapHeight, mapData,
+        mapType, alignCorners, borderValue);
 
     // Compare data in actual output versus the generated golden reference image
     CompareVectors(result, ref);
@@ -258,144 +258,186 @@ int main(int argc, char** argv) {
     TEST_CASES_BEGIN();
 
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, false, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        false, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        false, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, false, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        false, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        false, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, false, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        false, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU)));
-    
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        false, eDeviceType::GPU)));
+
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, true, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        true, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        true, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, true, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        true, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        true, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, true, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        true, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        true, eDeviceType::GPU)));
 
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, false, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, false, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, true, eDeviceType::GPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, true, eDeviceType::GPU)));
 
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, false, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        false, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        false, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, false, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        false, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        false, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, false, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        false, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        false, eDeviceType::CPU)));
 
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, true, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        true, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        true, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, true, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        true, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar3, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        true, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, true, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED,
+        true, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar4, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
                                eInterpolationType::INTERP_TYPE_NEAREST>(
-        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU)));
+        1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED,
+        true, eDeviceType::CPU)));
 
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, false, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, false, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU)));
+                               eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, true, eDeviceType::CPU)));
     TEST_CASE((TestCorrectness<uchar1, eBorderType::BORDER_TYPE_CONSTANT, eInterpolationType::INTERP_TYPE_NEAREST,
-                               eInterpolationType::INTERP_TYPE_NEAREST>(
-        2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU)));
-
-
+                               eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8,
+                                                                        make_float4(0.0f, 0.0f, 0.0f, 1.0f),
+                                                                        REMAP_ABSOLUTE, true, eDeviceType::CPU)));
 
     TEST_CASES_END();
 }
\ No newline at end of file
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp b/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp
index d7c385d0..02f567cf 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp
@@ -50,13 +50,12 @@ std::vector<BT> GoldenResize(std::vector<detail::BaseType<T>> &input, int batchS
     size_t numOutputElements = batchSize * outputSize.w * outputSize.h * detail::NumElements<T>;
 
     std::vector<detail::BaseType<T>> output(numOutputElements);
-    ImageWrapper<T> outputWrap(output, batchSize, outputSize.w, outputSize.h);
+    TensorWrapper<T> outputWrap(output, batchSize, outputSize.w, outputSize.h);
 
     // Use the replicate (or clamping) border mode by default to handle out of bounds conditions with certain
     // interpolation modes.
-    InterpolationWrapper<T, eBorderType::BORDER_TYPE_REPLICATE, InterpType> inputWrap(
-        BorderWrapper<T, eBorderType::BORDER_TYPE_REPLICATE>(
-            ImageWrapper<T>(input, batchSize, inputSize.w, inputSize.h), T{}));
+    auto inputWrap = MakeInterpolationWrapper<InterpType>(MakeBorderWrapper<eBorderType::BORDER_TYPE_REPLICATE>(
+        TensorWrapper<T>(input, batchSize, inputSize.w, inputSize.h), T{}));
 
     // Determine the scaling factor required to map from the output coordinates to the corresponding input coordinates
     // on both the x and y axes.
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp b/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp
index 56deeabb..fb9fa55e 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp
@@ -67,10 +67,9 @@ std::vector<detail::BaseType<T>> GoldenRotate(std::vector<detail::BaseType<T>>&
 
     T borderVal = detail::SaturateCast<T>(make_float4(0.0f, 0.0f, 0.0f, 0.0f));
 
-    ImageWrapper<T> outputWrapper(output, batchSize, imageSize.w, imageSize.h);
-    InterpolationWrapper<T, eBorderType::BORDER_TYPE_CONSTANT, InterpType> inputWrapper(
-        BorderWrapper<T, eBorderType::BORDER_TYPE_CONSTANT>(ImageWrapper<T>(input, batchSize, imageSize.w, imageSize.h),
-                                                            borderVal));
+    TensorWrapper<T> outputWrapper(output, batchSize, imageSize.w, imageSize.h);
+    auto inputWrapper = MakeInterpolationWrapper<InterpType>(MakeBorderWrapper<eBorderType::BORDER_TYPE_CONSTANT>(
+        TensorWrapper<T>(input, batchSize, imageSize.w, imageSize.h), borderVal));
 
     /**
      * Affine warp for a combined rotation and translate looks like the following when in its inverse representation:
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_thresholding.cpp b/tests/roccv/cpp/src/tests/operators/test_op_thresholding.cpp
index 09ef3262..279959e0 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_thresholding.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_thresholding.cpp
@@ -24,7 +24,7 @@ THE SOFTWARE.
 #include "core/detail/casting.hpp"
 #include "core/detail/type_traits.hpp"
 #include "core/detail/math/vectorized_type_math.hpp"
-#include <core/wrappers/image_wrapper.hpp>
+#include <core/wrappers/tensor_wrapper.hpp>
 #include <iostream>
 #include <op_thresholding.hpp>
 #include "operator_types.h"
@@ -59,8 +59,8 @@ std::vector<BT> GoldenBinaryThreshold(std::vector<BT>& input, int32_t batchSize,
     std::vector<BT> output(input.size());
 
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<T> src(input, batchSize, width, height);
-    ImageWrapper<T> dst(output, batchSize, width, height);
+    TensorWrapper<T> src(input, batchSize, width, height);
+    TensorWrapper<T> dst(output, batchSize, width, height);
     
     for (int b = 0; b < batchSize; ++b) {
         double th = thresh[b];
@@ -88,8 +88,8 @@ std::vector<BT> GoldenBinaryInvThreshold(std::vector<BT>& input, int32_t batchSi
     std::vector<BT> output(input.size());
 
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<T> src(input, batchSize, width, height);
-    ImageWrapper<T> dst(output, batchSize, width, height);
+    TensorWrapper<T> src(input, batchSize, width, height);
+    TensorWrapper<T> dst(output, batchSize, width, height);
     
     for (int b = 0; b < batchSize; ++b) {
         double th = thresh[b];
@@ -117,8 +117,8 @@ std::vector<BT> GoldenTruncThreshold(std::vector<BT>& input, int32_t batchSize,
     std::vector<BT> output(input.size());
 
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<T> src(input, batchSize, width, height);
-    ImageWrapper<T> dst(output, batchSize, width, height);
+    TensorWrapper<T> src(input, batchSize, width, height);
+    TensorWrapper<T> dst(output, batchSize, width, height);
     
     for (int b = 0; b < batchSize; ++b) {
         double th = thresh[b];
@@ -145,8 +145,8 @@ std::vector<BT> GoldenToZeroThreshold(std::vector<BT>& input, int32_t batchSize,
     std::vector<BT> output(input.size());
 
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<T> src(input, batchSize, width, height);
-    ImageWrapper<T> dst(output, batchSize, width, height);
+    TensorWrapper<T> src(input, batchSize, width, height);
+    TensorWrapper<T> dst(output, batchSize, width, height);
     
     for (int b = 0; b < batchSize; ++b) {
         double th = thresh[b];
@@ -173,8 +173,8 @@ std::vector<BT> GoldenToZeroInvThreshold(std::vector<BT>& input, int32_t batchSi
     std::vector<BT> output(input.size());
 
     // Wrap input/output vectors for simplified data access
-    ImageWrapper<T> src(input, batchSize, width, height);
-    ImageWrapper<T> dst(output, batchSize, width, height);
+    TensorWrapper<T> src(input, batchSize, width, height);
+    TensorWrapper<T> dst(output, batchSize, width, height);
     
     for (int b = 0; b < batchSize; ++b) {
         double th = thresh[b];
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp b/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp
index 93c91ae9..5af99149 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp
@@ -55,12 +55,12 @@ std::vector<detail::BaseType<T>> GoldenWarpAffine(std::vector<detail::BaseType<T
                                                   const std::array<float, 6>& mat, bool isInverted, int batchSize,
                                                   Size2D inputSize, Size2D outputSize, float4 borderValue) {
     // Create interpolation wrapper for input vector
-    InterpolationWrapper<T, BorderType, InterpType> inputWrap((BorderWrapper<T, BorderType>(
-        ImageWrapper<T>(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast<T>(borderValue))));
+    auto inputWrap = MakeInterpolationWrapper<InterpType>(MakeBorderWrapper<BorderType>(
+        TensorWrapper<T>(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast<T>(borderValue)));
 
-    // Create ImageWrapper for output vector. We also need to create said output vector.
+    // Create TensorWrapper for output vector. We also need to create said output vector.
     std::vector<detail::BaseType<T>> output(batchSize * outputSize.w * outputSize.h * detail::NumElements<T>);
-    ImageWrapper<T> outputWrap(output, batchSize, outputSize.w, outputSize.h);
+    TensorWrapper<T> outputWrap(output, batchSize, outputSize.w, outputSize.h);
 
     // Prepare the transformation matrix. An affine transform is effectively a 3x3 perspective transform with its last
     // row set to [0, 0, 1].
diff --git a/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp b/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp
index 1461365c..2c7559cb 100644
--- a/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp
+++ b/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp
@@ -52,12 +52,12 @@ std::vector<detail::BaseType<T>> GoldenWarpPerspective(std::vector<detail::BaseT
                                                        const std::array<float, 9>& mat, bool isInverted, int batchSize,
                                                        Size2D inputSize, Size2D outputSize, float4 borderValue) {
     // Create interpolation wrapper for input vector
-    InterpolationWrapper<T, BorderType, InterpType> inputWrap((BorderWrapper<T, BorderType>(
-        ImageWrapper<T>(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast<T>(borderValue))));
+    auto inputWrap = MakeInterpolationWrapper<InterpType>(MakeBorderWrapper<BorderType>(
+        TensorWrapper<T>(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast<T>(borderValue)));
 
-    // Create ImageWrapper for output vector. We also need to create said output vector.
+    // Create TensorWrapper for output vector. We also need to create said output vector.
     std::vector<detail::BaseType<T>> output(batchSize * outputSize.w * outputSize.h * detail::NumElements<T>);
-    ImageWrapper<T> outputWrap(output, batchSize, outputSize.w, outputSize.h);
+    TensorWrapper<T> outputWrap(output, batchSize, outputSize.w, outputSize.h);
 
     // If given matrix is not the inverted representation of the transformation, we have to invert it first (since we
     // transform from output -> input).