From b6a9c242a4b4bb81c84df33e63b1ba8e09a2fd45 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Wed, 10 Jun 2026 21:20:04 +0200 Subject: [PATCH 1/6] sec(parser): wrap untrusted MemorySegment.asSlice calls in MemorySegments helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SECURITY.md promises that malformed input throws VortexException, never an IndexOutOfBoundsException or other unchecked JDK exception. Several call sites in the file-open and scan paths violated this when fed adversarial offsets/lengths read from the on-disk schema: - VortexReader.parse: file shorter than the 8-byte trailer made `bodyBytes = size - TRAILER_SIZE` negative, then `seg.asSlice(negative, 8)` threw IOOBE. - VortexReader.parse: `trailer.postscriptLen()` greater than the body made `postscriptOffset` negative, same outcome. - VortexReader.slice (public, exposed via VortexHandle) forwarded any caller-supplied offset/length straight to asSlice with no bounds check. - VortexReader.readFlatStats / ScanIterator.readFlatStats: segment offsets taken from footer.segmentSpecs() were sliced unchecked. - VortexHttpReader: trailer + postscript offsets in the prefetched HTTP tail were sliced unchecked. - FlatSegmentDecoder: per-buffer dataOffset accumulated from attacker-controlled `Buffer.padding()` and `Buffer.length()` was sliced unchecked. - PostscriptParser.slice: local helper called asSlice without converting IOOBE to VortexException (checkBlobBounds already runs upstream, but the helper should defend the same contract on its own). Introduces `io.github.dfa1.vortex.core.MemorySegments.slice(MemorySegment, long, long, String)`. The static helper throws VortexException — with the caller-supplied context label baked into the message — for negative offsets, negative lengths, and overflow-prone offset+length combinations. Replaces eight asSlice call sites across reader/scan/encoding. Each surviving raw asSlice call now operates only on already-validated internal offsets (already-decoded array buffer projection in VarBinArray, ArraySegments slicing of post-decode buffers in ScanIterator's narrowToRows, ProtoReader's own bounds-checked cursor, the constant-offset magic check in Trailer, and the decoded-output buffer in ZstdEncoding). A Checkstyle ban on raw asSlice in io/scan/encoding packages is a planned follow-up so future regressions are caught at build time. MemorySegmentsTest covers in-range slicing, the zero-length-at-end edge, negative offsets, negative lengths, offset+length past segment size, length alone larger than the segment, and an overflow-prone (Long.MAX_VALUE - 1) + 100 input that would defeat a naive `off + len > segSize` check. ./mvnw verify — all unit + integration tests pass (incl. Rust round-trips). Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/MemorySegments.java | 47 ++++++++++ .../vortex/encoding/FlatSegmentDecoder.java | 3 +- .../dfa1/vortex/core/MemorySegmentsTest.java | 89 +++++++++++++++++++ .../dfa1/vortex/io/PostscriptParser.java | 3 +- .../dfa1/vortex/io/VortexHttpReader.java | 5 +- .../github/dfa1/vortex/io/VortexReader.java | 15 ++-- .../github/dfa1/vortex/scan/ScanIterator.java | 3 +- 7 files changed, 154 insertions(+), 11 deletions(-) create mode 100644 core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java create mode 100644 core/src/test/java/io/github/dfa1/vortex/core/MemorySegmentsTest.java diff --git a/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java b/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java new file mode 100644 index 00000000..5a6941da --- /dev/null +++ b/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java @@ -0,0 +1,47 @@ +package io.github.dfa1.vortex.core; + +import java.lang.foreign.MemorySegment; + +/// Bounds-checked wrappers for {@link MemorySegment} slicing on untrusted input. +/// +///

Every call site in {@code io}, {@code scan}, and {@code encoding} that slices a +/// memory-mapped file region by an offset or length read from the on-disk schema must +/// route through {@link #slice(MemorySegment, long, long, String)} instead of calling +/// {@link MemorySegment#asSlice(long, long)} directly. The contract: malformed input +/// throws {@link VortexException}, never {@link IndexOutOfBoundsException}, +/// {@link IllegalArgumentException}, or any other unchecked JDK exception. +public final class MemorySegments { + + private MemorySegments() { + } + + /// Returns a slice of {@code seg} starting at {@code off} for {@code len} bytes, + /// rejecting out-of-range or overflow-prone input with a {@link VortexException} + /// labelled by {@code context}. + /// + /// @param seg backing segment + /// @param off start offset in bytes; must be {@code >= 0} and {@code <= seg.byteSize() - len} + /// @param len slice length in bytes; must be {@code >= 0} and {@code <= seg.byteSize() - off} + /// @param context short label used in the exception message (e.g. {@code "footer blob"}, + /// {@code "segment spec data"}) so malformed-input errors point at the + /// specific on-disk structure rather than a generic offset + /// @return the bounds-checked slice + /// @throws VortexException if {@code off} or {@code len} is negative, or if + /// {@code off + len > seg.byteSize()} + public static MemorySegment slice(MemorySegment seg, long off, long len, String context) { + long segSize = seg.byteSize(); + if (off < 0) { + throw new VortexException("malformed " + context + ": negative offset " + off); + } + if (len < 0) { + throw new VortexException("malformed " + context + ": negative length " + len); + } + // Overflow-safe form of `off + len > segSize`. The subtraction can't underflow because + // len has already been bounded against segSize on the line above (segSize >= 0 always). + if (len > segSize || off > segSize - len) { + throw new VortexException("malformed " + context + ": offset+length " + + off + "+" + len + " exceeds segment size " + segSize); + } + return seg.asSlice(off, len); + } +} diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java b/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java index a5ffff87..41dad3bb 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java @@ -2,6 +2,7 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.ArrayStats; +import io.github.dfa1.vortex.core.MemorySegments; import io.github.dfa1.vortex.core.array.Array; import io.github.dfa1.vortex.fbs.Buffer; @@ -55,7 +56,7 @@ public Array decode(MemorySegment seg, List encodingSpecs, for (int i = 0; i < numBuffers; i++) { Buffer bufDesc = fbArray.buffers(i); dataOffset += bufDesc.padding(); - bufs[i] = seg.asSlice(dataOffset, bufDesc.length()); + bufs[i] = MemorySegments.slice(seg, dataOffset, bufDesc.length(), "encoded buffer " + i); dataOffset += bufDesc.length(); } diff --git a/core/src/test/java/io/github/dfa1/vortex/core/MemorySegmentsTest.java b/core/src/test/java/io/github/dfa1/vortex/core/MemorySegmentsTest.java new file mode 100644 index 00000000..ab00e0fe --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/core/MemorySegmentsTest.java @@ -0,0 +1,89 @@ +package io.github.dfa1.vortex.core; + +import org.junit.jupiter.api.Test; + +import java.lang.foreign.MemorySegment; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class MemorySegmentsTest { + + private final MemorySegment sut = MemorySegment.ofArray(new byte[16]); + + @Test + void inRangeSliceReturnsExpectedRegion() { + // Given valid offset+length inside the 16-byte backing array. + + // When + MemorySegment slice = MemorySegments.slice(sut, 4, 8, "test region"); + + // Then + assertThat(slice.byteSize()).isEqualTo(8); + } + + @Test + void zeroLengthAtEndIsAllowed() { + // Given — offset at the end, zero-length. The JDK permits this; we must too. + + // When + Then + assertThat(MemorySegments.slice(sut, 16, 0, "tail").byteSize()).isEqualTo(0); + } + + @Test + void negativeOffsetThrowsVortexException() { + // Given — adversarial offset from a malformed file. + // Without the wrapper, MemorySegment.asSlice throws IndexOutOfBoundsException — + // not VortexException — breaking the contract documented in SECURITY.md. + + // When + Then + assertThatThrownBy(() -> MemorySegments.slice(sut, -1, 4, "region")) + .isInstanceOf(VortexException.class) + .hasMessageContaining("region") + .hasMessageContaining("negative offset"); + } + + @Test + void negativeLengthThrowsVortexException() { + // Given — adversarial length. + + // When + Then + assertThatThrownBy(() -> MemorySegments.slice(sut, 0, -1, "region")) + .isInstanceOf(VortexException.class) + .hasMessageContaining("region") + .hasMessageContaining("negative length"); + } + + @Test + void offsetPlusLengthBeyondSegmentSizeThrows() { + // Given — 16-byte buffer, request 12 bytes starting at offset 8. + + // When + Then + assertThatThrownBy(() -> MemorySegments.slice(sut, 8, 12, "blob")) + .isInstanceOf(VortexException.class) + .hasMessageContaining("blob") + .hasMessageContaining("exceeds segment size 16"); + } + + @Test + void lengthAloneBiggerThanSegmentThrows() { + // Given — len > segSize even with off=0. + + // When + Then + assertThatThrownBy(() -> MemorySegments.slice(sut, 0, 17, "blob")) + .isInstanceOf(VortexException.class); + } + + @Test + void overflowingOffsetPlusLengthRejected() { + // Given — adversarial values designed to overflow a naive `off + len` computation. + // (off + len) wraps to a small positive number, which would pass a naive + // `off + len > segSize` check. The wrapper's overflow-safe form catches it. + long off = Long.MAX_VALUE - 1; + long len = 100; + + // When + Then + assertThatThrownBy(() -> MemorySegments.slice(sut, off, len, "blob")) + .isInstanceOf(VortexException.class); + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java b/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java index 6a6cfd6b..dd59924f 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.MemorySegments; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.SegmentSpec; import io.github.dfa1.vortex.core.VortexException; @@ -119,7 +120,7 @@ static ParsedFile parseBlobs(ByteBuffer footerBuf, ByteBuffer layoutBuf, ByteBuf } private static ByteBuffer slice(MemorySegment seg, long offset, long length) { - return seg.asSlice(offset, length).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + return MemorySegments.slice(seg, offset, length, "postscript blob").asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); } static Footer convertFooter(io.github.dfa1.vortex.fbs.Footer f) { diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java index 86c96b39..4d9eb279 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java @@ -3,6 +3,7 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.MemorySegments; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.core.VortexFormat; import io.github.dfa1.vortex.encoding.Registry; @@ -79,7 +80,7 @@ public static VortexHttpReader open(URI uri, Registry registry) throws IOExcepti MemorySegment tailSeg = MemorySegment.ofArray(tail); long trailerOff = tailLen - VortexFormat.TRAILER_SIZE; long bodyBytes = fileSize - VortexFormat.TRAILER_SIZE; - Trailer trailer = Trailer.parse(tailSeg.asSlice(trailerOff, VortexFormat.TRAILER_SIZE), bodyBytes); + Trailer trailer = Trailer.parse(MemorySegments.slice(tailSeg, trailerOff, VortexFormat.TRAILER_SIZE, "http trailer"), bodyBytes); // HTTP-specific: postscript may extend past the prefetched tail and need a larger fetch. long psOffInTail = trailerOff - trailer.postscriptLen(); @@ -89,7 +90,7 @@ public static VortexHttpReader open(URI uri, Registry registry) throws IOExcepti .formatted(trailer.postscriptLen(), TAIL_SIZE)); } - ByteBuffer postscriptBuf = tailSeg.asSlice(psOffInTail, trailer.postscriptLen()) + ByteBuffer postscriptBuf = MemorySegments.slice(tailSeg, psOffInTail, trailer.postscriptLen(), "http postscript") .asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); var ps = Postscript.getRootAsPostscript(postscriptBuf); diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java index 39c4d0f8..3a139d6d 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.MemorySegments; import io.github.dfa1.vortex.core.SegmentSpec; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.core.VortexFormat; @@ -85,11 +86,11 @@ private static VortexReader parse( MemorySegment seg, long size, Arena arena, Registry registry ) { long bodyBytes = size - VortexFormat.TRAILER_SIZE; - var trailerSeg = seg.asSlice(bodyBytes, VortexFormat.TRAILER_SIZE); + var trailerSeg = MemorySegments.slice(seg, bodyBytes, VortexFormat.TRAILER_SIZE, "trailer"); Trailer trailer = Trailer.parse(trailerSeg, bodyBytes); long postscriptOffset = bodyBytes - trailer.postscriptLen(); - var postscriptBuf = seg.asSlice(postscriptOffset, trailer.postscriptLen()) + var postscriptBuf = MemorySegments.slice(seg, postscriptOffset, trailer.postscriptLen(), "postscript blob") .asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); PostscriptParser.ParsedFile parsed; @@ -218,15 +219,17 @@ private ArrayStats readFlatStats(Layout flat) { if (segLen < 4) { return ArrayStats.empty(); } - MemorySegment seg = fileSegment.asSlice(spec.offset(), segLen); + MemorySegment seg = MemorySegments.slice(fileSegment, spec.offset(), segLen, "stats segment"); int fbLen = seg.get(LE_INT, segLen - 4); // Reject negative fbLen (signed int from untrusted bytes) or any value that would push - // fbStart below 0 → asSlice(negative, ...) throws IndexOutOfBoundsException without this guard. + // fbStart below 0. MemorySegments.slice would catch this too, but returning empty here keeps + // the older lenient behaviour for files with corrupt stats blobs — MemorySegments is reserved + // for offsets/lengths that must be valid (the data path). if (fbLen < 0 || fbLen > segLen - 4) { return ArrayStats.empty(); } long fbStart = segLen - 4L - fbLen; - var fbBuf = seg.asSlice(fbStart, fbLen).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + var fbBuf = MemorySegments.slice(seg, fbStart, fbLen, "stats flatbuffer").asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); var root = fbArray.root(); if (root == null) { @@ -238,7 +241,7 @@ private ArrayStats readFlatStats(Layout flat) { /// Zero-copy read-only slice of the memory-mapped file. @Override public MemorySegment slice(long offset, long length) { - return fileSegment.asSlice(offset, length).asReadOnly(); + return MemorySegments.slice(fileSegment, offset, length, "file segment").asReadOnly(); } @Override diff --git a/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java index d62bd284..8017b92e 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java +++ b/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java @@ -3,6 +3,7 @@ import io.github.dfa1.vortex.core.ArrayStats; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.MemorySegments; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.SegmentSpec; import io.github.dfa1.vortex.core.VortexException; @@ -638,7 +639,7 @@ private ArrayStats readFlatStats(Layout flat) { // segment as a ByteBuffer would fail for segments larger than 2 GB (ByteBuffer cap). int fbLen = seg.get(LE_INT, segLen - 4); long fbStart = segLen - 4L - fbLen; - ByteBuffer fbBuf = seg.asSlice(fbStart, fbLen).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer fbBuf = MemorySegments.slice(seg, fbStart, fbLen, "stats flatbuffer").asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); io.github.dfa1.vortex.fbs.ArrayNode root = fbArray.root(); From b7d583a8653ffd756657217c80d8bf79896a5187 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Wed, 10 Jun 2026 21:53:00 +0200 Subject: [PATCH 2/6] sec(parser): introduce BoundedSegment wrapper at the mmap boundary [Phase 1/4] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds io.github.dfa1.vortex.core.BoundedSegment, a record wrapping a MemorySegment with a context label. The type does not expose MemorySegment.asSlice; the only way to derive a sub-region is BoundedSegment.slice(off, len, childContext), which routes through MemorySegments.checkRange and throws VortexException — not IndexOutOfBoundsException — on malformed input. Raw access is exposed only through unwrapForSubParser(String reason), which both documents the trust transfer and surfaces the reason string in any propagated error, so trust transfers are greppable for audit. Phase 1 covers the file-open boundary: - VortexReader.parse: wraps the mmap'd segment as `new BoundedSegment(seg, "vortex file")` immediately after `channel.map`. Trailer + postscript slices go through `file.slice(off, len, ctx)`. - Trailer.parse: signature changed from (MemorySegment, long) to (BoundedSegment, long). The single internal `asSlice` on the magic bytes uses `unwrapForSubParser("trailer parser")` since the offset is a compile-time constant (4) on a segment whose length has already been validated to 8 bytes. - PostscriptParser.parse: signature changed from (ByteBuffer, MemorySegment, long) to (ByteBuffer, BoundedSegment). The local `slice` helper goes away; callers use `file.slice(...).asByteBufferLE()`. Explicit checkBlobBounds calls are retained so the per-blob error message ("postscript footer blob out of bounds") is more specific than BoundedSegment's generic "vortex file" parent context. - VortexHttpReader: tail array now wrapped as `BoundedSegment(MemorySegment.ofArray(tail), "http tail")` so the changed Trailer.parse signature is satisfied. Full HTTP-aware refactor lands in Phase 3. MemorySegments.slice is refactored to call a new MemorySegments.checkRange helper, which BoundedSegment's primitive readers (getByte, getIntLE, getLongLE) reuse to share the same bounds-check path without producing a slice. VortexHandle.slice (the public interface method) is NOT touched in this phase. Its signature change to return BoundedSegment lands in Phase 2 alongside the DecodeContext.buffer migration. BoundedSegmentTest covers in-range slice, bad slice carrying parent context label, bounds-checked primitive reads, and unwrapForSubParser identity. The 7 MemorySegmentsTest cases from PR #27 still pass. ./mvnw verify — all unit + integration tests pass (incl. Rust round-trips). Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/BoundedSegment.java | 108 ++++++++++++++++++ .../dfa1/vortex/core/MemorySegments.java | 16 ++- .../dfa1/vortex/core/BoundedSegmentTest.java | 66 +++++++++++ .../dfa1/vortex/io/PostscriptParser.java | 19 ++- .../io/github/dfa1/vortex/io/Trailer.java | 8 +- .../dfa1/vortex/io/VortexHttpReader.java | 10 +- .../github/dfa1/vortex/io/VortexReader.java | 13 ++- 7 files changed, 216 insertions(+), 24 deletions(-) create mode 100644 core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java create mode 100644 core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java diff --git a/core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java b/core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java new file mode 100644 index 00000000..304df8c5 --- /dev/null +++ b/core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java @@ -0,0 +1,108 @@ +package io.github.dfa1.vortex.core; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/// A memory-mapped region with built-in bounds-checking for slicing on untrusted input. +/// +///

By construction, callers cannot reach {@link MemorySegment#asSlice(long, long)} without +/// going through {@link #slice(long, long, String)}, which routes the offset/length through +/// {@link MemorySegments#slice} and throws {@link VortexException} on malformed input — +/// never {@link IndexOutOfBoundsException}. +/// +///

The {@code context} label travels with the type; nested slices receive an explicit +/// child label at the {@link #slice} site. Error messages thus name the on-disk structure +/// ({@code "trailer"}, {@code "postscript blob"}, {@code "encoded buffer 3"}) rather than +/// surfacing raw byte offsets. +/// +///

The raw segment is exposed only via {@link #unwrapForSubParser(String)}, which both +/// documents the trust transfer and forces a {@code reason} string so every escape-hatch +/// site is greppable for audit. +/// +/// @param seg the backing memory-mapped region; lifetime tied to the {@link +/// java.lang.foreign.Arena Arena} that produced it +/// @param context human-readable label naming the on-disk structure this region represents +public record BoundedSegment(MemorySegment seg, String context) { + + private static final ValueLayout.OfByte BYTE = ValueLayout.JAVA_BYTE; + private static final ValueLayout.OfInt LE_INT = + ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + private static final ValueLayout.OfLong LE_LONG = + ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + + /// @return total size of the bounded region in bytes + public long byteSize() { + return seg.byteSize(); + } + + /// Returns a sub-region with a fresh context label. + /// + /// @param off start offset in bytes, relative to this region + /// @param len slice length in bytes + /// @param childContext label for the resulting sub-region + /// @return the bounded sub-region + /// @throws VortexException if {@code off} or {@code len} is negative, or if + /// {@code off + len > this.byteSize()} + public BoundedSegment slice(long off, long len, String childContext) { + return new BoundedSegment( + MemorySegments.slice(seg, off, len, context), + childContext); + } + + /// Bounds-checked single-byte read. + /// + /// @param off byte offset + /// @return the byte at {@code off} + /// @throws VortexException if {@code off} is negative or {@code >= this.byteSize()} + public byte getByte(long off) { + MemorySegments.checkRange(seg, off, 1, context); + return seg.get(BYTE, off); + } + + /// Bounds-checked little-endian 32-bit read. + /// + /// @param off byte offset of the 4-byte word + /// @return the int at {@code off} + /// @throws VortexException if {@code off} is negative or {@code > this.byteSize() - 4} + public int getIntLE(long off) { + MemorySegments.checkRange(seg, off, 4, context); + return seg.get(LE_INT, off); + } + + /// Bounds-checked little-endian 64-bit read. + /// + /// @param off byte offset of the 8-byte word + /// @return the long at {@code off} + /// @throws VortexException if {@code off} is negative or {@code > this.byteSize() - 8} + public long getLongLE(long off) { + MemorySegments.checkRange(seg, off, 8, context); + return seg.get(LE_LONG, off); + } + + /// Little-endian {@link ByteBuffer} view of the whole bounded region, used by the + /// FlatBuffer runtime (which performs its own offset validation against the buffer's + /// capacity). + /// + /// @return a {@link ByteBuffer} view in little-endian order + public ByteBuffer asByteBufferLE() { + return seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + } + + /// Escape hatch returning the raw {@link MemorySegment} for a downstream parser that + /// takes its own bounds-checked cursor (currently {@link + /// io.github.dfa1.vortex.proto.ProtoReader}). The {@code reason} string names the + /// sub-parser for diagnostic attribution at the call site. + /// + ///

Audit point. Every call to this method is a trust transfer + /// across the bounds-checking boundary. New call sites must justify in review why + /// the receiver re-validates the bounds itself. + /// + /// @param reason short label naming the sub-parser ({@code "proto reader"}, + /// {@code "flatbuffer root"}) + /// @return the raw memory segment + public MemorySegment unwrapForSubParser(String reason) { + return seg; + } +} diff --git a/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java b/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java index 5a6941da..508adac5 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java @@ -29,6 +29,21 @@ private MemorySegments() { /// @throws VortexException if {@code off} or {@code len} is negative, or if /// {@code off + len > seg.byteSize()} public static MemorySegment slice(MemorySegment seg, long off, long len, String context) { + checkRange(seg, off, len, context); + return seg.asSlice(off, len); + } + + /// Bounds-check {@code off} and {@code len} against {@code seg} without producing a slice. + /// Used by {@link BoundedSegment}'s primitive readers, which need bounds-checking before + /// a {@link MemorySegment#get} call but do not need to materialise a sub-segment. + /// + /// @param seg backing segment + /// @param off start offset + /// @param len range length + /// @param context label used in the {@link VortexException} message + /// @throws VortexException if {@code off} or {@code len} is negative, or if + /// {@code off + len > seg.byteSize()} + public static void checkRange(MemorySegment seg, long off, long len, String context) { long segSize = seg.byteSize(); if (off < 0) { throw new VortexException("malformed " + context + ": negative offset " + off); @@ -42,6 +57,5 @@ public static MemorySegment slice(MemorySegment seg, long off, long len, String throw new VortexException("malformed " + context + ": offset+length " + off + "+" + len + " exceeds segment size " + segSize); } - return seg.asSlice(off, len); } } diff --git a/core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java b/core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java new file mode 100644 index 00000000..270611f4 --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java @@ -0,0 +1,66 @@ +package io.github.dfa1.vortex.core; + +import org.junit.jupiter.api.Test; + +import java.lang.foreign.MemorySegment; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class BoundedSegmentTest { + + private final BoundedSegment sut = new BoundedSegment( + MemorySegment.ofArray(new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + "test region"); + + @Test + void inRangeSliceReturnsExpectedRegion() { + // Given the 16-byte test region. + + // When + BoundedSegment child = sut.slice(4, 8, "child"); + + // Then — the slice carries its own context label, used in nested error messages. + assertThat(child.byteSize()).isEqualTo(8); + assertThat(child.context()).isEqualTo("child"); + } + + @Test + void badSliceThrowsVortexExceptionLabelledByParent() { + // Given — adversarial slice on the bounded region. The parent's context label + // ("test region") surfaces in the error so the caller knows which structure + // was being parsed when the bad offset arrived. + + // When + Then + assertThatThrownBy(() -> sut.slice(20, 4, "child")) + .isInstanceOf(VortexException.class) + .hasMessageContaining("test region"); + } + + @Test + void primitiveReadsAreBoundsChecked() { + // Given — getIntLE at offset 12 needs 4 bytes (12..16), valid. + + // When + Then + assertThat(sut.getIntLE(12)).isNotZero(); + + // Out-of-range read throws VortexException, not IOOBE. + assertThatThrownBy(() -> sut.getIntLE(13)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("test region"); + } + + @Test + void unwrapForSubParserReturnsRawSegment() { + // Given — explicit trust transfer documented by the reason string. The unwrapped + // segment is the same instance as the backing seg(); callers re-validate bounds + // in their own cursor (e.g. ProtoReader). + + // When + MemorySegment raw = sut.unwrapForSubParser("test sub-parser"); + + // Then + assertThat(raw).isSameAs(sut.seg()); + assertThat(raw.byteSize()).isEqualTo(16); + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java b/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java index dd59924f..b50c7db7 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java @@ -1,10 +1,10 @@ package io.github.dfa1.vortex.io; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.CompressionScheme; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; -import io.github.dfa1.vortex.core.MemorySegments; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.SegmentSpec; import io.github.dfa1.vortex.core.VortexException; @@ -20,9 +20,7 @@ import io.github.dfa1.vortex.fbs.Utf8; import io.github.dfa1.vortex.fbs.Variant; -import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; -import java.nio.ByteOrder; import java.util.ArrayList; import java.util.List; @@ -43,7 +41,7 @@ final class PostscriptParser { private PostscriptParser() { } - static ParsedFile parse(ByteBuffer postscriptBuf, MemorySegment fileSegment, long fileSize) { + static ParsedFile parse(ByteBuffer postscriptBuf, BoundedSegment file) { var ps = Postscript.getRootAsPostscript(postscriptBuf); var footerSeg = ps.footer(); @@ -56,16 +54,20 @@ static ParsedFile parse(ByteBuffer postscriptBuf, MemorySegment fileSegment, lon } var dtypeSeg = ps.dtype(); + // BoundedSegment.slice does the bounds check; the explicit checkBlobBounds calls + // below are kept because they produce more specific error messages naming the blob + // ("postscript footer blob out of bounds" vs the generic "vortex file" context label). + long fileSize = file.byteSize(); checkBlobBounds("footer", footerSeg.offset(), footerSeg.length(), fileSize); checkBlobBounds("layout", layoutSeg.offset(), layoutSeg.length(), fileSize); if (dtypeSeg != null && dtypeSeg.length() > 0) { checkBlobBounds("dtype", dtypeSeg.offset(), dtypeSeg.length(), fileSize); } - ByteBuffer footerBuf = slice(fileSegment, footerSeg.offset(), footerSeg.length()); - ByteBuffer layoutBuf = slice(fileSegment, layoutSeg.offset(), layoutSeg.length()); + ByteBuffer footerBuf = file.slice(footerSeg.offset(), footerSeg.length(), "footer blob").asByteBufferLE(); + ByteBuffer layoutBuf = file.slice(layoutSeg.offset(), layoutSeg.length(), "layout blob").asByteBufferLE(); ByteBuffer dtypeBuf = (dtypeSeg != null && dtypeSeg.length() > 0) - ? slice(fileSegment, dtypeSeg.offset(), dtypeSeg.length()) + ? file.slice(dtypeSeg.offset(), dtypeSeg.length(), "dtype blob").asByteBufferLE() : null; ParsedFile parsed = parseBlobs(footerBuf, layoutBuf, dtypeBuf); @@ -119,9 +121,6 @@ static ParsedFile parseBlobs(ByteBuffer footerBuf, ByteBuffer layoutBuf, ByteBuf } } - private static ByteBuffer slice(MemorySegment seg, long offset, long length) { - return MemorySegments.slice(seg, offset, length, "postscript blob").asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); - } static Footer convertFooter(io.github.dfa1.vortex.fbs.Footer f) { var arraySpecs = new ArrayList(f.arraySpecsLength()); diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/Trailer.java b/reader/src/main/java/io/github/dfa1/vortex/io/Trailer.java index ca714a7b..ee8f2172 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/Trailer.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/Trailer.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.io; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.core.VortexFormat; @@ -20,15 +21,16 @@ record Trailer(int version, int postscriptLen) { private static final ValueLayout.OfShort LE_SHORT = ValueLayout.JAVA_SHORT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); - /// Parse the 8-byte trailer from a [MemorySegment] view and validate magic, version, and + /// Parse the 8-byte trailer from a [BoundedSegment] view and validate magic, version, and /// postscript length against the body size. /// - /// @param trailerSeg the trailer slice, must be exactly [VortexFormat#TRAILER_SIZE] bytes + /// @param trailer the trailer region, must be exactly [VortexFormat#TRAILER_SIZE] bytes /// @param bodyBytes number of bytes in the file body (i.e. `fileSize - TRAILER_SIZE`) /// @return validated [Trailer] /// @throws VortexException if the magic mismatches, the version is unsupported, or /// postscriptLen is zero or exceeds {@code bodyBytes} - static Trailer parse(MemorySegment trailerSeg, long bodyBytes) { + static Trailer parse(BoundedSegment trailer, long bodyBytes) { + MemorySegment trailerSeg = trailer.unwrapForSubParser("trailer parser"); int version = Short.toUnsignedInt(trailerSeg.get(LE_SHORT, 0)); int postscriptLen = Short.toUnsignedInt(trailerSeg.get(LE_SHORT, 2)); diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java index 4d9eb279..02eae17c 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java @@ -1,9 +1,9 @@ package io.github.dfa1.vortex.io; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; -import io.github.dfa1.vortex.core.MemorySegments; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.core.VortexFormat; import io.github.dfa1.vortex.encoding.Registry; @@ -77,10 +77,10 @@ public static VortexHttpReader open(URI uri, Registry registry) throws IOExcepti long fileSize = tf.fileSize(); long tailLen = tail.length; - MemorySegment tailSeg = MemorySegment.ofArray(tail); + BoundedSegment tailRegion = new BoundedSegment(MemorySegment.ofArray(tail), "http tail"); long trailerOff = tailLen - VortexFormat.TRAILER_SIZE; long bodyBytes = fileSize - VortexFormat.TRAILER_SIZE; - Trailer trailer = Trailer.parse(MemorySegments.slice(tailSeg, trailerOff, VortexFormat.TRAILER_SIZE, "http trailer"), bodyBytes); + Trailer trailer = Trailer.parse(tailRegion.slice(trailerOff, VortexFormat.TRAILER_SIZE, "http trailer"), bodyBytes); // HTTP-specific: postscript may extend past the prefetched tail and need a larger fetch. long psOffInTail = trailerOff - trailer.postscriptLen(); @@ -90,8 +90,8 @@ public static VortexHttpReader open(URI uri, Registry registry) throws IOExcepti .formatted(trailer.postscriptLen(), TAIL_SIZE)); } - ByteBuffer postscriptBuf = MemorySegments.slice(tailSeg, psOffInTail, trailer.postscriptLen(), "http postscript") - .asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer postscriptBuf = tailRegion.slice(psOffInTail, trailer.postscriptLen(), "http postscript") + .asByteBufferLE(); var ps = Postscript.getRootAsPostscript(postscriptBuf); diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java index 3a139d6d..af7a1726 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.io; import io.github.dfa1.vortex.core.ArrayStats; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; @@ -16,6 +17,7 @@ import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.file.Path; @@ -85,17 +87,18 @@ public static VortexReader open(Path path, Registry registry) throws IOException private static VortexReader parse( MemorySegment seg, long size, Arena arena, Registry registry ) { + BoundedSegment file = new BoundedSegment(seg, "vortex file"); long bodyBytes = size - VortexFormat.TRAILER_SIZE; - var trailerSeg = MemorySegments.slice(seg, bodyBytes, VortexFormat.TRAILER_SIZE, "trailer"); - Trailer trailer = Trailer.parse(trailerSeg, bodyBytes); + BoundedSegment trailerRegion = file.slice(bodyBytes, VortexFormat.TRAILER_SIZE, "trailer"); + Trailer trailer = Trailer.parse(trailerRegion, bodyBytes); long postscriptOffset = bodyBytes - trailer.postscriptLen(); - var postscriptBuf = MemorySegments.slice(seg, postscriptOffset, trailer.postscriptLen(), "postscript blob") - .asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer postscriptBuf = file.slice(postscriptOffset, trailer.postscriptLen(), "postscript blob") + .asByteBufferLE(); PostscriptParser.ParsedFile parsed; try { - parsed = PostscriptParser.parse(postscriptBuf, seg, size); + parsed = PostscriptParser.parse(postscriptBuf, file); } catch (VortexException e) { throw e; } catch (RuntimeException e) { From 413edb8503bef92a0247beb4ff358993a7942bd7 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Wed, 10 Jun 2026 22:07:56 +0200 Subject: [PATCH 3/6] sec(parser): thread BoundedSegment through DecodeContext.buffer() [Phase 2/4] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DecodeContext.segmentBuffers becomes BoundedSegment[]; DecodeContext.buffer(i) returns BoundedSegment instead of MemorySegment. The encoded-buffer arithmetic in FlatSegmentDecoder now wraps the parent flat segment as BoundedSegment and slices each per-buffer region with a "encoded buffer i" context label, so a crafted Buffer.padding()/Buffer.length() pair in the FlatBuffer schema throws VortexException at slice time rather than IndexOutOfBoundsException at first read. Encoder consumers (14 files, ~25 call sites) keep their existing MemorySegment- based decode logic and add an explicit `.unwrapForSubParser(" encoding")` at each `ctx.buffer(i)` site. Trust transfers are greppable: `git grep unwrapForSubParser` lists every encoder boundary. A new convenience factory `DecodeContext.ofRawBuffers(...)` wraps raw MemorySegment arrays as BoundedSegments for synthetic test inputs that produce their own trusted buffers; the 71 test sites that previously called `new DecodeContext(..., new MemorySegment[]{buf}, ...)` migrated mechanically to `DecodeContext.ofRawBuffers(..., new MemorySegment[]{buf}, ...)`. Production callers (only FlatSegmentDecoder) keep using the canonical constructor with already-bounded buffers. DictEncoding.java:381 was the one site `ctx.buffer(i)` did not cover — it indexed `ctx.segmentBuffers()[...]` directly. That site now calls `.unwrapForSubParser("dict encoding values")` for symmetry. VortexHandle.slice (the public deprecated escape hatch) is NOT touched in this phase; it still returns MemorySegment. Phase 3 covers VortexHttpReader's HTTP-aware wrapping and ScanIterator.readFlatStats. ./mvnw verify — all 938 unit + 243 integration tests pass (incl. Rust round-trips). Co-Authored-By: Claude Opus 4.7 --- .../vortex/encoding/BitpackedEncoding.java | 2 +- .../dfa1/vortex/encoding/BoolEncoding.java | 2 +- .../vortex/encoding/ByteBoolEncoding.java | 2 +- .../vortex/encoding/ConstantEncoding.java | 2 +- .../dfa1/vortex/encoding/DecimalEncoding.java | 2 +- .../dfa1/vortex/encoding/DecodeContext.java | 29 +++++++++++++++++-- .../dfa1/vortex/encoding/DictEncoding.java | 8 ++--- .../vortex/encoding/FlatSegmentDecoder.java | 9 +++--- .../dfa1/vortex/encoding/FsstEncoding.java | 6 ++-- .../dfa1/vortex/encoding/PcoEncoding.java | 10 +++---- .../vortex/encoding/PrimitiveEncoding.java | 2 +- .../github/dfa1/vortex/encoding/Registry.java | 2 +- .../dfa1/vortex/encoding/SparseEncoding.java | 2 +- .../dfa1/vortex/encoding/VarBinEncoding.java | 2 +- .../vortex/encoding/VarBinViewEncoding.java | 4 +-- .../dfa1/vortex/encoding/ZstdEncoding.java | 6 ++-- .../dfa1/vortex/encoding/AlpEncodingTest.java | 4 +-- .../BitpackedEncodingPatchesTest.java | 2 +- .../vortex/encoding/ByteBoolEncodingTest.java | 2 +- .../vortex/encoding/ChunkedEncodingTest.java | 6 ++-- .../encoding/DateTimePartsEncodingTest.java | 10 +++---- .../vortex/encoding/EncodeTestHelper.java | 2 +- .../dfa1/vortex/encoding/ExtEncodingTest.java | 4 +-- .../encoding/FixedSizeListEncodingTest.java | 6 ++-- .../FrameOfReferenceEncodingTest.java | 4 +-- .../vortex/encoding/FsstEncodingTest.java | 6 ++-- .../vortex/encoding/ListEncodingTest.java | 10 +++---- .../vortex/encoding/ListViewEncodingTest.java | 6 ++-- .../vortex/encoding/NullEncodingTest.java | 4 +-- .../vortex/encoding/PatchedEncodingTest.java | 4 +-- .../PatchesBroadcastRegressionTest.java | 2 +- .../dfa1/vortex/encoding/PcoEncodingTest.java | 4 +-- .../encoding/PrimitiveEncodingTest.java | 4 +-- .../dfa1/vortex/encoding/RegistryTest.java | 10 +++---- .../dfa1/vortex/encoding/RleEncodingTest.java | 2 +- .../vortex/encoding/RunEndEncodingTest.java | 2 +- .../vortex/encoding/SequenceEncodingTest.java | 6 ++-- .../vortex/encoding/SparseEncodingTest.java | 8 ++--- .../vortex/encoding/StructEncodingTest.java | 6 ++-- .../vortex/encoding/TestDecodeContexts.java | 2 +- .../vortex/encoding/VarBinEncodingTest.java | 2 +- .../encoding/VarBinViewEncodingTest.java | 4 +-- .../vortex/encoding/VariantEncodingTest.java | 10 +++---- .../vortex/encoding/ZigZagEncodingTest.java | 2 +- .../vortex/encoding/ZstdEncodingTest.java | 8 ++--- 45 files changed, 128 insertions(+), 104 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/BitpackedEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/BitpackedEncoding.java index 23af45b3..4f933e96 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/BitpackedEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/BitpackedEncoding.java @@ -270,7 +270,7 @@ static Array decode(DecodeContext ctx) { int typeBits = ptype.byteSize() * 8; long rowCount = ctx.rowCount(); - MemorySegment packed = ctx.buffer(0); + MemorySegment packed = ctx.buffer(0).unwrapForSubParser("bitpacked encoding"); MemorySegment output = ctx.arena().allocate(rowCount * ptype.byteSize()); fastlanesUnpackToSeg(packed, bitWidth, offset, typeBits, rowCount, output); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/BoolEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/BoolEncoding.java index 19292a07..caf8dd33 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/BoolEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/BoolEncoding.java @@ -68,6 +68,6 @@ public EncodeResult encode(DType dtype, Object data, EncodeContext ctx) { @Override public Array decode(DecodeContext ctx) { - return new BoolArray(ctx.dtype(), ctx.rowCount(), ctx.buffer(0)); + return new BoolArray(ctx.dtype(), ctx.rowCount(), ctx.buffer(0).unwrapForSubParser("bool encoding")); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/ByteBoolEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/ByteBoolEncoding.java index 6fa711a4..e98cdcfd 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/ByteBoolEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/ByteBoolEncoding.java @@ -44,7 +44,7 @@ public EncodeResult encode(DType dtype, Object data, EncodeContext ctx) { @Override public Array decode(DecodeContext ctx) { long n = ctx.rowCount(); - MemorySegment bytes = ctx.buffer(0); + MemorySegment bytes = ctx.buffer(0).unwrapForSubParser("bytebool encoding"); long packedBytes = (n + 7) >>> 3; MemorySegment packed = ctx.arena().allocate(packedBytes > 0 ? packedBytes : 1); for (long i = 0; i < n; i++) { diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/ConstantEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/ConstantEncoding.java index 77065b73..f6d3e34e 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/ConstantEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/ConstantEncoding.java @@ -133,7 +133,7 @@ private static ScalarValue buildScalar(PType ptype, long rawBits) { private static final class Decoder { private static Array decode(DecodeContext ctx) { - MemorySegment scalarBuf = ctx.buffer(0); + MemorySegment scalarBuf = ctx.buffer(0).unwrapForSubParser("constant encoding"); ScalarValue scalar; try { scalar = ScalarValue.decode(scalarBuf, 0, scalarBuf.byteSize()); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/DecimalEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/DecimalEncoding.java index 078b7721..4e08f99a 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/DecimalEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/DecimalEncoding.java @@ -109,7 +109,7 @@ private static Array decode(DecodeContext ctx) { } int valuesType = decoded.values_type(); int byteWidth = decimalTypeByteWidth(valuesType); - MemorySegment buffer = ctx.buffer(0); + MemorySegment buffer = ctx.buffer(0).unwrapForSubParser("decimal encoding"); long expected = ctx.rowCount() * byteWidth; if (buffer.byteSize() < expected) { throw new VortexException(EncodingId.VORTEX_DECIMAL, diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/DecodeContext.java b/core/src/main/java/io/github/dfa1/vortex/encoding/DecodeContext.java index 78540e5b..e90cc950 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/DecodeContext.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/DecodeContext.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.encoding; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.array.Array; @@ -24,10 +25,32 @@ public record DecodeContext( ArrayNode node, DType dtype, long rowCount, - MemorySegment[] segmentBuffers, + BoundedSegment[] segmentBuffers, Registry registry, SegmentAllocator arena ) { + /// Convenience factory that wraps raw {@link MemorySegment} buffers as {@link BoundedSegment}s + /// for tests and other callers that produce synthetic, trusted buffer arrays. Production + /// decoders receive their buffers from {@link FlatSegmentDecoder}, which already wraps them + /// against the parent flat segment. + /// + /// @param node array node describing this encoding's tree structure + /// @param dtype logical type expected for the decoded array + /// @param rowCount number of logical rows to decode + /// @param rawBufs raw segment buffers; each wrapped as {@code "test buffer i"} + /// @param registry encoding registry used for recursive child decoding + /// @param arena allocator for decode output + /// @return a {@link DecodeContext} backed by bounded views of {@code rawBufs} + public static DecodeContext ofRawBuffers( + ArrayNode node, DType dtype, long rowCount, + MemorySegment[] rawBufs, Registry registry, SegmentAllocator arena) { + BoundedSegment[] wrapped = new BoundedSegment[rawBufs.length]; + for (int i = 0; i < rawBufs.length; i++) { + wrapped[i] = new BoundedSegment(rawBufs[i], "test buffer " + i); + } + return new DecodeContext(node, dtype, rowCount, wrapped, registry, arena); + } + /// Recursively decode child {@code i} using this context's dtype and row count. /// /// @param i zero-based child index within this node's children array @@ -78,8 +101,8 @@ public MemorySegment decodeChildSegment(int i, DType dtype, long rowCount) { /// Return the buffer at position `i` in this node's bufferIndices. /// /// @param i zero-based index into this node's {@code bufferIndices} array - /// @return the {@link MemorySegment} for the referenced segment buffer - public MemorySegment buffer(int i) { + /// @return the {@link BoundedSegment} for the referenced segment buffer + public BoundedSegment buffer(int i) { return segmentBuffers[node.bufferIndices()[i]]; } diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/DictEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/DictEncoding.java index 8244cc3a..17b4f370 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/DictEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/DictEncoding.java @@ -378,7 +378,7 @@ private static Array decodeLegacyJava(DecodeContext ctx, byte codeTypeByte) { long rowCount = ctx.rowCount(); // Values: always VORTEX_PRIMITIVE leaf, read direct - MemorySegment valuesBuf = ctx.segmentBuffers()[ctx.node().children()[0].bufferIndices()[0]]; + MemorySegment valuesBuf = ctx.segmentBuffers()[ctx.node().children()[0].bufferIndices()[0]].unwrapForSubParser("dict encoding values"); // Codes: decode through registry — supports both raw (VORTEX_PRIMITIVE) and cascade (FASTLANES_BITPACKED) children DType codesDtype = new DType.Primitive(codePType, false); @@ -435,9 +435,9 @@ private static Array decodeUtf8DictLegacy(DecodeContext ctx, ByteBuffer meta) { PType codePType = PType.fromOrdinal(Byte.toUnsignedInt(meta.get(0))); long n = ctx.rowCount(); - MemorySegment dictBytes = ctx.buffer(0); - MemorySegment dictOffsets = ctx.buffer(1); - MemorySegment codes = ctx.buffer(2); + MemorySegment dictBytes = ctx.buffer(0).unwrapForSubParser("dict encoding"); + MemorySegment dictOffsets = ctx.buffer(1).unwrapForSubParser("dict encoding"); + MemorySegment codes = ctx.buffer(2).unwrapForSubParser("dict encoding"); return VarBinArray.ofDict(ctx.dtype(), n, dictBytes, dictOffsets, PType.I64, diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java b/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java index 41dad3bb..4579ad0e 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java @@ -1,8 +1,8 @@ package io.github.dfa1.vortex.encoding; -import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.ArrayStats; -import io.github.dfa1.vortex.core.MemorySegments; +import io.github.dfa1.vortex.core.BoundedSegment; +import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.array.Array; import io.github.dfa1.vortex.fbs.Buffer; @@ -51,12 +51,13 @@ public Array decode(MemorySegment seg, List encodingSpecs, var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); int numBuffers = fbArray.buffersLength(); - MemorySegment[] bufs = new MemorySegment[numBuffers]; + BoundedSegment[] bufs = new BoundedSegment[numBuffers]; + BoundedSegment region = new BoundedSegment(seg, "flat segment"); long dataOffset = 0; for (int i = 0; i < numBuffers; i++) { Buffer bufDesc = fbArray.buffers(i); dataOffset += bufDesc.padding(); - bufs[i] = MemorySegments.slice(seg, dataOffset, bufDesc.length(), "encoded buffer " + i); + bufs[i] = region.slice(dataOffset, bufDesc.length(), "encoded buffer " + i); dataOffset += bufDesc.length(); } diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/FsstEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/FsstEncoding.java index 5f4a22bb..015906ac 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/FsstEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/FsstEncoding.java @@ -206,9 +206,9 @@ private static Array decode(DecodeContext ctx) { long n = ctx.rowCount(); - MemorySegment symbolsBuf = ctx.buffer(0); // 8 bytes per symbol (LE u64) - MemorySegment symbolLensBuf = ctx.buffer(1); // 1 byte per symbol - MemorySegment compressedBytes = ctx.buffer(2); // FSST-compressed heap + MemorySegment symbolsBuf = ctx.buffer(0).unwrapForSubParser("fsst encoding"); // 8 bytes per symbol (LE u64) + MemorySegment symbolLensBuf = ctx.buffer(1).unwrapForSubParser("fsst encoding"); // 1 byte per symbol + MemorySegment compressedBytes = ctx.buffer(2).unwrapForSubParser("fsst encoding"); // FSST-compressed heap MemorySegment uncompLensSeg = ctx.decodeChildSegment(0, new DType.Primitive(uncompLenPType, false), n); MemorySegment codesOffsetsSeg = ctx.decodeChildSegment(1, new DType.Primitive(codesOffPType, false), n + 1); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/PcoEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/PcoEncoding.java index e9275e77..615d5eb0 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/PcoEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/PcoEncoding.java @@ -141,7 +141,7 @@ static Array decode(DecodeContext ctx) { for (int c = 0; c < nChunks; c++) { PcoChunkInfo chunkInfo = meta.chunks().get(c); - MemorySegment chunkMetaBuf = ctx.buffer(bufIdx++); + MemorySegment chunkMetaBuf = ctx.buffer(bufIdx++).unwrapForSubParser("pco encoding"); PcoChunkMeta chunkMeta = readChunkMeta(chunkMetaBuf, dtypeSize); int mode = chunkMeta.mode(); @@ -160,7 +160,7 @@ static Array decode(DecodeContext ctx) { chunkMeta.ansSizeLog(), chunkMeta.bins()); for (int p = 0; p < chunkInfo.pages().size(); p++) { int pageN = chunkInfo.pages().get(p).n_values(); - MemorySegment pageBuf = ctx.buffer(bufIdx++); + MemorySegment pageBuf = ctx.buffer(bufIdx++).unwrapForSubParser("pco encoding"); rawByteOffset = decodeConv1Page( primaryTans, chunkMeta.ansSizeLog(), chunkMeta.conv1Weights().length, @@ -186,7 +186,7 @@ static Array decode(DecodeContext ctx) { long mask = typeMask(dtypeSize); for (int p = 0; p < chunkInfo.pages().size(); p++) { int pageN = chunkInfo.pages().get(p).n_values(); - MemorySegment pageBuf = ctx.buffer(bufIdx++); + MemorySegment pageBuf = ctx.buffer(bufIdx++).unwrapForSubParser("pco encoding"); rawByteOffset = decodeLookbackPage( deltaTans, chunkMeta.deltaAnsSizeLog(), primaryTans, chunkMeta.ansSizeLog(), @@ -202,7 +202,7 @@ static Array decode(DecodeContext ctx) { PcoTansDecoder tans = PcoTansDecoder.build(chunkMeta.ansSizeLog(), chunkMeta.bins()); for (int p = 0; p < chunkInfo.pages().size(); p++) { int pageN = chunkInfo.pages().get(p).n_values(); - MemorySegment pageBuf = ctx.buffer(bufIdx++); + MemorySegment pageBuf = ctx.buffer(bufIdx++).unwrapForSubParser("pco encoding"); rawByteOffset = decodeClassicPage(tans, chunkMeta.ansSizeLog(), chunkMeta.deltaOrder(), primaryDtypeSize, pageBuf, pageN, rawLatents, rawByteOffset, @@ -225,7 +225,7 @@ static Array decode(DecodeContext ctx) { long adjByteOffset = 0L; for (int p = 0; p < chunkInfo.pages().size(); p++) { int pageN = chunkInfo.pages().get(p).n_values(); - MemorySegment pageBuf = ctx.buffer(bufIdx++); + MemorySegment pageBuf = ctx.buffer(bufIdx++).unwrapForSubParser("pco encoding"); decodeIntMultPage(primaryTans, primaryAnsSizeLog, deltaOrder, secondaryTans, secondaryAnsSizeLog, secondaryDeltaOrder, dtypeSize, pageBuf, pageN, diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveEncoding.java index 0794b639..1ecf5331 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveEncoding.java @@ -312,7 +312,7 @@ private static byte[] scalarF64(double v) { private static final class Decoder { private static Array decode(DecodeContext ctx) { - MemorySegment buf = ctx.buffer(0); + MemorySegment buf = ctx.buffer(0).unwrapForSubParser("primitive encoding"); long n = ctx.rowCount(); DType dt = ctx.dtype(); PType ptype = ((DType.Primitive) dt).ptype(); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/Registry.java b/core/src/main/java/io/github/dfa1/vortex/encoding/Registry.java index 2af7cdfb..9c0fb3ad 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/Registry.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/Registry.java @@ -77,7 +77,7 @@ private static UnknownArray decodeUnknown(DecodeContext ctx, ArrayNode node) { }; MemorySegment[] bufs = new MemorySegment[node.bufferIndices().length]; for (int i = 0; i < bufs.length; i++) { - bufs[i] = ctx.buffer(i); + bufs[i] = ctx.buffer(i).unwrapForSubParser("registry"); } Array[] children = new Array[node.children().length]; for (int i = 0; i < children.length; i++) { diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/SparseEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/SparseEncoding.java index 22e76b6d..d05c923e 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/SparseEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/SparseEncoding.java @@ -210,7 +210,7 @@ private static Array decode(DecodeContext ctx) { } PType valuePtype = ((DType.Primitive) ctx.dtype()).ptype(); - MemorySegment fillBuf = ctx.buffer(0); + MemorySegment fillBuf = ctx.buffer(0).unwrapForSubParser("sparse encoding"); ScalarValue fillScalar; try { fillScalar = ScalarValue.decode(fillBuf, 0, fillBuf.byteSize()); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinEncoding.java index 0029af35..7d6a9e84 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinEncoding.java @@ -140,7 +140,7 @@ private static Array decode(DecodeContext ctx) { offsets = materialized; } - MemorySegment bytes = ctx.buffer(0); + MemorySegment bytes = ctx.buffer(0).unwrapForSubParser("varbin encoding"); return new VarBinArray(ctx.dtype(), n, bytes, offsets, offsetsPtype); } diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinViewEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinViewEncoding.java index 8f42a783..93e40c37 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinViewEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinViewEncoding.java @@ -120,10 +120,10 @@ private static Array decode(DecodeContext ctx) { } // Views buffer is the last; data buffers are 0..numBufs-2 - MemorySegment viewsBuf = ctx.buffer(numBufs - 1); + MemorySegment viewsBuf = ctx.buffer(numBufs - 1).unwrapForSubParser("varbinview encoding"); MemorySegment[] dataBufs = new MemorySegment[numBufs - 1]; for (int i = 0; i < dataBufs.length; i++) { - dataBufs[i] = ctx.buffer(i); + dataBufs[i] = ctx.buffer(i).unwrapForSubParser("varbinview encoding"); } long n = ctx.rowCount(); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/ZstdEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/ZstdEncoding.java index 19baa539..ff6b7fe7 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/ZstdEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/ZstdEncoding.java @@ -314,12 +314,12 @@ private static MemorySegment decompressFramesWithDict( long totalUncompressed ) { MemorySegment out = ctx.arena().allocate(totalUncompressed); - byte[] dictBytes = ctx.buffer(0).toArray(ValueLayout.JAVA_BYTE); + byte[] dictBytes = ctx.buffer(0).unwrapForSubParser("zstd encoding").toArray(ValueLayout.JAVA_BYTE); try (ZstdDecompressCtx zctx = new ZstdDecompressCtx()) { zctx.loadDict(dictBytes); long outOffset = 0; for (int i = 0; i < frameCount; i++) { - byte[] compressed = ctx.buffer(i + 1).toArray(ValueLayout.JAVA_BYTE); + byte[] compressed = ctx.buffer(i + 1).unwrapForSubParser("zstd encoding").toArray(ValueLayout.JAVA_BYTE); int uncompSize = (int) meta.frames().get(i).uncompressed_size(); byte[] temp = new byte[uncompSize]; int written = zctx.decompressByteArray(temp, 0, uncompSize, compressed, 0, compressed.length); @@ -348,7 +348,7 @@ private static MemorySegment decompressFrames( ZstdDecompressor decompressor = new ZstdJavaDecompressor(); long outOffset = 0; for (int i = 0; i < frameCount; i++) { - MemorySegment frameSeg = ctx.buffer(i); + MemorySegment frameSeg = ctx.buffer(i).unwrapForSubParser("zstd encoding"); byte[] compressed = frameSeg.toArray(ValueLayout.JAVA_BYTE); int uncompSize = (int) meta.frames().get(i).uncompressed_size(); byte[] temp = new byte[uncompSize]; diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/AlpEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/AlpEncodingTest.java index 5878e433..f80414ba 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/AlpEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/AlpEncodingTest.java @@ -84,7 +84,7 @@ private static DecodeContext buildAlpCtxF64( Registry registry = TestRegistry.of(new AlpEncoding(), new PrimitiveEncoding()); - return new DecodeContext(alpNode, DTypes.F64, encodedVals.length, segments, registry, java.lang.foreign.Arena.global()); + return DecodeContext.ofRawBuffers(alpNode, DTypes.F64, encodedVals.length, segments, registry, java.lang.foreign.Arena.global()); } private static DecodeContext buildAlpCtxF32( @@ -109,7 +109,7 @@ private static DecodeContext buildAlpCtxF32( Registry registry = TestRegistry.of(new AlpEncoding(), new PrimitiveEncoding()); - return new DecodeContext(alpNode, DTypes.F32, encodedVals.length, segments, registry, java.lang.foreign.Arena.global()); + return DecodeContext.ofRawBuffers(alpNode, DTypes.F32, encodedVals.length, segments, registry, java.lang.foreign.Arena.global()); } @Test diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/BitpackedEncodingPatchesTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/BitpackedEncodingPatchesTest.java index 0ab812af..ca40537e 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/BitpackedEncodingPatchesTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/BitpackedEncodingPatchesTest.java @@ -59,7 +59,7 @@ void decode_appliesPatches_overridingBitPackedValues() { Registry registry = TestRegistry.of(new BitpackedEncoding(), new PrimitiveEncoding()); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( bpNode, DTypes.I32, base.length, segments, registry, Arena.global()); // When diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ByteBoolEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ByteBoolEncodingTest.java index 7e496ef9..738fe465 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ByteBoolEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ByteBoolEncodingTest.java @@ -66,7 +66,7 @@ private static DecodeContext buildCtx(byte[] byteValues) { MemorySegment buf = MemorySegment.ofArray(byteValues); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_BYTEBOOL, null, new ArrayNode[0], new int[]{0}, null); Registry registry = Registry.builder().register(new ByteBoolEncoding()).build(); - return new DecodeContext(node, DTypes.BOOL, byteValues.length, new MemorySegment[]{buf}, registry, + return DecodeContext.ofRawBuffers(node, DTypes.BOOL, byteValues.length, new MemorySegment[]{buf}, registry, Arena.ofAuto()); } diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ChunkedEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ChunkedEncodingTest.java index 59030e0f..ee1baeed 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ChunkedEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ChunkedEncodingTest.java @@ -137,7 +137,7 @@ void roundTrip_twoChunks_concatenatesValues() { new ArrayNode[]{offsetsNode, chunk0Node, chunk1Node}, new int[]{}, null); - DecodeContext ctx = new DecodeContext(root, i64, 5L, allBufs, registry, Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(root, i64, 5L, allBufs, registry, Arena.ofAuto()); // When Array result = sut.decode(ctx); @@ -176,7 +176,7 @@ void singleChunk_returnsSameValues() { new ArrayNode[]{toArrayNode(offsetsResult.rootNode()), toArrayNode(remapped(chunkResult.rootNode(), 1))}, new int[]{}, null); - DecodeContext ctx = new DecodeContext(root, i64, 3L, allBufs, registry, Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(root, i64, 3L, allBufs, registry, Arena.ofAuto()); // When Array result = new ChunkedEncoding().decode(ctx); @@ -196,7 +196,7 @@ void noChildren_throws() { .register(new ChunkedEncoding()) .build(); ArrayNode root = ArrayNode.of(EncodingId.VORTEX_CHUNKED, null, new ArrayNode[]{}, new int[]{}, null); - DecodeContext ctx = new DecodeContext(root, i64, 0L, new MemorySegment[]{}, registry, Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(root, i64, 0L, new MemorySegment[]{}, registry, Arena.ofAuto()); // When / Then assertThatThrownBy(() -> new ChunkedEncoding().decode(ctx)) diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/DateTimePartsEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/DateTimePartsEncodingTest.java index 68e8851b..a4cbc6e2 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/DateTimePartsEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/DateTimePartsEncodingTest.java @@ -116,7 +116,7 @@ void roundTrip_milliseconds_preservesDaysSecondsSubseconds() { // When EncodeResult result = sut.encode(EXT_TIMESTAMP_MS, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), EXT_TIMESTAMP_MS, 1, bufs, registry(), Arena.global()); GenericArray decoded = (GenericArray) sut.decode(ctx); @@ -143,7 +143,7 @@ void roundTrip_nanoseconds_preservesSubsecondPrecision() { // When EncodeResult result = sut.encode(EXT_TIMESTAMP_NS, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), EXT_TIMESTAMP_NS, 1, bufs, registry(), Arena.global()); GenericArray decoded = (GenericArray) sut.decode(ctx); @@ -166,7 +166,7 @@ void roundTrip_epoch_allZero() { // When EncodeResult result = sut.encode(EXT_TIMESTAMP_MS, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), EXT_TIMESTAMP_MS, 1, bufs, registry(), Arena.global()); GenericArray decoded = (GenericArray) sut.decode(ctx); @@ -190,7 +190,7 @@ void roundTrip_multipleTimestamps_allRowsPreserved() { // When EncodeResult result = sut.encode(EXT_TIMESTAMP_MS, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), EXT_TIMESTAMP_MS, 4, bufs, registry(), Arena.global()); GenericArray decoded = (GenericArray) sut.decode(ctx); @@ -218,7 +218,7 @@ void roundTrip_allUnits_epochIsZero(TimeUnit unit) { // When EncodeResult result = sut.encode(dtype, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), dtype, 1, bufs, registry(), Arena.global()); GenericArray decoded = (GenericArray) sut.decode(ctx); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/EncodeTestHelper.java b/core/src/test/java/io/github/dfa1/vortex/encoding/EncodeTestHelper.java index f7fec03b..fdd6fdf3 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/EncodeTestHelper.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/EncodeTestHelper.java @@ -25,7 +25,7 @@ static DecodeContext toDecodeContext( List buffers = result.buffers(); MemorySegment[] segments = buffers.toArray(new MemorySegment[0]); ArrayNode root = toArrayNode(result.rootNode()); - return new DecodeContext(root, dtype, rowCount, segments, registry, Arena.ofAuto()); + return DecodeContext.ofRawBuffers(root, dtype, rowCount, segments, registry, Arena.ofAuto()); } private static ArrayNode toArrayNode(EncodeNode enc) { diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ExtEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ExtEncodingTest.java index 5db5bed7..c6ce34df 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ExtEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ExtEncodingTest.java @@ -55,7 +55,7 @@ void encode_extensionWrappingI64_roundTrips() { // Decode back Registry registry = TestRegistry.of(new PrimitiveEncoding(), new ExtEncoding()); ArrayNode rootNode = encodeNodeToArrayNode(result.rootNode()); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( rootNode, extDType, data.length, result.buffers().toArray(MemorySegment[]::new), registry, Arena.ofAuto()); @@ -136,7 +136,7 @@ void decode_extensionWrappingI64_returnsStorageArray() { Registry registry = TestRegistry.of(new PrimitiveEncoding(), new ExtEncoding()); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( extNode, extDType, values.length, new MemorySegment[]{buf}, registry, Arena.ofAuto()); var sut = new ExtEncoding(); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/FixedSizeListEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/FixedSizeListEncodingTest.java index 4745c9a2..45127b7c 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/FixedSizeListEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/FixedSizeListEncodingTest.java @@ -85,7 +85,7 @@ void roundTrip_i32Elements_preservesValues() { // When EncodeResult result = sut.encode(dtype, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), dtype, 2, bufs, registry(), Arena.global()); FixedSizeListArray decoded = (FixedSizeListArray) sut.decode(ctx); @@ -110,7 +110,7 @@ void roundTrip_fixedSizeOne_preservesValues() { // When EncodeResult result = sut.encode(dtype, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), dtype, 3, bufs, registry(), Arena.global()); FixedSizeListArray decoded = (FixedSizeListArray) sut.decode(ctx); @@ -129,7 +129,7 @@ void decode_wrongDtype_throws() { FixedSizeListEncoding sut = new FixedSizeListEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_FIXED_SIZE_LIST, null, new ArrayNode[0], new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0, new MemorySegment[0], registry(), Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0, new MemorySegment[0], registry(), Arena.global()); // When / Then assertThatThrownBy(() -> sut.decode(ctx)) diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/FrameOfReferenceEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/FrameOfReferenceEncodingTest.java index 6ea12e5e..211ee224 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/FrameOfReferenceEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/FrameOfReferenceEncodingTest.java @@ -62,7 +62,7 @@ private static DecodeContext buildForContext( Registry registry = TestRegistry.of(new FrameOfReferenceEncoding(), new PrimitiveEncoding()); - return new DecodeContext(forNode, dtype, residuals.length, segments, registry, java.lang.foreign.Arena.global()); + return DecodeContext.ofRawBuffers(forNode, dtype, residuals.length, segments, registry, java.lang.foreign.Arena.global()); } @Test @@ -170,7 +170,7 @@ void decode_nullableResiduals_returnsMaskedArrayWithCorrectValues() { Registry registry = TestRegistry.of(new FrameOfReferenceEncoding(), new PrimitiveEncoding(), new BoolEncoding()); MemorySegment[] segments = {MemorySegment.ofArray(residualBytes), validitySeg}; - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( forNode, DTypes.I32, residuals.length, segments, registry, java.lang.foreign.Arena.global()); FrameOfReferenceEncoding sut = new FrameOfReferenceEncoding(); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/FsstEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/FsstEncodingTest.java index cdd8eb20..ca1c103d 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/FsstEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/FsstEncodingTest.java @@ -96,7 +96,7 @@ void encode_thenDecode_roundtripsAllStrings(String name, String[] values) { .register(new PrimitiveEncoding()) .register(sut) .build(); - DecodeContext ctx = new DecodeContext(node, DTypes.UTF8, values.length, bufs, registry, arena); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.UTF8, values.length, bufs, registry, arena); var decoded = (VarBinArray) sut.decode(ctx); // Then @@ -158,7 +158,7 @@ private static DecodeContext buildCtx( EncodingId.VORTEX_FSST, ByteBuffer.wrap(metaBytes), new ArrayNode[]{uncompLensNode, codesOffNode}, new int[]{0, 1, 2}, null); - return new DecodeContext(root, DTypes.UTF8, n, segs, buildRegistry(), arena); + return DecodeContext.ofRawBuffers(root, DTypes.UTF8, n, segs, buildRegistry(), arena); } private static Registry buildRegistry() { @@ -265,7 +265,7 @@ void decode_missingMetadata_throwsVortexException() { // Given var sut = new FsstEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_FSST, null, new ArrayNode[0], new int[0], null); - DecodeContext ctx = new DecodeContext(node, DTypes.UTF8, 0, new MemorySegment[0], + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.UTF8, 0, new MemorySegment[0], buildRegistry(), Arena.ofAuto()); // When / Then diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ListEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ListEncodingTest.java index 975b556b..df0937b0 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ListEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ListEncodingTest.java @@ -85,7 +85,7 @@ void roundTrip_i32Elements_preservesValues() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 3, bufs, registry(), Arena.global()); ListArray decoded = (ListArray) sut.decode(ctx); @@ -114,7 +114,7 @@ void roundTrip_emptyLists_preservesOffsets() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 2, bufs, registry(), Arena.global()); ListArray decoded = (ListArray) sut.decode(ctx); @@ -135,7 +135,7 @@ void roundTrip_singleList_preservesValues() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 1, bufs, registry(), Arena.global()); ListArray decoded = (ListArray) sut.decode(ctx); @@ -154,7 +154,7 @@ void decode_wrongDtype_throws() { ListEncoding sut = new ListEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_LIST, null, new ArrayNode[0], new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0, new MemorySegment[0], registry(), Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0, new MemorySegment[0], registry(), Arena.global()); // When / Then assertThatThrownBy(() -> sut.decode(ctx)) @@ -170,7 +170,7 @@ void decode_wrongChildCount_throws() { ArrayNode node = ArrayNode.of(EncodingId.VORTEX_LIST, java.nio.ByteBuffer.wrap(new byte[0]), new ArrayNode[]{child}, new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.LIST_I32, 0, new MemorySegment[0], registry(), Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.LIST_I32, 0, new MemorySegment[0], registry(), Arena.global()); // When / Then assertThatThrownBy(() -> sut.decode(ctx)) diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ListViewEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ListViewEncodingTest.java index 539ccfa5..da410fd9 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ListViewEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ListViewEncodingTest.java @@ -83,7 +83,7 @@ void roundTrip_i32Elements_preservesValues() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 3, bufs, registry(), Arena.global()); ListViewArray decoded = (ListViewArray) sut.decode(ctx); @@ -112,7 +112,7 @@ void roundTrip_emptyLists_preservesZeroSizes() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 2, bufs, registry(), Arena.global()); ListViewArray decoded = (ListViewArray) sut.decode(ctx); @@ -135,7 +135,7 @@ void roundTrip_singleList_preservesValues() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 1, bufs, registry(), Arena.global()); ListViewArray decoded = (ListViewArray) sut.decode(ctx); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/NullEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/NullEncodingTest.java index 8646cb9a..4173fd59 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/NullEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/NullEncodingTest.java @@ -37,7 +37,7 @@ void encode_thenDecode_roundTrips() { // When EncodeResult encoded = sut.encode(DTypes.NULL, null, EncodeTestHelper.testCtx()); ArrayNode node = ArrayNode.of(encoded.rootNode().encodingId(), null, new ArrayNode[0], new int[0], null); - DecodeContext ctx = new DecodeContext(node, DTypes.NULL, rowCount, new MemorySegment[0], + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.NULL, rowCount, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); // Then @@ -53,7 +53,7 @@ class Decode { private static DecodeContext buildNullCtx(long rowCount) { ArrayNode node = ArrayNode.of(EncodingId.VORTEX_NULL, null, new ArrayNode[0], new int[0], null); Registry registry = Registry.builder().register(new NullEncoding()).build(); - return new DecodeContext(node, DTypes.NULL, rowCount, new MemorySegment[0], registry, Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, DTypes.NULL, rowCount, new MemorySegment[0], registry, Arena.ofAuto()); } @Test diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/PatchedEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/PatchedEncodingTest.java index 66501d60..5176d421 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/PatchedEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/PatchedEncodingTest.java @@ -89,7 +89,7 @@ private static Array decode( new ArrayNode[]{innerNode, laneNode, idxNode, valNode}, new int[]{}, null); Registry registry = TestRegistry.of(new PatchedEncoding(), new PrimitiveEncoding()); - DecodeContext ctx = new DecodeContext(patchedNode, dtype, n, segments, registry, Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(patchedNode, dtype, n, segments, registry, Arena.ofAuto()); return new PatchedEncoding().decode(ctx); } @@ -203,7 +203,7 @@ void decode_missingMetadata_throws() { ArrayNode patchedNode = ArrayNode.of(EncodingId.VORTEX_PATCHED, null, new ArrayNode[]{innerNode, innerNode, innerNode, innerNode}, new int[]{}, null); MemorySegment seg = i32Segment(1, 2, 3); - DecodeContext ctx = new DecodeContext(patchedNode, new DType.Primitive(PType.I32, false), 3, + DecodeContext ctx = DecodeContext.ofRawBuffers(patchedNode, new DType.Primitive(PType.I32, false), 3, new MemorySegment[]{seg}, Registry.empty(), Arena.ofAuto()); // When / Then diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/PatchesBroadcastRegressionTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/PatchesBroadcastRegressionTest.java index 2882d576..a9d7b256 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/PatchesBroadcastRegressionTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/PatchesBroadcastRegressionTest.java @@ -73,7 +73,7 @@ void bitpackedDecode_withConstantPatchesValues_broadcastsValueAcrossPatches() { DType dtype = new DType.Primitive(PType.I64, false); Registry registry = Registry.loadAll(); - DecodeContext ctx = new DecodeContext(root, dtype, n, + DecodeContext ctx = DecodeContext.ofRawBuffers(root, dtype, n, new MemorySegment[]{packedSeg, idxBufSeg, valBufSeg}, registry, Arena.ofAuto()); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/PcoEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/PcoEncodingTest.java index dc462dcc..319dc2da 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/PcoEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/PcoEncodingTest.java @@ -35,7 +35,7 @@ private static ByteBuffer validMetaBuffer() { private static DecodeContext ctxWith(ByteBuffer meta, DType dtype, long rowCount, MemorySegment[] buffers) { ArrayNode node = ArrayNode.of(EncodingId.VORTEX_PCO, meta, new ArrayNode[0], bufferIndices(buffers.length), null); - return new DecodeContext(node, dtype, rowCount, buffers, Registry.empty(), Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, dtype, rowCount, buffers, Registry.empty(), Arena.ofAuto()); } /// Build a nullable DecodeContext: validity buffer at index 0, pco buffers at indices 1..N. @@ -57,7 +57,7 @@ private static DecodeContext ctxWithValidity(ByteBuffer meta, DType dtype, long pcoBufferIndices, null); Registry registry = TestRegistry.of(new BoolEncoding()); - return new DecodeContext(pcoNode, dtype, rowCount, allBuffers, registry, Arena.ofAuto()); + return DecodeContext.ofRawBuffers(pcoNode, dtype, rowCount, allBuffers, registry, Arena.ofAuto()); } private static int[] bufferIndices(int n) { diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveEncodingTest.java index 8cb6ea37..7ae0b4b5 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveEncodingTest.java @@ -151,7 +151,7 @@ void decode_withValidityChild_returnsMaskedArray() { Registry registry = TestRegistry.of(new PrimitiveEncoding(), new BoolEncoding()); DType dtype = new DType.Primitive(PType.I32, false); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( primNode, dtype, raw.length, new MemorySegment[]{valuesSeg, validitySeg}, registry, Arena.global()); @@ -186,7 +186,7 @@ void decode_noValidityChild_returnsPlainArray() { Registry registry = TestRegistry.of(new PrimitiveEncoding()); DType dtype = new DType.Primitive(PType.I32, false); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( primNode, dtype, raw.length, new MemorySegment[]{valuesSeg}, registry, Arena.global()); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/RegistryTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/RegistryTest.java index 3a1f14ae..1b15599b 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/RegistryTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/RegistryTest.java @@ -102,7 +102,7 @@ void decodeUnknownEncodingThrowsByDefault() { Registry sut = Registry.empty(); ArrayNode node = new UnknownArrayNode("some.unknown", ByteBuffer.allocate(0), new ArrayNode[0], new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0L, + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0L, new MemorySegment[0], sut, Arena.ofAuto()); // When / Then @@ -117,7 +117,7 @@ void decodeKnownEncodingWithoutDecoderThrowsByDefault() { Registry sut = Registry.empty(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, ByteBuffer.allocate(0), new ArrayNode[0], new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0L, + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0L, new MemorySegment[0], sut, Arena.ofAuto()); // When / Then @@ -132,7 +132,7 @@ void decodeKnownEncodingWithoutDecoderReturnsUnknownArrayWhenAllowed() { Registry sut = Registry.builder().allowUnknown().build(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, ByteBuffer.allocate(0), new ArrayNode[0], new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0L, + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0L, new MemorySegment[0], sut, Arena.ofAuto()); // When @@ -152,7 +152,7 @@ void decodeUnknownEncodingReturnsUnknownArrayWhenAllowed() { buf.set(java.lang.foreign.ValueLayout.JAVA_INT, 0, 42); ArrayNode node = new UnknownArrayNode("some.unknown", metadata, new ArrayNode[0], new int[]{0}, ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 5L, + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 5L, new MemorySegment[]{buf}, sut, Arena.ofAuto()); // When @@ -180,7 +180,7 @@ void decodeUnknownEncodingWrapsChildrenAsUnknown() { ByteBuffer.allocate(0), new ArrayNode[0], new int[0], ArrayStats.empty()); ArrayNode parent = new UnknownArrayNode("some.unknown", ByteBuffer.allocate(0), new ArrayNode[]{child}, new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(parent, DTypes.I32, 0L, + DecodeContext ctx = DecodeContext.ofRawBuffers(parent, DTypes.I32, 0L, new MemorySegment[0], sut, Arena.ofAuto()); // When diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/RleEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/RleEncodingTest.java index f7adef03..cad0ea11 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/RleEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/RleEncodingTest.java @@ -305,7 +305,7 @@ void decode_nullableIndices_returnsMaskedArrayWithCorrectValidity() { .register(new PrimitiveEncoding()) .register(new BoolEncoding()) .build(); - DecodeContext ctx = new DecodeContext(root, dtype, data.length, segments, reg, Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(root, dtype, data.length, segments, reg, Arena.ofAuto()); // When Array result = sut.decode(ctx); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/RunEndEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/RunEndEncodingTest.java index 1fb239ee..6383b46f 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/RunEndEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/RunEndEncodingTest.java @@ -49,7 +49,7 @@ private static DecodeContext buildCtx( Registry registry = TestRegistry.of(new RunEndEncoding(), new PrimitiveEncoding()); - return new DecodeContext(reNode, dtype, rowCount, segments, registry, java.lang.foreign.Arena.global()); + return DecodeContext.ofRawBuffers(reNode, dtype, rowCount, segments, registry, java.lang.foreign.Arena.global()); } private static byte[] toLEBytes(long[] values, PType ptype) { diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/SequenceEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/SequenceEncodingTest.java index 7e5981b4..24b3b537 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/SequenceEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/SequenceEncodingTest.java @@ -32,7 +32,7 @@ class Encode { private static DecodeContext encodeResultToCtx(EncodeResult result, DType dtype, long n) { ByteBuffer meta = result.rootNode().metadata(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_SEQUENCE, meta, new ArrayNode[0], new int[0], null); - return new DecodeContext(node, dtype, n, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, dtype, n, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); } @Test @@ -140,7 +140,7 @@ private static DecodeContext makeCtx(byte[] meta, DType dtype, long n) { EncodingId.VORTEX_SEQUENCE, ByteBuffer.wrap(meta), new ArrayNode[0], new int[0], null); - return new DecodeContext(node, dtype, n, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, dtype, n, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); } private static byte[] intMeta(long base, long mul) { @@ -250,7 +250,7 @@ void decode_missingMetadata_throwsVortexException() { // Given var sut = new SequenceEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_SEQUENCE, null, new ArrayNode[0], new int[0], null); - DecodeContext ctx = new DecodeContext(node, DTypes.I64, 3, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I64, 3, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); // When / Then assertThatThrownBy(() -> sut.decode(ctx)) diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/SparseEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/SparseEncodingTest.java index dee032b9..fb571dc3 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/SparseEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/SparseEncodingTest.java @@ -47,7 +47,7 @@ private static Array decodeResult(EncodeResult encoded, DType dtype, int n) { Registry registry = TestRegistry.of(new SparseEncoding(), new PrimitiveEncoding()); - DecodeContext ctx = new DecodeContext(sparseNode, dtype, n, segments, registry, Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(sparseNode, dtype, n, segments, registry, Arena.global()); return new SparseEncoding().decode(ctx); } @@ -200,7 +200,7 @@ private static DecodeContext buildCtx( Registry registry = TestRegistry.of(new SparseEncoding(), new PrimitiveEncoding()); - return new DecodeContext(sparseNode, dtype, rowCount, segments, registry, java.lang.foreign.Arena.global()); + return DecodeContext.ofRawBuffers(sparseNode, dtype, rowCount, segments, registry, java.lang.foreign.Arena.global()); } private static byte[] buildSparseMetaBytes(long numPatches, long offset, PType idxPtype) { @@ -392,7 +392,7 @@ void decode_utf8_withPatches_writesStringsAtIndices() { MemorySegment.ofArray(strBytes), MemorySegment.ofArray(offsets), }; - DecodeContext ctx = new DecodeContext(sparseNode, utf8, 5, segments, registry, Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(sparseNode, utf8, 5, segments, registry, Arena.global()); SparseEncoding sut = new SparseEncoding(); // When @@ -433,7 +433,7 @@ void decode_bool_withPatches_setsBitsAtIndices() { MemorySegment.ofArray(idxBuf), MemorySegment.ofArray(boolBits), }; - DecodeContext ctx = new DecodeContext(sparseNode, bool, 6, segments, registry, Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(sparseNode, bool, 6, segments, registry, Arena.global()); SparseEncoding sut = new SparseEncoding(); // When diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/StructEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/StructEncodingTest.java index 777445f5..612ce351 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/StructEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/StructEncodingTest.java @@ -57,7 +57,7 @@ void roundTrip_twoI64Fields_preservesValues() { // Then — decode round-trip MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); Registry registry = TestRegistry.of(new StructEncoding(), new PrimitiveEncoding()); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), dtype, ids.length, bufs, registry, Arena.global()); StructArray decoded = (StructArray) sut.decode(ctx); @@ -117,7 +117,7 @@ private static ArrayNode boolNode(int bufferIdx) { private static DecodeContext buildStructCtx(ArrayNode structNode, MemorySegment[] segs, long rowCount) { Registry registry = TestRegistry.of(new StructEncoding(), new PrimitiveEncoding()); - return new DecodeContext(structNode, DTypes.I64, rowCount, segs, registry, Arena.global()); + return DecodeContext.ofRawBuffers(structNode, DTypes.I64, rowCount, segs, registry, Arena.global()); } @Test @@ -155,7 +155,7 @@ void decode_nullableWrapper_twoChildren_returnsMaskedArray() { new ArrayNode[]{validityNode, valuesNode}, new int[0], ArrayStats.empty()); Registry registry = TestRegistry.of(new StructEncoding(), new PrimitiveEncoding(), new BoolEncoding()); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( structNode, DTypes.I64, data.length, new MemorySegment[]{validitySeg, valuesSeg}, registry, Arena.global()); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/TestDecodeContexts.java b/core/src/test/java/io/github/dfa1/vortex/encoding/TestDecodeContexts.java index 1ba7c815..35fd894b 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/TestDecodeContexts.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/TestDecodeContexts.java @@ -44,6 +44,6 @@ TestDecodeContexts arena(Arena a) { } DecodeContext build() { - return new DecodeContext(node, dtype, rowCount, segments, registry, arena); + return DecodeContext.ofRawBuffers(node, dtype, rowCount, segments, registry, arena); } } diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinEncodingTest.java index ab6952b7..9bbd77d8 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinEncodingTest.java @@ -145,7 +145,7 @@ void decode_missingMetadata_throwsVortexException() { // Given var sut = new VarBinEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_VARBIN, null, new ArrayNode[0], new int[0], null); - DecodeContext ctx = new DecodeContext(node, DTypes.UTF8, 3, new MemorySegment[0], + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.UTF8, 3, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); // When / Then diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinViewEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinViewEncodingTest.java index 457070a0..c2232356 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinViewEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinViewEncodingTest.java @@ -62,7 +62,7 @@ void encode_thenDecode_roundtripsAllStrings(String name, String[] values) { EncodingId.VORTEX_VARBINVIEW, null, new ArrayNode[0], result.rootNode().bufferIndices(), null); Registry registry = TestRegistry.of(sut); - DecodeContext ctx = new DecodeContext(node, DTypes.UTF8, values.length, bufs, registry, arena); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.UTF8, values.length, bufs, registry, arena); var decoded = (VarBinArray) sut.decode(ctx); // Then @@ -151,7 +151,7 @@ void decode_roundtrip_returnsAllStrings(String name, String[] values) { Registry registry = TestRegistry.of(new VarBinViewEncoding()); - DecodeContext ctx = new DecodeContext(node, DTypes.UTF8, n, segBufs, registry, arena); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.UTF8, n, segBufs, registry, arena); var sut = new VarBinViewEncoding(); // When diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/VariantEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/VariantEncodingTest.java index db660604..bc0270e5 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/VariantEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/VariantEncodingTest.java @@ -57,7 +57,7 @@ void decode_withoutShredded_returnsCoreStorageOnly() { new ArrayNode[]{coreNode}, new int[]{}, null); Registry registry = TestRegistry.of(new VariantEncoding(), new NullEncoding()); - DecodeContext ctx = new DecodeContext(variantNode, VARIANT_DTYPE, N, + DecodeContext ctx = DecodeContext.ofRawBuffers(variantNode, VARIANT_DTYPE, N, new MemorySegment[0], registry, Arena.ofAuto()); // When @@ -86,7 +86,7 @@ void decode_withShredded_decodesSecondChild() { MemorySegment[] segments = {i32Segment(1, 2, 3)}; Registry registry = TestRegistry.of(new VariantEncoding(), new NullEncoding(), new PrimitiveEncoding()); - DecodeContext ctx = new DecodeContext(variantNode, VARIANT_DTYPE, N, + DecodeContext ctx = DecodeContext.ofRawBuffers(variantNode, VARIANT_DTYPE, N, segments, registry, Arena.ofAuto()); // When @@ -108,7 +108,7 @@ void decode_emptyMetadata_noShredded() { new ArrayNode[]{coreNode}, new int[]{}, null); Registry registry = TestRegistry.of(new VariantEncoding(), new NullEncoding()); - DecodeContext ctx = new DecodeContext(variantNode, VARIANT_DTYPE, N, + DecodeContext ctx = DecodeContext.ofRawBuffers(variantNode, VARIANT_DTYPE, N, new MemorySegment[0], registry, Arena.ofAuto()); // When @@ -128,7 +128,7 @@ void decode_nullableDtype_preservedOnResult() { new ArrayNode[]{coreNode}, new int[]{}, null); Registry registry = TestRegistry.of(new VariantEncoding(), new NullEncoding()); - DecodeContext ctx = new DecodeContext(variantNode, nullableVariant, N, + DecodeContext ctx = DecodeContext.ofRawBuffers(variantNode, nullableVariant, N, new MemorySegment[0], registry, Arena.ofAuto()); // When @@ -146,7 +146,7 @@ void decode_wrongChildCount_throws() { new ArrayNode[0], new int[]{}, null); Registry registry = TestRegistry.of(new VariantEncoding()); - DecodeContext ctx = new DecodeContext(variantNode, VARIANT_DTYPE, N, + DecodeContext ctx = DecodeContext.ofRawBuffers(variantNode, VARIANT_DTYPE, N, new MemorySegment[0], registry, Arena.ofAuto()); // When / Then diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ZigZagEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ZigZagEncodingTest.java index eec143e7..340400ae 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ZigZagEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ZigZagEncodingTest.java @@ -45,7 +45,7 @@ private static DecodeContext buildI32Ctx(int[] encodedUnsigned) { ArrayNode zigzagNode = ArrayNode.of(EncodingId.VORTEX_ZIGZAG, null, new ArrayNode[]{primitiveNode}, new int[0], null); Registry registry = TestRegistry.of(new ZigZagEncoding(), new PrimitiveEncoding()); - return new DecodeContext(zigzagNode, DTypes.I32, encodedUnsigned.length, + return DecodeContext.ofRawBuffers(zigzagNode, DTypes.I32, encodedUnsigned.length, new MemorySegment[]{seg}, registry, Arena.ofAuto()); } diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ZstdEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ZstdEncodingTest.java index b1b0eaff..b49510fb 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ZstdEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ZstdEncodingTest.java @@ -129,7 +129,7 @@ private static DecodeContext makeDictCtx( } ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ZSTD, ByteBuffer.wrap(meta), new ArrayNode[0], bufIndices, null); - return new DecodeContext(node, dtype, n, segments, Registry.empty(), Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, dtype, n, segments, Registry.empty(), Arena.ofAuto()); } private static byte[] makeDictFor(byte[]... samples) { @@ -179,7 +179,7 @@ private static DecodeContext makeNullableCtx( Registry registry = Registry.builder().register(new BoolEncoding()).build(); - return new DecodeContext(node, dtype, n, allSegments.toArray(new MemorySegment[0]), + return DecodeContext.ofRawBuffers(node, dtype, n, allSegments.toArray(new MemorySegment[0]), registry, Arena.ofAuto()); } @@ -200,7 +200,7 @@ private static DecodeContext makeCtx(byte[] meta, DType dtype, long n, byte[]... } ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ZSTD, ByteBuffer.wrap(meta), new ArrayNode[0], bufIndices, null); - return new DecodeContext(node, dtype, n, segments, Registry.empty(), Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, dtype, n, segments, Registry.empty(), Arena.ofAuto()); } private static byte[] compress(byte[] input) { @@ -382,7 +382,7 @@ void decode_missingMetadata_throwsVortexException() { // Given var sut = new ZstdEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ZSTD, null, new ArrayNode[0], new int[0], null); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0, new MemorySegment[0], + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); // When / Then From 4352be02d621a7700676b6fffef7f11933db142c Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Wed, 10 Jun 2026 22:13:08 +0200 Subject: [PATCH 4/6] sec(parser): change VortexHandle.slice to return BoundedSegment [Phase 3/4] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VortexHandle.slice (the internal escape hatch used by ScanIterator, InspectorTree, VortexInspectorTui, and integration-test code) now returns BoundedSegment instead of MemorySegment. Callers that genuinely need raw MemorySegment access add an explicit `.unwrapForSubParser("")` at the call site, making every cross-package escape-hatch usage greppable for audit. Sites migrated: - VortexReader.slice: wraps the file-segment slice as BoundedSegment with context "file slice". - VortexHttpReader.slice: wraps the freshly-fetched HTTP range as BoundedSegment with context "http range .." so error messages attribute bounds-check failures to the specific HTTP request. - ScanIterator.readFlat: `.unwrapForSubParser("flat segment decoder")` before handing off to FlatSegmentDecoder.decode (which still takes MemorySegment; full FlatSegmentDecoder migration would be a separate follow-up). - ScanIterator.readFlatStats: keeps a BoundedSegment for the parent stats region, slices the stats flatbuffer via `statsRegion.slice(off, len, "stats flatbuffer").asByteBufferLE()` instead of the raw MemorySegments.slice helper. - InspectorTree.peekFlat / collectEncodingsAndStats: unwrap with "inspector flat segment decoder" before peekFlatRoot. - VortexInspectorTui.previewSegment / hexPeekSegment: unwrap with "inspector tui flat segment" and "inspector tui hex peek". - PcoFixtureInspectionIntegrationTest.walkLayoutInner: unwrap with "integration test inspector". The slice method is still marked @Deprecated(forRemoval = true) on VortexHandle; the long-term direction is to remove cross-package raw segment access entirely and route everything through Scan + typed accessors. That demolition happens in Phase 4. ./mvnw verify — all unit + integration tests pass (incl. Rust round-trips). Co-Authored-By: Claude Opus 4.7 --- .../github/dfa1/vortex/cli/tui/VortexInspectorTui.java | 4 ++-- .../io/github/dfa1/vortex/inspect/InspectorTree.java | 4 ++-- .../integration/PcoFixtureInspectionIntegrationTest.java | 2 +- .../main/java/io/github/dfa1/vortex/io/VortexHandle.java | 7 ++++--- .../java/io/github/dfa1/vortex/io/VortexHttpReader.java | 8 ++++---- .../main/java/io/github/dfa1/vortex/io/VortexReader.java | 8 +++++--- .../java/io/github/dfa1/vortex/scan/ScanIterator.java | 9 +++++---- 7 files changed, 23 insertions(+), 19 deletions(-) diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/tui/VortexInspectorTui.java b/cli/src/main/java/io/github/dfa1/vortex/cli/tui/VortexInspectorTui.java index e02ef48a..e947bc0a 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/tui/VortexInspectorTui.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/tui/VortexInspectorTui.java @@ -571,7 +571,7 @@ private void runDictLoad(InspectorTree.Node dictNode) { try (java.lang.foreign.Arena arena = java.lang.foreign.Arena.ofConfined()) { int segIdx = values.segments().getFirst(); SegmentSpec spec = tree.segmentSpecs().get(segIdx); - java.lang.foreign.MemorySegment seg = handle.slice(spec.offset(), spec.length()); + java.lang.foreign.MemorySegment seg = handle.slice(spec.offset(), spec.length()).unwrapForSubParser("inspector tui flat segment"); io.github.dfa1.vortex.core.array.Array arr = new io.github.dfa1.vortex.encoding.FlatSegmentDecoder(handle.registry()) .decode(seg, handle.footer().arraySpecs(), @@ -760,7 +760,7 @@ private byte[] fetchHex(InspectorTree.Node node) { return new byte[0]; } try { - MemorySegment seg = handle.slice(spec.offset(), wanted); + MemorySegment seg = handle.slice(spec.offset(), wanted).unwrapForSubParser("inspector tui hex peek"); byte[] buf = new byte[wanted]; MemorySegment.copy(seg, 0, MemorySegment.ofArray(buf), 0, wanted); return buf; diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java index ca4fae4d..e9e2e81f 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java @@ -147,7 +147,7 @@ public static Peek peek(Node node, VortexHandle handle) { if (spec.compression().code != 0) { return Peek.EMPTY; } - MemorySegment seg = handle.slice(spec.offset(), spec.length()); + MemorySegment seg = handle.slice(spec.offset(), spec.length()).unwrapForSubParser("inspector flat segment decoder"); return peekFlatRoot(seg, handle.footer().arraySpecs()); } @@ -202,7 +202,7 @@ private static Node buildNode(Layout layout, Optional fieldName, VortexH int segIdx = layout.segments().getFirst(); SegmentSpec spec = handle.footer().segmentSpecs().get(segIdx); if (spec.compression().code == 0) { - MemorySegment seg = handle.slice(spec.offset(), spec.length()); + MemorySegment seg = handle.slice(spec.offset(), spec.length()).unwrapForSubParser("inspector flat segment decoder"); Peek peek = peekFlatRoot(seg, arraySpecs); if (peek.encoding() != null) { localUsed.add(peek.encoding()); diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/PcoFixtureInspectionIntegrationTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/PcoFixtureInspectionIntegrationTest.java index b16eab76..bb876e10 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/PcoFixtureInspectionIntegrationTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/PcoFixtureInspectionIntegrationTest.java @@ -101,7 +101,7 @@ private static void walkLayoutInner(VortexReader vf, Layout layout, List if ((layout.isFlat() || layout.isDict()) && !layout.segments().isEmpty()) { int segIdx = layout.segments().getFirst(); SegmentSpec spec = segmentSpecs.get(segIdx); - MemorySegment seg = vf.slice(spec.offset(), spec.length()); + MemorySegment seg = vf.slice(spec.offset(), spec.length()).unwrapForSubParser("integration test inspector"); scanFlatSegment(seg, arraySpecs, stats, currentPath); return; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java index e5f5bce8..4968264a 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.io; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; @@ -8,7 +9,6 @@ import io.github.dfa1.vortex.scan.ScanOptions; import java.io.Closeable; -import java.lang.foreign.MemorySegment; /// Common interface for handles to a Vortex file, regardless of storage backend. /// @@ -37,11 +37,12 @@ public interface VortexHandle extends Closeable { /// /// @param offset the start offset in bytes /// @param length the number of bytes to expose - /// @return a read-only [MemorySegment] view of the requested range + /// @return a {@link BoundedSegment} view of the requested range; bounds-checking on + /// later sub-slices is enforced by the type /// @deprecated marked for removal once the reader-internal packages consolidate (see /// {@code TODO.md}); kept here as an interim escape hatch for vortex-internal callers. @Deprecated(since = "0.4.0", forRemoval = true) - MemorySegment slice(long offset, long length); + BoundedSegment slice(long offset, long length); ScanIterator scan(ScanOptions options); diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java index 02eae17c..8974f2f7 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java @@ -220,10 +220,10 @@ public long fileSize() { // ── HTTP helpers ────────────────────────────────────────────────────────── - /// Fetches bytes `[offset, offset+length)` via HTTP Range and returns them - /// as an off-heap [MemorySegment] tied to this reader's [Arena]. + /// Fetches bytes `[offset, offset+length)` via HTTP Range and returns them as a + /// {@link BoundedSegment} wrapping an off-heap region tied to this reader's {@link Arena}. @Override - public MemorySegment slice(long offset, long length) { + public BoundedSegment slice(long offset, long length) { byte[] bytes; try { bytes = fetchRange(uri, offset, offset + length - 1); @@ -233,7 +233,7 @@ public MemorySegment slice(long offset, long length) { } MemorySegment seg = arena.allocate(length); MemorySegment.copy(MemorySegment.ofArray(bytes), 0, seg, 0, length); - return seg.asReadOnly(); + return new BoundedSegment(seg.asReadOnly(), "http range " + offset + ".." + (offset + length)); } @Override diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java index af7a1726..012d669e 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java @@ -241,10 +241,12 @@ private ArrayStats readFlatStats(Layout flat) { return ArrayStats.fromFbs(root.stats()); } - /// Zero-copy read-only slice of the memory-mapped file. + /// Zero-copy slice of the memory-mapped file, wrapped as a {@link BoundedSegment}. @Override - public MemorySegment slice(long offset, long length) { - return MemorySegments.slice(fileSegment, offset, length, "file segment").asReadOnly(); + public BoundedSegment slice(long offset, long length) { + return new BoundedSegment( + MemorySegments.slice(fileSegment, offset, length, "file segment").asReadOnly(), + "file slice"); } @Override diff --git a/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java index 8017b92e..bedf8834 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java +++ b/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java @@ -3,7 +3,7 @@ import io.github.dfa1.vortex.core.ArrayStats; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Layout; -import io.github.dfa1.vortex.core.MemorySegments; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.SegmentSpec; import io.github.dfa1.vortex.core.VortexException; @@ -480,7 +480,7 @@ private Array decodeFlat(Layout flat, DType dtype, SegmentAllocator arena) { } int segIdx = flat.segments().getFirst(); SegmentSpec spec = file.footer().segmentSpecs().get(segIdx); - MemorySegment seg = file.slice(spec.offset(), spec.length()); + MemorySegment seg = file.slice(spec.offset(), spec.length()).unwrapForSubParser("flat segment decoder"); return new FlatSegmentDecoder(registry).decode(seg, file.footer().arraySpecs(), dtype, flat.rowCount(), arena); } @@ -633,13 +633,14 @@ private ArrayStats readFlatStats(Layout flat) { int segIdx = flat.segments().getFirst(); SegmentSpec spec = file.footer().segmentSpecs().get(segIdx); long segLen = spec.length(); - MemorySegment seg = file.slice(spec.offset(), segLen); + BoundedSegment statsRegion = file.slice(spec.offset(), segLen); + MemorySegment seg = statsRegion.unwrapForSubParser("stats segment fbLen read"); // Stats FlatBuffer lives in the segment's last 4+fbLen bytes; reading the whole // segment as a ByteBuffer would fail for segments larger than 2 GB (ByteBuffer cap). int fbLen = seg.get(LE_INT, segLen - 4); long fbStart = segLen - 4L - fbLen; - ByteBuffer fbBuf = MemorySegments.slice(seg, fbStart, fbLen, "stats flatbuffer").asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer fbBuf = statsRegion.slice(fbStart, fbLen, "stats flatbuffer").asByteBufferLE(); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); io.github.dfa1.vortex.fbs.ArrayNode root = fbArray.root(); From a9a5189165e9e3e838fd2b7ccf44541742809a70 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Wed, 10 Jun 2026 22:15:44 +0200 Subject: [PATCH 5/6] sec(parser): migrate VortexReader internals to BoundedSegment [Phase 4/4] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VortexReader.slice and VortexReader.readFlatStats — the last two file-open / scan-adjacent sites still calling MemorySegments.slice directly — now construct a BoundedSegment from fileSegment and slice through it: - VortexReader.slice: returns `new BoundedSegment(fileSegment, "vortex file").slice(offset, length, "file slice")`. The intermediate label flows into VortexHandle.slice's BoundedSegment return value, so a downstream `.slice(off, len, "...")` on it carries both parent context ("file slice") and the child label in error messages. - VortexReader.readFlatStats: builds a `statsRegion` BoundedSegment for the per-flat stats slice, then uses `statsRegion.getIntLE(...)` for the trailing fbLen read and `statsRegion.slice(fbStart, fbLen, "stats flatbuffer").asByteBufferLE()` for the FlatBuffer payload. The bounds-check helper calls (MemorySegments.slice / .checkRange) are no longer reachable from reader-layer code. MemorySegments class javadoc updated to describe its new role as the implementation detail behind BoundedSegment — application code should prefer BoundedSegment, and direct MemorySegments use is reserved for constructing a BoundedSegment from a raw segment at the mmap boundary. Phase 4 closeout — the remaining unwrapForSubParser sites (35) are all in encoders that consume `ctx.buffer(i)` and pass to sub-parsers (ProtoReader, FlatBuffer runtime) which take raw MemorySegment. Future work to drop those would mean adapting ProtoReader to take BoundedSegment directly (it already does its own bounds-checked cursor internally) and adding a BoundedSegment-aware FlatBuffer helper. Both are mechanically straightforward but out of scope here; the audit point — every trust transfer is greppable via unwrapForSubParser — is satisfied. ./mvnw verify — all unit + integration tests pass (incl. Rust round-trips). Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/MemorySegments.java | 18 ++++++++++++------ .../github/dfa1/vortex/io/VortexReader.java | 19 +++++++++---------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java b/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java index 508adac5..0af2e53d 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java @@ -4,12 +4,18 @@ /// Bounds-checked wrappers for {@link MemorySegment} slicing on untrusted input. /// -///

Every call site in {@code io}, {@code scan}, and {@code encoding} that slices a -/// memory-mapped file region by an offset or length read from the on-disk schema must -/// route through {@link #slice(MemorySegment, long, long, String)} instead of calling -/// {@link MemorySegment#asSlice(long, long)} directly. The contract: malformed input -/// throws {@link VortexException}, never {@link IndexOutOfBoundsException}, -/// {@link IllegalArgumentException}, or any other unchecked JDK exception. +///

Application code in {@code io}, {@code scan}, and {@code encoding} should prefer +/// {@link BoundedSegment}, which encapsulates a segment + context label and makes the +/// safe-slice operation the only available API on the type. This class is the underlying +/// implementation: {@code BoundedSegment.slice} delegates to {@link #slice}, and +/// {@code BoundedSegment}'s primitive readers delegate to {@link #checkRange}. +/// +///

Direct {@code MemorySegments.slice} use is reserved for the few places that build a +/// {@code BoundedSegment} in the first place (the mmap boundary in {@code VortexReader.parse}) +/// or that need a bounded {@link MemorySegment} without producing a {@code BoundedSegment}. +/// In both cases the contract is the same: malformed input throws {@link VortexException}, +/// never {@link IndexOutOfBoundsException}, {@link IllegalArgumentException}, or any other +/// unchecked JDK exception. public final class MemorySegments { private MemorySegments() { diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java index 012d669e..0fb1e8c0 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java @@ -5,7 +5,6 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; -import io.github.dfa1.vortex.core.MemorySegments; import io.github.dfa1.vortex.core.SegmentSpec; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.core.VortexFormat; @@ -222,17 +221,18 @@ private ArrayStats readFlatStats(Layout flat) { if (segLen < 4) { return ArrayStats.empty(); } - MemorySegment seg = MemorySegments.slice(fileSegment, spec.offset(), segLen, "stats segment"); - int fbLen = seg.get(LE_INT, segLen - 4); + BoundedSegment statsRegion = new BoundedSegment(fileSegment, "vortex file") + .slice(spec.offset(), segLen, "stats segment"); + int fbLen = statsRegion.getIntLE(segLen - 4); // Reject negative fbLen (signed int from untrusted bytes) or any value that would push - // fbStart below 0. MemorySegments.slice would catch this too, but returning empty here keeps - // the older lenient behaviour for files with corrupt stats blobs — MemorySegments is reserved - // for offsets/lengths that must be valid (the data path). + // fbStart below 0. BoundedSegment.slice would also catch this, but returning empty here keeps + // the older lenient behaviour for files with corrupt stats blobs — bounded slicing is + // reserved for offsets/lengths that must be valid (the data path). if (fbLen < 0 || fbLen > segLen - 4) { return ArrayStats.empty(); } long fbStart = segLen - 4L - fbLen; - var fbBuf = MemorySegments.slice(seg, fbStart, fbLen, "stats flatbuffer").asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + var fbBuf = statsRegion.slice(fbStart, fbLen, "stats flatbuffer").asByteBufferLE(); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); var root = fbArray.root(); if (root == null) { @@ -244,9 +244,8 @@ private ArrayStats readFlatStats(Layout flat) { /// Zero-copy slice of the memory-mapped file, wrapped as a {@link BoundedSegment}. @Override public BoundedSegment slice(long offset, long length) { - return new BoundedSegment( - MemorySegments.slice(fileSegment, offset, length, "file segment").asReadOnly(), - "file slice"); + return new BoundedSegment(fileSegment, "vortex file") + .slice(offset, length, "file slice"); } @Override From 66a50acee018978fdca17a354d8d9760994aba63 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Thu, 11 Jun 2026 08:36:50 +0200 Subject: [PATCH 6/6] sec(parser): fold MemorySegments into BoundedSegment as the single API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After Phase 4 nothing outside BoundedSegment called MemorySegments.slice or MemorySegments.checkRange any more, so the static helper was a one-line proxy used only by BoundedSegment itself — two parallel APIs saying the same thing. Moves the bounds-check logic directly into BoundedSegment as a private instance method that closes over `context`, so each call no longer re-passes the label. slice now does the check inline and calls seg.asSlice directly; getByte/getIntLE/getLongLE share the same private checkRange. Deletes MemorySegments.java and MemorySegmentsTest.java. The seven test cases that lived in MemorySegmentsTest (zero-length-at-end, negative offset, negative length, len > segSize, overflow-prone Long.MAX_VALUE - 1 + 100) were absorbed into BoundedSegmentTest so the coverage doesn't regress. Net: one type, one API, ~93 lines fewer code. No behavioural change; ./mvnw verify — all unit + integration tests pass. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/BoundedSegment.java | 27 ++++-- .../dfa1/vortex/core/MemorySegments.java | 67 -------------- .../dfa1/vortex/core/BoundedSegmentTest.java | 48 ++++++++++ .../dfa1/vortex/core/MemorySegmentsTest.java | 89 ------------------- 4 files changed, 69 insertions(+), 162 deletions(-) delete mode 100644 core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java delete mode 100644 core/src/test/java/io/github/dfa1/vortex/core/MemorySegmentsTest.java diff --git a/core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java b/core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java index 304df8c5..b04ff356 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java @@ -46,9 +46,8 @@ public long byteSize() { /// @throws VortexException if {@code off} or {@code len} is negative, or if /// {@code off + len > this.byteSize()} public BoundedSegment slice(long off, long len, String childContext) { - return new BoundedSegment( - MemorySegments.slice(seg, off, len, context), - childContext); + checkRange(off, len); + return new BoundedSegment(seg.asSlice(off, len), childContext); } /// Bounds-checked single-byte read. @@ -57,7 +56,7 @@ public BoundedSegment slice(long off, long len, String childContext) { /// @return the byte at {@code off} /// @throws VortexException if {@code off} is negative or {@code >= this.byteSize()} public byte getByte(long off) { - MemorySegments.checkRange(seg, off, 1, context); + checkRange(off, 1); return seg.get(BYTE, off); } @@ -67,7 +66,7 @@ public byte getByte(long off) { /// @return the int at {@code off} /// @throws VortexException if {@code off} is negative or {@code > this.byteSize() - 4} public int getIntLE(long off) { - MemorySegments.checkRange(seg, off, 4, context); + checkRange(off, 4); return seg.get(LE_INT, off); } @@ -77,10 +76,26 @@ public int getIntLE(long off) { /// @return the long at {@code off} /// @throws VortexException if {@code off} is negative or {@code > this.byteSize() - 8} public long getLongLE(long off) { - MemorySegments.checkRange(seg, off, 8, context); + checkRange(off, 8); return seg.get(LE_LONG, off); } + private void checkRange(long off, long len) { + long segSize = seg.byteSize(); + if (off < 0) { + throw new VortexException("malformed " + context + ": negative offset " + off); + } + if (len < 0) { + throw new VortexException("malformed " + context + ": negative length " + len); + } + // Overflow-safe form of `off + len > segSize`. The subtraction can't underflow because + // len has already been bounded against segSize on the line above (segSize >= 0 always). + if (len > segSize || off > segSize - len) { + throw new VortexException("malformed " + context + ": offset+length " + + off + "+" + len + " exceeds segment size " + segSize); + } + } + /// Little-endian {@link ByteBuffer} view of the whole bounded region, used by the /// FlatBuffer runtime (which performs its own offset validation against the buffer's /// capacity). diff --git a/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java b/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java deleted file mode 100644 index 0af2e53d..00000000 --- a/core/src/main/java/io/github/dfa1/vortex/core/MemorySegments.java +++ /dev/null @@ -1,67 +0,0 @@ -package io.github.dfa1.vortex.core; - -import java.lang.foreign.MemorySegment; - -/// Bounds-checked wrappers for {@link MemorySegment} slicing on untrusted input. -/// -///

Application code in {@code io}, {@code scan}, and {@code encoding} should prefer -/// {@link BoundedSegment}, which encapsulates a segment + context label and makes the -/// safe-slice operation the only available API on the type. This class is the underlying -/// implementation: {@code BoundedSegment.slice} delegates to {@link #slice}, and -/// {@code BoundedSegment}'s primitive readers delegate to {@link #checkRange}. -/// -///

Direct {@code MemorySegments.slice} use is reserved for the few places that build a -/// {@code BoundedSegment} in the first place (the mmap boundary in {@code VortexReader.parse}) -/// or that need a bounded {@link MemorySegment} without producing a {@code BoundedSegment}. -/// In both cases the contract is the same: malformed input throws {@link VortexException}, -/// never {@link IndexOutOfBoundsException}, {@link IllegalArgumentException}, or any other -/// unchecked JDK exception. -public final class MemorySegments { - - private MemorySegments() { - } - - /// Returns a slice of {@code seg} starting at {@code off} for {@code len} bytes, - /// rejecting out-of-range or overflow-prone input with a {@link VortexException} - /// labelled by {@code context}. - /// - /// @param seg backing segment - /// @param off start offset in bytes; must be {@code >= 0} and {@code <= seg.byteSize() - len} - /// @param len slice length in bytes; must be {@code >= 0} and {@code <= seg.byteSize() - off} - /// @param context short label used in the exception message (e.g. {@code "footer blob"}, - /// {@code "segment spec data"}) so malformed-input errors point at the - /// specific on-disk structure rather than a generic offset - /// @return the bounds-checked slice - /// @throws VortexException if {@code off} or {@code len} is negative, or if - /// {@code off + len > seg.byteSize()} - public static MemorySegment slice(MemorySegment seg, long off, long len, String context) { - checkRange(seg, off, len, context); - return seg.asSlice(off, len); - } - - /// Bounds-check {@code off} and {@code len} against {@code seg} without producing a slice. - /// Used by {@link BoundedSegment}'s primitive readers, which need bounds-checking before - /// a {@link MemorySegment#get} call but do not need to materialise a sub-segment. - /// - /// @param seg backing segment - /// @param off start offset - /// @param len range length - /// @param context label used in the {@link VortexException} message - /// @throws VortexException if {@code off} or {@code len} is negative, or if - /// {@code off + len > seg.byteSize()} - public static void checkRange(MemorySegment seg, long off, long len, String context) { - long segSize = seg.byteSize(); - if (off < 0) { - throw new VortexException("malformed " + context + ": negative offset " + off); - } - if (len < 0) { - throw new VortexException("malformed " + context + ": negative length " + len); - } - // Overflow-safe form of `off + len > segSize`. The subtraction can't underflow because - // len has already been bounded against segSize on the line above (segSize >= 0 always). - if (len > segSize || off > segSize - len) { - throw new VortexException("malformed " + context + ": offset+length " - + off + "+" + len + " exceeds segment size " + segSize); - } - } -} diff --git a/core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java b/core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java index 270611f4..c7aa3769 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java @@ -50,6 +50,54 @@ void primitiveReadsAreBoundsChecked() { .hasMessageContaining("test region"); } + @Test + void zeroLengthAtEndIsAllowed() { + // Given — slice at the end with zero length. JDK permits this; the wrapper must too. + + // When + Then — does not throw; child region has byteSize 0. + assertThat(sut.slice(16, 0, "tail").byteSize()).isEqualTo(0); + } + + @Test + void negativeOffsetThrowsVortexException() { + // Given — adversarial offset from a malformed file. + + // When + Then + assertThatThrownBy(() -> sut.slice(-1, 4, "child")) + .isInstanceOf(VortexException.class) + .hasMessageContaining("negative offset"); + } + + @Test + void negativeLengthThrowsVortexException() { + // Given — adversarial length. + + // When + Then + assertThatThrownBy(() -> sut.slice(0, -1, "child")) + .isInstanceOf(VortexException.class) + .hasMessageContaining("negative length"); + } + + @Test + void lengthAloneBiggerThanSegmentThrows() { + // Given — len > segSize with off=0; the wrapper rejects before any subtraction. + + // When + Then + assertThatThrownBy(() -> sut.slice(0, 17, "child")) + .isInstanceOf(VortexException.class); + } + + @Test + void overflowingOffsetPlusLengthRejected() { + // Given — adversarial values designed to overflow a naive `off + len` computation: + // (off + len) wraps to a small positive number, which would pass a naive + // `off + len > segSize` check. The overflow-safe form catches it. + + // When + Then + assertThatThrownBy(() -> sut.slice(Long.MAX_VALUE - 1, 100, "child")) + .isInstanceOf(VortexException.class); + } + @Test void unwrapForSubParserReturnsRawSegment() { // Given — explicit trust transfer documented by the reason string. The unwrapped diff --git a/core/src/test/java/io/github/dfa1/vortex/core/MemorySegmentsTest.java b/core/src/test/java/io/github/dfa1/vortex/core/MemorySegmentsTest.java deleted file mode 100644 index ab00e0fe..00000000 --- a/core/src/test/java/io/github/dfa1/vortex/core/MemorySegmentsTest.java +++ /dev/null @@ -1,89 +0,0 @@ -package io.github.dfa1.vortex.core; - -import org.junit.jupiter.api.Test; - -import java.lang.foreign.MemorySegment; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -class MemorySegmentsTest { - - private final MemorySegment sut = MemorySegment.ofArray(new byte[16]); - - @Test - void inRangeSliceReturnsExpectedRegion() { - // Given valid offset+length inside the 16-byte backing array. - - // When - MemorySegment slice = MemorySegments.slice(sut, 4, 8, "test region"); - - // Then - assertThat(slice.byteSize()).isEqualTo(8); - } - - @Test - void zeroLengthAtEndIsAllowed() { - // Given — offset at the end, zero-length. The JDK permits this; we must too. - - // When + Then - assertThat(MemorySegments.slice(sut, 16, 0, "tail").byteSize()).isEqualTo(0); - } - - @Test - void negativeOffsetThrowsVortexException() { - // Given — adversarial offset from a malformed file. - // Without the wrapper, MemorySegment.asSlice throws IndexOutOfBoundsException — - // not VortexException — breaking the contract documented in SECURITY.md. - - // When + Then - assertThatThrownBy(() -> MemorySegments.slice(sut, -1, 4, "region")) - .isInstanceOf(VortexException.class) - .hasMessageContaining("region") - .hasMessageContaining("negative offset"); - } - - @Test - void negativeLengthThrowsVortexException() { - // Given — adversarial length. - - // When + Then - assertThatThrownBy(() -> MemorySegments.slice(sut, 0, -1, "region")) - .isInstanceOf(VortexException.class) - .hasMessageContaining("region") - .hasMessageContaining("negative length"); - } - - @Test - void offsetPlusLengthBeyondSegmentSizeThrows() { - // Given — 16-byte buffer, request 12 bytes starting at offset 8. - - // When + Then - assertThatThrownBy(() -> MemorySegments.slice(sut, 8, 12, "blob")) - .isInstanceOf(VortexException.class) - .hasMessageContaining("blob") - .hasMessageContaining("exceeds segment size 16"); - } - - @Test - void lengthAloneBiggerThanSegmentThrows() { - // Given — len > segSize even with off=0. - - // When + Then - assertThatThrownBy(() -> MemorySegments.slice(sut, 0, 17, "blob")) - .isInstanceOf(VortexException.class); - } - - @Test - void overflowingOffsetPlusLengthRejected() { - // Given — adversarial values designed to overflow a naive `off + len` computation. - // (off + len) wraps to a small positive number, which would pass a naive - // `off + len > segSize` check. The wrapper's overflow-safe form catches it. - long off = Long.MAX_VALUE - 1; - long len = 100; - - // When + Then - assertThatThrownBy(() -> MemorySegments.slice(sut, off, len, "blob")) - .isInstanceOf(VortexException.class); - } -}