diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/tui/VortexInspectorTui.java b/cli/src/main/java/io/github/dfa1/vortex/cli/tui/VortexInspectorTui.java index e02ef48a..e947bc0a 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/tui/VortexInspectorTui.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/tui/VortexInspectorTui.java @@ -571,7 +571,7 @@ private void runDictLoad(InspectorTree.Node dictNode) { try (java.lang.foreign.Arena arena = java.lang.foreign.Arena.ofConfined()) { int segIdx = values.segments().getFirst(); SegmentSpec spec = tree.segmentSpecs().get(segIdx); - java.lang.foreign.MemorySegment seg = handle.slice(spec.offset(), spec.length()); + java.lang.foreign.MemorySegment seg = handle.slice(spec.offset(), spec.length()).unwrapForSubParser("inspector tui flat segment"); io.github.dfa1.vortex.core.array.Array arr = new io.github.dfa1.vortex.encoding.FlatSegmentDecoder(handle.registry()) .decode(seg, handle.footer().arraySpecs(), @@ -760,7 +760,7 @@ private byte[] fetchHex(InspectorTree.Node node) { return new byte[0]; } try { - MemorySegment seg = handle.slice(spec.offset(), wanted); + MemorySegment seg = handle.slice(spec.offset(), wanted).unwrapForSubParser("inspector tui hex peek"); byte[] buf = new byte[wanted]; MemorySegment.copy(seg, 0, MemorySegment.ofArray(buf), 0, wanted); return buf; diff --git a/core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java b/core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java new file mode 100644 index 00000000..b04ff356 --- /dev/null +++ b/core/src/main/java/io/github/dfa1/vortex/core/BoundedSegment.java @@ -0,0 +1,123 @@ +package io.github.dfa1.vortex.core; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/// A memory-mapped region with built-in bounds-checking for slicing on untrusted input. +/// +///

By construction, callers cannot reach {@link MemorySegment#asSlice(long, long)} without +/// going through {@link #slice(long, long, String)}, which routes the offset/length through +/// {@link MemorySegments#slice} and throws {@link VortexException} on malformed input — +/// never {@link IndexOutOfBoundsException}. +/// +///

The {@code context} label travels with the type; nested slices receive an explicit +/// child label at the {@link #slice} site. Error messages thus name the on-disk structure +/// ({@code "trailer"}, {@code "postscript blob"}, {@code "encoded buffer 3"}) rather than +/// surfacing raw byte offsets. +/// +///

The raw segment is exposed only via {@link #unwrapForSubParser(String)}, which both +/// documents the trust transfer and forces a {@code reason} string so every escape-hatch +/// site is greppable for audit. +/// +/// @param seg the backing memory-mapped region; lifetime tied to the {@link +/// java.lang.foreign.Arena Arena} that produced it +/// @param context human-readable label naming the on-disk structure this region represents +public record BoundedSegment(MemorySegment seg, String context) { + + private static final ValueLayout.OfByte BYTE = ValueLayout.JAVA_BYTE; + private static final ValueLayout.OfInt LE_INT = + ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + private static final ValueLayout.OfLong LE_LONG = + ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + + /// @return total size of the bounded region in bytes + public long byteSize() { + return seg.byteSize(); + } + + /// Returns a sub-region with a fresh context label. + /// + /// @param off start offset in bytes, relative to this region + /// @param len slice length in bytes + /// @param childContext label for the resulting sub-region + /// @return the bounded sub-region + /// @throws VortexException if {@code off} or {@code len} is negative, or if + /// {@code off + len > this.byteSize()} + public BoundedSegment slice(long off, long len, String childContext) { + checkRange(off, len); + return new BoundedSegment(seg.asSlice(off, len), childContext); + } + + /// Bounds-checked single-byte read. + /// + /// @param off byte offset + /// @return the byte at {@code off} + /// @throws VortexException if {@code off} is negative or {@code >= this.byteSize()} + public byte getByte(long off) { + checkRange(off, 1); + return seg.get(BYTE, off); + } + + /// Bounds-checked little-endian 32-bit read. + /// + /// @param off byte offset of the 4-byte word + /// @return the int at {@code off} + /// @throws VortexException if {@code off} is negative or {@code > this.byteSize() - 4} + public int getIntLE(long off) { + checkRange(off, 4); + return seg.get(LE_INT, off); + } + + /// Bounds-checked little-endian 64-bit read. + /// + /// @param off byte offset of the 8-byte word + /// @return the long at {@code off} + /// @throws VortexException if {@code off} is negative or {@code > this.byteSize() - 8} + public long getLongLE(long off) { + checkRange(off, 8); + return seg.get(LE_LONG, off); + } + + private void checkRange(long off, long len) { + long segSize = seg.byteSize(); + if (off < 0) { + throw new VortexException("malformed " + context + ": negative offset " + off); + } + if (len < 0) { + throw new VortexException("malformed " + context + ": negative length " + len); + } + // Overflow-safe form of `off + len > segSize`. The subtraction can't underflow because + // len has already been bounded against segSize on the line above (segSize >= 0 always). + if (len > segSize || off > segSize - len) { + throw new VortexException("malformed " + context + ": offset+length " + + off + "+" + len + " exceeds segment size " + segSize); + } + } + + /// Little-endian {@link ByteBuffer} view of the whole bounded region, used by the + /// FlatBuffer runtime (which performs its own offset validation against the buffer's + /// capacity). + /// + /// @return a {@link ByteBuffer} view in little-endian order + public ByteBuffer asByteBufferLE() { + return seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + } + + /// Escape hatch returning the raw {@link MemorySegment} for a downstream parser that + /// takes its own bounds-checked cursor (currently {@link + /// io.github.dfa1.vortex.proto.ProtoReader}). The {@code reason} string names the + /// sub-parser for diagnostic attribution at the call site. + /// + ///

Audit point. Every call to this method is a trust transfer + /// across the bounds-checking boundary. New call sites must justify in review why + /// the receiver re-validates the bounds itself. + /// + /// @param reason short label naming the sub-parser ({@code "proto reader"}, + /// {@code "flatbuffer root"}) + /// @return the raw memory segment + public MemorySegment unwrapForSubParser(String reason) { + return seg; + } +} diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/BitpackedEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/BitpackedEncoding.java index 23af45b3..4f933e96 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/BitpackedEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/BitpackedEncoding.java @@ -270,7 +270,7 @@ static Array decode(DecodeContext ctx) { int typeBits = ptype.byteSize() * 8; long rowCount = ctx.rowCount(); - MemorySegment packed = ctx.buffer(0); + MemorySegment packed = ctx.buffer(0).unwrapForSubParser("bitpacked encoding"); MemorySegment output = ctx.arena().allocate(rowCount * ptype.byteSize()); fastlanesUnpackToSeg(packed, bitWidth, offset, typeBits, rowCount, output); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/BoolEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/BoolEncoding.java index 19292a07..caf8dd33 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/BoolEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/BoolEncoding.java @@ -68,6 +68,6 @@ public EncodeResult encode(DType dtype, Object data, EncodeContext ctx) { @Override public Array decode(DecodeContext ctx) { - return new BoolArray(ctx.dtype(), ctx.rowCount(), ctx.buffer(0)); + return new BoolArray(ctx.dtype(), ctx.rowCount(), ctx.buffer(0).unwrapForSubParser("bool encoding")); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/ByteBoolEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/ByteBoolEncoding.java index 6fa711a4..e98cdcfd 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/ByteBoolEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/ByteBoolEncoding.java @@ -44,7 +44,7 @@ public EncodeResult encode(DType dtype, Object data, EncodeContext ctx) { @Override public Array decode(DecodeContext ctx) { long n = ctx.rowCount(); - MemorySegment bytes = ctx.buffer(0); + MemorySegment bytes = ctx.buffer(0).unwrapForSubParser("bytebool encoding"); long packedBytes = (n + 7) >>> 3; MemorySegment packed = ctx.arena().allocate(packedBytes > 0 ? packedBytes : 1); for (long i = 0; i < n; i++) { diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/ConstantEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/ConstantEncoding.java index 77065b73..f6d3e34e 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/ConstantEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/ConstantEncoding.java @@ -133,7 +133,7 @@ private static ScalarValue buildScalar(PType ptype, long rawBits) { private static final class Decoder { private static Array decode(DecodeContext ctx) { - MemorySegment scalarBuf = ctx.buffer(0); + MemorySegment scalarBuf = ctx.buffer(0).unwrapForSubParser("constant encoding"); ScalarValue scalar; try { scalar = ScalarValue.decode(scalarBuf, 0, scalarBuf.byteSize()); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/DecimalEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/DecimalEncoding.java index 078b7721..4e08f99a 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/DecimalEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/DecimalEncoding.java @@ -109,7 +109,7 @@ private static Array decode(DecodeContext ctx) { } int valuesType = decoded.values_type(); int byteWidth = decimalTypeByteWidth(valuesType); - MemorySegment buffer = ctx.buffer(0); + MemorySegment buffer = ctx.buffer(0).unwrapForSubParser("decimal encoding"); long expected = ctx.rowCount() * byteWidth; if (buffer.byteSize() < expected) { throw new VortexException(EncodingId.VORTEX_DECIMAL, diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/DecodeContext.java b/core/src/main/java/io/github/dfa1/vortex/encoding/DecodeContext.java index 78540e5b..e90cc950 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/DecodeContext.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/DecodeContext.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.encoding; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.array.Array; @@ -24,10 +25,32 @@ public record DecodeContext( ArrayNode node, DType dtype, long rowCount, - MemorySegment[] segmentBuffers, + BoundedSegment[] segmentBuffers, Registry registry, SegmentAllocator arena ) { + /// Convenience factory that wraps raw {@link MemorySegment} buffers as {@link BoundedSegment}s + /// for tests and other callers that produce synthetic, trusted buffer arrays. Production + /// decoders receive their buffers from {@link FlatSegmentDecoder}, which already wraps them + /// against the parent flat segment. + /// + /// @param node array node describing this encoding's tree structure + /// @param dtype logical type expected for the decoded array + /// @param rowCount number of logical rows to decode + /// @param rawBufs raw segment buffers; each wrapped as {@code "test buffer i"} + /// @param registry encoding registry used for recursive child decoding + /// @param arena allocator for decode output + /// @return a {@link DecodeContext} backed by bounded views of {@code rawBufs} + public static DecodeContext ofRawBuffers( + ArrayNode node, DType dtype, long rowCount, + MemorySegment[] rawBufs, Registry registry, SegmentAllocator arena) { + BoundedSegment[] wrapped = new BoundedSegment[rawBufs.length]; + for (int i = 0; i < rawBufs.length; i++) { + wrapped[i] = new BoundedSegment(rawBufs[i], "test buffer " + i); + } + return new DecodeContext(node, dtype, rowCount, wrapped, registry, arena); + } + /// Recursively decode child {@code i} using this context's dtype and row count. /// /// @param i zero-based child index within this node's children array @@ -78,8 +101,8 @@ public MemorySegment decodeChildSegment(int i, DType dtype, long rowCount) { /// Return the buffer at position `i` in this node's bufferIndices. /// /// @param i zero-based index into this node's {@code bufferIndices} array - /// @return the {@link MemorySegment} for the referenced segment buffer - public MemorySegment buffer(int i) { + /// @return the {@link BoundedSegment} for the referenced segment buffer + public BoundedSegment buffer(int i) { return segmentBuffers[node.bufferIndices()[i]]; } diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/DictEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/DictEncoding.java index 8244cc3a..17b4f370 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/DictEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/DictEncoding.java @@ -378,7 +378,7 @@ private static Array decodeLegacyJava(DecodeContext ctx, byte codeTypeByte) { long rowCount = ctx.rowCount(); // Values: always VORTEX_PRIMITIVE leaf, read direct - MemorySegment valuesBuf = ctx.segmentBuffers()[ctx.node().children()[0].bufferIndices()[0]]; + MemorySegment valuesBuf = ctx.segmentBuffers()[ctx.node().children()[0].bufferIndices()[0]].unwrapForSubParser("dict encoding values"); // Codes: decode through registry — supports both raw (VORTEX_PRIMITIVE) and cascade (FASTLANES_BITPACKED) children DType codesDtype = new DType.Primitive(codePType, false); @@ -435,9 +435,9 @@ private static Array decodeUtf8DictLegacy(DecodeContext ctx, ByteBuffer meta) { PType codePType = PType.fromOrdinal(Byte.toUnsignedInt(meta.get(0))); long n = ctx.rowCount(); - MemorySegment dictBytes = ctx.buffer(0); - MemorySegment dictOffsets = ctx.buffer(1); - MemorySegment codes = ctx.buffer(2); + MemorySegment dictBytes = ctx.buffer(0).unwrapForSubParser("dict encoding"); + MemorySegment dictOffsets = ctx.buffer(1).unwrapForSubParser("dict encoding"); + MemorySegment codes = ctx.buffer(2).unwrapForSubParser("dict encoding"); return VarBinArray.ofDict(ctx.dtype(), n, dictBytes, dictOffsets, PType.I64, diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java b/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java index a5ffff87..4579ad0e 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/FlatSegmentDecoder.java @@ -1,7 +1,8 @@ package io.github.dfa1.vortex.encoding; -import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.ArrayStats; +import io.github.dfa1.vortex.core.BoundedSegment; +import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.array.Array; import io.github.dfa1.vortex.fbs.Buffer; @@ -50,12 +51,13 @@ public Array decode(MemorySegment seg, List encodingSpecs, var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); int numBuffers = fbArray.buffersLength(); - MemorySegment[] bufs = new MemorySegment[numBuffers]; + BoundedSegment[] bufs = new BoundedSegment[numBuffers]; + BoundedSegment region = new BoundedSegment(seg, "flat segment"); long dataOffset = 0; for (int i = 0; i < numBuffers; i++) { Buffer bufDesc = fbArray.buffers(i); dataOffset += bufDesc.padding(); - bufs[i] = seg.asSlice(dataOffset, bufDesc.length()); + bufs[i] = region.slice(dataOffset, bufDesc.length(), "encoded buffer " + i); dataOffset += bufDesc.length(); } diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/FsstEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/FsstEncoding.java index 5f4a22bb..015906ac 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/FsstEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/FsstEncoding.java @@ -206,9 +206,9 @@ private static Array decode(DecodeContext ctx) { long n = ctx.rowCount(); - MemorySegment symbolsBuf = ctx.buffer(0); // 8 bytes per symbol (LE u64) - MemorySegment symbolLensBuf = ctx.buffer(1); // 1 byte per symbol - MemorySegment compressedBytes = ctx.buffer(2); // FSST-compressed heap + MemorySegment symbolsBuf = ctx.buffer(0).unwrapForSubParser("fsst encoding"); // 8 bytes per symbol (LE u64) + MemorySegment symbolLensBuf = ctx.buffer(1).unwrapForSubParser("fsst encoding"); // 1 byte per symbol + MemorySegment compressedBytes = ctx.buffer(2).unwrapForSubParser("fsst encoding"); // FSST-compressed heap MemorySegment uncompLensSeg = ctx.decodeChildSegment(0, new DType.Primitive(uncompLenPType, false), n); MemorySegment codesOffsetsSeg = ctx.decodeChildSegment(1, new DType.Primitive(codesOffPType, false), n + 1); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/PcoEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/PcoEncoding.java index e9275e77..615d5eb0 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/PcoEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/PcoEncoding.java @@ -141,7 +141,7 @@ static Array decode(DecodeContext ctx) { for (int c = 0; c < nChunks; c++) { PcoChunkInfo chunkInfo = meta.chunks().get(c); - MemorySegment chunkMetaBuf = ctx.buffer(bufIdx++); + MemorySegment chunkMetaBuf = ctx.buffer(bufIdx++).unwrapForSubParser("pco encoding"); PcoChunkMeta chunkMeta = readChunkMeta(chunkMetaBuf, dtypeSize); int mode = chunkMeta.mode(); @@ -160,7 +160,7 @@ static Array decode(DecodeContext ctx) { chunkMeta.ansSizeLog(), chunkMeta.bins()); for (int p = 0; p < chunkInfo.pages().size(); p++) { int pageN = chunkInfo.pages().get(p).n_values(); - MemorySegment pageBuf = ctx.buffer(bufIdx++); + MemorySegment pageBuf = ctx.buffer(bufIdx++).unwrapForSubParser("pco encoding"); rawByteOffset = decodeConv1Page( primaryTans, chunkMeta.ansSizeLog(), chunkMeta.conv1Weights().length, @@ -186,7 +186,7 @@ static Array decode(DecodeContext ctx) { long mask = typeMask(dtypeSize); for (int p = 0; p < chunkInfo.pages().size(); p++) { int pageN = chunkInfo.pages().get(p).n_values(); - MemorySegment pageBuf = ctx.buffer(bufIdx++); + MemorySegment pageBuf = ctx.buffer(bufIdx++).unwrapForSubParser("pco encoding"); rawByteOffset = decodeLookbackPage( deltaTans, chunkMeta.deltaAnsSizeLog(), primaryTans, chunkMeta.ansSizeLog(), @@ -202,7 +202,7 @@ static Array decode(DecodeContext ctx) { PcoTansDecoder tans = PcoTansDecoder.build(chunkMeta.ansSizeLog(), chunkMeta.bins()); for (int p = 0; p < chunkInfo.pages().size(); p++) { int pageN = chunkInfo.pages().get(p).n_values(); - MemorySegment pageBuf = ctx.buffer(bufIdx++); + MemorySegment pageBuf = ctx.buffer(bufIdx++).unwrapForSubParser("pco encoding"); rawByteOffset = decodeClassicPage(tans, chunkMeta.ansSizeLog(), chunkMeta.deltaOrder(), primaryDtypeSize, pageBuf, pageN, rawLatents, rawByteOffset, @@ -225,7 +225,7 @@ static Array decode(DecodeContext ctx) { long adjByteOffset = 0L; for (int p = 0; p < chunkInfo.pages().size(); p++) { int pageN = chunkInfo.pages().get(p).n_values(); - MemorySegment pageBuf = ctx.buffer(bufIdx++); + MemorySegment pageBuf = ctx.buffer(bufIdx++).unwrapForSubParser("pco encoding"); decodeIntMultPage(primaryTans, primaryAnsSizeLog, deltaOrder, secondaryTans, secondaryAnsSizeLog, secondaryDeltaOrder, dtypeSize, pageBuf, pageN, diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveEncoding.java index 0794b639..1ecf5331 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveEncoding.java @@ -312,7 +312,7 @@ private static byte[] scalarF64(double v) { private static final class Decoder { private static Array decode(DecodeContext ctx) { - MemorySegment buf = ctx.buffer(0); + MemorySegment buf = ctx.buffer(0).unwrapForSubParser("primitive encoding"); long n = ctx.rowCount(); DType dt = ctx.dtype(); PType ptype = ((DType.Primitive) dt).ptype(); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/Registry.java b/core/src/main/java/io/github/dfa1/vortex/encoding/Registry.java index 2af7cdfb..9c0fb3ad 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/Registry.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/Registry.java @@ -77,7 +77,7 @@ private static UnknownArray decodeUnknown(DecodeContext ctx, ArrayNode node) { }; MemorySegment[] bufs = new MemorySegment[node.bufferIndices().length]; for (int i = 0; i < bufs.length; i++) { - bufs[i] = ctx.buffer(i); + bufs[i] = ctx.buffer(i).unwrapForSubParser("registry"); } Array[] children = new Array[node.children().length]; for (int i = 0; i < children.length; i++) { diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/SparseEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/SparseEncoding.java index 22e76b6d..d05c923e 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/SparseEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/SparseEncoding.java @@ -210,7 +210,7 @@ private static Array decode(DecodeContext ctx) { } PType valuePtype = ((DType.Primitive) ctx.dtype()).ptype(); - MemorySegment fillBuf = ctx.buffer(0); + MemorySegment fillBuf = ctx.buffer(0).unwrapForSubParser("sparse encoding"); ScalarValue fillScalar; try { fillScalar = ScalarValue.decode(fillBuf, 0, fillBuf.byteSize()); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinEncoding.java index 0029af35..7d6a9e84 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinEncoding.java @@ -140,7 +140,7 @@ private static Array decode(DecodeContext ctx) { offsets = materialized; } - MemorySegment bytes = ctx.buffer(0); + MemorySegment bytes = ctx.buffer(0).unwrapForSubParser("varbin encoding"); return new VarBinArray(ctx.dtype(), n, bytes, offsets, offsetsPtype); } diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinViewEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinViewEncoding.java index 8f42a783..93e40c37 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinViewEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/VarBinViewEncoding.java @@ -120,10 +120,10 @@ private static Array decode(DecodeContext ctx) { } // Views buffer is the last; data buffers are 0..numBufs-2 - MemorySegment viewsBuf = ctx.buffer(numBufs - 1); + MemorySegment viewsBuf = ctx.buffer(numBufs - 1).unwrapForSubParser("varbinview encoding"); MemorySegment[] dataBufs = new MemorySegment[numBufs - 1]; for (int i = 0; i < dataBufs.length; i++) { - dataBufs[i] = ctx.buffer(i); + dataBufs[i] = ctx.buffer(i).unwrapForSubParser("varbinview encoding"); } long n = ctx.rowCount(); diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/ZstdEncoding.java b/core/src/main/java/io/github/dfa1/vortex/encoding/ZstdEncoding.java index 19baa539..ff6b7fe7 100644 --- a/core/src/main/java/io/github/dfa1/vortex/encoding/ZstdEncoding.java +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/ZstdEncoding.java @@ -314,12 +314,12 @@ private static MemorySegment decompressFramesWithDict( long totalUncompressed ) { MemorySegment out = ctx.arena().allocate(totalUncompressed); - byte[] dictBytes = ctx.buffer(0).toArray(ValueLayout.JAVA_BYTE); + byte[] dictBytes = ctx.buffer(0).unwrapForSubParser("zstd encoding").toArray(ValueLayout.JAVA_BYTE); try (ZstdDecompressCtx zctx = new ZstdDecompressCtx()) { zctx.loadDict(dictBytes); long outOffset = 0; for (int i = 0; i < frameCount; i++) { - byte[] compressed = ctx.buffer(i + 1).toArray(ValueLayout.JAVA_BYTE); + byte[] compressed = ctx.buffer(i + 1).unwrapForSubParser("zstd encoding").toArray(ValueLayout.JAVA_BYTE); int uncompSize = (int) meta.frames().get(i).uncompressed_size(); byte[] temp = new byte[uncompSize]; int written = zctx.decompressByteArray(temp, 0, uncompSize, compressed, 0, compressed.length); @@ -348,7 +348,7 @@ private static MemorySegment decompressFrames( ZstdDecompressor decompressor = new ZstdJavaDecompressor(); long outOffset = 0; for (int i = 0; i < frameCount; i++) { - MemorySegment frameSeg = ctx.buffer(i); + MemorySegment frameSeg = ctx.buffer(i).unwrapForSubParser("zstd encoding"); byte[] compressed = frameSeg.toArray(ValueLayout.JAVA_BYTE); int uncompSize = (int) meta.frames().get(i).uncompressed_size(); byte[] temp = new byte[uncompSize]; diff --git a/core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java b/core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java new file mode 100644 index 00000000..c7aa3769 --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/core/BoundedSegmentTest.java @@ -0,0 +1,114 @@ +package io.github.dfa1.vortex.core; + +import org.junit.jupiter.api.Test; + +import java.lang.foreign.MemorySegment; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class BoundedSegmentTest { + + private final BoundedSegment sut = new BoundedSegment( + MemorySegment.ofArray(new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + "test region"); + + @Test + void inRangeSliceReturnsExpectedRegion() { + // Given the 16-byte test region. + + // When + BoundedSegment child = sut.slice(4, 8, "child"); + + // Then — the slice carries its own context label, used in nested error messages. + assertThat(child.byteSize()).isEqualTo(8); + assertThat(child.context()).isEqualTo("child"); + } + + @Test + void badSliceThrowsVortexExceptionLabelledByParent() { + // Given — adversarial slice on the bounded region. The parent's context label + // ("test region") surfaces in the error so the caller knows which structure + // was being parsed when the bad offset arrived. + + // When + Then + assertThatThrownBy(() -> sut.slice(20, 4, "child")) + .isInstanceOf(VortexException.class) + .hasMessageContaining("test region"); + } + + @Test + void primitiveReadsAreBoundsChecked() { + // Given — getIntLE at offset 12 needs 4 bytes (12..16), valid. + + // When + Then + assertThat(sut.getIntLE(12)).isNotZero(); + + // Out-of-range read throws VortexException, not IOOBE. + assertThatThrownBy(() -> sut.getIntLE(13)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("test region"); + } + + @Test + void zeroLengthAtEndIsAllowed() { + // Given — slice at the end with zero length. JDK permits this; the wrapper must too. + + // When + Then — does not throw; child region has byteSize 0. + assertThat(sut.slice(16, 0, "tail").byteSize()).isEqualTo(0); + } + + @Test + void negativeOffsetThrowsVortexException() { + // Given — adversarial offset from a malformed file. + + // When + Then + assertThatThrownBy(() -> sut.slice(-1, 4, "child")) + .isInstanceOf(VortexException.class) + .hasMessageContaining("negative offset"); + } + + @Test + void negativeLengthThrowsVortexException() { + // Given — adversarial length. + + // When + Then + assertThatThrownBy(() -> sut.slice(0, -1, "child")) + .isInstanceOf(VortexException.class) + .hasMessageContaining("negative length"); + } + + @Test + void lengthAloneBiggerThanSegmentThrows() { + // Given — len > segSize with off=0; the wrapper rejects before any subtraction. + + // When + Then + assertThatThrownBy(() -> sut.slice(0, 17, "child")) + .isInstanceOf(VortexException.class); + } + + @Test + void overflowingOffsetPlusLengthRejected() { + // Given — adversarial values designed to overflow a naive `off + len` computation: + // (off + len) wraps to a small positive number, which would pass a naive + // `off + len > segSize` check. The overflow-safe form catches it. + + // When + Then + assertThatThrownBy(() -> sut.slice(Long.MAX_VALUE - 1, 100, "child")) + .isInstanceOf(VortexException.class); + } + + @Test + void unwrapForSubParserReturnsRawSegment() { + // Given — explicit trust transfer documented by the reason string. The unwrapped + // segment is the same instance as the backing seg(); callers re-validate bounds + // in their own cursor (e.g. ProtoReader). + + // When + MemorySegment raw = sut.unwrapForSubParser("test sub-parser"); + + // Then + assertThat(raw).isSameAs(sut.seg()); + assertThat(raw.byteSize()).isEqualTo(16); + } +} diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/AlpEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/AlpEncodingTest.java index 5878e433..f80414ba 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/AlpEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/AlpEncodingTest.java @@ -84,7 +84,7 @@ private static DecodeContext buildAlpCtxF64( Registry registry = TestRegistry.of(new AlpEncoding(), new PrimitiveEncoding()); - return new DecodeContext(alpNode, DTypes.F64, encodedVals.length, segments, registry, java.lang.foreign.Arena.global()); + return DecodeContext.ofRawBuffers(alpNode, DTypes.F64, encodedVals.length, segments, registry, java.lang.foreign.Arena.global()); } private static DecodeContext buildAlpCtxF32( @@ -109,7 +109,7 @@ private static DecodeContext buildAlpCtxF32( Registry registry = TestRegistry.of(new AlpEncoding(), new PrimitiveEncoding()); - return new DecodeContext(alpNode, DTypes.F32, encodedVals.length, segments, registry, java.lang.foreign.Arena.global()); + return DecodeContext.ofRawBuffers(alpNode, DTypes.F32, encodedVals.length, segments, registry, java.lang.foreign.Arena.global()); } @Test diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/BitpackedEncodingPatchesTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/BitpackedEncodingPatchesTest.java index 0ab812af..ca40537e 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/BitpackedEncodingPatchesTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/BitpackedEncodingPatchesTest.java @@ -59,7 +59,7 @@ void decode_appliesPatches_overridingBitPackedValues() { Registry registry = TestRegistry.of(new BitpackedEncoding(), new PrimitiveEncoding()); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( bpNode, DTypes.I32, base.length, segments, registry, Arena.global()); // When diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ByteBoolEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ByteBoolEncodingTest.java index 7e496ef9..738fe465 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ByteBoolEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ByteBoolEncodingTest.java @@ -66,7 +66,7 @@ private static DecodeContext buildCtx(byte[] byteValues) { MemorySegment buf = MemorySegment.ofArray(byteValues); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_BYTEBOOL, null, new ArrayNode[0], new int[]{0}, null); Registry registry = Registry.builder().register(new ByteBoolEncoding()).build(); - return new DecodeContext(node, DTypes.BOOL, byteValues.length, new MemorySegment[]{buf}, registry, + return DecodeContext.ofRawBuffers(node, DTypes.BOOL, byteValues.length, new MemorySegment[]{buf}, registry, Arena.ofAuto()); } diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ChunkedEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ChunkedEncodingTest.java index 59030e0f..ee1baeed 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ChunkedEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ChunkedEncodingTest.java @@ -137,7 +137,7 @@ void roundTrip_twoChunks_concatenatesValues() { new ArrayNode[]{offsetsNode, chunk0Node, chunk1Node}, new int[]{}, null); - DecodeContext ctx = new DecodeContext(root, i64, 5L, allBufs, registry, Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(root, i64, 5L, allBufs, registry, Arena.ofAuto()); // When Array result = sut.decode(ctx); @@ -176,7 +176,7 @@ void singleChunk_returnsSameValues() { new ArrayNode[]{toArrayNode(offsetsResult.rootNode()), toArrayNode(remapped(chunkResult.rootNode(), 1))}, new int[]{}, null); - DecodeContext ctx = new DecodeContext(root, i64, 3L, allBufs, registry, Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(root, i64, 3L, allBufs, registry, Arena.ofAuto()); // When Array result = new ChunkedEncoding().decode(ctx); @@ -196,7 +196,7 @@ void noChildren_throws() { .register(new ChunkedEncoding()) .build(); ArrayNode root = ArrayNode.of(EncodingId.VORTEX_CHUNKED, null, new ArrayNode[]{}, new int[]{}, null); - DecodeContext ctx = new DecodeContext(root, i64, 0L, new MemorySegment[]{}, registry, Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(root, i64, 0L, new MemorySegment[]{}, registry, Arena.ofAuto()); // When / Then assertThatThrownBy(() -> new ChunkedEncoding().decode(ctx)) diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/DateTimePartsEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/DateTimePartsEncodingTest.java index 68e8851b..a4cbc6e2 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/DateTimePartsEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/DateTimePartsEncodingTest.java @@ -116,7 +116,7 @@ void roundTrip_milliseconds_preservesDaysSecondsSubseconds() { // When EncodeResult result = sut.encode(EXT_TIMESTAMP_MS, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), EXT_TIMESTAMP_MS, 1, bufs, registry(), Arena.global()); GenericArray decoded = (GenericArray) sut.decode(ctx); @@ -143,7 +143,7 @@ void roundTrip_nanoseconds_preservesSubsecondPrecision() { // When EncodeResult result = sut.encode(EXT_TIMESTAMP_NS, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), EXT_TIMESTAMP_NS, 1, bufs, registry(), Arena.global()); GenericArray decoded = (GenericArray) sut.decode(ctx); @@ -166,7 +166,7 @@ void roundTrip_epoch_allZero() { // When EncodeResult result = sut.encode(EXT_TIMESTAMP_MS, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), EXT_TIMESTAMP_MS, 1, bufs, registry(), Arena.global()); GenericArray decoded = (GenericArray) sut.decode(ctx); @@ -190,7 +190,7 @@ void roundTrip_multipleTimestamps_allRowsPreserved() { // When EncodeResult result = sut.encode(EXT_TIMESTAMP_MS, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), EXT_TIMESTAMP_MS, 4, bufs, registry(), Arena.global()); GenericArray decoded = (GenericArray) sut.decode(ctx); @@ -218,7 +218,7 @@ void roundTrip_allUnits_epochIsZero(TimeUnit unit) { // When EncodeResult result = sut.encode(dtype, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), dtype, 1, bufs, registry(), Arena.global()); GenericArray decoded = (GenericArray) sut.decode(ctx); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/EncodeTestHelper.java b/core/src/test/java/io/github/dfa1/vortex/encoding/EncodeTestHelper.java index f7fec03b..fdd6fdf3 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/EncodeTestHelper.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/EncodeTestHelper.java @@ -25,7 +25,7 @@ static DecodeContext toDecodeContext( List buffers = result.buffers(); MemorySegment[] segments = buffers.toArray(new MemorySegment[0]); ArrayNode root = toArrayNode(result.rootNode()); - return new DecodeContext(root, dtype, rowCount, segments, registry, Arena.ofAuto()); + return DecodeContext.ofRawBuffers(root, dtype, rowCount, segments, registry, Arena.ofAuto()); } private static ArrayNode toArrayNode(EncodeNode enc) { diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ExtEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ExtEncodingTest.java index 5db5bed7..c6ce34df 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ExtEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ExtEncodingTest.java @@ -55,7 +55,7 @@ void encode_extensionWrappingI64_roundTrips() { // Decode back Registry registry = TestRegistry.of(new PrimitiveEncoding(), new ExtEncoding()); ArrayNode rootNode = encodeNodeToArrayNode(result.rootNode()); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( rootNode, extDType, data.length, result.buffers().toArray(MemorySegment[]::new), registry, Arena.ofAuto()); @@ -136,7 +136,7 @@ void decode_extensionWrappingI64_returnsStorageArray() { Registry registry = TestRegistry.of(new PrimitiveEncoding(), new ExtEncoding()); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( extNode, extDType, values.length, new MemorySegment[]{buf}, registry, Arena.ofAuto()); var sut = new ExtEncoding(); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/FixedSizeListEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/FixedSizeListEncodingTest.java index 4745c9a2..45127b7c 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/FixedSizeListEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/FixedSizeListEncodingTest.java @@ -85,7 +85,7 @@ void roundTrip_i32Elements_preservesValues() { // When EncodeResult result = sut.encode(dtype, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), dtype, 2, bufs, registry(), Arena.global()); FixedSizeListArray decoded = (FixedSizeListArray) sut.decode(ctx); @@ -110,7 +110,7 @@ void roundTrip_fixedSizeOne_preservesValues() { // When EncodeResult result = sut.encode(dtype, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), dtype, 3, bufs, registry(), Arena.global()); FixedSizeListArray decoded = (FixedSizeListArray) sut.decode(ctx); @@ -129,7 +129,7 @@ void decode_wrongDtype_throws() { FixedSizeListEncoding sut = new FixedSizeListEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_FIXED_SIZE_LIST, null, new ArrayNode[0], new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0, new MemorySegment[0], registry(), Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0, new MemorySegment[0], registry(), Arena.global()); // When / Then assertThatThrownBy(() -> sut.decode(ctx)) diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/FrameOfReferenceEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/FrameOfReferenceEncodingTest.java index 6ea12e5e..211ee224 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/FrameOfReferenceEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/FrameOfReferenceEncodingTest.java @@ -62,7 +62,7 @@ private static DecodeContext buildForContext( Registry registry = TestRegistry.of(new FrameOfReferenceEncoding(), new PrimitiveEncoding()); - return new DecodeContext(forNode, dtype, residuals.length, segments, registry, java.lang.foreign.Arena.global()); + return DecodeContext.ofRawBuffers(forNode, dtype, residuals.length, segments, registry, java.lang.foreign.Arena.global()); } @Test @@ -170,7 +170,7 @@ void decode_nullableResiduals_returnsMaskedArrayWithCorrectValues() { Registry registry = TestRegistry.of(new FrameOfReferenceEncoding(), new PrimitiveEncoding(), new BoolEncoding()); MemorySegment[] segments = {MemorySegment.ofArray(residualBytes), validitySeg}; - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( forNode, DTypes.I32, residuals.length, segments, registry, java.lang.foreign.Arena.global()); FrameOfReferenceEncoding sut = new FrameOfReferenceEncoding(); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/FsstEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/FsstEncodingTest.java index cdd8eb20..ca1c103d 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/FsstEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/FsstEncodingTest.java @@ -96,7 +96,7 @@ void encode_thenDecode_roundtripsAllStrings(String name, String[] values) { .register(new PrimitiveEncoding()) .register(sut) .build(); - DecodeContext ctx = new DecodeContext(node, DTypes.UTF8, values.length, bufs, registry, arena); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.UTF8, values.length, bufs, registry, arena); var decoded = (VarBinArray) sut.decode(ctx); // Then @@ -158,7 +158,7 @@ private static DecodeContext buildCtx( EncodingId.VORTEX_FSST, ByteBuffer.wrap(metaBytes), new ArrayNode[]{uncompLensNode, codesOffNode}, new int[]{0, 1, 2}, null); - return new DecodeContext(root, DTypes.UTF8, n, segs, buildRegistry(), arena); + return DecodeContext.ofRawBuffers(root, DTypes.UTF8, n, segs, buildRegistry(), arena); } private static Registry buildRegistry() { @@ -265,7 +265,7 @@ void decode_missingMetadata_throwsVortexException() { // Given var sut = new FsstEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_FSST, null, new ArrayNode[0], new int[0], null); - DecodeContext ctx = new DecodeContext(node, DTypes.UTF8, 0, new MemorySegment[0], + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.UTF8, 0, new MemorySegment[0], buildRegistry(), Arena.ofAuto()); // When / Then diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ListEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ListEncodingTest.java index 975b556b..df0937b0 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ListEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ListEncodingTest.java @@ -85,7 +85,7 @@ void roundTrip_i32Elements_preservesValues() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 3, bufs, registry(), Arena.global()); ListArray decoded = (ListArray) sut.decode(ctx); @@ -114,7 +114,7 @@ void roundTrip_emptyLists_preservesOffsets() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 2, bufs, registry(), Arena.global()); ListArray decoded = (ListArray) sut.decode(ctx); @@ -135,7 +135,7 @@ void roundTrip_singleList_preservesValues() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 1, bufs, registry(), Arena.global()); ListArray decoded = (ListArray) sut.decode(ctx); @@ -154,7 +154,7 @@ void decode_wrongDtype_throws() { ListEncoding sut = new ListEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_LIST, null, new ArrayNode[0], new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0, new MemorySegment[0], registry(), Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0, new MemorySegment[0], registry(), Arena.global()); // When / Then assertThatThrownBy(() -> sut.decode(ctx)) @@ -170,7 +170,7 @@ void decode_wrongChildCount_throws() { ArrayNode node = ArrayNode.of(EncodingId.VORTEX_LIST, java.nio.ByteBuffer.wrap(new byte[0]), new ArrayNode[]{child}, new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.LIST_I32, 0, new MemorySegment[0], registry(), Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.LIST_I32, 0, new MemorySegment[0], registry(), Arena.global()); // When / Then assertThatThrownBy(() -> sut.decode(ctx)) diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ListViewEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ListViewEncodingTest.java index 539ccfa5..da410fd9 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ListViewEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ListViewEncodingTest.java @@ -83,7 +83,7 @@ void roundTrip_i32Elements_preservesValues() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 3, bufs, registry(), Arena.global()); ListViewArray decoded = (ListViewArray) sut.decode(ctx); @@ -112,7 +112,7 @@ void roundTrip_emptyLists_preservesZeroSizes() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 2, bufs, registry(), Arena.global()); ListViewArray decoded = (ListViewArray) sut.decode(ctx); @@ -135,7 +135,7 @@ void roundTrip_singleList_preservesValues() { // When EncodeResult result = sut.encode(DTypes.LIST_I32, data, EncodeTestHelper.testCtx()); MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), DTypes.LIST_I32, 1, bufs, registry(), Arena.global()); ListViewArray decoded = (ListViewArray) sut.decode(ctx); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/NullEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/NullEncodingTest.java index 8646cb9a..4173fd59 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/NullEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/NullEncodingTest.java @@ -37,7 +37,7 @@ void encode_thenDecode_roundTrips() { // When EncodeResult encoded = sut.encode(DTypes.NULL, null, EncodeTestHelper.testCtx()); ArrayNode node = ArrayNode.of(encoded.rootNode().encodingId(), null, new ArrayNode[0], new int[0], null); - DecodeContext ctx = new DecodeContext(node, DTypes.NULL, rowCount, new MemorySegment[0], + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.NULL, rowCount, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); // Then @@ -53,7 +53,7 @@ class Decode { private static DecodeContext buildNullCtx(long rowCount) { ArrayNode node = ArrayNode.of(EncodingId.VORTEX_NULL, null, new ArrayNode[0], new int[0], null); Registry registry = Registry.builder().register(new NullEncoding()).build(); - return new DecodeContext(node, DTypes.NULL, rowCount, new MemorySegment[0], registry, Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, DTypes.NULL, rowCount, new MemorySegment[0], registry, Arena.ofAuto()); } @Test diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/PatchedEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/PatchedEncodingTest.java index 66501d60..5176d421 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/PatchedEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/PatchedEncodingTest.java @@ -89,7 +89,7 @@ private static Array decode( new ArrayNode[]{innerNode, laneNode, idxNode, valNode}, new int[]{}, null); Registry registry = TestRegistry.of(new PatchedEncoding(), new PrimitiveEncoding()); - DecodeContext ctx = new DecodeContext(patchedNode, dtype, n, segments, registry, Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(patchedNode, dtype, n, segments, registry, Arena.ofAuto()); return new PatchedEncoding().decode(ctx); } @@ -203,7 +203,7 @@ void decode_missingMetadata_throws() { ArrayNode patchedNode = ArrayNode.of(EncodingId.VORTEX_PATCHED, null, new ArrayNode[]{innerNode, innerNode, innerNode, innerNode}, new int[]{}, null); MemorySegment seg = i32Segment(1, 2, 3); - DecodeContext ctx = new DecodeContext(patchedNode, new DType.Primitive(PType.I32, false), 3, + DecodeContext ctx = DecodeContext.ofRawBuffers(patchedNode, new DType.Primitive(PType.I32, false), 3, new MemorySegment[]{seg}, Registry.empty(), Arena.ofAuto()); // When / Then diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/PatchesBroadcastRegressionTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/PatchesBroadcastRegressionTest.java index 2882d576..a9d7b256 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/PatchesBroadcastRegressionTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/PatchesBroadcastRegressionTest.java @@ -73,7 +73,7 @@ void bitpackedDecode_withConstantPatchesValues_broadcastsValueAcrossPatches() { DType dtype = new DType.Primitive(PType.I64, false); Registry registry = Registry.loadAll(); - DecodeContext ctx = new DecodeContext(root, dtype, n, + DecodeContext ctx = DecodeContext.ofRawBuffers(root, dtype, n, new MemorySegment[]{packedSeg, idxBufSeg, valBufSeg}, registry, Arena.ofAuto()); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/PcoEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/PcoEncodingTest.java index dc462dcc..319dc2da 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/PcoEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/PcoEncodingTest.java @@ -35,7 +35,7 @@ private static ByteBuffer validMetaBuffer() { private static DecodeContext ctxWith(ByteBuffer meta, DType dtype, long rowCount, MemorySegment[] buffers) { ArrayNode node = ArrayNode.of(EncodingId.VORTEX_PCO, meta, new ArrayNode[0], bufferIndices(buffers.length), null); - return new DecodeContext(node, dtype, rowCount, buffers, Registry.empty(), Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, dtype, rowCount, buffers, Registry.empty(), Arena.ofAuto()); } /// Build a nullable DecodeContext: validity buffer at index 0, pco buffers at indices 1..N. @@ -57,7 +57,7 @@ private static DecodeContext ctxWithValidity(ByteBuffer meta, DType dtype, long pcoBufferIndices, null); Registry registry = TestRegistry.of(new BoolEncoding()); - return new DecodeContext(pcoNode, dtype, rowCount, allBuffers, registry, Arena.ofAuto()); + return DecodeContext.ofRawBuffers(pcoNode, dtype, rowCount, allBuffers, registry, Arena.ofAuto()); } private static int[] bufferIndices(int n) { diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveEncodingTest.java index 8cb6ea37..7ae0b4b5 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveEncodingTest.java @@ -151,7 +151,7 @@ void decode_withValidityChild_returnsMaskedArray() { Registry registry = TestRegistry.of(new PrimitiveEncoding(), new BoolEncoding()); DType dtype = new DType.Primitive(PType.I32, false); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( primNode, dtype, raw.length, new MemorySegment[]{valuesSeg, validitySeg}, registry, Arena.global()); @@ -186,7 +186,7 @@ void decode_noValidityChild_returnsPlainArray() { Registry registry = TestRegistry.of(new PrimitiveEncoding()); DType dtype = new DType.Primitive(PType.I32, false); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( primNode, dtype, raw.length, new MemorySegment[]{valuesSeg}, registry, Arena.global()); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/RegistryTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/RegistryTest.java index 3a1f14ae..1b15599b 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/RegistryTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/RegistryTest.java @@ -102,7 +102,7 @@ void decodeUnknownEncodingThrowsByDefault() { Registry sut = Registry.empty(); ArrayNode node = new UnknownArrayNode("some.unknown", ByteBuffer.allocate(0), new ArrayNode[0], new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0L, + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0L, new MemorySegment[0], sut, Arena.ofAuto()); // When / Then @@ -117,7 +117,7 @@ void decodeKnownEncodingWithoutDecoderThrowsByDefault() { Registry sut = Registry.empty(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, ByteBuffer.allocate(0), new ArrayNode[0], new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0L, + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0L, new MemorySegment[0], sut, Arena.ofAuto()); // When / Then @@ -132,7 +132,7 @@ void decodeKnownEncodingWithoutDecoderReturnsUnknownArrayWhenAllowed() { Registry sut = Registry.builder().allowUnknown().build(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, ByteBuffer.allocate(0), new ArrayNode[0], new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0L, + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0L, new MemorySegment[0], sut, Arena.ofAuto()); // When @@ -152,7 +152,7 @@ void decodeUnknownEncodingReturnsUnknownArrayWhenAllowed() { buf.set(java.lang.foreign.ValueLayout.JAVA_INT, 0, 42); ArrayNode node = new UnknownArrayNode("some.unknown", metadata, new ArrayNode[0], new int[]{0}, ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 5L, + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 5L, new MemorySegment[]{buf}, sut, Arena.ofAuto()); // When @@ -180,7 +180,7 @@ void decodeUnknownEncodingWrapsChildrenAsUnknown() { ByteBuffer.allocate(0), new ArrayNode[0], new int[0], ArrayStats.empty()); ArrayNode parent = new UnknownArrayNode("some.unknown", ByteBuffer.allocate(0), new ArrayNode[]{child}, new int[0], ArrayStats.empty()); - DecodeContext ctx = new DecodeContext(parent, DTypes.I32, 0L, + DecodeContext ctx = DecodeContext.ofRawBuffers(parent, DTypes.I32, 0L, new MemorySegment[0], sut, Arena.ofAuto()); // When diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/RleEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/RleEncodingTest.java index f7adef03..cad0ea11 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/RleEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/RleEncodingTest.java @@ -305,7 +305,7 @@ void decode_nullableIndices_returnsMaskedArrayWithCorrectValidity() { .register(new PrimitiveEncoding()) .register(new BoolEncoding()) .build(); - DecodeContext ctx = new DecodeContext(root, dtype, data.length, segments, reg, Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(root, dtype, data.length, segments, reg, Arena.ofAuto()); // When Array result = sut.decode(ctx); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/RunEndEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/RunEndEncodingTest.java index 1fb239ee..6383b46f 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/RunEndEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/RunEndEncodingTest.java @@ -49,7 +49,7 @@ private static DecodeContext buildCtx( Registry registry = TestRegistry.of(new RunEndEncoding(), new PrimitiveEncoding()); - return new DecodeContext(reNode, dtype, rowCount, segments, registry, java.lang.foreign.Arena.global()); + return DecodeContext.ofRawBuffers(reNode, dtype, rowCount, segments, registry, java.lang.foreign.Arena.global()); } private static byte[] toLEBytes(long[] values, PType ptype) { diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/SequenceEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/SequenceEncodingTest.java index 7e5981b4..24b3b537 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/SequenceEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/SequenceEncodingTest.java @@ -32,7 +32,7 @@ class Encode { private static DecodeContext encodeResultToCtx(EncodeResult result, DType dtype, long n) { ByteBuffer meta = result.rootNode().metadata(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_SEQUENCE, meta, new ArrayNode[0], new int[0], null); - return new DecodeContext(node, dtype, n, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, dtype, n, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); } @Test @@ -140,7 +140,7 @@ private static DecodeContext makeCtx(byte[] meta, DType dtype, long n) { EncodingId.VORTEX_SEQUENCE, ByteBuffer.wrap(meta), new ArrayNode[0], new int[0], null); - return new DecodeContext(node, dtype, n, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, dtype, n, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); } private static byte[] intMeta(long base, long mul) { @@ -250,7 +250,7 @@ void decode_missingMetadata_throwsVortexException() { // Given var sut = new SequenceEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_SEQUENCE, null, new ArrayNode[0], new int[0], null); - DecodeContext ctx = new DecodeContext(node, DTypes.I64, 3, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I64, 3, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); // When / Then assertThatThrownBy(() -> sut.decode(ctx)) diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/SparseEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/SparseEncodingTest.java index dee032b9..fb571dc3 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/SparseEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/SparseEncodingTest.java @@ -47,7 +47,7 @@ private static Array decodeResult(EncodeResult encoded, DType dtype, int n) { Registry registry = TestRegistry.of(new SparseEncoding(), new PrimitiveEncoding()); - DecodeContext ctx = new DecodeContext(sparseNode, dtype, n, segments, registry, Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(sparseNode, dtype, n, segments, registry, Arena.global()); return new SparseEncoding().decode(ctx); } @@ -200,7 +200,7 @@ private static DecodeContext buildCtx( Registry registry = TestRegistry.of(new SparseEncoding(), new PrimitiveEncoding()); - return new DecodeContext(sparseNode, dtype, rowCount, segments, registry, java.lang.foreign.Arena.global()); + return DecodeContext.ofRawBuffers(sparseNode, dtype, rowCount, segments, registry, java.lang.foreign.Arena.global()); } private static byte[] buildSparseMetaBytes(long numPatches, long offset, PType idxPtype) { @@ -392,7 +392,7 @@ void decode_utf8_withPatches_writesStringsAtIndices() { MemorySegment.ofArray(strBytes), MemorySegment.ofArray(offsets), }; - DecodeContext ctx = new DecodeContext(sparseNode, utf8, 5, segments, registry, Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(sparseNode, utf8, 5, segments, registry, Arena.global()); SparseEncoding sut = new SparseEncoding(); // When @@ -433,7 +433,7 @@ void decode_bool_withPatches_setsBitsAtIndices() { MemorySegment.ofArray(idxBuf), MemorySegment.ofArray(boolBits), }; - DecodeContext ctx = new DecodeContext(sparseNode, bool, 6, segments, registry, Arena.global()); + DecodeContext ctx = DecodeContext.ofRawBuffers(sparseNode, bool, 6, segments, registry, Arena.global()); SparseEncoding sut = new SparseEncoding(); // When diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/StructEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/StructEncodingTest.java index 777445f5..612ce351 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/StructEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/StructEncodingTest.java @@ -57,7 +57,7 @@ void roundTrip_twoI64Fields_preservesValues() { // Then — decode round-trip MemorySegment[] bufs = result.buffers().toArray(MemorySegment[]::new); Registry registry = TestRegistry.of(new StructEncoding(), new PrimitiveEncoding()); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( toArrayNode(result.rootNode()), dtype, ids.length, bufs, registry, Arena.global()); StructArray decoded = (StructArray) sut.decode(ctx); @@ -117,7 +117,7 @@ private static ArrayNode boolNode(int bufferIdx) { private static DecodeContext buildStructCtx(ArrayNode structNode, MemorySegment[] segs, long rowCount) { Registry registry = TestRegistry.of(new StructEncoding(), new PrimitiveEncoding()); - return new DecodeContext(structNode, DTypes.I64, rowCount, segs, registry, Arena.global()); + return DecodeContext.ofRawBuffers(structNode, DTypes.I64, rowCount, segs, registry, Arena.global()); } @Test @@ -155,7 +155,7 @@ void decode_nullableWrapper_twoChildren_returnsMaskedArray() { new ArrayNode[]{validityNode, valuesNode}, new int[0], ArrayStats.empty()); Registry registry = TestRegistry.of(new StructEncoding(), new PrimitiveEncoding(), new BoolEncoding()); - DecodeContext ctx = new DecodeContext( + DecodeContext ctx = DecodeContext.ofRawBuffers( structNode, DTypes.I64, data.length, new MemorySegment[]{validitySeg, valuesSeg}, registry, Arena.global()); diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/TestDecodeContexts.java b/core/src/test/java/io/github/dfa1/vortex/encoding/TestDecodeContexts.java index 1ba7c815..35fd894b 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/TestDecodeContexts.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/TestDecodeContexts.java @@ -44,6 +44,6 @@ TestDecodeContexts arena(Arena a) { } DecodeContext build() { - return new DecodeContext(node, dtype, rowCount, segments, registry, arena); + return DecodeContext.ofRawBuffers(node, dtype, rowCount, segments, registry, arena); } } diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinEncodingTest.java index ab6952b7..9bbd77d8 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinEncodingTest.java @@ -145,7 +145,7 @@ void decode_missingMetadata_throwsVortexException() { // Given var sut = new VarBinEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_VARBIN, null, new ArrayNode[0], new int[0], null); - DecodeContext ctx = new DecodeContext(node, DTypes.UTF8, 3, new MemorySegment[0], + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.UTF8, 3, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); // When / Then diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinViewEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinViewEncodingTest.java index 457070a0..c2232356 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinViewEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/VarBinViewEncodingTest.java @@ -62,7 +62,7 @@ void encode_thenDecode_roundtripsAllStrings(String name, String[] values) { EncodingId.VORTEX_VARBINVIEW, null, new ArrayNode[0], result.rootNode().bufferIndices(), null); Registry registry = TestRegistry.of(sut); - DecodeContext ctx = new DecodeContext(node, DTypes.UTF8, values.length, bufs, registry, arena); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.UTF8, values.length, bufs, registry, arena); var decoded = (VarBinArray) sut.decode(ctx); // Then @@ -151,7 +151,7 @@ void decode_roundtrip_returnsAllStrings(String name, String[] values) { Registry registry = TestRegistry.of(new VarBinViewEncoding()); - DecodeContext ctx = new DecodeContext(node, DTypes.UTF8, n, segBufs, registry, arena); + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.UTF8, n, segBufs, registry, arena); var sut = new VarBinViewEncoding(); // When diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/VariantEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/VariantEncodingTest.java index db660604..bc0270e5 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/VariantEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/VariantEncodingTest.java @@ -57,7 +57,7 @@ void decode_withoutShredded_returnsCoreStorageOnly() { new ArrayNode[]{coreNode}, new int[]{}, null); Registry registry = TestRegistry.of(new VariantEncoding(), new NullEncoding()); - DecodeContext ctx = new DecodeContext(variantNode, VARIANT_DTYPE, N, + DecodeContext ctx = DecodeContext.ofRawBuffers(variantNode, VARIANT_DTYPE, N, new MemorySegment[0], registry, Arena.ofAuto()); // When @@ -86,7 +86,7 @@ void decode_withShredded_decodesSecondChild() { MemorySegment[] segments = {i32Segment(1, 2, 3)}; Registry registry = TestRegistry.of(new VariantEncoding(), new NullEncoding(), new PrimitiveEncoding()); - DecodeContext ctx = new DecodeContext(variantNode, VARIANT_DTYPE, N, + DecodeContext ctx = DecodeContext.ofRawBuffers(variantNode, VARIANT_DTYPE, N, segments, registry, Arena.ofAuto()); // When @@ -108,7 +108,7 @@ void decode_emptyMetadata_noShredded() { new ArrayNode[]{coreNode}, new int[]{}, null); Registry registry = TestRegistry.of(new VariantEncoding(), new NullEncoding()); - DecodeContext ctx = new DecodeContext(variantNode, VARIANT_DTYPE, N, + DecodeContext ctx = DecodeContext.ofRawBuffers(variantNode, VARIANT_DTYPE, N, new MemorySegment[0], registry, Arena.ofAuto()); // When @@ -128,7 +128,7 @@ void decode_nullableDtype_preservedOnResult() { new ArrayNode[]{coreNode}, new int[]{}, null); Registry registry = TestRegistry.of(new VariantEncoding(), new NullEncoding()); - DecodeContext ctx = new DecodeContext(variantNode, nullableVariant, N, + DecodeContext ctx = DecodeContext.ofRawBuffers(variantNode, nullableVariant, N, new MemorySegment[0], registry, Arena.ofAuto()); // When @@ -146,7 +146,7 @@ void decode_wrongChildCount_throws() { new ArrayNode[0], new int[]{}, null); Registry registry = TestRegistry.of(new VariantEncoding()); - DecodeContext ctx = new DecodeContext(variantNode, VARIANT_DTYPE, N, + DecodeContext ctx = DecodeContext.ofRawBuffers(variantNode, VARIANT_DTYPE, N, new MemorySegment[0], registry, Arena.ofAuto()); // When / Then diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ZigZagEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ZigZagEncodingTest.java index eec143e7..340400ae 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ZigZagEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ZigZagEncodingTest.java @@ -45,7 +45,7 @@ private static DecodeContext buildI32Ctx(int[] encodedUnsigned) { ArrayNode zigzagNode = ArrayNode.of(EncodingId.VORTEX_ZIGZAG, null, new ArrayNode[]{primitiveNode}, new int[0], null); Registry registry = TestRegistry.of(new ZigZagEncoding(), new PrimitiveEncoding()); - return new DecodeContext(zigzagNode, DTypes.I32, encodedUnsigned.length, + return DecodeContext.ofRawBuffers(zigzagNode, DTypes.I32, encodedUnsigned.length, new MemorySegment[]{seg}, registry, Arena.ofAuto()); } diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/ZstdEncodingTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/ZstdEncodingTest.java index b1b0eaff..b49510fb 100644 --- a/core/src/test/java/io/github/dfa1/vortex/encoding/ZstdEncodingTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/ZstdEncodingTest.java @@ -129,7 +129,7 @@ private static DecodeContext makeDictCtx( } ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ZSTD, ByteBuffer.wrap(meta), new ArrayNode[0], bufIndices, null); - return new DecodeContext(node, dtype, n, segments, Registry.empty(), Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, dtype, n, segments, Registry.empty(), Arena.ofAuto()); } private static byte[] makeDictFor(byte[]... samples) { @@ -179,7 +179,7 @@ private static DecodeContext makeNullableCtx( Registry registry = Registry.builder().register(new BoolEncoding()).build(); - return new DecodeContext(node, dtype, n, allSegments.toArray(new MemorySegment[0]), + return DecodeContext.ofRawBuffers(node, dtype, n, allSegments.toArray(new MemorySegment[0]), registry, Arena.ofAuto()); } @@ -200,7 +200,7 @@ private static DecodeContext makeCtx(byte[] meta, DType dtype, long n, byte[]... } ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ZSTD, ByteBuffer.wrap(meta), new ArrayNode[0], bufIndices, null); - return new DecodeContext(node, dtype, n, segments, Registry.empty(), Arena.ofAuto()); + return DecodeContext.ofRawBuffers(node, dtype, n, segments, Registry.empty(), Arena.ofAuto()); } private static byte[] compress(byte[] input) { @@ -382,7 +382,7 @@ void decode_missingMetadata_throwsVortexException() { // Given var sut = new ZstdEncoding(); ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ZSTD, null, new ArrayNode[0], new int[0], null); - DecodeContext ctx = new DecodeContext(node, DTypes.I32, 0, new MemorySegment[0], + DecodeContext ctx = DecodeContext.ofRawBuffers(node, DTypes.I32, 0, new MemorySegment[0], Registry.empty(), Arena.ofAuto()); // When / Then diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java index ca4fae4d..e9e2e81f 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java @@ -147,7 +147,7 @@ public static Peek peek(Node node, VortexHandle handle) { if (spec.compression().code != 0) { return Peek.EMPTY; } - MemorySegment seg = handle.slice(spec.offset(), spec.length()); + MemorySegment seg = handle.slice(spec.offset(), spec.length()).unwrapForSubParser("inspector flat segment decoder"); return peekFlatRoot(seg, handle.footer().arraySpecs()); } @@ -202,7 +202,7 @@ private static Node buildNode(Layout layout, Optional fieldName, VortexH int segIdx = layout.segments().getFirst(); SegmentSpec spec = handle.footer().segmentSpecs().get(segIdx); if (spec.compression().code == 0) { - MemorySegment seg = handle.slice(spec.offset(), spec.length()); + MemorySegment seg = handle.slice(spec.offset(), spec.length()).unwrapForSubParser("inspector flat segment decoder"); Peek peek = peekFlatRoot(seg, arraySpecs); if (peek.encoding() != null) { localUsed.add(peek.encoding()); diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/PcoFixtureInspectionIntegrationTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/PcoFixtureInspectionIntegrationTest.java index b16eab76..bb876e10 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/PcoFixtureInspectionIntegrationTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/PcoFixtureInspectionIntegrationTest.java @@ -101,7 +101,7 @@ private static void walkLayoutInner(VortexReader vf, Layout layout, List if ((layout.isFlat() || layout.isDict()) && !layout.segments().isEmpty()) { int segIdx = layout.segments().getFirst(); SegmentSpec spec = segmentSpecs.get(segIdx); - MemorySegment seg = vf.slice(spec.offset(), spec.length()); + MemorySegment seg = vf.slice(spec.offset(), spec.length()).unwrapForSubParser("integration test inspector"); scanFlatSegment(seg, arraySpecs, stats, currentPath); return; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java b/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java index 6a6cfd6b..b50c7db7 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/PostscriptParser.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.io; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.CompressionScheme; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; @@ -19,9 +20,7 @@ import io.github.dfa1.vortex.fbs.Utf8; import io.github.dfa1.vortex.fbs.Variant; -import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; -import java.nio.ByteOrder; import java.util.ArrayList; import java.util.List; @@ -42,7 +41,7 @@ final class PostscriptParser { private PostscriptParser() { } - static ParsedFile parse(ByteBuffer postscriptBuf, MemorySegment fileSegment, long fileSize) { + static ParsedFile parse(ByteBuffer postscriptBuf, BoundedSegment file) { var ps = Postscript.getRootAsPostscript(postscriptBuf); var footerSeg = ps.footer(); @@ -55,16 +54,20 @@ static ParsedFile parse(ByteBuffer postscriptBuf, MemorySegment fileSegment, lon } var dtypeSeg = ps.dtype(); + // BoundedSegment.slice does the bounds check; the explicit checkBlobBounds calls + // below are kept because they produce more specific error messages naming the blob + // ("postscript footer blob out of bounds" vs the generic "vortex file" context label). + long fileSize = file.byteSize(); checkBlobBounds("footer", footerSeg.offset(), footerSeg.length(), fileSize); checkBlobBounds("layout", layoutSeg.offset(), layoutSeg.length(), fileSize); if (dtypeSeg != null && dtypeSeg.length() > 0) { checkBlobBounds("dtype", dtypeSeg.offset(), dtypeSeg.length(), fileSize); } - ByteBuffer footerBuf = slice(fileSegment, footerSeg.offset(), footerSeg.length()); - ByteBuffer layoutBuf = slice(fileSegment, layoutSeg.offset(), layoutSeg.length()); + ByteBuffer footerBuf = file.slice(footerSeg.offset(), footerSeg.length(), "footer blob").asByteBufferLE(); + ByteBuffer layoutBuf = file.slice(layoutSeg.offset(), layoutSeg.length(), "layout blob").asByteBufferLE(); ByteBuffer dtypeBuf = (dtypeSeg != null && dtypeSeg.length() > 0) - ? slice(fileSegment, dtypeSeg.offset(), dtypeSeg.length()) + ? file.slice(dtypeSeg.offset(), dtypeSeg.length(), "dtype blob").asByteBufferLE() : null; ParsedFile parsed = parseBlobs(footerBuf, layoutBuf, dtypeBuf); @@ -118,9 +121,6 @@ static ParsedFile parseBlobs(ByteBuffer footerBuf, ByteBuffer layoutBuf, ByteBuf } } - private static ByteBuffer slice(MemorySegment seg, long offset, long length) { - return seg.asSlice(offset, length).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); - } static Footer convertFooter(io.github.dfa1.vortex.fbs.Footer f) { var arraySpecs = new ArrayList(f.arraySpecsLength()); diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/Trailer.java b/reader/src/main/java/io/github/dfa1/vortex/io/Trailer.java index ca714a7b..ee8f2172 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/Trailer.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/Trailer.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.io; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.core.VortexFormat; @@ -20,15 +21,16 @@ record Trailer(int version, int postscriptLen) { private static final ValueLayout.OfShort LE_SHORT = ValueLayout.JAVA_SHORT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); - /// Parse the 8-byte trailer from a [MemorySegment] view and validate magic, version, and + /// Parse the 8-byte trailer from a [BoundedSegment] view and validate magic, version, and /// postscript length against the body size. /// - /// @param trailerSeg the trailer slice, must be exactly [VortexFormat#TRAILER_SIZE] bytes + /// @param trailer the trailer region, must be exactly [VortexFormat#TRAILER_SIZE] bytes /// @param bodyBytes number of bytes in the file body (i.e. `fileSize - TRAILER_SIZE`) /// @return validated [Trailer] /// @throws VortexException if the magic mismatches, the version is unsupported, or /// postscriptLen is zero or exceeds {@code bodyBytes} - static Trailer parse(MemorySegment trailerSeg, long bodyBytes) { + static Trailer parse(BoundedSegment trailer, long bodyBytes) { + MemorySegment trailerSeg = trailer.unwrapForSubParser("trailer parser"); int version = Short.toUnsignedInt(trailerSeg.get(LE_SHORT, 0)); int postscriptLen = Short.toUnsignedInt(trailerSeg.get(LE_SHORT, 2)); diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java index e5f5bce8..4968264a 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.io; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; @@ -8,7 +9,6 @@ import io.github.dfa1.vortex.scan.ScanOptions; import java.io.Closeable; -import java.lang.foreign.MemorySegment; /// Common interface for handles to a Vortex file, regardless of storage backend. /// @@ -37,11 +37,12 @@ public interface VortexHandle extends Closeable { /// /// @param offset the start offset in bytes /// @param length the number of bytes to expose - /// @return a read-only [MemorySegment] view of the requested range + /// @return a {@link BoundedSegment} view of the requested range; bounds-checking on + /// later sub-slices is enforced by the type /// @deprecated marked for removal once the reader-internal packages consolidate (see /// {@code TODO.md}); kept here as an interim escape hatch for vortex-internal callers. @Deprecated(since = "0.4.0", forRemoval = true) - MemorySegment slice(long offset, long length); + BoundedSegment slice(long offset, long length); ScanIterator scan(ScanOptions options); diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java index 86c96b39..8974f2f7 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.io; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; @@ -76,10 +77,10 @@ public static VortexHttpReader open(URI uri, Registry registry) throws IOExcepti long fileSize = tf.fileSize(); long tailLen = tail.length; - MemorySegment tailSeg = MemorySegment.ofArray(tail); + BoundedSegment tailRegion = new BoundedSegment(MemorySegment.ofArray(tail), "http tail"); long trailerOff = tailLen - VortexFormat.TRAILER_SIZE; long bodyBytes = fileSize - VortexFormat.TRAILER_SIZE; - Trailer trailer = Trailer.parse(tailSeg.asSlice(trailerOff, VortexFormat.TRAILER_SIZE), bodyBytes); + Trailer trailer = Trailer.parse(tailRegion.slice(trailerOff, VortexFormat.TRAILER_SIZE, "http trailer"), bodyBytes); // HTTP-specific: postscript may extend past the prefetched tail and need a larger fetch. long psOffInTail = trailerOff - trailer.postscriptLen(); @@ -89,8 +90,8 @@ public static VortexHttpReader open(URI uri, Registry registry) throws IOExcepti .formatted(trailer.postscriptLen(), TAIL_SIZE)); } - ByteBuffer postscriptBuf = tailSeg.asSlice(psOffInTail, trailer.postscriptLen()) - .asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer postscriptBuf = tailRegion.slice(psOffInTail, trailer.postscriptLen(), "http postscript") + .asByteBufferLE(); var ps = Postscript.getRootAsPostscript(postscriptBuf); @@ -219,10 +220,10 @@ public long fileSize() { // ── HTTP helpers ────────────────────────────────────────────────────────── - /// Fetches bytes `[offset, offset+length)` via HTTP Range and returns them - /// as an off-heap [MemorySegment] tied to this reader's [Arena]. + /// Fetches bytes `[offset, offset+length)` via HTTP Range and returns them as a + /// {@link BoundedSegment} wrapping an off-heap region tied to this reader's {@link Arena}. @Override - public MemorySegment slice(long offset, long length) { + public BoundedSegment slice(long offset, long length) { byte[] bytes; try { bytes = fetchRange(uri, offset, offset + length - 1); @@ -232,7 +233,7 @@ public MemorySegment slice(long offset, long length) { } MemorySegment seg = arena.allocate(length); MemorySegment.copy(MemorySegment.ofArray(bytes), 0, seg, 0, length); - return seg.asReadOnly(); + return new BoundedSegment(seg.asReadOnly(), "http range " + offset + ".." + (offset + length)); } @Override diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java index 39c4d0f8..0fb1e8c0 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.io; import io.github.dfa1.vortex.core.ArrayStats; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; @@ -15,6 +16,7 @@ import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.file.Path; @@ -84,17 +86,18 @@ public static VortexReader open(Path path, Registry registry) throws IOException private static VortexReader parse( MemorySegment seg, long size, Arena arena, Registry registry ) { + BoundedSegment file = new BoundedSegment(seg, "vortex file"); long bodyBytes = size - VortexFormat.TRAILER_SIZE; - var trailerSeg = seg.asSlice(bodyBytes, VortexFormat.TRAILER_SIZE); - Trailer trailer = Trailer.parse(trailerSeg, bodyBytes); + BoundedSegment trailerRegion = file.slice(bodyBytes, VortexFormat.TRAILER_SIZE, "trailer"); + Trailer trailer = Trailer.parse(trailerRegion, bodyBytes); long postscriptOffset = bodyBytes - trailer.postscriptLen(); - var postscriptBuf = seg.asSlice(postscriptOffset, trailer.postscriptLen()) - .asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer postscriptBuf = file.slice(postscriptOffset, trailer.postscriptLen(), "postscript blob") + .asByteBufferLE(); PostscriptParser.ParsedFile parsed; try { - parsed = PostscriptParser.parse(postscriptBuf, seg, size); + parsed = PostscriptParser.parse(postscriptBuf, file); } catch (VortexException e) { throw e; } catch (RuntimeException e) { @@ -218,15 +221,18 @@ private ArrayStats readFlatStats(Layout flat) { if (segLen < 4) { return ArrayStats.empty(); } - MemorySegment seg = fileSegment.asSlice(spec.offset(), segLen); - int fbLen = seg.get(LE_INT, segLen - 4); + BoundedSegment statsRegion = new BoundedSegment(fileSegment, "vortex file") + .slice(spec.offset(), segLen, "stats segment"); + int fbLen = statsRegion.getIntLE(segLen - 4); // Reject negative fbLen (signed int from untrusted bytes) or any value that would push - // fbStart below 0 → asSlice(negative, ...) throws IndexOutOfBoundsException without this guard. + // fbStart below 0. BoundedSegment.slice would also catch this, but returning empty here keeps + // the older lenient behaviour for files with corrupt stats blobs — bounded slicing is + // reserved for offsets/lengths that must be valid (the data path). if (fbLen < 0 || fbLen > segLen - 4) { return ArrayStats.empty(); } long fbStart = segLen - 4L - fbLen; - var fbBuf = seg.asSlice(fbStart, fbLen).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + var fbBuf = statsRegion.slice(fbStart, fbLen, "stats flatbuffer").asByteBufferLE(); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); var root = fbArray.root(); if (root == null) { @@ -235,10 +241,11 @@ private ArrayStats readFlatStats(Layout flat) { return ArrayStats.fromFbs(root.stats()); } - /// Zero-copy read-only slice of the memory-mapped file. + /// Zero-copy slice of the memory-mapped file, wrapped as a {@link BoundedSegment}. @Override - public MemorySegment slice(long offset, long length) { - return fileSegment.asSlice(offset, length).asReadOnly(); + public BoundedSegment slice(long offset, long length) { + return new BoundedSegment(fileSegment, "vortex file") + .slice(offset, length, "file slice"); } @Override diff --git a/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java index d62bd284..bedf8834 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java +++ b/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java @@ -3,6 +3,7 @@ import io.github.dfa1.vortex.core.ArrayStats; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.BoundedSegment; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.SegmentSpec; import io.github.dfa1.vortex.core.VortexException; @@ -479,7 +480,7 @@ private Array decodeFlat(Layout flat, DType dtype, SegmentAllocator arena) { } int segIdx = flat.segments().getFirst(); SegmentSpec spec = file.footer().segmentSpecs().get(segIdx); - MemorySegment seg = file.slice(spec.offset(), spec.length()); + MemorySegment seg = file.slice(spec.offset(), spec.length()).unwrapForSubParser("flat segment decoder"); return new FlatSegmentDecoder(registry).decode(seg, file.footer().arraySpecs(), dtype, flat.rowCount(), arena); } @@ -632,13 +633,14 @@ private ArrayStats readFlatStats(Layout flat) { int segIdx = flat.segments().getFirst(); SegmentSpec spec = file.footer().segmentSpecs().get(segIdx); long segLen = spec.length(); - MemorySegment seg = file.slice(spec.offset(), segLen); + BoundedSegment statsRegion = file.slice(spec.offset(), segLen); + MemorySegment seg = statsRegion.unwrapForSubParser("stats segment fbLen read"); // Stats FlatBuffer lives in the segment's last 4+fbLen bytes; reading the whole // segment as a ByteBuffer would fail for segments larger than 2 GB (ByteBuffer cap). int fbLen = seg.get(LE_INT, segLen - 4); long fbStart = segLen - 4L - fbLen; - ByteBuffer fbBuf = seg.asSlice(fbStart, fbLen).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer fbBuf = statsRegion.slice(fbStart, fbLen, "stats flatbuffer").asByteBufferLE(); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); io.github.dfa1.vortex.fbs.ArrayNode root = fbArray.root();