Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -571,11 +571,8 @@ private void runDictLoad(InspectorTree.Node dictNode) {
try (java.lang.foreign.Arena arena = java.lang.foreign.Arena.ofConfined()) {
int segIdx = values.segments().getFirst();
SegmentSpec spec = tree.segmentSpecs().get(segIdx);
java.lang.foreign.MemorySegment seg = handle.slice(spec.offset(), spec.length());
io.github.dfa1.vortex.core.array.Array arr =
new io.github.dfa1.vortex.encoding.FlatSegmentDecoder(handle.registry())
.decode(seg, handle.footer().arraySpecs(),
dtype, values.rowCount(), arena);
handle.decodeFlatSegment(spec, dtype, values.rowCount(), arena);
int n = (int) Math.min(arr.length(), DATA_PREVIEW_ROWS);
List<String> out = new ArrayList<>(n);
for (int i = 0; i < n; i++) {
Expand Down
19 changes: 19 additions & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,25 @@
</dependency>
</dependencies>

<build>
<plugins>
<!-- Publish a test-jar so reader/ and writer/ tests can reuse core test
helpers (DTypes, EncodeTestHelper) without duplication. -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<id>publish-test-jar</id>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

<!--
Generated sources (src/main/java/…/fbs and …/proto) are committed to the repo.
Normal builds need no external tools.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,25 @@
public final class AlpEncoding implements Encoding {

// Powers of 10 for F64 — shared by encode (exponent search) and decode (reconstruction).
static final double[] F10_F64 = {
public static final double[] F10_F64 = {
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
1e20, 1e21, 1e22, 1e23
};
static final double[] IF10_F64 = {
public static final double[] IF10_F64 = {
1e-0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9,
1e-10, 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, 1e-16, 1e-17, 1e-18, 1e-19,
1e-20, 1e-21, 1e-22, 1e-23
};
// Powers of 10 for F32 — shared by encode (exponent search) and decode (reconstruction).
static final float[] F10_F32 = {
public static final float[] F10_F32 = {
1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f
};
static final float[] IF10_F32 = {
public static final float[] IF10_F32 = {
1e-0f, 1e-1f, 1e-2f, 1e-3f, 1e-4f, 1e-5f, 1e-6f, 1e-7f, 1e-8f, 1e-9f, 1e-10f
};
static final DType I64_DTYPE = new DType.Primitive(PType.I64, false);
static final DType I32_DTYPE = new DType.Primitive(PType.I32, false);
public static final DType I64_DTYPE = new DType.Primitive(PType.I64, false);
public static final DType I32_DTYPE = new DType.Primitive(PType.I32, false);

/// Creates a new {@code AlpEncoding} instance; use via {@link Registry}.
public AlpEncoding() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
/// </ul>
public final class AlpRdEncoding implements Encoding {

private static final DType U16_DTYPE = new DType.Primitive(PType.U16, false);
private static final DType U32_DTYPE = new DType.Primitive(PType.U32, false);
private static final DType U64_DTYPE = new DType.Primitive(PType.U64, false);
public static final DType U16_DTYPE = new DType.Primitive(PType.U16, false);
public static final DType U32_DTYPE = new DType.Primitive(PType.U32, false);
public static final DType U64_DTYPE = new DType.Primitive(PType.U64, false);

/// Creates a new {@code AlpRdEncoding} instance; use via {@link Registry}.
public AlpRdEncoding() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
/// Sealed: a node is either [KnownArrayNode] (id resolves to an [EncodingId]) or
/// [UnknownArrayNode] (id is an arbitrary string only meaningful for
/// [Registry#allowUnknown()] passthrough decode).
sealed interface ArrayNode permits KnownArrayNode, UnknownArrayNode {
public sealed interface ArrayNode permits KnownArrayNode, UnknownArrayNode {
/// Short factory for the common case: a node whose encoding id is well-known.
/// Mostly used by tests and helper code that converts an [EncodeNode] tree back into
/// an [ArrayNode] tree.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
public final class BitpackedEncoding implements Encoding {

// FL_ORDER permutation from the FastLanes paper / spiraldb/fastlanes-rs.
static final int[] FL_ORDER = {0, 4, 2, 6, 1, 5, 3, 7};
public static final int[] FL_ORDER = {0, 4, 2, 6, 1, 5, 3, 7};

/// Creates a new {@code BitpackedEncoding} instance; use via {@link Registry}.
public BitpackedEncoding() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,42 +32,42 @@
/// {@code LANES = 1024 / typeBits}, where {@code typeBits = byteSize * 8}.
public final class DeltaEncoding implements Encoding {

static final int FL_CHUNK_SIZE = 1024;
static final int[] FL_ORDER = {0, 4, 2, 6, 1, 5, 3, 7};
public static final int FL_CHUNK_SIZE = 1024;
public static final int[] FL_ORDER = {0, 4, 2, 6, 1, 5, 3, 7};

/// Creates a new {@code DeltaEncoding} instance.
public DeltaEncoding() {
}

static int transposeIndex(int idx) {
public static int transposeIndex(int idx) {
int lane = idx % 16;
int order = (idx / 16) % 8;
int row = idx / 128;
return lane * 64 + FL_ORDER[order] * 8 + row;
}

static int iterateIndex(int row, int lane) {
public static int iterateIndex(int row, int lane) {
int o = row / 8;
int s = row % 8;
return FL_ORDER[o] * 16 + s * 128 + lane;
}

static int lanes(PType ptype) {
public static int lanes(PType ptype) {
return FL_CHUNK_SIZE / (ptype.byteSize() * 8);
}

static int typeBits(PType ptype) {
public static int typeBits(PType ptype) {
return ptype.byteSize() * 8;
}

// ── Shared helpers (FastLanes index + type math + buffer I/O) ────────────

static long typeMask(PType ptype) {
public static long typeMask(PType ptype) {
int bits = ptype.byteSize() * 8;
return bits == 64 ? -1L : (1L << bits) - 1;
}

static MemorySegment fromLongs(long[] longs, PType ptype, SegmentAllocator arena) {
public static MemorySegment fromLongs(long[] longs, PType ptype, SegmentAllocator arena) {
if (ptype == PType.I64 || ptype == PType.U64) {
MemorySegment dst = arena.allocate((long) longs.length * 8);
MemorySegment.copy(MemorySegment.ofArray(longs), ValueLayout.JAVA_LONG, 0L,
Expand Down
49 changes: 8 additions & 41 deletions core/src/main/java/io/github/dfa1/vortex/encoding/Encoding.java
Original file line number Diff line number Diff line change
@@ -1,44 +1,11 @@
package io.github.dfa1.vortex.encoding;

import io.github.dfa1.vortex.core.DType;
import io.github.dfa1.vortex.core.array.Array;

/// Combines encode and decode for one encoding type.
/// Register via [Registry] — implementations are discoverable via ServiceLoader.
public interface Encoding {
/// Returns the encoding id for this encoding.
///
/// @return encoding id
EncodingId encodingId();

/// Decodes an array node from the file using the provided context.
///
/// @param ctx decoding context containing buffers, dtype, row count, and child registry
/// @return decoded array
Array decode(DecodeContext ctx);

/// Returns `true` if this encoding can encode the given dtype.
///
/// @param dtype the dtype to test
/// @return `true` if this encoding accepts `dtype`
boolean accepts(DType dtype);

/// Encodes {@code data} to bytes using the provided arena for output buffer allocation.
///
/// @param dtype logical type of the data
/// @param data the data to encode (type depends on encoding; typically a primitive array)
/// @param ctx encoding context supplying the arena for output buffer allocation
/// @return encode result containing the root node, buffers, and optional stats
EncodeResult encode(DType dtype, Object data, EncodeContext ctx);

/// Cascade-aware encode: returns a partial step with open child slots.
/// Default wraps the terminal {@link #encode} result; override to expose children.
///
/// @param dtype the logical type of the data
/// @param data the data to encode
/// @param ctx encoding context supplying the arena, registry, and cascade parameters
/// @return cascade step with optional open child slots
default CascadeStep encodeCascade(DType dtype, Object data, EncodeContext ctx) {
return CascadeStep.terminal(encode(dtype, data, ctx));
}
/// Bifunctional encoding interface that combines the read-side {@link EncodingDecoder}
/// surface and the write-side {@link EncodingEncoder} surface. Existing implementations
/// satisfy both contracts on a single class; ADR 0001 progressively peels these apart
/// so that a read-only deployment carries only decoders.
///
/// <p>Register via {@link Registry} — implementations are discoverable via
/// {@link java.util.ServiceLoader}.
public interface Encoding extends EncodingDecoder, EncodingEncoder {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package io.github.dfa1.vortex.encoding;

import io.github.dfa1.vortex.core.DType;
import io.github.dfa1.vortex.core.array.Array;

/// Read-side surface of an encoding. Exposes only the metadata required to dispatch
/// a decode and the {@link #decode(DecodeContext)} entry point itself.
///
/// <p>ADR 0001 Phase 1: {@link Encoding} now extends this interface for backward
/// compatibility while encoding implementations are still bifunctional. Later phases
/// peel encoder implementations into a separate write runtime, at which point
/// {@code EncodingDecoder} becomes the canonical read-side type and lives in the
/// {@code reader} module.
public interface EncodingDecoder {

/// @return the wire identifier of this encoding
EncodingId encodingId();

/// @param dtype the dtype to test
/// @return {@code true} if this encoding can decode arrays of {@code dtype}
boolean accepts(DType dtype);

/// Decodes an array node from the file using the provided context.
///
/// @param ctx decoding context containing buffers, dtype, row count, and child registry
/// @return decoded array
Array decode(DecodeContext ctx);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package io.github.dfa1.vortex.encoding;

import io.github.dfa1.vortex.core.DType;

/// Write-side surface of an encoding. Exposes only the metadata required to pick an
/// encoder for a dtype and the {@link #encode(DType, Object, EncodeContext)} entry
/// point itself.
///
/// <p>ADR 0001 Phase 1: {@link Encoding} now extends this interface for backward
/// compatibility while encoding implementations are still bifunctional. Later phases
/// peel encoder implementations into a separate write runtime, at which point
/// {@code EncodingEncoder} becomes the canonical write-side type and lives in the
/// {@code writer} module.
public interface EncodingEncoder {

/// @return the wire identifier of this encoding
EncodingId encodingId();

/// @param dtype the dtype to test
/// @return {@code true} if this encoding can encode arrays of {@code dtype}
boolean accepts(DType dtype);

/// Encodes {@code data} to bytes using the provided arena for output buffer allocation.
///
/// @param dtype logical type of the data
/// @param data the data to encode (type depends on encoding; typically a primitive array)
/// @param ctx encoding context supplying the arena for output buffer allocation
/// @return encode result containing the root node, buffers, and optional stats
EncodeResult encode(DType dtype, Object data, EncodeContext ctx);

/// Cascade-aware encode: returns a partial step with open child slots.
/// Default wraps the terminal {@link #encode} result; override to expose children.
///
/// @param dtype the logical type of the data
/// @param data the data to encode
/// @param ctx encoding context supplying the arena, registry, and cascade parameters
/// @return cascade step with optional open child slots
default CascadeStep encodeCascade(DType dtype, Object data, EncodeContext ctx) {
return CascadeStep.terminal(encode(dtype, data, ctx));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import java.nio.ByteBuffer;

/// Array node whose encoding id is well-known to this build (an [EncodingId] enum constant).
record KnownArrayNode(
public record KnownArrayNode(
EncodingId encodingId,
ByteBuffer metadata,
ArrayNode[] children,
Expand Down
21 changes: 15 additions & 6 deletions core/src/main/java/io/github/dfa1/vortex/encoding/LeBitReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,26 @@
///
/// Bits are packed LSB-first within each byte (pcodec wire format convention).
/// Bit 0 of the stream is the LSB of byte 0; bit 8 is the LSB of byte 1.
final class LeBitReader {
public final class LeBitReader {

private static final ValueLayout.OfByte BYTE = ValueLayout.JAVA_BYTE;

private final MemorySegment data;
private long bitPos;

LeBitReader(MemorySegment data) {
/// Wrap {@code data} for LSB-first sequential reads from bit position 0.
///
/// @param data backing segment
public LeBitReader(MemorySegment data) {
this.data = data;
this.bitPos = 0;
}

/// Read {@code n} bits (0 ≤ n ≤ 64) from the stream, LSB-first.
long readBits(int n) {
///
/// @param n bit count, 0..64 inclusive
/// @return value with low {@code n} bits set from the stream
public long readBits(int n) {
if (n == 0) {
return 0L;
}
Expand All @@ -49,19 +55,22 @@ long readBits(int n) {
}

/// Discard bits to align the stream to the next byte boundary.
void alignToByte() {
public void alignToByte() {
int bitsInCurrentByte = (int) (bitPos & 7);
if (bitsInCurrentByte != 0) {
bitPos += 8 - bitsInCurrentByte;
}
}

/// Current byte offset (only meaningful after {@link #alignToByte()}).
long byteOffset() {
///
/// @return current stream position in bytes
public long byteOffset() {
return bitPos >>> 3;
}

long bitsConsumed() {
/// @return total bits consumed since construction
public long bitsConsumed() {
return bitPos;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ private static MethodHandle[] buildSetters() {

/// Write `bits` at `offset` in `seg`, narrowed or bit-reinterpreted to the ptype's carrier.
/// Float bits use `Float.intBitsToFloat` / `Double.longBitsToDouble` semantics.
static void set(MemorySegment seg, long offset, PType ptype, long bits) {
public static void set(MemorySegment seg, long offset, PType ptype, long bits) {
try {
SETTERS[ptype.ordinal()].invokeExact(seg, offset, bits);
} catch (Throwable t) {
Expand All @@ -102,7 +102,7 @@ static void set(MemorySegment seg, long offset, PType ptype, long bits) {

/// Bulk-copy a primitive Java array into a freshly allocated little-endian segment.
/// Element layout conversion (host order → LE) is delegated to `MemorySegment.copy`.
static MemorySegment copyArray(PType ptype, Object typedArray, int count) {
public static MemorySegment copyArray(PType ptype, Object typedArray, int count) {
MemorySegment src;
ValueLayout srcLayout;
switch (typedArray) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
/// {@code weight} is the bin's count in the tANS table (sum of weights == table size).
/// {@code lower} is the raw unsigned lower bound (U64 for 64-bit latents).
/// {@code offsetBits} is the log2 of the range size (0 = single value).
record PcoBin(int weight, long lower, int offsetBits) {
public record PcoBin(int weight, long lower, int offsetBits) {
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@
/// all integer/float ptypes except F16 (Conv1 additionally excludes 64-bit dtypes).
public final class PcoEncoding implements Encoding {

static final byte PCO_FORMAT_MAJOR = 0x04;
static final byte PCO_FORMAT_MINOR = 0x01;
public static final byte PCO_FORMAT_MAJOR = 0x04;
public static final byte PCO_FORMAT_MINOR = 0x01;
// bits needed to encode offset_bits field per latent type
static final int BITS_TO_ENCODE_OFFSET_BITS_64 = 7; // log2(64) + 1
static final int BITS_TO_ENCODE_OFFSET_BITS_32 = 6; // log2(32) + 1
static final int BITS_TO_ENCODE_OFFSET_BITS_16 = 5; // log2(16) + 1
public static final int BITS_TO_ENCODE_OFFSET_BITS_64 = 7; // log2(64) + 1
public static final int BITS_TO_ENCODE_OFFSET_BITS_32 = 6; // log2(32) + 1
public static final int BITS_TO_ENCODE_OFFSET_BITS_16 = 5; // log2(16) + 1

/// Creates a new {@code PcoEncoding} instance; use via {@link Registry}.
public PcoEncoding() {
Expand Down
Loading
Loading