Skip to content

Commit cac75c2

Browse files
author
Yicong Huang
committed
fix: ensure offset buffer has readable bytes for empty variable-width vectors
When valueCount is 0, the offset buffer must still contain at least one entry [0] per Arrow spec. Previously setReaderAndWriterIndex() set the offset buffer writerIndex to 0 for empty vectors, causing IPC serializers to write 0 bytes for the offset buffer. This breaks IPC readers in other Arrow implementations. Fix setReaderAndWriterIndex() in BaseVariableWidthVector and BaseLargeVariableWidthVector to always set offset buffer writerIndex to (valueCount + 1) * OFFSET_WIDTH, allocating a minimal buffer if the current capacity is insufficient. Also fix memory leaks in tests and NonNullableStructVector.
1 parent 4a7fb4e commit cac75c2

File tree

9 files changed

+188
-86
lines changed

9 files changed

+188
-86
lines changed

adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,19 @@ public void testZeroRowResultSet() throws Exception {
4343
.setReuseVectorSchemaRoot(reuseVectorSchemaRoot)
4444
.build();
4545

46-
ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config);
47-
assertTrue(iter.hasNext(), "Iterator on zero row ResultSet should haveNext() before use");
48-
VectorSchemaRoot root = iter.next();
49-
assertNotNull(root, "VectorSchemaRoot from first next() result should never be null");
50-
assertEquals(
51-
0, root.getRowCount(), "VectorSchemaRoot from empty ResultSet should have zero rows");
52-
assertFalse(
53-
iter.hasNext(),
54-
"hasNext() should return false on empty ResultSets after initial next() call");
46+
try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) {
47+
assertTrue(iter.hasNext(), "Iterator on zero row ResultSet should haveNext() before use");
48+
VectorSchemaRoot root = iter.next();
49+
assertNotNull(root, "VectorSchemaRoot from first next() result should never be null");
50+
assertEquals(
51+
0, root.getRowCount(), "VectorSchemaRoot from empty ResultSet should have zero rows");
52+
assertFalse(
53+
iter.hasNext(),
54+
"hasNext() should return false on empty ResultSets after initial next() call");
55+
if (!reuseVectorSchemaRoot) {
56+
root.close();
57+
}
58+
}
5559
}
5660
}
5761
}

flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ public void testReuseSchema() {
5555

5656
@Test
5757
public void testCreateSchema() {
58-
try (BufferAllocator allocator = new RootAllocator(1024)) {
58+
try (BufferAllocator allocator = new RootAllocator(1024);
59+
VarCharVector dictVec = new VarCharVector("dict vector", allocator)) {
5960
DictionaryEncoding dictionaryEncoding =
6061
new DictionaryEncoding(0, true, new ArrowType.Int(8, true));
61-
VarCharVector dictVec = new VarCharVector("dict vector", allocator);
6262
Dictionary dictionary = new Dictionary(dictVec, dictionaryEncoding);
6363
DictionaryProvider dictProvider = new DictionaryProvider.MapDictionaryProvider(dictionary);
6464
TreeSet<Long> dictionaryUsed = new TreeSet<>();

vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,14 +373,29 @@ private void setReaderAndWriterIndex() {
373373
valueBuffer.readerIndex(0);
374374
if (valueCount == 0) {
375375
validityBuffer.writerIndex(0);
376-
offsetBuffer.writerIndex(0);
377376
valueBuffer.writerIndex(0);
378377
} else {
379378
final long lastDataOffset = getStartOffset(valueCount);
380379
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount));
381-
offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH);
382380
valueBuffer.writerIndex(lastDataOffset);
383381
}
382+
// IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`.
383+
// Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers
384+
// in other libraries. According to Arrow spec, we should still output the offset buffer which
385+
// is [0].
386+
final long requiredOffsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH;
387+
if (offsetBuffer.capacity() < requiredOffsetBufferSize) {
388+
// Allocate a new buffer with sufficient capacity. This can happen when vector
389+
// was loaded via loadFieldBuffers() with an empty offset buffer.
390+
ArrowBuf newOffsetBuffer = allocateOffsetBuffer(requiredOffsetBufferSize);
391+
// Copy existing data if any
392+
if (offsetBuffer.capacity() > 0) {
393+
newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity());
394+
}
395+
offsetBuffer.getReferenceManager().release();
396+
offsetBuffer = newOffsetBuffer;
397+
}
398+
offsetBuffer.writerIndex(requiredOffsetBufferSize);
384399
}
385400

386401
/** Same as {@link #allocateNewSafe()}. */

vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,14 +389,29 @@ private void setReaderAndWriterIndex() {
389389
valueBuffer.readerIndex(0);
390390
if (valueCount == 0) {
391391
validityBuffer.writerIndex(0);
392-
offsetBuffer.writerIndex(0);
393392
valueBuffer.writerIndex(0);
394393
} else {
395394
final int lastDataOffset = getStartOffset(valueCount);
396395
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount));
397-
offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH);
398396
valueBuffer.writerIndex(lastDataOffset);
399397
}
398+
// IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`.
399+
// Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers
400+
// in other libraries. According to Arrow spec, we should still output the offset buffer which
401+
// is [0].
402+
final long requiredOffsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH;
403+
if (offsetBuffer.capacity() < requiredOffsetBufferSize) {
404+
// Allocate a new buffer with sufficient capacity. This can happen when vector
405+
// was loaded via loadFieldBuffers() with an empty offset buffer.
406+
ArrowBuf newOffsetBuffer = allocateOffsetBuffer(requiredOffsetBufferSize);
407+
// Copy existing data if any
408+
if (offsetBuffer.capacity() > 0) {
409+
newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity());
410+
}
411+
offsetBuffer.getReferenceManager().release();
412+
offsetBuffer = newOffsetBuffer;
413+
}
414+
offsetBuffer.writerIndex(requiredOffsetBufferSize);
400415
}
401416

402417
/** Same as {@link #allocateNewSafe()}. */

vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,11 @@ public void close() {
509509
/** Initializes the struct's members from the given Fields. */
510510
public void initializeChildrenFromFields(List<Field> children) {
511511
for (Field field : children) {
512+
FieldVector oldVector = getChild(field.getName());
512513
FieldVector vector = (FieldVector) this.add(field.getName(), field.getFieldType());
514+
if (oldVector != null && oldVector != vector) {
515+
oldVector.close();
516+
}
513517
vector.initializeChildrenFromFields(field.getChildren());
514518
}
515519
}

vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -412,23 +412,25 @@ public void testGetFieldTypeInfo() throws Exception {
412412
final Field field = new Field("union", fieldType, children);
413413

414414
MinorType minorType = MinorType.DENSEUNION;
415-
DenseUnionVector vector = (DenseUnionVector) minorType.getNewVector(field, allocator, null);
416-
vector.initializeChildrenFromFields(children);
415+
try (DenseUnionVector vector =
416+
(DenseUnionVector) minorType.getNewVector(field, allocator, null)) {
417+
vector.initializeChildrenFromFields(children);
417418

418-
assertEquals(vector.getField(), field);
419+
assertEquals(vector.getField(), field);
419420

420-
// Union has 2 child vectors
421-
assertEquals(2, vector.size());
421+
// Union has 2 child vectors
422+
assertEquals(2, vector.size());
422423

423-
// Check child field 0
424-
VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
425-
assertEquals(0, intChild.ordinal);
426-
assertEquals(intChild.vector.getField(), children.get(0));
424+
// Check child field 0
425+
VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
426+
assertEquals(0, intChild.ordinal);
427+
assertEquals(intChild.vector.getField(), children.get(0));
427428

428-
// Check child field 1
429-
VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
430-
assertEquals(1, varcharChild.ordinal);
431-
assertEquals(varcharChild.vector.getField(), children.get(1));
429+
// Check child field 1
430+
VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
431+
assertEquals(1, varcharChild.ordinal);
432+
assertEquals(varcharChild.vector.getField(), children.get(1));
433+
}
432434
}
433435

434436
@Test

vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java

Lines changed: 68 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -111,68 +111,91 @@ private void populateDenseUnionVector(final DenseUnionVector vector, int valueCo
111111
@Test
112112
public void testWithEmptyVector() {
113113
// MapVector use TransferImpl from ListVector
114-
ListVector listVector = ListVector.empty("", allocator);
115-
TransferPair transferPair = listVector.getTransferPair(allocator);
116-
transferPair.splitAndTransfer(0, 0);
117-
assertEquals(0, transferPair.getTo().getValueCount());
114+
try (ListVector listVector = ListVector.empty("", allocator)) {
115+
TransferPair transferPair = listVector.getTransferPair(allocator);
116+
transferPair.splitAndTransfer(0, 0);
117+
assertEquals(0, transferPair.getTo().getValueCount());
118+
transferPair.getTo().close();
119+
}
118120
// BaseFixedWidthVector
119-
IntVector intVector = new IntVector("", allocator);
120-
transferPair = intVector.getTransferPair(allocator);
121-
transferPair.splitAndTransfer(0, 0);
122-
assertEquals(0, transferPair.getTo().getValueCount());
121+
try (IntVector intVector = new IntVector("", allocator)) {
122+
TransferPair transferPair = intVector.getTransferPair(allocator);
123+
transferPair.splitAndTransfer(0, 0);
124+
assertEquals(0, transferPair.getTo().getValueCount());
125+
transferPair.getTo().close();
126+
}
123127
// BaseVariableWidthVector
124-
VarCharVector varCharVector = new VarCharVector("", allocator);
125-
transferPair = varCharVector.getTransferPair(allocator);
126-
transferPair.splitAndTransfer(0, 0);
127-
assertEquals(0, transferPair.getTo().getValueCount());
128+
try (VarCharVector varCharVector = new VarCharVector("", allocator)) {
129+
TransferPair transferPair = varCharVector.getTransferPair(allocator);
130+
transferPair.splitAndTransfer(0, 0);
131+
assertEquals(0, transferPair.getTo().getValueCount());
132+
transferPair.getTo().close();
133+
}
128134
// BaseVariableWidthViewVector: ViewVarCharVector
129-
ViewVarCharVector viewVarCharVector = new ViewVarCharVector("", allocator);
130-
transferPair = viewVarCharVector.getTransferPair(allocator);
131-
transferPair.splitAndTransfer(0, 0);
132-
assertEquals(0, transferPair.getTo().getValueCount());
135+
try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("", allocator)) {
136+
TransferPair transferPair = viewVarCharVector.getTransferPair(allocator);
137+
transferPair.splitAndTransfer(0, 0);
138+
assertEquals(0, transferPair.getTo().getValueCount());
139+
transferPair.getTo().close();
140+
}
133141
// BaseVariableWidthVector: ViewVarBinaryVector
134-
ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("", allocator);
135-
transferPair = viewVarBinaryVector.getTransferPair(allocator);
136-
transferPair.splitAndTransfer(0, 0);
137-
assertEquals(0, transferPair.getTo().getValueCount());
142+
try (ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("", allocator)) {
143+
TransferPair transferPair = viewVarBinaryVector.getTransferPair(allocator);
144+
transferPair.splitAndTransfer(0, 0);
145+
assertEquals(0, transferPair.getTo().getValueCount());
146+
transferPair.getTo().close();
147+
}
138148
// BaseLargeVariableWidthVector
139-
LargeVarCharVector largeVarCharVector = new LargeVarCharVector("", allocator);
140-
transferPair = largeVarCharVector.getTransferPair(allocator);
141-
transferPair.splitAndTransfer(0, 0);
142-
assertEquals(0, transferPair.getTo().getValueCount());
149+
try (LargeVarCharVector largeVarCharVector = new LargeVarCharVector("", allocator)) {
150+
TransferPair transferPair = largeVarCharVector.getTransferPair(allocator);
151+
transferPair.splitAndTransfer(0, 0);
152+
assertEquals(0, transferPair.getTo().getValueCount());
153+
transferPair.getTo().close();
154+
}
143155
// StructVector
144-
StructVector structVector = StructVector.empty("", allocator);
145-
transferPair = structVector.getTransferPair(allocator);
146-
transferPair.splitAndTransfer(0, 0);
147-
assertEquals(0, transferPair.getTo().getValueCount());
156+
try (StructVector structVector = StructVector.empty("", allocator)) {
157+
TransferPair transferPair = structVector.getTransferPair(allocator);
158+
transferPair.splitAndTransfer(0, 0);
159+
assertEquals(0, transferPair.getTo().getValueCount());
160+
transferPair.getTo().close();
161+
}
148162
// FixedSizeListVector
149-
FixedSizeListVector fixedSizeListVector = FixedSizeListVector.empty("", 0, allocator);
150-
transferPair = fixedSizeListVector.getTransferPair(allocator);
151-
transferPair.splitAndTransfer(0, 0);
152-
assertEquals(0, transferPair.getTo().getValueCount());
163+
try (FixedSizeListVector fixedSizeListVector = FixedSizeListVector.empty("", 0, allocator)) {
164+
TransferPair transferPair = fixedSizeListVector.getTransferPair(allocator);
165+
transferPair.splitAndTransfer(0, 0);
166+
assertEquals(0, transferPair.getTo().getValueCount());
167+
transferPair.getTo().close();
168+
}
153169
// FixedSizeBinaryVector
154-
FixedSizeBinaryVector fixedSizeBinaryVector = new FixedSizeBinaryVector("", allocator, 4);
155-
transferPair = fixedSizeBinaryVector.getTransferPair(allocator);
156-
transferPair.splitAndTransfer(0, 0);
157-
assertEquals(0, transferPair.getTo().getValueCount());
170+
try (FixedSizeBinaryVector fixedSizeBinaryVector =
171+
new FixedSizeBinaryVector("", allocator, 4)) {
172+
TransferPair transferPair = fixedSizeBinaryVector.getTransferPair(allocator);
173+
transferPair.splitAndTransfer(0, 0);
174+
assertEquals(0, transferPair.getTo().getValueCount());
175+
transferPair.getTo().close();
176+
}
158177
// UnionVector
159-
UnionVector unionVector = UnionVector.empty("", allocator);
160-
transferPair = unionVector.getTransferPair(allocator);
161-
transferPair.splitAndTransfer(0, 0);
162-
assertEquals(0, transferPair.getTo().getValueCount());
178+
try (UnionVector unionVector = UnionVector.empty("", allocator)) {
179+
TransferPair transferPair = unionVector.getTransferPair(allocator);
180+
transferPair.splitAndTransfer(0, 0);
181+
assertEquals(0, transferPair.getTo().getValueCount());
182+
transferPair.getTo().close();
183+
}
163184
// DenseUnionVector
164-
DenseUnionVector duv = DenseUnionVector.empty("", allocator);
165-
transferPair = duv.getTransferPair(allocator);
166-
transferPair.splitAndTransfer(0, 0);
167-
assertEquals(0, transferPair.getTo().getValueCount());
185+
try (DenseUnionVector duv = DenseUnionVector.empty("", allocator)) {
186+
TransferPair transferPair = duv.getTransferPair(allocator);
187+
transferPair.splitAndTransfer(0, 0);
188+
assertEquals(0, transferPair.getTo().getValueCount());
189+
transferPair.getTo().close();
190+
}
168191

169192
// non empty from vector
170193

171194
// BaseFixedWidthVector
172195
IntVector fromIntVector = new IntVector("", allocator);
173196
fromIntVector.allocateNew(100);
174197
populateIntVector(fromIntVector, 100);
175-
transferPair = fromIntVector.getTransferPair(allocator);
198+
TransferPair transferPair = fromIntVector.getTransferPair(allocator);
176199
IntVector toIntVector = (IntVector) transferPair.getTo();
177200
transferPair.splitAndTransfer(0, 0);
178201
assertEquals(0, toIntVector.getValueCount());

vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -402,23 +402,24 @@ public void testGetFieldTypeInfo() throws Exception {
402402
final Field field = new Field("union", fieldType, children);
403403

404404
MinorType minorType = MinorType.UNION;
405-
UnionVector vector = (UnionVector) minorType.getNewVector(field, allocator, null);
406-
vector.initializeChildrenFromFields(children);
405+
try (UnionVector vector = (UnionVector) minorType.getNewVector(field, allocator, null)) {
406+
vector.initializeChildrenFromFields(children);
407407

408-
assertTrue(vector.getField().equals(field));
408+
assertTrue(vector.getField().equals(field));
409409

410-
// Union has 2 child vectors
411-
assertEquals(2, vector.size());
410+
// Union has 2 child vectors
411+
assertEquals(2, vector.size());
412412

413-
// Check child field 0
414-
VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
415-
assertEquals(0, intChild.ordinal);
416-
assertEquals(intChild.vector.getField(), children.get(0));
413+
// Check child field 0
414+
VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
415+
assertEquals(0, intChild.ordinal);
416+
assertEquals(intChild.vector.getField(), children.get(0));
417417

418-
// Check child field 1
419-
VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
420-
assertEquals(1, varcharChild.ordinal);
421-
assertEquals(varcharChild.vector.getField(), children.get(1));
418+
// Check child field 1
419+
VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
420+
assertEquals(1, varcharChild.ordinal);
421+
assertEquals(varcharChild.vector.getField(), children.get(1));
422+
}
422423
}
423424

424425
@Test

vector/src/test/java/org/apache/arrow/vector/TestValueVector.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3940,4 +3940,42 @@ public void testVectorLoadUnloadOnNonVariadicVectors() {
39403940
}
39413941
}
39423942
}
3943+
3944+
@Test
3945+
public void testEmptyVarCharOffsetBuffer() {
3946+
// Validates that offset buffer has at least OFFSET_WIDTH bytes (for offset[0]=0)
3947+
// even when valueCount is 0, per Arrow specification.
3948+
try (VarCharVector vector = newVarCharVector("varchar", allocator)) {
3949+
vector.allocateNew();
3950+
vector.setValueCount(0);
3951+
3952+
List<ArrowBuf> buffers = vector.getFieldBuffers();
3953+
// buffers: [validity, offset, data]
3954+
assertTrue(
3955+
buffers.get(1).readableBytes() >= BaseVariableWidthVector.OFFSET_WIDTH,
3956+
"Offset buffer should have at least "
3957+
+ BaseVariableWidthVector.OFFSET_WIDTH
3958+
+ " bytes for offset[0]");
3959+
assertEquals(0, vector.getOffsetBuffer().getInt(0));
3960+
}
3961+
}
3962+
3963+
@Test
3964+
public void testEmptyLargeVarCharOffsetBuffer() {
3965+
// Validates that offset buffer has at least OFFSET_WIDTH bytes (for offset[0]=0)
3966+
// even when valueCount is 0, per Arrow specification.
3967+
try (LargeVarCharVector vector = new LargeVarCharVector("largevarchar", allocator)) {
3968+
vector.allocateNew();
3969+
vector.setValueCount(0);
3970+
3971+
List<ArrowBuf> buffers = vector.getFieldBuffers();
3972+
// buffers: [validity, offset, data]
3973+
assertTrue(
3974+
buffers.get(1).readableBytes() >= BaseLargeVariableWidthVector.OFFSET_WIDTH,
3975+
"Offset buffer should have at least "
3976+
+ BaseLargeVariableWidthVector.OFFSET_WIDTH
3977+
+ " bytes for offset[0]");
3978+
assertEquals(0, vector.getOffsetBuffer().getLong(0));
3979+
}
3980+
}
39433981
}

0 commit comments

Comments
 (0)