From 0acf4c8271e65e103c9fbbf753b8759fd9058f45 Mon Sep 17 00:00:00 2001 From: James Duong Date: Thu, 9 Nov 2023 13:56:53 -0800 Subject: [PATCH] GH-38614: [Java] Add VarBinary and VarCharWriter helper methods to more writers (#38631) ### Rationale for this change Add the overrides for new convenience Writer methods added to VarCharWriter and VarBinaryWriter so that classes that use composition such as UnionWriter and PromotableWriter can invoke them properly. ### What changes are included in this PR? - Rename from writeTo$type to write$type for consistency with other methods - Add new helper methods to PromotableWriter - Add new helper methods to complex writers such as list and union ### Are these changes tested? Yes. New unit tests added for several Writer classes. **This PR includes breaking changes to public APIs.** The writeTo() and similar methods in Writers have been renamed to just write() * Closes: #38614 Authored-by: James Duong Signed-off-by: David Li --- .../templates/AbstractFieldWriter.java | 11 +- .../AbstractPromotableFieldWriter.java | 32 +++ .../codegen/templates/ComplexWriters.java | 16 +- .../templates/UnionFixedSizeListWriter.java | 56 +++++ .../codegen/templates/UnionListWriter.java | 37 +++ .../main/codegen/templates/UnionWriter.java | 36 +++ .../vector/complex/impl/PromotableWriter.java | 63 +++++- .../arrow/vector/TestFixedSizeListVector.java | 92 +++++++- .../complex/impl/TestPromotableWriter.java | 197 ++++++++++++++++ .../complex/writer/TestComplexWriter.java | 211 ++++++++++++++++++ .../complex/writer/TestSimpleWriter.java | 16 +- 11 files changed, 741 insertions(+), 26 deletions(-) diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java index bb4ee45eaa073..6c2368117f7c2 100644 --- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -27,6 +27,9 @@ /* * This class is generated using freemarker and the ${.template_name} template. + * Note that changes to the AbstractFieldWriter template should also get reflected in the + * AbstractPromotableFieldWriter, ComplexWriters, UnionFixedSizeListWriter, UnionListWriter + * and UnionWriter templates and the PromotableWriter concrete code. */ @SuppressWarnings("unused") abstract class AbstractFieldWriter extends AbstractBaseWriter implements FieldWriter { @@ -125,19 +128,19 @@ public void write(${name}Holder holder) { <#if minor.class?ends_with("VarBinary")> - public void writeTo${minor.class}(byte[] value) { + public void write${minor.class}(byte[] value) { fail("${name}"); } - public void writeTo${minor.class}(byte[] value, int offset, int length) { + public void write${minor.class}(byte[] value, int offset, int length) { fail("${name}"); } - public void writeTo${minor.class}(ByteBuffer value) { + public void write${minor.class}(ByteBuffer value) { fail("${name}"); } - public void writeTo${minor.class}(ByteBuffer value, int offset, int length) { + public void write${minor.class}(ByteBuffer value, int offset, int length) { fail("${name}"); } diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index 2f963a9df0d05..59f9fb5b8098d 100644 --- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -221,6 +221,38 @@ public void write(${name}Holder holder) { } + <#if minor.class?ends_with("VarBinary")> + @Override + public void write${minor.class}(byte[] value) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value); + } + + @Override + public void write${minor.class}(byte[] value, int offset, int length) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value, offset, length); + } + + @Override + public void write${minor.class}(ByteBuffer value) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value); + } + + @Override + public void write${minor.class}(ByteBuffer value, int offset, int length) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value, offset, length); + } + <#elseif minor.class?ends_with("VarChar")> + @Override + public void write${minor.class}(Text value) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value); + } + + @Override + public void write${minor.class}(String value) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value); + } + + public void writeNull() { } diff --git a/java/vector/src/main/codegen/templates/ComplexWriters.java b/java/vector/src/main/codegen/templates/ComplexWriters.java index 51a52a6e3070d..2e3caae1f0f22 100644 --- a/java/vector/src/main/codegen/templates/ComplexWriters.java +++ b/java/vector/src/main/codegen/templates/ComplexWriters.java @@ -194,22 +194,22 @@ public void writeNull() { <#if minor.class?ends_with("VarBinary")> - public void writeTo${minor.class}(byte[] value) { + public void write${minor.class}(byte[] value) { vector.setSafe(idx(), value); vector.setValueCount(idx() + 1); } - public void writeTo${minor.class}(byte[] value, int offset, int length) { + public void write${minor.class}(byte[] value, int offset, int length) { vector.setSafe(idx(), value, offset, length); vector.setValueCount(idx() + 1); } - public void writeTo${minor.class}(ByteBuffer value) { + public void write${minor.class}(ByteBuffer value) { vector.setSafe(idx(), value, 0, value.remaining()); vector.setValueCount(idx() + 1); } - public void writeTo${minor.class}(ByteBuffer value, int offset, int length) { + public void write${minor.class}(ByteBuffer value, int offset, int length) { vector.setSafe(idx(), value, offset, length); vector.setValueCount(idx() + 1); } @@ -259,13 +259,13 @@ public interface ${eName}Writer extends BaseWriter { <#if minor.class?ends_with("VarBinary")> - public void writeTo${minor.class}(byte[] value); + public void write${minor.class}(byte[] value); - public void writeTo${minor.class}(byte[] value, int offset, int length); + public void write${minor.class}(byte[] value, int offset, int length); - public void writeTo${minor.class}(ByteBuffer value); + public void write${minor.class}(ByteBuffer value); - public void writeTo${minor.class}(ByteBuffer value, int offset, int length); + public void write${minor.class}(ByteBuffer value, int offset, int length); <#if minor.class?ends_with("VarChar")> diff --git a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java index 55c661bfc6023..3436e3a967651 100644 --- a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java @@ -295,6 +295,62 @@ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) { <#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#assign uncappedName = name?uncap_first/> + <#if minor.class?ends_with("VarBinary")> + @Override + public void write${minor.class}(byte[] value) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(byte[] value, int offset, int length) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value, offset, length); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(ByteBuffer value) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(ByteBuffer value, int offset, int length) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value, offset, length); + writer.setPosition(writer.idx() + 1); + } + <#elseif minor.class?ends_with("VarChar")> + @Override + public void write${minor.class}(Text value) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(String value) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + <#if !minor.typeParams?? > @Override public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index fac75a9ce5637..5c0565ee27175 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -274,6 +274,43 @@ public void write(${name}Holder holder) { writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, ); writer.setPosition(writer.idx()+1); } + + + <#if minor.class?ends_with("VarBinary")> + @Override + public void write${minor.class}(byte[] value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(byte[] value, int offset, int length) { + writer.write${minor.class}(value, offset, length); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(ByteBuffer value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(ByteBuffer value, int offset, int length) { + writer.write${minor.class}(value, offset, length); + writer.setPosition(writer.idx() + 1); + } + <#elseif minor.class?ends_with("VarChar")> + @Override + public void write${minor.class}(Text value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + public void write${minor.class}(String value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } diff --git a/java/vector/src/main/codegen/templates/UnionWriter.java b/java/vector/src/main/codegen/templates/UnionWriter.java index 4efd1026cac4a..08dbf24324b17 100644 --- a/java/vector/src/main/codegen/templates/UnionWriter.java +++ b/java/vector/src/main/codegen/templates/UnionWriter.java @@ -302,6 +302,42 @@ public void write(${name}Holder holder) { get${name}Writer(arrowType).setPosition(idx()); get${name}Writer(arrowType).writeBigEndianBytesTo${name}(value, arrowType); } + <#elseif minor.class?ends_with("VarBinary")> + @Override + public void write${minor.class}(byte[] value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } + + @Override + public void write${minor.class}(byte[] value, int offset, int length) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value, offset, length); + } + + @Override + public void write${minor.class}(ByteBuffer value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } + + @Override + public void write${minor.class}(ByteBuffer value, int offset, int length) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value, offset, length); + } + <#elseif minor.class?ends_with("VarChar")> + @Override + public void write${minor.class}(${friendlyType} value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } + + @Override + public void write${minor.class}(String value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index d99efceae3eca..f7be277f592a6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector.complex.impl; import java.math.BigDecimal; +import java.nio.ByteBuffer; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.vector.FieldVector; @@ -37,6 +38,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; /** @@ -378,7 +380,66 @@ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) { /*bitWidth=*/256)).writeBigEndianBytesToDecimal256(value, arrowType); } - + @Override + public void writeVarBinary(byte[] value) { + getWriter(MinorType.VARBINARY).writeVarBinary(value); + } + + @Override + public void writeVarBinary(byte[] value, int offset, int length) { + getWriter(MinorType.VARBINARY).writeVarBinary(value, offset, length); + } + + @Override + public void writeVarBinary(ByteBuffer value) { + getWriter(MinorType.VARBINARY).writeVarBinary(value); + } + + @Override + public void writeVarBinary(ByteBuffer value, int offset, int length) { + getWriter(MinorType.VARBINARY).writeVarBinary(value, offset, length); + } + + @Override + public void writeLargeVarBinary(byte[] value) { + getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value); + } + + @Override + public void writeLargeVarBinary(byte[] value, int offset, int length) { + getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value, offset, length); + } + + @Override + public void writeLargeVarBinary(ByteBuffer value) { + getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value); + } + + @Override + public void writeLargeVarBinary(ByteBuffer value, int offset, int length) { + getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value, offset, length); + } + + @Override + public void writeVarChar(Text value) { + getWriter(MinorType.VARCHAR).writeVarChar(value); + } + + @Override + public void writeVarChar(String value) { + getWriter(MinorType.VARCHAR).writeVarChar(value); + } + + @Override + public void writeLargeVarChar(Text value) { + getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value); + } + + @Override + public void writeLargeVarChar(String value) { + getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value); + } + @Override public void allocate() { getWriter().allocate(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java index 9d7e413a739ad..0023b1dddb8e7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java @@ -24,6 +24,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import java.math.BigDecimal; +import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; @@ -37,6 +38,7 @@ import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; import org.junit.After; import org.junit.Assert; @@ -457,18 +459,98 @@ public void testVectorWithNulls() { assertEquals(4, vector1.getValueCount()); - List realValue1 = vector1.getObject(0); + List realValue1 = vector1.getObject(0); assertEquals(values1, realValue1); - List realValue2 = vector1.getObject(1); + List realValue2 = vector1.getObject(1); assertEquals(values2, realValue2); - List realValue3 = vector1.getObject(2); + List realValue3 = vector1.getObject(2); assertEquals(values3, realValue3); - List realValue4 = vector1.getObject(3); + List realValue4 = vector1.getObject(3); assertEquals(values4, realValue4); } } - private int[] convertListToIntArray(List list) { + @Test + public void testWriteVarCharHelpers() throws Exception { + try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) { + + UnionFixedSizeListWriter writer = vector.getWriter(); + writer.allocate(); + + writer.startList(); + writer.writeVarChar("row1,1"); + writer.writeVarChar(new Text("row1,2")); + writer.writeNull(); + writer.writeNull(); + writer.endList(); + + assertEquals("row1,1", vector.getObject(0).get(0).toString()); + assertEquals("row1,2", vector.getObject(0).get(1).toString()); + } + } + + @Test + public void testWriteLargeVarCharHelpers() throws Exception { + try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) { + + UnionFixedSizeListWriter writer = vector.getWriter(); + writer.allocate(); + + writer.startList(); + writer.writeLargeVarChar("row1,1"); + writer.writeLargeVarChar(new Text("row1,2")); + writer.writeNull(); + writer.writeNull(); + writer.endList(); + + assertEquals("row1,1", vector.getObject(0).get(0).toString()); + assertEquals("row1,2", vector.getObject(0).get(1).toString()); + } + } + + @Test + public void testWriteVarBinaryHelpers() throws Exception { + try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) { + + UnionFixedSizeListWriter writer = vector.getWriter(); + writer.allocate(); + + writer.startList(); + writer.writeVarBinary("row1,1".getBytes()); + writer.writeVarBinary("row1,2".getBytes(), 0, "row1,2".getBytes().length); + writer.writeVarBinary(ByteBuffer.wrap("row1,3".getBytes())); + writer.writeVarBinary(ByteBuffer.wrap("row1,4".getBytes()), 0, "row1,4".getBytes().length); + writer.endList(); + + assertEquals("row1,1", new String((byte[]) (vector.getObject(0).get(0)))); + assertEquals("row1,2", new String((byte[]) (vector.getObject(0).get(1)))); + assertEquals("row1,3", new String((byte[]) (vector.getObject(0).get(2)))); + assertEquals("row1,4", new String((byte[]) (vector.getObject(0).get(3)))); + } + } + + @Test + public void testWriteLargeVarBinaryHelpers() throws Exception { + try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) { + + UnionFixedSizeListWriter writer = vector.getWriter(); + writer.allocate(); + + writer.startList(); + writer.writeLargeVarBinary("row1,1".getBytes()); + writer.writeLargeVarBinary("row1,2".getBytes(), 0, "row1,2".getBytes().length); + writer.writeLargeVarBinary(ByteBuffer.wrap("row1,3".getBytes())); + writer.writeLargeVarBinary(ByteBuffer.wrap("row1,4".getBytes()), 0, "row1,4".getBytes().length); + writer.endList(); + + assertEquals("row1,1", new String((byte[]) (vector.getObject(0).get(0)))); + assertEquals("row1,2", new String((byte[]) (vector.getObject(0).get(1)))); + assertEquals("row1,3", new String((byte[]) (vector.getObject(0).get(2)))); + assertEquals("row1,4", new String((byte[]) (vector.getObject(0).get(3)))); + } + } + + private int[] convertListToIntArray(List list) { int[] values = new int[list.size()]; for (int i = 0; i < list.size(); i++) { values[i] = (int) list.get(i); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index 1068f7c030eb5..4c8c96a0d74d3 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -28,6 +28,10 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.DirtyRootAllocator; +import org.apache.arrow.vector.LargeVarBinaryVector; +import org.apache.arrow.vector.LargeVarCharVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.StructVector; @@ -43,6 +47,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.Text; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -392,4 +397,196 @@ public void testNoPromoteFixedSizeBinaryToUnionWithNull() throws Exception { buf.close(); } } + + @Test + public void testPromoteLargeVarCharHelpersOnStruct() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.largeVarChar("c").writeLargeVarChar(new Text("foo")); + writer.setPosition(1); + writer.largeVarChar("c").writeLargeVarChar("foo2"); + writer.end(); + + final LargeVarCharVector uv = v.getChild("c", LargeVarCharVector.class); + assertEquals("foo", uv.getObject(0).toString()); + assertEquals("foo2", uv.getObject(1).toString()); + } + } + + @Test + public void testPromoteVarCharHelpersOnStruct() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.varChar("c").writeVarChar(new Text("foo")); + writer.setPosition(1); + writer.varChar("c").writeVarChar("foo2"); + writer.end(); + + final VarCharVector uv = v.getChild("c", VarCharVector.class); + assertEquals("foo", uv.getObject(0).toString()); + assertEquals("foo2", uv.getObject(1).toString()); + } + } + + @Test + public void testPromoteVarCharHelpersDirect() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.writeVarChar(new Text("foo")); + writer.setPosition(1); + writer.writeVarChar("foo2"); + writer.end(); + + // The "test" vector in the parent container should have been replaced with a UnionVector. + UnionVector promotedVector = container.getChild("test", UnionVector.class); + VarCharVector vector = promotedVector.getVarCharVector(); + assertEquals("foo", vector.getObject(0).toString()); + assertEquals("foo2", vector.getObject(1).toString()); + } + } + + @Test + public void testPromoteLargeVarCharHelpersDirect() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.writeLargeVarChar(new Text("foo")); + writer.setPosition(1); + writer.writeLargeVarChar("foo2"); + writer.end(); + + // The "test" vector in the parent container should have been replaced with a UnionVector. + UnionVector promotedVector = container.getChild("test", UnionVector.class); + LargeVarCharVector vector = promotedVector.getLargeVarCharVector(); + assertEquals("foo", vector.getObject(0).toString()); + assertEquals("foo2", vector.getObject(1).toString()); + } + } + + @Test + public void testPromoteVarBinaryHelpersOnStruct() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.varBinary("c").writeVarBinary("row1".getBytes()); + writer.setPosition(1); + writer.varBinary("c").writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + writer.setPosition(2); + writer.varBinary("c").writeVarBinary(ByteBuffer.wrap("row3".getBytes())); + writer.setPosition(3); + writer.varBinary("c").writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + writer.end(); + + final VarBinaryVector uv = v.getChild("c", VarBinaryVector.class); + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } + + @Test + public void testPromoteVarBinaryHelpersDirect() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.writeVarBinary("row1".getBytes()); + writer.setPosition(1); + writer.writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + writer.setPosition(2); + writer.writeVarBinary(ByteBuffer.wrap("row3".getBytes())); + writer.setPosition(3); + writer.writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + writer.end(); + + // The "test" vector in the parent container should have been replaced with a UnionVector. + UnionVector promotedVector = container.getChild("test", UnionVector.class); + VarBinaryVector uv = promotedVector.getVarBinaryVector(); + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } + + @Test + public void testPromoteLargeVarBinaryHelpersOnStruct() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.largeVarBinary("c").writeLargeVarBinary("row1".getBytes()); + writer.setPosition(1); + writer.largeVarBinary("c").writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + writer.setPosition(2); + writer.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes())); + writer.setPosition(3); + writer.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + writer.end(); + + final LargeVarBinaryVector uv = v.getChild("c", LargeVarBinaryVector.class); + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } + + @Test + public void testPromoteLargeVarBinaryHelpersDirect() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.writeLargeVarBinary("row1".getBytes()); + writer.setPosition(1); + writer.writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + writer.setPosition(2); + writer.writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes())); + writer.setPosition(3); + writer.writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + writer.end(); + + // The "test" vector in the parent container should have been replaced with a UnionVector. + UnionVector promotedVector = container.getChild("test", UnionVector.class); + LargeVarBinaryVector uv = promotedVector.getLargeVarBinaryVector(); + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 6f7f5abd30ac9..96d39e85f1f4a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -20,6 +20,7 @@ import static org.junit.Assert.*; import java.math.BigDecimal; +import java.nio.ByteBuffer; import java.time.LocalDateTime; import java.util.ArrayList; import java.util.HashSet; @@ -35,7 +36,11 @@ import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.LargeVarBinaryVector; +import org.apache.arrow.vector.LargeVarCharVector; import org.apache.arrow.vector.SchemaChangeCallBack; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.NonNullableStructVector; @@ -1667,4 +1672,210 @@ public void testMapWithStructKey() { assertEquals(1, mapReader.value().readInteger().intValue()); } } + + @Test + public void structWriterVarCharHelpers() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); + StructWriter rootWriter = writer.rootAsStruct(); + rootWriter.start(); + rootWriter.setPosition(0); + rootWriter.varChar("c").writeVarChar(new Text("row1")); + rootWriter.setPosition(1); + rootWriter.varChar("c").writeVarChar("row2"); + rootWriter.end(); + + VarCharVector vector = parent.getChild("root", StructVector.class).getChild("c", VarCharVector.class); + + assertEquals("row1", vector.getObject(0).toString()); + assertEquals("row2", vector.getObject(1).toString()); + } + } + + @Test + public void structWriterLargeVarCharHelpers() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); + StructWriter rootWriter = writer.rootAsStruct(); + rootWriter.start(); + rootWriter.setPosition(0); + rootWriter.largeVarChar("c").writeLargeVarChar(new Text("row1")); + rootWriter.setPosition(1); + rootWriter.largeVarChar("c").writeLargeVarChar("row2"); + rootWriter.end(); + + LargeVarCharVector vector = parent.getChild("root", StructVector.class).getChild("c", + LargeVarCharVector.class); + + assertEquals("row1", vector.getObject(0).toString()); + assertEquals("row2", vector.getObject(1).toString()); + } + } + + @Test + public void structWriterVarBinaryHelpers() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); + StructWriter rootWriter = writer.rootAsStruct(); + rootWriter.start(); + rootWriter.setPosition(0); + rootWriter.varBinary("c").writeVarBinary("row1".getBytes()); + rootWriter.setPosition(1); + rootWriter.varBinary("c").writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + rootWriter.setPosition(2); + rootWriter.varBinary("c").writeVarBinary(ByteBuffer.wrap("row3".getBytes())); + rootWriter.setPosition(3); + rootWriter.varBinary("c").writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + rootWriter.end(); + + VarBinaryVector uv = parent.getChild("root", StructVector.class).getChild("c", VarBinaryVector.class); + + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } + + @Test + public void structWriterLargeVarBinaryHelpers() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); + StructWriter rootWriter = writer.rootAsStruct(); + rootWriter.start(); + rootWriter.setPosition(0); + rootWriter.largeVarBinary("c").writeLargeVarBinary("row1".getBytes()); + rootWriter.setPosition(1); + rootWriter.largeVarBinary("c").writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + rootWriter.setPosition(2); + rootWriter.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes())); + rootWriter.setPosition(3); + rootWriter.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, + "row4".getBytes().length); + rootWriter.end(); + + LargeVarBinaryVector uv = parent.getChild("root", StructVector.class).getChild("c", + LargeVarBinaryVector.class); + + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } + + @Test + public void listVarCharHelpers() { + try (ListVector listVector = ListVector.empty("list", allocator)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + listWriter.startList(); + listWriter.writeVarChar("row1"); + listWriter.writeVarChar(new Text("row2")); + listWriter.endList(); + listWriter.setValueCount(1); + assertEquals("row1", listVector.getObject(0).get(0).toString()); + assertEquals("row2", listVector.getObject(0).get(1).toString()); + } + } + + @Test + public void listLargeVarCharHelpers() { + try (ListVector listVector = ListVector.empty("list", allocator)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + listWriter.startList(); + listWriter.writeLargeVarChar("row1"); + listWriter.writeLargeVarChar(new Text("row2")); + listWriter.endList(); + listWriter.setValueCount(1); + assertEquals("row1", listVector.getObject(0).get(0).toString()); + assertEquals("row2", listVector.getObject(0).get(1).toString()); + } + } + + @Test + public void listVarBinaryHelpers() { + try (ListVector listVector = ListVector.empty("list", allocator)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + listWriter.startList(); + listWriter.writeVarBinary("row1".getBytes()); + listWriter.writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + listWriter.writeVarBinary(ByteBuffer.wrap("row3".getBytes())); + listWriter.writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + listWriter.endList(); + listWriter.setValueCount(1); + assertEquals("row1", new String((byte[]) listVector.getObject(0).get(0))); + assertEquals("row2", new String((byte[]) listVector.getObject(0).get(1))); + assertEquals("row3", new String((byte[]) listVector.getObject(0).get(2))); + assertEquals("row4", new String((byte[]) listVector.getObject(0).get(3))); + } + } + + @Test + public void listLargeVarBinaryHelpers() { + try (ListVector listVector = ListVector.empty("list", allocator)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + listWriter.startList(); + listWriter.writeLargeVarBinary("row1".getBytes()); + listWriter.writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + listWriter.writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes())); + listWriter.writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + listWriter.endList(); + listWriter.setValueCount(1); + assertEquals("row1", new String((byte[]) listVector.getObject(0).get(0))); + assertEquals("row2", new String((byte[]) listVector.getObject(0).get(1))); + assertEquals("row3", new String((byte[]) listVector.getObject(0).get(2))); + assertEquals("row4", new String((byte[]) listVector.getObject(0).get(3))); + } + } + + @Test + public void unionWithVarCharAndBinaryHelpers() throws Exception { + try (UnionVector vector = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null)) { + UnionWriter unionWriter = new UnionWriter(vector); + unionWriter.allocate(); + unionWriter.start(); + unionWriter.setPosition(0); + unionWriter.writeVarChar("row1"); + unionWriter.setPosition(1); + unionWriter.writeVarChar(new Text("row2")); + unionWriter.setPosition(2); + unionWriter.writeLargeVarChar("row3"); + unionWriter.setPosition(3); + unionWriter.writeLargeVarChar(new Text("row4")); + unionWriter.setPosition(4); + unionWriter.writeVarBinary("row5".getBytes()); + unionWriter.setPosition(5); + unionWriter.writeVarBinary("row6".getBytes(), 0, "row6".getBytes().length); + unionWriter.setPosition(6); + unionWriter.writeVarBinary(ByteBuffer.wrap("row7".getBytes())); + unionWriter.setPosition(7); + unionWriter.writeVarBinary(ByteBuffer.wrap("row8".getBytes()), 0, "row8".getBytes().length); + unionWriter.setPosition(8); + unionWriter.writeLargeVarBinary("row9".getBytes()); + unionWriter.setPosition(9); + unionWriter.writeLargeVarBinary("row10".getBytes(), 0, "row10".getBytes().length); + unionWriter.setPosition(10); + unionWriter.writeLargeVarBinary(ByteBuffer.wrap("row11".getBytes())); + unionWriter.setPosition(11); + unionWriter.writeLargeVarBinary(ByteBuffer.wrap("row12".getBytes()), 0, "row12".getBytes().length); + unionWriter.end(); + + assertEquals("row1", new String(vector.getVarCharVector().get(0))); + assertEquals("row2", new String(vector.getVarCharVector().get(1))); + assertEquals("row3", new String(vector.getLargeVarCharVector().get(2))); + assertEquals("row4", new String(vector.getLargeVarCharVector().get(3))); + assertEquals("row5", new String(vector.getVarBinaryVector().get(4))); + assertEquals("row6", new String(vector.getVarBinaryVector().get(5))); + assertEquals("row7", new String(vector.getVarBinaryVector().get(6))); + assertEquals("row8", new String(vector.getVarBinaryVector().get(7))); + assertEquals("row9", new String(vector.getLargeVarBinaryVector().get(8))); + assertEquals("row10", new String(vector.getLargeVarBinaryVector().get(9))); + assertEquals("row11", new String(vector.getLargeVarBinaryVector().get(10))); + assertEquals("row12", new String(vector.getLargeVarBinaryVector().get(11))); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java index ef918b13fb691..27b8f1796ee31 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java @@ -54,7 +54,7 @@ public void testWriteByteArrayToVarBinary() throws Exception { try (VarBinaryVector vector = new VarBinaryVector("test", allocator); VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; - writer.writeToVarBinary(input); + writer.writeVarBinary(input); byte[] result = vector.get(0); Assert.assertArrayEquals(input, result); } @@ -65,7 +65,7 @@ public void testWriteByteArrayWithOffsetToVarBinary() throws Exception { try (VarBinaryVector vector = new VarBinaryVector("test", allocator); VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; - writer.writeToVarBinary(input, 1, 1); + writer.writeVarBinary(input, 1, 1); byte[] result = vector.get(0); Assert.assertArrayEquals(new byte[] { 0x02 }, result); } @@ -77,7 +77,7 @@ public void testWriteByteBufferToVarBinary() throws Exception { VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeToVarBinary(buffer); + writer.writeVarBinary(buffer); byte[] result = vector.get(0); Assert.assertArrayEquals(input, result); } @@ -89,7 +89,7 @@ public void testWriteByteBufferWithOffsetToVarBinary() throws Exception { VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeToVarBinary(buffer, 1, 1); + writer.writeVarBinary(buffer, 1, 1); byte[] result = vector.get(0); Assert.assertArrayEquals(new byte[] { 0x02 }, result); } @@ -100,7 +100,7 @@ public void testWriteByteArrayToLargeVarBinary() throws Exception { try (LargeVarBinaryVector vector = new LargeVarBinaryVector("test", allocator); LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; - writer.writeToLargeVarBinary(input); + writer.writeLargeVarBinary(input); byte[] result = vector.get(0); Assert.assertArrayEquals(input, result); } @@ -111,7 +111,7 @@ public void testWriteByteArrayWithOffsetToLargeVarBinary() throws Exception { try (LargeVarBinaryVector vector = new LargeVarBinaryVector("test", allocator); LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; - writer.writeToLargeVarBinary(input, 1, 1); + writer.writeLargeVarBinary(input, 1, 1); byte[] result = vector.get(0); Assert.assertArrayEquals(new byte[] { 0x02 }, result); } @@ -123,7 +123,7 @@ public void testWriteByteBufferToLargeVarBinary() throws Exception { LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeToLargeVarBinary(buffer); + writer.writeLargeVarBinary(buffer); byte[] result = vector.get(0); Assert.assertArrayEquals(input, result); } @@ -135,7 +135,7 @@ public void testWriteByteBufferWithOffsetToLargeVarBinary() throws Exception { LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeToLargeVarBinary(buffer, 1, 1); + writer.writeLargeVarBinary(buffer, 1, 1); byte[] result = vector.get(0); Assert.assertArrayEquals(new byte[] { 0x02 }, result); }