From 6c2a0667ebd6f4b9483481537a3fb3d4012aef3f Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 9 Apr 2025 15:19:25 +0300 Subject: [PATCH] GH-87: [Vector] Add ExtensionWriter (#697) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on changes from https://github.com/apache/arrow/pull/41731. ## What's Changed Added writer ExtensionWriter with 3 methods: - write method for writing values from Extension holders; - writeExtensionType method for writing values (arguments is Object because we don't know exact type); - addExtensionTypeFactory method - because the exact vector and value type are unknown, the user should create their own extension type vector, write for it, and ExtensionTypeFactory, which should map the vector and writer. Closes #87. Co-authored-by: Finn Völkel --- .../templates/AbstractFieldWriter.java | 22 ++++ .../AbstractPromotableFieldWriter.java | 10 ++ .../main/codegen/templates/BaseWriter.java | 31 +++++ .../codegen/templates/PromotableWriter.java | 14 +++ .../main/codegen/templates/StructWriters.java | 26 ++++ .../codegen/templates/UnionListWriter.java | 23 ++++ .../codegen/templates/UnionMapWriter.java | 12 ++ .../main/codegen/templates/UnionWriter.java | 20 +++ .../impl/AbstractExtensionTypeWriter.java | 66 ++++++++++ .../impl/ExtensionTypeWriterFactory.java | 38 ++++++ .../complex/impl/UnionExtensionWriter.java | 79 ++++++++++++ .../vector/complex/writer/FieldWriter.java | 4 +- .../arrow/vector/holders/ExtensionHolder.java | 22 ++++ .../apache/arrow/vector/TestStructVector.java | 37 ++++++ .../org/apache/arrow/vector/UuidVector.java | 114 ++++++++++++++++++ .../complex/impl/TestPromotableWriter.java | 29 +++++ .../complex/impl/UuidWriterFactory.java | 31 +++++ .../vector/complex/impl/UuidWriterImpl.java | 47 ++++++++ .../complex/writer/TestSimpleWriter.java | 20 +++ .../arrow/vector/holder/UuidHolder.java | 23 ++++ .../vector/types/pojo/TestExtensionType.java | 70 +---------- .../arrow/vector/types/pojo/UuidType.java | 60 +++++++++ 22 files changed, 728 insertions(+), 70 deletions(-) create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/UuidVector.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java diff --git a/vector/src/main/codegen/templates/AbstractFieldWriter.java b/vector/src/main/codegen/templates/AbstractFieldWriter.java index cc2cc618d8..ae5b97faef 100644 --- a/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -107,6 +107,16 @@ public void endEntry() { throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); } + public void write(ExtensionHolder var1) { + this.fail("ExtensionType"); + } + public void writeExtension(Object var1) { + this.fail("ExtensionType"); + } + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { + this.fail("ExtensionType"); + } + <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> @@ -241,6 +251,18 @@ public MapWriter map(String name, boolean keysSorted) { fail("Map"); return null; } + + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + fail("Extension"); + return null; + } + + @Override + public ExtensionWriter extension(ArrowType arrowType) { + fail("Extension"); + return null; + } <#list vv.types as type><#list type.minor as minor> <#assign lowerName = minor.class?uncap_first /> <#if lowerName == "int" ><#assign lowerName = "integer" /> diff --git a/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index 06cb235f7d..951edd5eee 100644 --- a/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -293,6 +293,11 @@ public MapWriter map(boolean keysSorted) { return getWriter(MinorType.MAP, new ArrowType.Map(keysSorted)); } + @Override + public ExtensionWriter extension(ArrowType arrowType) { + return getWriter(MinorType.EXTENSIONTYPE).extension(arrowType); + } + @Override public StructWriter struct(String name) { return getWriter(MinorType.STRUCT).struct(name); @@ -318,6 +323,11 @@ public MapWriter map(String name, boolean keysSorted) { return getWriter(MinorType.STRUCT).map(name, keysSorted); } + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + return getWriter(MinorType.EXTENSIONTYPE).extension(name, arrowType); + } + <#list vv.types as type><#list type.minor as minor> <#assign lowerName = minor.class?uncap_first /> <#if lowerName == "int" ><#assign lowerName = "integer" /> diff --git a/vector/src/main/codegen/templates/BaseWriter.java b/vector/src/main/codegen/templates/BaseWriter.java index e952d46f1f..78da7fddc3 100644 --- a/vector/src/main/codegen/templates/BaseWriter.java +++ b/vector/src/main/codegen/templates/BaseWriter.java @@ -61,6 +61,7 @@ public interface StructWriter extends BaseWriter { void copyReaderToField(String name, FieldReader reader); StructWriter struct(String name); + ExtensionWriter extension(String name, ArrowType arrowType); ListWriter list(String name); ListWriter listView(String name); MapWriter map(String name); @@ -79,6 +80,7 @@ public interface ListWriter extends BaseWriter { ListWriter listView(); MapWriter map(); MapWriter map(boolean keysSorted); + ExtensionWriter extension(ArrowType arrowType); void copyReader(FieldReader reader); <#list vv.types as type><#list type.minor as minor> @@ -101,6 +103,35 @@ public interface MapWriter extends ListWriter { MapWriter value(); } + public interface ExtensionWriter extends BaseWriter { + + /** + * Writes a null value. + */ + void writeNull(); + + /** + * Writes value from the given extension holder. + * + * @param holder the extension holder to write + */ + void write(ExtensionHolder holder); + + /** + * Writes the given extension type value. + * + * @param value the extension type value to write + */ + void writeExtension(Object value); + + /** + * Adds the given extension type factory. This factory allows configuring writer implementations for specific ExtensionTypeVector. + * + * @param factory the extension type factory to add + */ + void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory); + } + public interface ScalarWriter extends <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Writer, BaseWriter {} diff --git a/vector/src/main/codegen/templates/PromotableWriter.java b/vector/src/main/codegen/templates/PromotableWriter.java index c0e686f317..8d7d57bb9d 100644 --- a/vector/src/main/codegen/templates/PromotableWriter.java +++ b/vector/src/main/codegen/templates/PromotableWriter.java @@ -285,6 +285,9 @@ protected void setWriter(ValueVector v) { case UNION: writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory); break; + case EXTENSIONTYPE: + writer = new UnionExtensionWriter((ExtensionTypeVector) vector); + break; default: writer = type.getNewFieldWriter(vector); break; @@ -316,6 +319,7 @@ protected boolean requiresArrowType(MinorType type) { || type == MinorType.MAP || type == MinorType.DURATION || type == MinorType.FIXEDSIZEBINARY + || type == MinorType.EXTENSIONTYPE || (type.name().startsWith("TIMESTAMP") && type.name().endsWith("TZ")); } @@ -536,6 +540,16 @@ public void writeLargeVarChar(String value) { getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value); } + @Override + public void writeExtension(Object value) { + getWriter(MinorType.EXTENSIONTYPE).writeExtension(value); + } + + @Override + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) { + getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory); + } + @Override public void allocate() { getWriter().allocate(); diff --git a/vector/src/main/codegen/templates/StructWriters.java b/vector/src/main/codegen/templates/StructWriters.java index 3e6b9fd773..413f707c70 100644 --- a/vector/src/main/codegen/templates/StructWriters.java +++ b/vector/src/main/codegen/templates/StructWriters.java @@ -83,6 +83,9 @@ public class ${mode}StructWriter extends AbstractFieldWriter { fields.put(handleCase(child.getName()), writer); break; } + case EXTENSIONTYPE: + extension(child.getName(), child.getType()); + break; case UNION: FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null); UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory()); @@ -159,6 +162,29 @@ public StructWriter struct(String name) { return writer; } + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + String finalName = handleCase(name); + FieldWriter writer = fields.get(finalName); + if(writer == null){ + int vectorCount=container.size(); + FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null); + ExtensionTypeVector vector = container.addOrGet(name, fieldType, ExtensionTypeVector.class); + writer = new PromotableWriter(vector, container, getNullableStructWriterFactory()); + if(vectorCount != container.size()) { + writer.allocate(); + } + writer.setPosition(idx()); + fields.put(finalName, writer); + } else { + if (writer instanceof PromotableWriter) { + // ensure writers are initialized + ((PromotableWriter)writer).getWriter(MinorType.EXTENSIONTYPE, arrowType); + } + } + return (ExtensionWriter) writer; + } + @Override public void close() throws Exception { clear(); diff --git a/vector/src/main/codegen/templates/UnionListWriter.java b/vector/src/main/codegen/templates/UnionListWriter.java index 3962e1d073..9424533f29 100644 --- a/vector/src/main/codegen/templates/UnionListWriter.java +++ b/vector/src/main/codegen/templates/UnionListWriter.java @@ -201,6 +201,17 @@ public MapWriter map(String name, boolean keysSorted) { return mapWriter; } + @Override + public ExtensionWriter extension(ArrowType arrowType) { + writer.extension(arrowType); + return writer; + } + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + ExtensionWriter extensionWriter = writer.extension(name, arrowType); + return extensionWriter; + } + <#if listName == "LargeList"> @Override public void startList() { @@ -323,6 +334,18 @@ public void writeNull() { } } + @Override + public void writeExtension(Object value) { + writer.writeExtension(value); + } + @Override + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { + writer.addExtensionTypeWriterFactory(var1); + } + public void write(ExtensionHolder var1) { + writer.write(var1); + } + <#list vv.types as type> <#list type.minor as minor> <#assign name = minor.class?cap_first /> diff --git a/vector/src/main/codegen/templates/UnionMapWriter.java b/vector/src/main/codegen/templates/UnionMapWriter.java index 90b55cb65e..8b2f091215 100644 --- a/vector/src/main/codegen/templates/UnionMapWriter.java +++ b/vector/src/main/codegen/templates/UnionMapWriter.java @@ -231,4 +231,16 @@ public MapWriter map() { return super.map(); } } + + @Override + public ExtensionWriter extension(ArrowType type) { + switch (mode) { + case KEY: + return entryWriter.extension(MapVector.KEY_NAME, type); + case VALUE: + return entryWriter.extension(MapVector.VALUE_NAME, type); + default: + return super.extension(type); + } + } } diff --git a/vector/src/main/codegen/templates/UnionWriter.java b/vector/src/main/codegen/templates/UnionWriter.java index bfe97e2770..272edab17c 100644 --- a/vector/src/main/codegen/templates/UnionWriter.java +++ b/vector/src/main/codegen/templates/UnionWriter.java @@ -213,6 +213,10 @@ public MapWriter asMap(ArrowType arrowType) { return getMapWriter(arrowType); } + private ExtensionWriter getExtensionWriter(ArrowType arrowType) { + throw new UnsupportedOperationException("ExtensionTypes are not supported yet."); + } + BaseWriter getWriter(MinorType minorType) { return getWriter(minorType, null); } @@ -227,6 +231,8 @@ BaseWriter getWriter(MinorType minorType, ArrowType arrowType) { return getListViewWriter(); case MAP: return getMapWriter(arrowType); + case EXTENSIONTYPE: + return getExtensionWriter(arrowType); <#list vv.types as type> <#list type.minor as minor> <#assign name = minor.class?cap_first /> @@ -460,6 +466,20 @@ public MapWriter map(String name, boolean keysSorted) { return getStructWriter().map(name, keysSorted); } + @Override + public ExtensionWriter extension(ArrowType arrowType) { + data.setType(idx(), MinorType.EXTENSIONTYPE); + getListWriter().setPosition(idx()); + return getListWriter().extension(arrowType); + } + + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + data.setType(idx(), MinorType.EXTENSIONTYPE); + getStructWriter().setPosition(idx()); + return getStructWriter().extension(name, arrowType); + } + <#list vv.types as type><#list type.minor as minor> <#assign lowerName = minor.class?uncap_first /> <#if lowerName == "int" ><#assign lowerName = "integer" /> diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java new file mode 100644 index 0000000000..fccff6c21f --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.types.pojo.Field; + +/** + * Base {@link AbstractFieldWriter} class for an {@link + * org.apache.arrow.vector.ExtensionTypeVector}. + * + * @param a specific {@link ExtensionTypeVector}. + */ +public class AbstractExtensionTypeWriter + extends AbstractFieldWriter { + protected final T vector; + + public AbstractExtensionTypeWriter(T vector) { + this.vector = vector; + } + + @Override + public Field getField() { + return this.vector.getField(); + } + + @Override + public int getValueCapacity() { + return this.vector.getValueCapacity(); + } + + @Override + public void allocate() { + this.vector.allocateNew(); + } + + @Override + public void close() { + this.vector.close(); + } + + @Override + public void clear() { + this.vector.clear(); + } + + @Override + public void writeNull() { + this.vector.setNull(getPosition()); + this.vector.setValueCount(getPosition() + 1); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java new file mode 100644 index 0000000000..09f0314c5f --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; + +/** + * A factory interface for creating instances of {@link ExtensionTypeWriter}. This factory allows + * configuring writer implementations for specific {@link ExtensionTypeVector}. + * + * @param the type of writer implementation for a specific {@link ExtensionTypeVector}. + */ +public interface ExtensionTypeWriterFactory { + + /** + * Returns an instance of the writer implementation for the given {@link ExtensionTypeVector}. + * + * @param vector the {@link ExtensionTypeVector} for which the writer implementation is to be + * returned. + * @return an instance of the writer implementation for the given {@link ExtensionTypeVector}. + */ + T getWriterImpl(ExtensionTypeVector vector); +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java new file mode 100644 index 0000000000..d341384bd9 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.types.pojo.Field; + +public class UnionExtensionWriter extends AbstractFieldWriter { + protected ExtensionTypeVector vector; + protected FieldWriter writer; + + public UnionExtensionWriter(ExtensionTypeVector vector) { + this.vector = vector; + } + + @Override + public void allocate() { + vector.allocateNew(); + } + + @Override + public void clear() { + vector.clear(); + } + + @Override + public int getValueCapacity() { + return vector.getValueCapacity(); + } + + @Override + public Field getField() { + return vector.getField(); + } + + @Override + public void close() throws Exception { + vector.close(); + } + + @Override + public void writeExtension(Object var1) { + this.writer.writeExtension(var1); + } + + @Override + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) { + this.writer = factory.getWriterImpl(vector); + this.writer.setPosition(idx()); + } + + public void write(ExtensionHolder holder) { + this.writer.write(holder); + } + + @Override + public void setPosition(int index) { + super.setPosition(index); + if (this.writer != null) { + this.writer.setPosition(index); + } + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java index 949eb35d8e..51bf106685 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java @@ -16,6 +16,7 @@ */ package org.apache.arrow.vector.complex.writer; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ScalarWriter; @@ -25,7 +26,8 @@ * Composite of all writer types. Writers are convenience classes for incrementally adding values to * {@linkplain org.apache.arrow.vector.ValueVector}s. */ -public interface FieldWriter extends StructWriter, ListWriter, MapWriter, ScalarWriter { +public interface FieldWriter + extends StructWriter, ListWriter, MapWriter, ScalarWriter, ExtensionWriter { void allocate(); void clear(); diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java new file mode 100644 index 0000000000..fc7ed85878 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.holders; + +/** Base {@link ValueHolder} class for a {@link org.apache.arrow.vector.ExtensionTypeVector}. */ +public abstract class ExtensionHolder implements ValueHolder { + public int isSet; +} diff --git a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index 4ef0fbe2d9..d40af9ae89 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -26,6 +26,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.AbstractStructVector; import org.apache.arrow.vector.complex.ListVector; @@ -37,9 +38,11 @@ import org.apache.arrow.vector.holders.ComplexHolder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -336,6 +339,40 @@ public void testGetTransferPairWithFieldAndCallBack() { } } + @Test + public void testStructVectorWithExtensionTypes() { + UuidType uuidType = new UuidType(); + Field uuidField = new Field("struct_child", FieldType.nullable(uuidType), null); + Field structField = + new Field("struct", FieldType.nullable(new ArrowType.Struct()), List.of(uuidField)); + StructVector s1 = new StructVector(structField, allocator, null); + StructVector s2 = (StructVector) structField.createVector(allocator); + s1.close(); + s2.close(); + } + + @Test + public void testStructVectorTransferPairWithExtensionType() { + UuidType uuidType = new UuidType(); + Field uuidField = new Field("uuid_child", FieldType.nullable(uuidType), null); + Field structField = + new Field("struct", FieldType.nullable(new ArrowType.Struct()), List.of(uuidField)); + + StructVector s1 = (StructVector) structField.createVector(allocator); + UuidVector uuidVector = + s1.addOrGet("uuid_child", FieldType.nullable(uuidType), UuidVector.class); + s1.setValueCount(1); + uuidVector.set(0, new UUID(1, 2)); + s1.setIndexDefined(0); + + TransferPair tp = s1.getTransferPair(structField, allocator); + final StructVector toVector = (StructVector) tp.getTo(); + assertEquals(s1.getField(), toVector.getField()); + + s1.close(); + toVector.close(); + } + private StructVector simpleStructVector(String name, BufferAllocator allocator) { final String INT_COL = "struct_int_child"; final String FLT_COL = "struct_flt_child"; diff --git a/vector/src/test/java/org/apache/arrow/vector/UuidVector.java b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java new file mode 100644 index 0000000000..5c90d45f60 --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; +import org.apache.arrow.vector.util.TransferPair; + +public class UuidVector extends ExtensionTypeVector + implements ValueIterableVector { + private final Field field; + + public UuidVector( + String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { + super(name, allocator, underlyingVector); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + public UuidVector(String name, BufferAllocator allocator) { + super(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + @Override + public UUID getObject(int index) { + final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); + return new UUID(bb.getLong(), bb.getLong()); + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return getUnderlyingVector().hashCode(index, hasher); + } + + public void set(int index, UUID uuid) { + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + getUnderlyingVector().set(index, bb.array()); + } + + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); + } + + @Override + public Field getField() { + return field; + } + + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((UuidVector) to); + } + + public void setSafe(int index, byte[] value) { + getUnderlyingVector().setIndexDefined(index); + getUnderlyingVector().setSafe(index, value); + } + + public class TransferImpl implements TransferPair { + UuidVector to; + ValueVector targetUnderlyingVector; + TransferPair tp; + + public TransferImpl(UuidVector to) { + this.to = to; + targetUnderlyingVector = this.to.getUnderlyingVector(); + tp = getUnderlyingVector().makeTransferPair(targetUnderlyingVector); + } + + public UuidVector getTo() { + return this.to; + } + + public void transfer() { + tp.transfer(); + } + + public void splitAndTransfer(int startIndex, int length) { + tp.splitAndTransfer(startIndex, length); + } + + public void copyValueSafe(int fromIndex, int toIndex) { + tp.copyValueSafe(fromIndex, toIndex); + } + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index a791e55135..1556852c5a 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -26,12 +26,14 @@ import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.util.Objects; +import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.DirtyRootAllocator; import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.LargeVarCharVector; +import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.ListVector; @@ -52,6 +54,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; @@ -776,4 +779,30 @@ public void testPromoteToUnionFromDecimal() throws Exception { assertEquals(1, intHolder.value); } } + + @Test + public void testExtensionType() throws Exception { + try (final NonNullableStructVector container = + NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final UuidVector v = + container.addOrGet("uuid", FieldType.nullable(new UuidType()), UuidVector.class); + final PromotableWriter writer = new PromotableWriter(v, container)) { + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + container.allocateNew(); + container.setValueCount(1); + writer.addExtensionTypeWriterFactory(new UuidWriterFactory()); + + writer.setPosition(0); + writer.writeExtension(u1); + writer.setPosition(1); + writer.writeExtension(u2); + + container.setValueCount(2); + + UuidVector uuidVector = (UuidVector) container.getChild("uuid"); + assertEquals(u1, uuidVector.getObject(0)); + assertEquals(u2, uuidVector.getObject(1)); + } + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java new file mode 100644 index 0000000000..1b1bf4e6e4 --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.UuidVector; + +public class UuidWriterFactory implements ExtensionTypeWriterFactory { + + @Override + public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector) { + if (extensionTypeVector instanceof UuidVector) { + return new UuidWriterImpl((UuidVector) extensionTypeVector); + } + return null; + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java new file mode 100644 index 0000000000..68029b1df5 --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.holder.UuidHolder; +import org.apache.arrow.vector.holders.ExtensionHolder; + +public class UuidWriterImpl extends AbstractExtensionTypeWriter { + + public UuidWriterImpl(UuidVector vector) { + super(vector); + } + + @Override + public void writeExtension(Object value) { + UUID uuid = (UUID) value; + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + vector.setSafe(getPosition(), bb.array()); + vector.setValueCount(getPosition() + 1); + } + + @Override + public void write(ExtensionHolder holder) { + UuidHolder uuidHolder = (UuidHolder) holder; + vector.setSafe(getPosition(), uuidHolder.value); + vector.setValueCount(getPosition() + 1); + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java index 5bb5962704..bf1b9b0dfa 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java @@ -20,16 +20,20 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.ByteBuffer; +import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.LargeVarCharVector; +import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.impl.LargeVarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.LargeVarCharWriterImpl; +import org.apache.arrow.vector.complex.impl.UuidWriterImpl; import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -184,4 +188,20 @@ public void testWriteTextToLargeVarChar() throws Exception { assertEquals(input, result); } } + + @Test + public void testWriteToExtensionVector() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + UUID uuid = UUID.randomUUID(); + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + UuidHolder holder = new UuidHolder(); + holder.value = bb.array(); + writer.write(holder); + UUID result = vector.getObject(0); + assertEquals(uuid, result); + } + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java b/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java new file mode 100644 index 0000000000..207b0951a7 --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.holder; + +import org.apache.arrow.vector.holders.ExtensionHolder; + +public class UuidHolder extends ExtensionHolder { + public byte[] value; +} diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index 8f54a6e5d7..d24708d66c 100644 --- a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -41,6 +41,7 @@ import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.FixedSizeBinaryVector; import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.ValueIterableVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.compare.Range; @@ -295,75 +296,6 @@ public void testVectorCompare() { } } - static class UuidType extends ExtensionType { - - @Override - public ArrowType storageType() { - return new ArrowType.FixedSizeBinary(16); - } - - @Override - public String extensionName() { - return "uuid"; - } - - @Override - public boolean extensionEquals(ExtensionType other) { - return other instanceof UuidType; - } - - @Override - public ArrowType deserialize(ArrowType storageType, String serializedData) { - if (!storageType.equals(storageType())) { - throw new UnsupportedOperationException( - "Cannot construct UuidType from underlying type " + storageType); - } - return new UuidType(); - } - - @Override - public String serialize() { - return ""; - } - - @Override - public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { - return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); - } - } - - static class UuidVector extends ExtensionTypeVector - implements ValueIterableVector { - - public UuidVector( - String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { - super(name, allocator, underlyingVector); - } - - @Override - public UUID getObject(int index) { - final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); - return new UUID(bb.getLong(), bb.getLong()); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); - } - - public void set(int index, UUID uuid) { - ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - getUnderlyingVector().set(index, bb.array()); - } - } - static class LocationType extends ExtensionType { @Override diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java new file mode 100644 index 0000000000..5e2bd8881b --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.types.pojo; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; + +public class UuidType extends ExtensionType { + + @Override + public ArrowType storageType() { + return new ArrowType.FixedSizeBinary(16); + } + + @Override + public String extensionName() { + return "uuid"; + } + + @Override + public boolean extensionEquals(ExtensionType other) { + return other instanceof UuidType; + } + + @Override + public ArrowType deserialize(ArrowType storageType, String serializedData) { + if (!storageType.equals(storageType())) { + throw new UnsupportedOperationException( + "Cannot construct UuidType from underlying type " + storageType); + } + return new UuidType(); + } + + @Override + public String serialize() { + return ""; + } + + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { + return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); + } +}