From 0ced3427bebdb05192bce6a8b18eebf8d3134efa Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 10 Dec 2025 13:55:40 +0200 Subject: [PATCH 1/6] GH-891: Add ExtensionTypeWriterFactory to TransferPair --- .../templates/AbstractFieldReader.java | 5 +- .../templates/AbstractFieldWriter.java | 11 +- .../src/main/codegen/templates/ArrowType.java | 6 + .../main/codegen/templates/BaseReader.java | 3 - .../main/codegen/templates/BaseWriter.java | 7 +- .../main/codegen/templates/ComplexCopier.java | 23 +- .../main/codegen/templates/NullReader.java | 1 - .../codegen/templates/PromotableWriter.java | 14 +- .../codegen/templates/UnionListWriter.java | 12 +- .../main/codegen/templates/UnionReader.java | 23 + .../main/codegen/templates/UnionVector.java | 18 + .../main/codegen/templates/UnionWriter.java | 27 +- .../arrow/vector/BaseFixedWidthVector.java | 1 + .../apache/arrow/vector/BaseValueVector.java | 13 - .../apache/arrow/vector/FixedWidthVector.java | 3 + .../org/apache/arrow/vector/NullVector.java | 13 - .../org/apache/arrow/vector/UuidVector.java | 481 ++++++++++++++++++ .../org/apache/arrow/vector/ValueVector.java | 25 - .../complex/AbstractContainerVector.java | 13 - .../arrow/vector/complex/LargeListVector.java | 33 +- .../vector/complex/LargeListViewVector.java | 15 - .../arrow/vector/complex/ListVector.java | 33 +- .../arrow/vector/complex/ListViewVector.java | 15 +- .../complex/impl/AbstractBaseReader.java | 10 - .../impl/ExtensionTypeWriterFactory.java | 38 -- .../complex/impl/UnionExtensionWriter.java | 8 +- .../complex/impl/UnionLargeListReader.java | 4 - .../vector/complex/impl/UuidReaderImpl.java | 35 +- .../vector/complex/impl/UuidWriterImpl.java | 74 +++ .../arrow/vector/extension/OpaqueType.java | 7 + .../arrow/vector/extension/UuidType.java | 117 +++++ .../arrow/vector/holders/ExtensionHolder.java | 4 + .../vector/holders/NullableUuidHolder.java} | 29 +- .../arrow/vector/holders}/UuidHolder.java | 28 +- .../apache/arrow/vector/util/UuidUtility.java | 77 +++ .../arrow/vector/TestLargeListVector.java | 79 +++ .../apache/arrow/vector/TestListVector.java | 113 +++- .../apache/arrow/vector/TestMapVector.java | 40 +- .../apache/arrow/vector/TestStructVector.java | 12 +- .../org/apache/arrow/vector/TestUtils.java | 11 + .../org/apache/arrow/vector/TestUuidType.java | 275 ++++++++++ .../apache/arrow/vector/TestUuidVector.java | 464 +++++++++++++++++ .../org/apache/arrow/vector/UuidVector.java | 127 ----- .../complex/impl/TestComplexCopier.java | 31 +- .../complex/impl/TestPromotableWriter.java | 43 +- .../vector/complex/impl/UuidWriterImpl.java | 47 -- .../complex/writer/TestComplexWriter.java | 32 +- .../complex/writer/TestSimpleWriter.java | 40 -- .../vector/types/pojo/TestExtensionType.java | 25 +- .../arrow/vector/types/pojo/UuidType.java | 60 --- 50 files changed, 1975 insertions(+), 650 deletions(-) create mode 100644 vector/src/main/java/org/apache/arrow/vector/UuidVector.java delete mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java rename vector/src/{test => main}/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java (61%) create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/extension/UuidType.java rename vector/src/{test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java => main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java} (51%) rename vector/src/{test/java/org/apache/arrow/vector/holder => main/java/org/apache/arrow/vector/holders}/UuidHolder.java (55%) create mode 100644 vector/src/main/java/org/apache/arrow/vector/util/UuidUtility.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/TestUuidType.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java delete mode 100644 vector/src/test/java/org/apache/arrow/vector/UuidVector.java delete mode 100644 vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java delete mode 100644 vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java diff --git a/vector/src/main/codegen/templates/AbstractFieldReader.java b/vector/src/main/codegen/templates/AbstractFieldReader.java index c7c5b4d78d..556fb576ce 100644 --- a/vector/src/main/codegen/templates/AbstractFieldReader.java +++ b/vector/src/main/codegen/templates/AbstractFieldReader.java @@ -109,10 +109,6 @@ public void copyAsField(String name, ${name}Writer writer) { - public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory) { - fail("CopyAsValue StructWriter"); - } - public void read(ExtensionHolder holder) { fail("Extension"); } @@ -147,4 +143,5 @@ public int size() { private void fail(String name) { throw new IllegalArgumentException(String.format("You tried to read a [%s] type when you are using a field reader of type [%s].", name, this.getClass().getSimpleName())); } + } diff --git a/vector/src/main/codegen/templates/AbstractFieldWriter.java b/vector/src/main/codegen/templates/AbstractFieldWriter.java index ae5b97faef..4b4a17d932 100644 --- a/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -107,14 +107,17 @@ public void endEntry() { throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); } + @Override public void write(ExtensionHolder var1) { - this.fail("ExtensionType"); + this.fail("Cannot write ExtensionHolder"); } + @Override public void writeExtension(Object var1) { - this.fail("ExtensionType"); + this.fail("Cannot write extension object"); } - public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { - this.fail("ExtensionType"); + @Override + public void writeExtension(Object var1, ArrowType type) { + this.fail("Cannot write extension with type " + type); } <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> diff --git a/vector/src/main/codegen/templates/ArrowType.java b/vector/src/main/codegen/templates/ArrowType.java index fd35c1cd2b..b428f09155 100644 --- a/vector/src/main/codegen/templates/ArrowType.java +++ b/vector/src/main/codegen/templates/ArrowType.java @@ -27,8 +27,10 @@ import org.apache.arrow.flatbuf.Type; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.types.*; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -331,6 +333,10 @@ public boolean equals(Object obj) { public T accept(ArrowTypeVisitor visitor) { return visitor.visit(this); } + + public FieldWriter getNewFieldWriter(ValueVector vector) { + throw new UnsupportedOperationException("WriterImpl not yet implemented."); + } } private static final int defaultDecimalBitWidth = 128; diff --git a/vector/src/main/codegen/templates/BaseReader.java b/vector/src/main/codegen/templates/BaseReader.java index 4c6f49ab9b..c52345af21 100644 --- a/vector/src/main/codegen/templates/BaseReader.java +++ b/vector/src/main/codegen/templates/BaseReader.java @@ -49,7 +49,6 @@ public interface RepeatedStructReader extends StructReader{ boolean next(); int size(); void copyAsValue(StructWriter writer); - void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory); } public interface ListReader extends BaseReader{ @@ -60,7 +59,6 @@ public interface RepeatedListReader extends ListReader{ boolean next(); int size(); void copyAsValue(ListWriter writer); - void copyAsValue(ListWriter writer, ExtensionTypeWriterFactory writerFactory); } public interface MapReader extends BaseReader{ @@ -71,7 +69,6 @@ public interface RepeatedMapReader extends MapReader{ boolean next(); int size(); void copyAsValue(MapWriter writer); - void copyAsValue(MapWriter writer, ExtensionTypeWriterFactory writerFactory); } public interface ScalarReader extends diff --git a/vector/src/main/codegen/templates/BaseWriter.java b/vector/src/main/codegen/templates/BaseWriter.java index 78da7fddc3..a4c98d7089 100644 --- a/vector/src/main/codegen/templates/BaseWriter.java +++ b/vector/src/main/codegen/templates/BaseWriter.java @@ -125,11 +125,12 @@ public interface ExtensionWriter extends BaseWriter { void writeExtension(Object value); /** - * Adds the given extension type factory. This factory allows configuring writer implementations for specific ExtensionTypeVector. + * Writes the given extension type value. * - * @param factory the extension type factory to add + * @param value the extension type value to write + * @param type of the extension */ - void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory); + void writeExtension(Object value, ArrowType type); } public interface ScalarWriter extends diff --git a/vector/src/main/codegen/templates/ComplexCopier.java b/vector/src/main/codegen/templates/ComplexCopier.java index 4df5478f48..6655f6c2a7 100644 --- a/vector/src/main/codegen/templates/ComplexCopier.java +++ b/vector/src/main/codegen/templates/ComplexCopier.java @@ -41,15 +41,8 @@ public class ComplexCopier { * @param input field to read from * @param output field to write to */ - public static void copy(FieldReader input, FieldWriter output) { - writeValue(input, output, null); - } - - public static void copy(FieldReader input, FieldWriter output, ExtensionTypeWriterFactory extensionTypeWriterFactory) { - writeValue(input, output, extensionTypeWriterFactory); - } + public static void copy(FieldReader reader, FieldWriter writer) { - private static void writeValue(FieldReader reader, FieldWriter writer, ExtensionTypeWriterFactory extensionTypeWriterFactory) { final MinorType mt = reader.getMinorType(); switch (mt) { @@ -65,7 +58,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer, Extension FieldReader childReader = reader.reader(); FieldWriter childWriter = getListWriterForReader(childReader, writer); if (childReader.isSet()) { - writeValue(childReader, childWriter, extensionTypeWriterFactory); + copy(childReader, childWriter); } else { childWriter.writeNull(); } @@ -83,8 +76,8 @@ private static void writeValue(FieldReader reader, FieldWriter writer, Extension FieldReader structReader = reader.reader(); if (structReader.isSet()) { writer.startEntry(); - writeValue(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key()), extensionTypeWriterFactory); - writeValue(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value()), extensionTypeWriterFactory); + copy(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key())); + copy(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value())); writer.endEntry(); } else { writer.writeNull(); @@ -103,7 +96,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer, Extension if (childReader.getMinorType() != Types.MinorType.NULL) { FieldWriter childWriter = getStructWriterForReader(childReader, writer, name); if (childReader.isSet()) { - writeValue(childReader, childWriter, extensionTypeWriterFactory); + copy(childReader, childWriter); } else { childWriter.writeNull(); } @@ -115,14 +108,10 @@ private static void writeValue(FieldReader reader, FieldWriter writer, Extension } break; case EXTENSIONTYPE: - if (extensionTypeWriterFactory == null) { - throw new IllegalArgumentException("Must provide ExtensionTypeWriterFactory"); - } if (reader.isSet()) { Object value = reader.readObject(); if (value != null) { - writer.addExtensionTypeWriterFactory(extensionTypeWriterFactory); - writer.writeExtension(value); + writer.writeExtension(value, reader.getField().getType()); } } else { writer.writeNull(); diff --git a/vector/src/main/codegen/templates/NullReader.java b/vector/src/main/codegen/templates/NullReader.java index 0529633478..88e6ea98ea 100644 --- a/vector/src/main/codegen/templates/NullReader.java +++ b/vector/src/main/codegen/templates/NullReader.java @@ -86,7 +86,6 @@ public void read(int arrayIndex, Nullable${name}Holder holder){ } - public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory){} public void read(ExtensionHolder holder) { holder.isSet = 0; } diff --git a/vector/src/main/codegen/templates/PromotableWriter.java b/vector/src/main/codegen/templates/PromotableWriter.java index d22eb00b2c..11d34f72c9 100644 --- a/vector/src/main/codegen/templates/PromotableWriter.java +++ b/vector/src/main/codegen/templates/PromotableWriter.java @@ -286,7 +286,7 @@ protected void setWriter(ValueVector v) { writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory); break; case EXTENSIONTYPE: - writer = new UnionExtensionWriter((ExtensionTypeVector) vector); + writer = ((ExtensionType) vector.getField().getType()).getNewFieldWriter(vector); break; default: writer = type.getNewFieldWriter(vector); @@ -541,17 +541,13 @@ public void writeLargeVarChar(String value) { } @Override - public void writeExtension(Object value) { - getWriter(MinorType.EXTENSIONTYPE).writeExtension(value); + public void writeExtension(Object value, ArrowType arrowType) { + getWriter(MinorType.EXTENSIONTYPE, arrowType).writeExtension(value, arrowType); } @Override - public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) { - getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory); - } - - public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory, ArrowType arrowType) { - getWriter(MinorType.EXTENSIONTYPE, arrowType).addExtensionTypeWriterFactory(factory); + public void write(ExtensionHolder holder) { + getWriter(MinorType.EXTENSIONTYPE, holder.type()).write(holder); } @Override diff --git a/vector/src/main/codegen/templates/UnionListWriter.java b/vector/src/main/codegen/templates/UnionListWriter.java index 94723e6c9d..80383254f0 100644 --- a/vector/src/main/codegen/templates/UnionListWriter.java +++ b/vector/src/main/codegen/templates/UnionListWriter.java @@ -204,13 +204,13 @@ public MapWriter map(String name, boolean keysSorted) { @Override public ExtensionWriter extension(ArrowType arrowType) { - this.extensionType = arrowType; + extensionType = arrowType; return this; } + @Override public ExtensionWriter extension(String name, ArrowType arrowType) { - ExtensionWriter extensionWriter = writer.extension(name, arrowType); - return extensionWriter; + return writer.extension(name, arrowType); } <#if listName == "LargeList"> @@ -337,13 +337,13 @@ public void writeNull() { @Override public void writeExtension(Object value) { - writer.writeExtension(value); + writer.writeExtension(value, extensionType); writer.setPosition(writer.idx() + 1); } @Override - public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { - writer.addExtensionTypeWriterFactory(var1, extensionType); + public void writeExtension(Object value, ArrowType type) { + writeExtension(value); } public void write(ExtensionHolder var1) { diff --git a/vector/src/main/codegen/templates/UnionReader.java b/vector/src/main/codegen/templates/UnionReader.java index 96ad3e1b9b..0edae7ade0 100644 --- a/vector/src/main/codegen/templates/UnionReader.java +++ b/vector/src/main/codegen/templates/UnionReader.java @@ -79,6 +79,10 @@ public void read(int index, UnionHolder holder) { } private FieldReader getReaderForIndex(int index) { + return getReaderForIndex(index, null); + } + + private FieldReader getReaderForIndex(int index, ArrowType type) { int typeValue = data.getTypeValue(index); FieldReader reader = (FieldReader) readers[typeValue]; if (reader != null) { @@ -105,11 +109,26 @@ private FieldReader getReaderForIndex(int index) { + case EXTENSIONTYPE: + if(type == null) { + throw new RuntimeException("Cannot get Extension reader without an ArrowType"); + } + return (FieldReader) getExtension(type); default: throw new UnsupportedOperationException("Unsupported type: " + MinorType.values()[typeValue]); } } + private ExtensionReader extensionReader; + + private ExtensionReader getExtension(ArrowType type) { + if (extensionReader == null) { + extensionReader = data.getExtension(type).getReader(); + extensionReader.setPosition(idx()); + } + return extensionReader; + } + private SingleStructReaderImpl structReader; private StructReader getStruct() { @@ -240,4 +259,8 @@ public FieldReader reader() { public boolean next() { return getReaderForIndex(idx()).next(); } + + public void read(ExtensionHolder holder){ + getReaderForIndex(idx(), holder.type()).read(holder); + } } diff --git a/vector/src/main/codegen/templates/UnionVector.java b/vector/src/main/codegen/templates/UnionVector.java index 67efdf60f7..c706591966 100644 --- a/vector/src/main/codegen/templates/UnionVector.java +++ b/vector/src/main/codegen/templates/UnionVector.java @@ -379,6 +379,22 @@ public MapVector getMap(String name, ArrowType arrowType) { return mapVector; } + private ExtensionTypeVector extensionVector; + + public ExtensionTypeVector getExtension(ArrowType arrowType) { + if (extensionVector == null) { + int vectorCount = internalStruct.size(); + extensionVector = addOrGet(null, MinorType.EXTENSIONTYPE, arrowType, ExtensionTypeVector.class); + if (internalStruct.size() > vectorCount) { + extensionVector.allocateNew(); + if (callBack != null) { + callBack.doWork(); + } + } + } + return extensionVector; + } + public int getTypeValue(int index) { return typeBuffer.getByte(index * TYPE_WIDTH); } @@ -725,6 +741,8 @@ public ValueVector getVectorByType(int typeId, ArrowType arrowType) { return getListView(); case MAP: return getMap(name, arrowType); + case EXTENSIONTYPE: + return getExtension(arrowType); default: throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[typeId]); } diff --git a/vector/src/main/codegen/templates/UnionWriter.java b/vector/src/main/codegen/templates/UnionWriter.java index 272edab17c..0db699fd8c 100644 --- a/vector/src/main/codegen/templates/UnionWriter.java +++ b/vector/src/main/codegen/templates/UnionWriter.java @@ -28,6 +28,8 @@ package org.apache.arrow.vector.complex.impl; <#include "/@includes/vv_imports.ftl" /> +import java.util.HashMap; + import org.apache.arrow.vector.complex.writer.BaseWriter; import org.apache.arrow.vector.types.Types.MinorType; @@ -213,8 +215,31 @@ public MapWriter asMap(ArrowType arrowType) { return getMapWriter(arrowType); } + private java.util.Map extensionWriters = new HashMap<>(); + private ExtensionWriter getExtensionWriter(ArrowType arrowType) { - throw new UnsupportedOperationException("ExtensionTypes are not supported yet."); + ExtensionWriter w = extensionWriters.get(arrowType); + if (w == null) { + w = ((ExtensionType) arrowType).getNewFieldWriter(data.getExtension(arrowType)); + w.setPosition(idx()); + extensionWriters.put(arrowType, w); + } + return w; + } + + public void writeExtension(Object value, ArrowType type) { + data.setType(idx(), MinorType.EXTENSIONTYPE); + ExtensionWriter w = getExtensionWriter(type); + w.setPosition(idx()); + w.writeExtension(value); + } + + @Override + public void write(ExtensionHolder holder) { + data.setType(idx(), MinorType.EXTENSIONTYPE); + ExtensionWriter w = getExtensionWriter(holder.type()); + w.setPosition(idx()); + w.write(holder); } BaseWriter getWriter(MinorType minorType) { diff --git a/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java index 4be55396b7..2374f6de13 100644 --- a/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java @@ -72,6 +72,7 @@ public BaseFixedWidthVector(Field field, final BufferAllocator allocator, final refreshValueCapacity(); } + @Override public int getTypeWidth() { return typeWidth; } diff --git a/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java index 6abf030dd2..9befcb890f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java @@ -22,7 +22,6 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.ReferenceManager; import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.util.DataSizeRoundingUtil; import org.apache.arrow.vector.util.TransferPair; @@ -249,16 +248,4 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { throw new UnsupportedOperationException(); } - - @Override - public void copyFrom( - int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - throw new UnsupportedOperationException(); - } - - @Override - public void copyFromSafe( - int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - throw new UnsupportedOperationException(); - } } diff --git a/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java b/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java index e22a973f3b..61a5574898 100644 --- a/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java @@ -31,4 +31,7 @@ public interface FixedWidthVector extends ElementAddressableVector { /** Zero out the underlying buffer backing this vector. */ void zeroVector(); + + /** Get the width of the type in bytes. */ + int getTypeWidth(); } diff --git a/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/vector/src/main/java/org/apache/arrow/vector/NullVector.java index 0d6dab2837..6bfe540d23 100644 --- a/vector/src/main/java/org/apache/arrow/vector/NullVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/NullVector.java @@ -27,7 +27,6 @@ import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.NullReader; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; @@ -330,18 +329,6 @@ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { throw new UnsupportedOperationException(); } - @Override - public void copyFrom( - int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - throw new UnsupportedOperationException(); - } - - @Override - public void copyFromSafe( - int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - throw new UnsupportedOperationException(); - } - @Override public String getName() { return this.getField().getName(); diff --git a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java new file mode 100644 index 0000000000..c662a6e064 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java @@ -0,0 +1,481 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import static org.apache.arrow.vector.extension.UuidType.UUID_BYTE_WIDTH; + +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.complex.impl.UuidReaderImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.UuidHolder; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.util.UuidUtility; + +/** + * Vector implementation for UUID values using {@link UuidType}. + * + *

Supports setting and retrieving UUIDs with efficient storage and nullable value handling. + * + *

Usage: + * + *

{@code
+ * UuidVector vector = new UuidVector("uuid_col", allocator);
+ * vector.set(0, UUID.randomUUID());
+ * UUID value = vector.getObject(0);
+ * }
+ * + * @see UuidType + * @see UuidHolder + * @see NullableUuidHolder + */ +public class UuidVector extends ExtensionTypeVector + implements ValueIterableVector, FixedWidthVector { + private final Field field; + + /** The fixed byte width of UUID values (16 bytes). */ + public static final int TYPE_WIDTH = UUID_BYTE_WIDTH; + + /** + * Constructs a UUID vector with the given name, allocator, and underlying vector. + * + * @param name the name of the vector + * @param allocator the buffer allocator + * @param underlyingVector the underlying FixedSizeBinaryVector for storage + */ + public UuidVector( + String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { + super(name, allocator, underlyingVector); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + /** + * Constructs a UUID vector with the given name, field type, allocator, and underlying vector. + * + * @param name the name of the vector + * @param fieldType the field type (should contain UuidType) + * @param allocator the buffer allocator + * @param underlyingVector the underlying FixedSizeBinaryVector for storage + */ + public UuidVector( + String name, + FieldType fieldType, + BufferAllocator allocator, + FixedSizeBinaryVector underlyingVector) { + super(name, allocator, underlyingVector); + this.field = new Field(name, fieldType, null); + } + + /** + * Constructs a UUID vector with the given name and allocator. + * + *

Creates a new underlying FixedSizeBinaryVector with 16-byte width. + * + * @param name the name of the vector + * @param allocator the buffer allocator + */ + public UuidVector(String name, BufferAllocator allocator) { + super(name, allocator, new FixedSizeBinaryVector(name, allocator, UUID_BYTE_WIDTH)); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + /** + * Constructs a UUID vector from a field and allocator. + * + * @param field the field definition (should contain UuidType) + * @param allocator the buffer allocator + */ + public UuidVector(Field field, BufferAllocator allocator) { + super( + field.getName(), + allocator, + new FixedSizeBinaryVector(field.getName(), allocator, UUID_BYTE_WIDTH)); + this.field = field; + } + + @Override + public UUID getObject(int index) { + if (isSet(index) == 0) { + return null; + } + final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); + return new UUID(bb.getLong(), bb.getLong()); + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return getUnderlyingVector().hashCode(index, hasher); + } + + /** + * Checks if the value at the given index is set (non-null). + * + * @param index the index to check + * @return 1 if the value is set, 0 if null + */ + public int isSet(int index) { + return getUnderlyingVector().isSet(index); + } + + /** + * Gets the UUID value at the given index as an ArrowBuf. + * + * @param index the index to retrieve + * @return a buffer slice containing the 16-byte UUID + * @throws IllegalStateException if the value at the index is null and null checking is enabled + */ + public ArrowBuf get(int index) throws IllegalStateException { + if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { + throw new IllegalStateException("Value at index is null"); + } else { + return getBufferSlicePostNullCheck(index); + } + } + + /** + * Reads the UUID value at the given index into a NullableUuidHolder. + * + * @param index the index to read from + * @param holder the holder to populate with the UUID data + */ + public void get(int index, NullableUuidHolder holder) { + if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { + holder.isSet = 0; + } else { + holder.isSet = 1; + holder.buffer = getBufferSlicePostNullCheck(index); + } + } + + /** + * Reads the UUID value at the given index into a UuidHolder. + * + * @param index the index to read from + * @param holder the holder to populate with the UUID data + */ + public void get(int index, UuidHolder holder) { + holder.isSet = 1; + holder.buffer = getBufferSlicePostNullCheck(index); + } + + /** + * Sets the UUID value at the given index. + * + * @param index the index to set + * @param value the UUID value to set, or null to set a null value + */ + public void set(int index, UUID value) { + if (value != null) { + set(index, UuidUtility.getBytesFromUUID(value)); + } else { + getUnderlyingVector().setNull(index); + } + } + + /** + * Sets the UUID value at the given index from a UuidHolder. + * + * @param index the index to set + * @param holder the holder containing the UUID data + */ + public void set(int index, UuidHolder holder) { + this.set(index, holder.isSet, holder.buffer); + } + + /** + * Sets the UUID value at the given index from a NullableUuidHolder. + * + * @param index the index to set + * @param holder the holder containing the UUID data + */ + public void set(int index, NullableUuidHolder holder) { + this.set(index, holder.isSet, holder.buffer); + } + + /** + * Sets the UUID value at the given index with explicit null flag. + * + * @param index the index to set + * @param isSet 1 if the value is set, 0 if null + * @param buffer the buffer containing the 16-byte UUID data + */ + public void set(int index, int isSet, ArrowBuf buffer) { + getUnderlyingVector().set(index, isSet, buffer); + } + + /** + * Sets the UUID value at the given index from an ArrowBuf. + * + * @param index the index to set + * @param value the buffer containing the 16-byte UUID data + */ + public void set(int index, ArrowBuf value) { + getUnderlyingVector().set(index, value); + } + + /** + * Sets the UUID value at the given index by copying from a source buffer. + * + * @param index the index to set + * @param source the source buffer to copy from + * @param sourceOffset the offset in the source buffer where the UUID data starts + */ + public void set(int index, ArrowBuf source, int sourceOffset) { + // Copy bytes from source buffer to target vector data buffer + ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer(); + dataBuffer.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH); + getUnderlyingVector().setIndexDefined(index); + } + + /** + * Sets the UUID value at the given index from a byte array. + * + * @param index the index to set + * @param value the 16-byte array containing the UUID data + */ + public void set(int index, byte[] value) { + getUnderlyingVector().set(index, value); + } + + /** + * Sets the UUID value at the given index, expanding capacity if needed. + * + * @param index the index to set + * @param value the UUID value to set, or null to set a null value + */ + public void setSafe(int index, UUID value) { + if (value != null) { + setSafe(index, UuidUtility.getBytesFromUUID(value)); + } else { + getUnderlyingVector().setNull(index); + } + } + + /** + * Sets the UUID value at the given index from a NullableUuidHolder, expanding capacity if needed. + * + * @param index the index to set + * @param holder the holder containing the UUID data, or null to set a null value + */ + public void setSafe(int index, NullableUuidHolder holder) { + if (holder != null) { + getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer); + } else { + getUnderlyingVector().setNull(index); + } + } + + /** + * Sets the UUID value at the given index from a UuidHolder, expanding capacity if needed. + * + * @param index the index to set + * @param holder the holder containing the UUID data, or null to set a null value + */ + public void setSafe(int index, UuidHolder holder) { + if (holder != null) { + getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer); + } else { + getUnderlyingVector().setNull(index); + } + } + + /** + * Sets the UUID value at the given index from a byte array, expanding capacity if needed. + * + * @param index the index to set + * @param value the 16-byte array containing the UUID data + */ + public void setSafe(int index, byte[] value) { + getUnderlyingVector().setIndexDefined(index); + getUnderlyingVector().setSafe(index, value); + } + + /** + * Sets the UUID value at the given index from an ArrowBuf, expanding capacity if needed. + * + * @param index the index to set + * @param value the buffer containing the 16-byte UUID data + */ + public void setSafe(int index, ArrowBuf value) { + getUnderlyingVector().setSafe(index, value); + } + + @Override + public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); + } + + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); + } + + @Override + public Field getField() { + return field; + } + + @Override + public ArrowBufPointer getDataPointer(int i) { + return getUnderlyingVector().getDataPointer(i); + } + + @Override + public ArrowBufPointer getDataPointer(int i, ArrowBufPointer arrowBufPointer) { + return getUnderlyingVector().getDataPointer(i, arrowBufPointer); + } + + @Override + public void allocateNew(int valueCount) { + getUnderlyingVector().allocateNew(valueCount); + } + + @Override + public void zeroVector() { + getUnderlyingVector().zeroVector(); + } + + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((UuidVector) to); + } + + @Override + protected FieldReader getReaderImpl() { + return new UuidReaderImpl(this); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new TransferImpl(field, allocator); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(field, allocator); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return new TransferImpl(ref, allocator); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(ref, allocator); + } + + @Override + public TransferPair getTransferPair(BufferAllocator allocator) { + return getTransferPair(this.getField().getName(), allocator); + } + + private ArrowBuf getBufferSlicePostNullCheck(int index) { + return getUnderlyingVector() + .getDataBuffer() + .slice((long) index * UUID_BYTE_WIDTH, UUID_BYTE_WIDTH); + } + + @Override + public int getTypeWidth() { + return getUnderlyingVector().getTypeWidth(); + } + + /** {@link TransferPair} for {@link UuidVector}. */ + public class TransferImpl implements TransferPair { + UuidVector to; + + /** + * Constructs a transfer pair with the given target vector. + * + * @param to the target UUID vector + */ + public TransferImpl(UuidVector to) { + this.to = to; + } + + /** + * Constructs a transfer pair, creating a new target vector from the field and allocator. + * + * @param field the field definition for the target vector + * @param allocator the buffer allocator for the target vector + */ + public TransferImpl(Field field, BufferAllocator allocator) { + this.to = new UuidVector(field, allocator); + } + + /** + * Constructs a transfer pair, creating a new target vector with the given name and allocator. + * + * @param ref the name for the target vector + * @param allocator the buffer allocator for the target vector + */ + public TransferImpl(String ref, BufferAllocator allocator) { + this.to = new UuidVector(ref, allocator); + } + + /** + * Gets the target vector of this transfer pair. + * + * @return the target UUID vector + */ + public UuidVector getTo() { + return this.to; + } + + /** Transfers ownership of data from the source vector to the target vector. */ + public void transfer() { + getUnderlyingVector().transferTo(to.getUnderlyingVector()); + } + + /** + * Splits and transfers a range of values from the source vector to the target vector. + * + * @param startIndex the starting index in the source vector + * @param length the number of values to transfer + */ + public void splitAndTransfer(int startIndex, int length) { + getUnderlyingVector().splitAndTransferTo(startIndex, length, to.getUnderlyingVector()); + } + + /** + * Copies a value from the source vector to the target vector, expanding capacity if needed. + * + * @param fromIndex the index in the source vector + * @param toIndex the index in the target vector + */ + public void copyValueSafe(int fromIndex, int toIndex) { + to.copyFromSafe(fromIndex, toIndex, (ValueVector) UuidVector.this); + } + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index e0628c2ee1..3a5058256c 100644 --- a/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -22,7 +22,6 @@ import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Field; @@ -310,30 +309,6 @@ public interface ValueVector extends Closeable, Iterable { */ void copyFromSafe(int fromIndex, int thisIndex, ValueVector from); - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - * @param writerFactory the extension type writer factory to use for copying extension type values - */ - void copyFrom( - int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory); - - /** - * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the - * capacity of the vector needs to be expanded before copy. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - * @param writerFactory the extension type writer factory to use for copying extension type values - */ - void copyFromSafe( - int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory); - /** * Accept a generic {@link VectorVisitor} and return the result. * diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java index 429f9884bb..a6a71cf1a4 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java @@ -21,7 +21,6 @@ import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; @@ -152,18 +151,6 @@ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { throw new UnsupportedOperationException(); } - @Override - public void copyFrom( - int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - throw new UnsupportedOperationException(); - } - - @Override - public void copyFromSafe( - int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - throw new UnsupportedOperationException(); - } - @Override public String getName() { return name; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 3a900d0159..ed075352c9 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -48,7 +48,6 @@ import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.ComplexCopier; -import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionLargeListReader; import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -484,42 +483,12 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { */ @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { - copyFrom(inIndex, outIndex, from, null); - } - - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. - * - * @param inIndex position to copy from in source vector - * @param outIndex position to copy to in this vector - * @param from source vector - * @param writerFactory the extension type writer factory to use for copying extension type values - */ - @Override - public void copyFrom( - int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); FieldReader in = from.getReader(); in.setPosition(inIndex); UnionLargeListWriter out = getWriter(); out.setPosition(outIndex); - ComplexCopier.copy(in, out, writerFactory); - } - - /** - * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the - * capacity of the vector needs to be expanded before copy. - * - * @param inIndex position to copy from in source vector - * @param outIndex position to copy to in this vector - * @param from source vector - * @param writerFactory the extension type writer factory to use for copying extension type values - */ - @Override - public void copyFromSafe( - int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - copyFrom(inIndex, outIndex, from, writerFactory); + ComplexCopier.copy(in, out); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index 6bfdea3a0f..84c6f03edb 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -41,7 +41,6 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionLargeListViewReader; import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListReader; @@ -348,20 +347,6 @@ public void copyFrom(int inIndex, int outIndex, ValueVector from) { "LargeListViewVector does not support copyFrom operation yet."); } - @Override - public void copyFromSafe( - int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - throw new UnsupportedOperationException( - "LargeListViewVector does not support copyFromSafe operation yet."); - } - - @Override - public void copyFrom( - int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - throw new UnsupportedOperationException( - "LargeListViewVector does not support copyFrom operation yet."); - } - @Override public FieldVector getDataVector() { return vector; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 1e82fa22f2..3daeb6d77b 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -42,7 +42,6 @@ import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.ComplexCopier; -import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -402,42 +401,12 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { */ @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { - copyFrom(inIndex, outIndex, from, null); - } - - /** - * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the - * capacity of the vector needs to be expanded before copy. - * - * @param inIndex position to copy from in source vector - * @param outIndex position to copy to in this vector - * @param from source vector - * @param writerFactory the extension type writer factory to use for copying extension type values - */ - @Override - public void copyFromSafe( - int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - copyFrom(inIndex, outIndex, from, writerFactory); - } - - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. - * - * @param inIndex position to copy from in source vector - * @param outIndex position to copy to in this vector - * @param from source vector - * @param writerFactory the extension type writer factory to use for copying extension type values - */ - @Override - public void copyFrom( - int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); FieldReader in = from.getReader(); in.setPosition(inIndex); FieldWriter out = getWriter(); out.setPosition(outIndex); - ComplexCopier.copy(in, out, writerFactory); + ComplexCopier.copy(in, out); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index fb703a6fb3..9b4e6b4c0c 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -42,7 +42,6 @@ import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.ComplexCopier; -import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionListViewReader; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -340,12 +339,6 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { copyFrom(inIndex, outIndex, from); } - @Override - public void copyFromSafe( - int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { - copyFrom(inIndex, outIndex, from, writerFactory); - } - @Override public OUT accept(VectorVisitor visitor, IN value) { return visitor.visit(this, value); @@ -353,18 +346,12 @@ public OUT accept(VectorVisitor visitor, IN value) { @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { - copyFrom(inIndex, outIndex, from, null); - } - - @Override - public void copyFrom( - int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); FieldReader in = from.getReader(); in.setPosition(inIndex); FieldWriter out = getWriter(); out.setPosition(outIndex); - ComplexCopier.copy(in, out, writerFactory); + ComplexCopier.copy(in, out); } @Override diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java index bf074ecb90..b2e95663f7 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java @@ -115,14 +115,4 @@ public void copyAsValue(ListWriter writer) { public void copyAsValue(MapWriter writer) { ComplexCopier.copy(this, (FieldWriter) writer); } - - @Override - public void copyAsValue(ListWriter writer, ExtensionTypeWriterFactory writerFactory) { - ComplexCopier.copy(this, (FieldWriter) writer, writerFactory); - } - - @Override - public void copyAsValue(MapWriter writer, ExtensionTypeWriterFactory writerFactory) { - ComplexCopier.copy(this, (FieldWriter) writer, writerFactory); - } } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java deleted file mode 100644 index 09f0314c5f..0000000000 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.complex.writer.FieldWriter; - -/** - * A factory interface for creating instances of {@link ExtensionTypeWriter}. This factory allows - * configuring writer implementations for specific {@link ExtensionTypeVector}. - * - * @param the type of writer implementation for a specific {@link ExtensionTypeVector}. - */ -public interface ExtensionTypeWriterFactory { - - /** - * Returns an instance of the writer implementation for the given {@link ExtensionTypeVector}. - * - * @param vector the {@link ExtensionTypeVector} for which the writer implementation is to be - * returned. - * @return an instance of the writer implementation for the given {@link ExtensionTypeVector}. - */ - T getWriterImpl(ExtensionTypeVector vector); -} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java index 4219069cba..93796aa77e 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java @@ -60,11 +60,6 @@ public void writeExtension(Object var1) { } @Override - public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) { - this.writer = factory.getWriterImpl(vector); - this.writer.setPosition(idx()); - } - public void write(ExtensionHolder holder) { this.writer.write(holder); } @@ -79,6 +74,7 @@ public void setPosition(int index) { @Override public void writeNull() { - this.writer.writeNull(); + this.vector.setNull(getPosition()); + this.vector.setValueCount(getPosition() + 1); } } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java index a9104cb0d2..be236c3166 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java @@ -105,8 +105,4 @@ public boolean next() { public void copyAsValue(UnionLargeListWriter writer) { ComplexCopier.copy(this, (FieldWriter) writer); } - - public void copyAsValue(UnionLargeListWriter writer, ExtensionTypeWriterFactory writerFactory) { - ComplexCopier.copy(this, (FieldWriter) writer, writerFactory); - } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java similarity index 61% rename from vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java rename to vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java index 6b98d3b340..bb35b960d3 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java @@ -17,15 +17,30 @@ package org.apache.arrow.vector.complex.impl; import org.apache.arrow.vector.UuidVector; -import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Field; +/** + * Reader implementation for {@link UuidVector}. + * + *

Provides methods to read UUID values from a vector, including support for reading into {@link + * UuidHolder} and retrieving values as {@link java.util.UUID} objects. + * + * @see UuidVector + * @see org.apache.arrow.vector.extension.UuidType + */ public class UuidReaderImpl extends AbstractFieldReader { private final UuidVector vector; + /** + * Constructs a reader for the given UUID vector. + * + * @param vector the UUID vector to read from + */ public UuidReaderImpl(UuidVector vector) { super(); this.vector = vector; @@ -48,12 +63,26 @@ public boolean isSet() { @Override public void read(ExtensionHolder holder) { - vector.get(idx(), (UuidHolder) holder); + if (holder instanceof UuidHolder) { + vector.get(idx(), (UuidHolder) holder); + } else if (holder instanceof NullableUuidHolder) { + vector.get(idx(), (NullableUuidHolder) holder); + } else { + throw new IllegalArgumentException( + "Unsupported holder type for UuidReader: " + holder.getClass()); + } } @Override public void read(int arrayIndex, ExtensionHolder holder) { - vector.get(arrayIndex, (UuidHolder) holder); + if (holder instanceof UuidHolder) { + vector.get(arrayIndex, (UuidHolder) holder); + } else if (holder instanceof NullableUuidHolder) { + vector.get(arrayIndex, (NullableUuidHolder) holder); + } else { + throw new IllegalArgumentException( + "Unsupported holder type for UuidReader: " + holder.getClass()); + } } @Override diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java new file mode 100644 index 0000000000..ee3c79d5e3 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.UuidHolder; +import org.apache.arrow.vector.types.pojo.ArrowType; + +/** + * Writer implementation for {@link UuidVector}. + * + *

Supports writing UUID values in multiple formats: {@link java.util.UUID}, byte arrays, and + * {@link ArrowBuf}. Also handles {@link UuidHolder} and {@link NullableUuidHolder}. + * + * @see UuidVector + * @see org.apache.arrow.vector.extension.UuidType + */ +public class UuidWriterImpl extends AbstractExtensionTypeWriter { + + /** + * Constructs a writer for the given UUID vector. + * + * @param vector the UUID vector to write to + */ + public UuidWriterImpl(UuidVector vector) { + super(vector); + } + + @Override + public void writeExtension(Object value) { + if (value instanceof byte[]) { + vector.setSafe(getPosition(), (byte[]) value); + } else if (value instanceof ArrowBuf) { + vector.setSafe(getPosition(), (ArrowBuf) value); + } else if (value instanceof java.util.UUID) { + vector.setSafe(getPosition(), (java.util.UUID) value); + } else { + throw new IllegalArgumentException("Unsupported value type for UUID: " + value.getClass()); + } + vector.setValueCount(getPosition() + 1); + } + + @Override + public void writeExtension(Object value, ArrowType type) { + writeExtension(value); + } + + @Override + public void write(ExtensionHolder holder) { + if (holder instanceof UuidHolder) { + vector.setSafe(getPosition(), (UuidHolder) holder); + } else if (holder instanceof NullableUuidHolder) { + vector.setSafe(getPosition(), (NullableUuidHolder) holder); + } + vector.setValueCount(getPosition() + 1); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java b/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java index ca56214fda..780a4ee659 100644 --- a/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java +++ b/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java @@ -54,10 +54,12 @@ import org.apache.arrow.vector.TimeStampNanoVector; import org.apache.arrow.vector.TimeStampSecTZVector; import org.apache.arrow.vector.TimeStampSecVector; +import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.ViewVarBinaryVector; import org.apache.arrow.vector.ViewVarCharVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; @@ -177,6 +179,11 @@ public int hashCode() { return Objects.hash(super.hashCode(), storageType, typeName, vendorName); } + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + throw new UnsupportedOperationException("WriterImpl not yet implemented."); + } + @Override public String toString() { return "OpaqueType(" diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/UuidType.java b/vector/src/main/java/org/apache/arrow/vector/extension/UuidType.java new file mode 100644 index 0000000000..7a7af298e8 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/extension/UuidType.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.extension; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.impl.UuidWriterImpl; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; +import org.apache.arrow.vector.types.pojo.FieldType; + +/** + * Extension type for UUID (Universally Unique Identifier) values. + * + *

UUIDs are stored as 16-byte fixed-size binary values. This extension type provides a + * standardized way to represent UUIDs in Arrow, making them interoperable across different systems + * and languages.π + * + *

The extension name is "arrow.uuid" and it uses {@link ArrowType.FixedSizeBinary} with 16 bytes + * as the storage type. + * + *

Usage: + * + *

{@code
+ * UuidVector vector = new UuidVector("uuid_col", allocator);
+ * vector.set(0, UUID.randomUUID());
+ * UUID value = vector.getObject(0);
+ * }
+ * + * @see UuidVector + * @see org.apache.arrow.vector.holders.UuidHolder + * @see org.apache.arrow.vector.holders.NullableUuidHolder + */ +public class UuidType extends ExtensionType { + /** Singleton instance of UuidType. */ + public static final UuidType INSTANCE = new UuidType(); + + /** Extension name registered in the Arrow extension type registry. */ + public static final String EXTENSION_NAME = "arrow.uuid"; + + /** Number of bytes used to store a UUID (128 bits = 16 bytes). */ + public static final int UUID_BYTE_WIDTH = 16; + + /** Number of characters in the standard UUID string representation (with hyphens). */ + public static final int UUID_STRING_WIDTH = 36; + + /** Storage type for UUID: FixedSizeBinary(16). */ + public static final ArrowType STORAGE_TYPE = new ArrowType.FixedSizeBinary(UUID_BYTE_WIDTH); + + static { + ExtensionTypeRegistry.register(INSTANCE); + } + + @Override + public ArrowType storageType() { + return STORAGE_TYPE; + } + + @Override + public String extensionName() { + return EXTENSION_NAME; + } + + @Override + public boolean extensionEquals(ExtensionType other) { + return other instanceof UuidType; + } + + @Override + public ArrowType deserialize(ArrowType storageType, String serializedData) { + if (!storageType.equals(storageType())) { + throw new UnsupportedOperationException( + "Cannot construct UuidType from underlying type " + storageType); + } + return INSTANCE; + } + + @Override + public String serialize() { + return ""; + } + + @Override + public boolean isComplex() { + return false; + } + + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { + return new UuidVector( + name, fieldType, allocator, new FixedSizeBinaryVector(name, allocator, UUID_BYTE_WIDTH)); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new UuidWriterImpl((UuidVector) vector); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java index fc7ed85878..4d3f767aef 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java @@ -16,7 +16,11 @@ */ package org.apache.arrow.vector.holders; +import org.apache.arrow.vector.types.pojo.ArrowType; + /** Base {@link ValueHolder} class for a {@link org.apache.arrow.vector.ExtensionTypeVector}. */ public abstract class ExtensionHolder implements ValueHolder { public int isSet; + + public abstract ArrowType type(); } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java similarity index 51% rename from vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java rename to vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java index 1b1bf4e6e4..7fa50ca761 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java @@ -14,18 +14,29 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.complex.impl; +package org.apache.arrow.vector.holders; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.types.pojo.ArrowType; -public class UuidWriterFactory implements ExtensionTypeWriterFactory { +/** + * Value holder for nullable UUID values. + * + *

The {@code isSet} field controls nullability: when {@code isSet = 1}, the holder contains a + * valid UUID in {@code buffer}; when {@code isSet = 0}, the holder represents a null value and + * {@code buffer} should not be accessed. + * + * @see UuidHolder + * @see org.apache.arrow.vector.UuidVector + * @see org.apache.arrow.vector.extension.UuidType + */ +public class NullableUuidHolder extends ExtensionHolder { + /** Buffer containing 16-byte UUID data. */ + public ArrowBuf buffer; @Override - public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector) { - if (extensionTypeVector instanceof UuidVector) { - return new UuidWriterImpl((UuidVector) extensionTypeVector); - } - return null; + public ArrowType type() { + return UuidType.INSTANCE; } } diff --git a/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java similarity index 55% rename from vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java rename to vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java index 207b0951a7..8a0a66e435 100644 --- a/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java @@ -14,10 +14,32 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.holder; +package org.apache.arrow.vector.holders; -import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.types.pojo.ArrowType; +/** + * Value holder for non-nullable UUID values. + * + *

Contains a 16-byte UUID in {@code buffer} with {@code isSet} always 1. + * + * @see NullableUuidHolder + * @see org.apache.arrow.vector.UuidVector + * @see org.apache.arrow.vector.extension.UuidType + */ public class UuidHolder extends ExtensionHolder { - public byte[] value; + /** Buffer containing 16-byte UUID data. */ + public ArrowBuf buffer; + + /** Constructs a UuidHolder with isSet = 1. */ + public UuidHolder() { + this.isSet = 1; + } + + @Override + public ArrowType type() { + return UuidType.INSTANCE; + } } diff --git a/vector/src/main/java/org/apache/arrow/vector/util/UuidUtility.java b/vector/src/main/java/org/apache/arrow/vector/util/UuidUtility.java new file mode 100644 index 0000000000..a1b0b54579 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/util/UuidUtility.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.util; + +import static org.apache.arrow.vector.extension.UuidType.UUID_BYTE_WIDTH; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.UUID; +import org.apache.arrow.memory.ArrowBuf; + +/** + * Utility class for UUID conversions and operations. + * + *

Provides methods to convert between {@link UUID} objects and byte representations used in + * Arrow vectors. + * + * @see org.apache.arrow.vector.UuidVector + * @see org.apache.arrow.vector.extension.UuidType + */ +public class UuidUtility { + /** + * Converts a UUID to a 16-byte array. + * + *

The UUID is stored in big-endian byte order, with the most significant bits first. + * + * @param uuid the UUID to convert + * @return a 16-byte array representing the UUID + */ + public static byte[] getBytesFromUUID(UUID uuid) { + byte[] result = new byte[16]; + long msb = uuid.getMostSignificantBits(); + long lsb = uuid.getLeastSignificantBits(); + for (int i = 15; i >= 8; i--) { + result[i] = (byte) (lsb & 0xFF); + lsb >>= 8; + } + for (int i = 7; i >= 0; i--) { + result[i] = (byte) (msb & 0xFF); + msb >>= 8; + } + return result; + } + + /** + * Constructs a UUID from bytes stored in an ArrowBuf at the specified index. + * + *

Reads 16 bytes from the buffer starting at the given index and interprets them as a UUID in + * big-endian byte order. + * + * @param buffer the buffer containing UUID data + * @param index the byte offset in the buffer where the UUID starts + * @return the UUID constructed from the buffer data + */ + public static UUID uuidFromArrowBuf(ArrowBuf buffer, long index) { + ByteBuffer buf = buffer.nioBuffer(index, UUID_BYTE_WIDTH); + + buf.order(ByteOrder.BIG_ENDIAN); + long mostSigBits = buf.getLong(0); + long leastSigBits = buf.getLong(Long.BYTES); + return new UUID(mostSigBits, leastSigBits); + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index 101d942d2a..eecd4884ee 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -25,18 +25,24 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.impl.UnionLargeListReader; import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; +import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1020,6 +1026,79 @@ public void testGetTransferPairWithField() throws Exception { } } + @Test + public void testCopyValueSafeForExtensionType() throws Exception { + try (LargeListVector inVector = LargeListVector.empty("input", allocator); + LargeListVector outVector = LargeListVector.empty("output", allocator)) { + UnionLargeListWriter writer = inVector.getWriter(); + writer.allocate(); + + // Create first list with UUIDs + writer.setPosition(0); + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(UuidType.INSTANCE); + extensionWriter.writeExtension(u1); + extensionWriter.writeExtension(u2); + writer.endList(); + + // Create second list with UUIDs + writer.setPosition(1); + UUID u3 = UUID.randomUUID(); + UUID u4 = UUID.randomUUID(); + writer.startList(); + extensionWriter = writer.extension(UuidType.INSTANCE); + extensionWriter.writeExtension(u3); + extensionWriter.writeExtension(u4); + extensionWriter.writeNull(); + + writer.endList(); + writer.setValueCount(2); + + // Use copyFromSafe with ExtensionTypeWriterFactory + // This internally calls TransferImpl.copyValueSafe with ExtensionTypeWriterFactory + outVector.allocateNew(); + TransferPair tp = inVector.makeTransferPair(outVector); + tp.copyValueSafe(0, 0); + tp.copyValueSafe(1, 1); + outVector.setValueCount(2); + + // Verify first list + UnionLargeListReader reader = outVector.getReader(); + reader.setPosition(0); + assertTrue(reader.isSet(), "first list shouldn't be null"); + reader.next(); + FieldReader uuidReader = reader.reader(); + UuidHolder holder = new UuidHolder(); + uuidReader.read(holder); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(u1, actualUuid); + reader.next(); + uuidReader = reader.reader(); + uuidReader.read(holder); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(u2, actualUuid); + + // Verify second list + reader.setPosition(1); + assertTrue(reader.isSet(), "second list shouldn't be null"); + reader.next(); + uuidReader = reader.reader(); + uuidReader.read(holder); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(u3, actualUuid); + reader.next(); + uuidReader = reader.reader(); + uuidReader.read(holder); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(u4, actualUuid); + reader.next(); + uuidReader = reader.reader(); + assertFalse(uuidReader.isSet(), "third element should be null"); + } + } + private void writeIntValues(UnionLargeListWriter writer, int[] values) { writer.startList(); for (int v : values) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index ace36334ee..d6f15141eb 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -23,7 +23,6 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -35,20 +34,20 @@ import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.complex.impl.UuidWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; -import org.apache.arrow.vector.holder.UuidHolder; +import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; +import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1215,8 +1214,7 @@ public void testListVectorWithExtensionType() throws Exception { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); writer.startList(); - ExtensionWriter extensionWriter = writer.extension(new UuidType()); - extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + ExtensionWriter extensionWriter = writer.extension(UuidType.INSTANCE); extensionWriter.writeExtension(u1); extensionWriter.writeExtension(u2); writer.endList(); @@ -1243,8 +1241,7 @@ public void testListVectorReaderForExtensionType() throws Exception { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); writer.startList(); - ExtensionWriter extensionWriter = writer.extension(new UuidType()); - extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + ExtensionWriter extensionWriter = writer.extension(UuidType.INSTANCE); extensionWriter.writeExtension(u1); extensionWriter.writeExtension(u2); writer.endList(); @@ -1258,14 +1255,12 @@ public void testListVectorReaderForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - ByteBuffer bb = ByteBuffer.wrap(holder.value); - UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - bb = ByteBuffer.wrap(holder.value); - actualUuid = new UUID(bb.getLong(), bb.getLong()); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); assertEquals(u2, actualUuid); } } @@ -1280,19 +1275,18 @@ public void testCopyFromForExtensionType() throws Exception { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); writer.startList(); - ExtensionWriter extensionWriter = writer.extension(new UuidType()); - extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); - extensionWriter.writeExtension(u1); - extensionWriter.writeExtension(u2); - extensionWriter.writeNull(); + + writer.extension(UuidType.INSTANCE).writeExtension(u1); + writer.writeExtension(u2); + writer.writeNull(); writer.endList(); - writer.setValueCount(1); + writer.setValueCount(3); // copy values from input to output outVector.allocateNew(); - outVector.copyFrom(0, 0, inVector, new UuidWriterFactory()); - outVector.setValueCount(1); + outVector.copyFrom(0, 0, inVector); + outVector.setValueCount(3); UnionListReader reader = outVector.getReader(); assertTrue(reader.isSet(), "shouldn't be null"); @@ -1301,15 +1295,86 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - ByteBuffer bb = ByteBuffer.wrap(holder.value); - UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(u1, actualUuid); + reader.next(); + uuidReader = reader.reader(); + uuidReader.read(holder); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(u2, actualUuid); + } + } + + @Test + public void testCopyValueSafeForExtensionType() throws Exception { + try (ListVector inVector = ListVector.empty("input", allocator); + ListVector outVector = ListVector.empty("output", allocator)) { + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + // Create first list with UUIDs + writer.setPosition(0); + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(UuidType.INSTANCE); + extensionWriter.writeExtension(u1); + extensionWriter.writeExtension(u2); + writer.endList(); + + // Create second list with UUIDs + writer.setPosition(1); + UUID u3 = UUID.randomUUID(); + UUID u4 = UUID.randomUUID(); + writer.startList(); + extensionWriter = writer.extension(UuidType.INSTANCE); + extensionWriter.writeExtension(u3); + extensionWriter.writeExtension(u4); + extensionWriter.writeNull(); + + writer.endList(); + writer.setValueCount(2); + + // Use TransferPair with ExtensionTypeWriterFactory + // This tests the new makeTransferPair API with writerFactory parameter + outVector.allocateNew(); + TransferPair transferPair = inVector.makeTransferPair(outVector); + transferPair.copyValueSafe(0, 0); + transferPair.copyValueSafe(1, 1); + outVector.setValueCount(2); + + // Verify first list + UnionListReader reader = outVector.getReader(); + reader.setPosition(0); + assertTrue(reader.isSet(), "first list shouldn't be null"); + reader.next(); + FieldReader uuidReader = reader.reader(); + UuidHolder holder = new UuidHolder(); + uuidReader.read(holder); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - bb = ByteBuffer.wrap(holder.value); - actualUuid = new UUID(bb.getLong(), bb.getLong()); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); assertEquals(u2, actualUuid); + + // Verify second list + reader.setPosition(1); + assertTrue(reader.isSet(), "second list shouldn't be null"); + reader.next(); + uuidReader = reader.reader(); + uuidReader.read(holder); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(u3, actualUuid); + reader.next(); + uuidReader = reader.reader(); + uuidReader.read(holder); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(u4, actualUuid); + reader.next(); + uuidReader = reader.reader(); + assertFalse(uuidReader.isSet(), "third element should be null"); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 1a1810d0f7..17bb3b5455 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -22,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -34,20 +33,20 @@ import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.complex.impl.UnionMapWriter; -import org.apache.arrow.vector.complex.impl.UuidWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.holder.UuidHolder; +import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1281,15 +1280,12 @@ public void testMapVectorWithExtensionType() throws Exception { writer.startMap(); writer.startEntry(); writer.key().bigInt().writeBigInt(0); - ExtensionWriter extensionWriter = writer.value().extension(new UuidType()); - extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); - extensionWriter.writeExtension(u1); + ExtensionWriter extensionWriter = writer.value().extension(UuidType.INSTANCE); + extensionWriter.writeExtension(u1, UuidType.INSTANCE); writer.endEntry(); writer.startEntry(); writer.key().bigInt().writeBigInt(1); - extensionWriter = writer.value().extension(new UuidType()); - extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); - extensionWriter.writeExtension(u2); + extensionWriter.writeExtension(u2, UuidType.INSTANCE); writer.endEntry(); writer.endMap(); @@ -1301,14 +1297,12 @@ public void testMapVectorWithExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - ByteBuffer bb = ByteBuffer.wrap(holder.value); - UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - bb = ByteBuffer.wrap(holder.value); - actualUuid = new UUID(bb.getLong(), bb.getLong()); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); assertEquals(u2, actualUuid); } } @@ -1325,21 +1319,19 @@ public void testCopyFromForExtensionType() throws Exception { writer.startMap(); writer.startEntry(); writer.key().bigInt().writeBigInt(0); - ExtensionWriter extensionWriter = writer.value().extension(new UuidType()); - extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); - extensionWriter.writeExtension(u1); + ExtensionWriter extensionWriter = writer.value().extension(UuidType.INSTANCE); + extensionWriter.writeExtension(u1, UuidType.INSTANCE); writer.endEntry(); writer.startEntry(); writer.key().bigInt().writeBigInt(1); - extensionWriter = writer.value().extension(new UuidType()); - extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); - extensionWriter.writeExtension(u2); + extensionWriter = writer.value().extension(UuidType.INSTANCE); + extensionWriter.writeExtension(u2, UuidType.INSTANCE); writer.endEntry(); writer.endMap(); writer.setValueCount(1); outVector.allocateNew(); - outVector.copyFrom(0, 0, inVector, new UuidWriterFactory()); + outVector.copyFrom(0, 0, inVector); outVector.setValueCount(1); UnionMapReader mapReader = outVector.getReader(); @@ -1348,14 +1340,12 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - ByteBuffer bb = ByteBuffer.wrap(holder.value); - UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - bb = ByteBuffer.wrap(holder.value); - actualUuid = new UUID(bb.getLong(), bb.getLong()); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index d40af9ae89..c5e23c8e8d 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -35,6 +35,7 @@ import org.apache.arrow.vector.complex.impl.NullableStructWriter; import org.apache.arrow.vector.complex.writer.Float8Writer; import org.apache.arrow.vector.complex.writer.IntWriter; +import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.holders.ComplexHolder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; @@ -42,7 +43,6 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -160,17 +160,23 @@ public void testGetPrimitiveVectors() { UnionVector unionVector = vector.addOrGetUnion("union"); unionVector.addVector(new BigIntVector("bigInt", allocator)); unionVector.addVector(new SmallIntVector("smallInt", allocator)); + unionVector.addVector(new UuidVector("uuid", allocator)); // add varchar vector vector.addOrGet( "varchar", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class); + // add extension vector + vector.addOrGet("extension", FieldType.nullable(UuidType.INSTANCE), UuidVector.class); + List primitiveVectors = vector.getPrimitiveVectors(); - assertEquals(4, primitiveVectors.size()); + assertEquals(6, primitiveVectors.size()); assertEquals(MinorType.INT, primitiveVectors.get(0).getMinorType()); assertEquals(MinorType.BIGINT, primitiveVectors.get(1).getMinorType()); assertEquals(MinorType.SMALLINT, primitiveVectors.get(2).getMinorType()); - assertEquals(MinorType.VARCHAR, primitiveVectors.get(3).getMinorType()); + assertEquals(MinorType.EXTENSIONTYPE, primitiveVectors.get(3).getMinorType()); + assertEquals(MinorType.VARCHAR, primitiveVectors.get(4).getMinorType()); + assertEquals(MinorType.EXTENSIONTYPE, primitiveVectors.get(5).getMinorType()); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUtils.java b/vector/src/test/java/org/apache/arrow/vector/TestUtils.java index 82295f8037..c28751aa58 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestUtils.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestUtils.java @@ -20,6 +20,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; import org.apache.arrow.vector.types.pojo.FieldType; public class TestUtils { @@ -62,4 +63,14 @@ public static String generateRandomString(int length) { } return sb.toString(); } + + /* + * Ensure the extension type is registered, as there might other tests trying to unregister the + * type. ex.: TestExtensionType#readUnderlyingType + */ + public static void ensureRegistered(ArrowType.ExtensionType type) { + if (ExtensionTypeRegistry.lookup(type.extensionName()) == null) { + ExtensionTypeRegistry.register(type); + } + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java new file mode 100644 index 0000000000..9f7c65b82b --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import static org.apache.arrow.vector.TestUtils.ensureRegistered; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.UUID; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.UuidUtility; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class TestUuidType { + BufferAllocator allocator; + + @BeforeEach + void beforeEach() { + allocator = new RootAllocator(); + } + + @AfterEach + void afterEach() { + allocator.close(); + } + + @Test + void testConstants() { + assertEquals("arrow.uuid", UuidType.EXTENSION_NAME); + assertNotNull(UuidType.INSTANCE); + assertNotNull(UuidType.STORAGE_TYPE); + assertInstanceOf(ArrowType.FixedSizeBinary.class, UuidType.STORAGE_TYPE); + assertEquals( + UuidType.UUID_BYTE_WIDTH, + ((ArrowType.FixedSizeBinary) UuidType.STORAGE_TYPE).getByteWidth()); + } + + @Test + void testStorageType() { + UuidType type = new UuidType(); + assertEquals(UuidType.STORAGE_TYPE, type.storageType()); + assertInstanceOf(ArrowType.FixedSizeBinary.class, type.storageType()); + } + + @Test + void testExtensionName() { + UuidType type = new UuidType(); + assertEquals("arrow.uuid", type.extensionName()); + } + + @Test + void testExtensionEquals() { + UuidType type1 = new UuidType(); + UuidType type2 = new UuidType(); + UuidType type3 = UuidType.INSTANCE; + + assertTrue(type1.extensionEquals(type2)); + assertTrue(type1.extensionEquals(type3)); + assertTrue(type2.extensionEquals(type3)); + } + + @Test + void testIsComplex() { + UuidType type = new UuidType(); + assertFalse(type.isComplex()); + } + + @Test + void testSerialize() { + UuidType type = new UuidType(); + String serialized = type.serialize(); + assertEquals("", serialized); + } + + @Test + void testDeserializeValid() { + UuidType type = new UuidType(); + ArrowType storageType = new ArrowType.FixedSizeBinary(UuidType.UUID_BYTE_WIDTH); + + ArrowType deserialized = assertDoesNotThrow(() -> type.deserialize(storageType, "")); + assertInstanceOf(UuidType.class, deserialized); + assertEquals(UuidType.INSTANCE, deserialized); + } + + @Test + void testDeserializeInvalidStorageType() { + UuidType type = new UuidType(); + ArrowType wrongStorageType = new ArrowType.FixedSizeBinary(32); + + assertThrows(UnsupportedOperationException.class, () -> type.deserialize(wrongStorageType, "")); + } + + @Test + void testGetNewVector() { + UuidType type = new UuidType(); + try (FieldVector vector = + type.getNewVector("uuid_field", FieldType.nullable(type), allocator)) { + assertInstanceOf(UuidVector.class, vector); + assertEquals("uuid_field", vector.getField().getName()); + assertEquals(type, vector.getField().getType()); + } + } + + @Test + void testVectorOperations() { + UuidType type = new UuidType(); + try (FieldVector vector = + type.getNewVector("uuid_field", FieldType.nullable(type), allocator)) { + UuidVector uuidVector = (UuidVector) vector; + + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + uuidVector.setSafe(0, uuid1); + uuidVector.setSafe(1, uuid2); + uuidVector.setNull(2); + uuidVector.setValueCount(3); + + assertEquals(uuid1, uuidVector.getObject(0)); + assertEquals(uuid2, uuidVector.getObject(1)); + assertNull(uuidVector.getObject(2)); + assertFalse(uuidVector.isNull(0)); + assertFalse(uuidVector.isNull(1)); + assertTrue(uuidVector.isNull(2)); + } + } + + @Test + void testIpcRoundTrip() { + UuidType type = UuidType.INSTANCE; + ensureRegistered(type); + + Schema schema = new Schema(Collections.singletonList(Field.nullable("uuid", type))); + byte[] serialized = schema.serializeAsMessage(); + Schema deserialized = Schema.deserializeMessage(ByteBuffer.wrap(serialized)); + assertEquals(schema, deserialized); + } + + @Test + void testVectorIpcRoundTrip() throws IOException { + UuidType type = UuidType.INSTANCE; + ensureRegistered(type); + + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + try (FieldVector vector = type.getNewVector("field", FieldType.nullable(type), allocator)) { + UuidVector uuidVector = (UuidVector) vector; + uuidVector.setSafe(0, uuid1); + uuidVector.setNull(1); + uuidVector.setSafe(2, uuid2); + uuidVector.setValueCount(3); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (VectorSchemaRoot root = new VectorSchemaRoot(Collections.singletonList(uuidVector)); + ArrowStreamWriter writer = + new ArrowStreamWriter(root, new DictionaryProvider.MapDictionaryProvider(), baos)) { + writer.start(); + writer.writeBatch(); + } + + try (ArrowStreamReader reader = + new ArrowStreamReader(new ByteArrayInputStream(baos.toByteArray()), allocator)) { + assertTrue(reader.loadNextBatch()); + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + assertEquals(3, root.getRowCount()); + assertEquals( + new Schema(Collections.singletonList(uuidVector.getField())), root.getSchema()); + + UuidVector actual = assertInstanceOf(UuidVector.class, root.getVector("field")); + assertFalse(actual.isNull(0)); + assertTrue(actual.isNull(1)); + assertFalse(actual.isNull(2)); + assertEquals(uuid1, actual.getObject(0)); + assertNull(actual.getObject(1)); + assertEquals(uuid2, actual.getObject(2)); + } + } + } + + @Test + void testVectorByteArrayOperations() { + UuidType type = new UuidType(); + try (FieldVector vector = + type.getNewVector("uuid_field", FieldType.nullable(type), allocator)) { + UuidVector uuidVector = (UuidVector) vector; + + UUID uuid = UUID.randomUUID(); + byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); + + uuidVector.setSafe(0, uuidBytes); + uuidVector.setValueCount(1); + + assertEquals(uuid, uuidVector.getObject(0)); + + // Verify the bytes match + byte[] actualBytes = new byte[UuidType.UUID_BYTE_WIDTH]; + uuidVector.get(0).getBytes(0, actualBytes); + assertArrayEquals(uuidBytes, actualBytes); + } + } + + @Test + void testGetNewVectorWithCustomFieldType() { + UuidType type = new UuidType(); + FieldType fieldType = new FieldType(false, type, null); + + try (FieldVector vector = type.getNewVector("non_nullable_uuid", fieldType, allocator)) { + assertInstanceOf(UuidVector.class, vector); + assertEquals("non_nullable_uuid", vector.getField().getName()); + assertFalse(vector.getField().isNullable()); + } + } + + @Test + void testSingleton() { + UuidType type1 = UuidType.INSTANCE; + UuidType type2 = UuidType.INSTANCE; + + // Same instance + assertSame(type1, type2); + assertTrue(type1.extensionEquals(type2)); + } + + @Test + void testUnderlyingVector() { + UuidType type = new UuidType(); + try (FieldVector vector = + type.getNewVector("uuid_field", FieldType.nullable(type), allocator)) { + UuidVector uuidVector = (UuidVector) vector; + FixedSizeBinaryVector underlying = uuidVector.getUnderlyingVector(); + + assertInstanceOf(FixedSizeBinaryVector.class, underlying); + assertEquals(UuidType.UUID_BYTE_WIDTH, underlying.getByteWidth()); + } + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java new file mode 100644 index 0000000000..a3690461cf --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java @@ -0,0 +1,464 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.impl.UuidReaderImpl; +import org.apache.arrow.vector.complex.impl.UuidWriterImpl; +import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.UuidHolder; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.util.UuidUtility; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** Tests for UuidVector, UuidWriterImpl, and UuidReaderImpl. */ +class TestUuidVector { + + private BufferAllocator allocator; + + @BeforeEach + void beforeEach() { + allocator = new RootAllocator(); + } + + @AfterEach + void afterEach() { + allocator.close(); + } + + // ========== Writer Tests ========== + + @Test + void testWriteToExtensionVector() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + UUID uuid = UUID.randomUUID(); + ByteBuffer bb = ByteBuffer.allocate(UuidType.UUID_BYTE_WIDTH); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + + // Allocate ArrowBuf for the holder + try (ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { + buf.setBytes(0, bb.array()); + + UuidHolder holder = new UuidHolder(); + holder.buffer = buf; + + writer.write(holder); + UUID result = vector.getObject(0); + assertEquals(uuid, result); + } + } + } + + @Test + void testWriteExtensionWithUUID() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + UUID uuid = UUID.randomUUID(); + writer.setPosition(0); + writer.writeExtension(uuid); + + UUID result = vector.getObject(0); + assertEquals(uuid, result); + assertEquals(1, vector.getValueCount()); + } + } + + @Test + void testWriteExtensionWithByteArray() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + UUID uuid = UUID.randomUUID(); + byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); + + writer.setPosition(0); + writer.writeExtension(uuidBytes); + + UUID result = vector.getObject(0); + assertEquals(uuid, result); + assertEquals(1, vector.getValueCount()); + } + } + + @Test + void testWriteExtensionWithArrowBuf() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector); + ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { + UUID uuid = UUID.randomUUID(); + byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); + buf.setBytes(0, uuidBytes); + + writer.setPosition(0); + writer.writeExtension(buf); + + UUID result = vector.getObject(0); + assertEquals(uuid, result); + assertEquals(1, vector.getValueCount()); + } + } + + @Test + void testWriteExtensionWithUnsupportedType() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + writer.setPosition(0); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> writer.writeExtension("invalid-type")); + + assertEquals( + "Unsupported value type for UUID: class java.lang.String", exception.getMessage()); + } + } + + @Test + void testWriteExtensionMultipleValues() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + writer.setPosition(0); + writer.writeExtension(uuid1); + writer.setPosition(1); + writer.writeExtension(uuid2); + writer.setPosition(2); + writer.writeExtension(uuid3); + + assertEquals(uuid1, vector.getObject(0)); + assertEquals(uuid2, vector.getObject(1)); + assertEquals(uuid3, vector.getObject(2)); + assertEquals(3, vector.getValueCount()); + } + } + + @Test + void testWriteWithUuidHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector); + ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { + UUID uuid = UUID.randomUUID(); + byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); + buf.setBytes(0, uuidBytes); + + UuidHolder holder = new UuidHolder(); + holder.buffer = buf; + holder.isSet = 1; + + writer.setPosition(0); + writer.write(holder); + + UUID result = vector.getObject(0); + assertEquals(uuid, result); + assertEquals(1, vector.getValueCount()); + } + } + + @Test + void testWriteWithNullableUuidHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector); + ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { + UUID uuid = UUID.randomUUID(); + byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); + buf.setBytes(0, uuidBytes); + + NullableUuidHolder holder = new NullableUuidHolder(); + holder.buffer = buf; + holder.isSet = 1; + + writer.setPosition(0); + writer.write(holder); + + UUID result = vector.getObject(0); + assertEquals(uuid, result); + assertEquals(1, vector.getValueCount()); + } + } + + @Test + void testWriteWithNullableUuidHolderNull() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + NullableUuidHolder holder = new NullableUuidHolder(); + holder.isSet = 0; + + writer.setPosition(0); + writer.write(holder); + + assertTrue(vector.isNull(0)); + assertEquals(1, vector.getValueCount()); + } + } + + // ========== Reader Tests ========== + + @Test + void testReaderCopyAsValueExtensionVector() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidVector vectorForRead = new UuidVector("test2", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + UUID uuid = UUID.randomUUID(); + vectorForRead.setValueCount(1); + vectorForRead.set(0, uuid); + UuidReaderImpl reader = (UuidReaderImpl) vectorForRead.getReader(); + reader.copyAsValue(writer); + UuidReaderImpl reader2 = (UuidReaderImpl) vector.getReader(); + UuidHolder holder = new UuidHolder(); + reader2.read(0, holder); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(uuid, actualUuid); + } + } + + @Test + void testReaderReadWithUuidHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setValueCount(1); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + reader.setPosition(0); + + UuidHolder holder = new UuidHolder(); + reader.read(holder); + + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(uuid, actualUuid); + assertEquals(1, holder.isSet); + } + } + + @Test + void testReaderReadWithNullableUuidHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setValueCount(1); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + reader.setPosition(0); + + NullableUuidHolder holder = new NullableUuidHolder(); + reader.read(holder); + + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(uuid, actualUuid); + assertEquals(1, holder.isSet); + } + } + + @Test + void testReaderReadWithNullableUuidHolderNull() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + vector.setNull(0); + vector.setValueCount(1); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + reader.setPosition(0); + + NullableUuidHolder holder = new NullableUuidHolder(); + reader.read(holder); + + assertEquals(0, holder.isSet); + } + } + + @Test + void testReaderReadWithArrayIndexUuidHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setSafe(1, uuid2); + vector.setSafe(2, uuid3); + vector.setValueCount(3); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + + UuidHolder holder = new UuidHolder(); + reader.read(1, holder); + + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + assertEquals(uuid2, actualUuid); + assertEquals(1, holder.isSet); + } + } + + @Test + void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setNull(1); + vector.setSafe(2, uuid2); + vector.setValueCount(3); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + + NullableUuidHolder holder1 = new NullableUuidHolder(); + reader.read(0, holder1); + assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, 0)); + assertEquals(1, holder1.isSet); + + NullableUuidHolder holder2 = new NullableUuidHolder(); + reader.read(1, holder2); + assertEquals(0, holder2.isSet); + + NullableUuidHolder holder3 = new NullableUuidHolder(); + reader.read(2, holder3); + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, 0)); + assertEquals(1, holder3.isSet); + } + } + + @Test + void testReaderReadWithUnsupportedHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setValueCount(1); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + reader.setPosition(0); + + // Create a mock unsupported holder + ExtensionHolder unsupportedHolder = + new ExtensionHolder() { + @Override + public ArrowType type() { + return null; + } + }; + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> reader.read(unsupportedHolder)); + + assertTrue(exception.getMessage().contains("Unsupported holder type for UuidReader")); + } + } + + @Test + void testReaderReadWithArrayIndexUnsupportedHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setValueCount(1); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + + // Create a mock unsupported holder + ExtensionHolder unsupportedHolder = + new ExtensionHolder() { + @Override + public ArrowType type() { + return null; + } + }; + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> reader.read(0, unsupportedHolder)); + + assertTrue(exception.getMessage().contains("Unsupported holder type for UuidReader")); + } + } + + @Test + void testReaderIsSet() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setNull(1); + vector.setSafe(2, uuid); + vector.setValueCount(3); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + + reader.setPosition(0); + assertTrue(reader.isSet()); + + reader.setPosition(1); + assertFalse(reader.isSet()); + + reader.setPosition(2); + assertTrue(reader.isSet()); + } + } + + @Test + void testReaderReadObject() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setNull(1); + vector.setSafe(2, uuid2); + vector.setValueCount(3); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + + reader.setPosition(0); + assertEquals(uuid1, reader.readObject()); + + reader.setPosition(1); + assertNull(reader.readObject()); + + reader.setPosition(2); + assertEquals(uuid2, reader.readObject()); + } + } + + @Test + void testReaderGetMinorType() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + assertEquals(vector.getMinorType(), reader.getMinorType()); + } + } + + @Test + void testReaderGetField() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + assertEquals(vector.getField(), reader.getField()); + assertEquals("test", reader.getField().getName()); + } + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/UuidVector.java b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java deleted file mode 100644 index 72ba4aa555..0000000000 --- a/vector/src/test/java/org/apache/arrow/vector/UuidVector.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.nio.ByteBuffer; -import java.util.UUID; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.vector.complex.impl.UuidReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holder.UuidHolder; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.UuidType; -import org.apache.arrow.vector.util.TransferPair; - -public class UuidVector extends ExtensionTypeVector - implements ValueIterableVector { - private final Field field; - - public UuidVector( - String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { - super(name, allocator, underlyingVector); - this.field = new Field(name, FieldType.nullable(new UuidType()), null); - } - - public UuidVector(String name, BufferAllocator allocator) { - super(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); - this.field = new Field(name, FieldType.nullable(new UuidType()), null); - } - - @Override - public UUID getObject(int index) { - final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); - return new UUID(bb.getLong(), bb.getLong()); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); - } - - public void set(int index, UUID uuid) { - ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - getUnderlyingVector().set(index, bb.array()); - } - - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - getUnderlyingVector() - .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); - } - - @Override - public Field getField() { - return field; - } - - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((UuidVector) to); - } - - @Override - protected FieldReader getReaderImpl() { - return new UuidReaderImpl(this); - } - - public void setSafe(int index, byte[] value) { - getUnderlyingVector().setIndexDefined(index); - getUnderlyingVector().setSafe(index, value); - } - - public void get(int index, UuidHolder holder) { - holder.value = getUnderlyingVector().get(index); - holder.isSet = 1; - } - - public class TransferImpl implements TransferPair { - UuidVector to; - ValueVector targetUnderlyingVector; - TransferPair tp; - - public TransferImpl(UuidVector to) { - this.to = to; - targetUnderlyingVector = this.to.getUnderlyingVector(); - tp = getUnderlyingVector().makeTransferPair(targetUnderlyingVector); - } - - public UuidVector getTo() { - return this.to; - } - - public void transfer() { - tp.transfer(); - } - - public void splitAndTransfer(int startIndex, int length) { - tp.splitAndTransfer(startIndex, length); - } - - public void copyValueSafe(int fromIndex, int toIndex) { - tp.copyValueSafe(fromIndex, toIndex); - } - } -} diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java index 738e8905e3..b2a8cf9ba4 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java @@ -34,11 +34,11 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.holders.DecimalHolder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.DecimalUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -860,8 +860,7 @@ public void testCopyListVectorWithExtensionType() { for (int i = 0; i < COUNT; i++) { listWriter.setPosition(i); listWriter.startList(); - ExtensionWriter extensionWriter = listWriter.extension(new UuidType()); - extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + ExtensionWriter extensionWriter = listWriter.extension(UuidType.INSTANCE); extensionWriter.writeExtension(UUID.randomUUID()); extensionWriter.writeExtension(UUID.randomUUID()); listWriter.endList(); @@ -874,7 +873,7 @@ public void testCopyListVectorWithExtensionType() { for (int i = 0; i < COUNT; i++) { in.setPosition(i); out.setPosition(i); - ComplexCopier.copy(in, out, new UuidWriterFactory()); + ComplexCopier.copy(in, out); } to.setValueCount(COUNT); @@ -896,12 +895,10 @@ public void testCopyMapVectorWithExtensionType() { mapWriter.setPosition(i); mapWriter.startMap(); mapWriter.startEntry(); - ExtensionWriter extensionKeyWriter = mapWriter.key().extension(new UuidType()); - extensionKeyWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); - extensionKeyWriter.writeExtension(UUID.randomUUID()); - ExtensionWriter extensionValueWriter = mapWriter.value().extension(new UuidType()); - extensionValueWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); - extensionValueWriter.writeExtension(UUID.randomUUID()); + ExtensionWriter extensionKeyWriter = mapWriter.key().extension(UuidType.INSTANCE); + extensionKeyWriter.writeExtension(UUID.randomUUID(), UuidType.INSTANCE); + ExtensionWriter extensionValueWriter = mapWriter.value().extension(UuidType.INSTANCE); + extensionValueWriter.writeExtension(UUID.randomUUID(), UuidType.INSTANCE); mapWriter.endEntry(); mapWriter.endMap(); } @@ -914,7 +911,7 @@ public void testCopyMapVectorWithExtensionType() { for (int i = 0; i < COUNT; i++) { in.setPosition(i); out.setPosition(i); - ComplexCopier.copy(in, out, new UuidWriterFactory()); + ComplexCopier.copy(in, out); } to.setValueCount(COUNT); @@ -934,12 +931,10 @@ public void testCopyStructVectorWithExtensionType() { for (int i = 0; i < COUNT; i++) { structWriter.setPosition(i); structWriter.start(); - ExtensionWriter extensionWriter1 = structWriter.extension("timestamp1", new UuidType()); - extensionWriter1.addExtensionTypeWriterFactory(new UuidWriterFactory()); - extensionWriter1.writeExtension(UUID.randomUUID()); - ExtensionWriter extensionWriter2 = structWriter.extension("timestamp2", new UuidType()); - extensionWriter2.addExtensionTypeWriterFactory(new UuidWriterFactory()); - extensionWriter2.writeExtension(UUID.randomUUID()); + ExtensionWriter extensionWriter1 = structWriter.extension("uuid1", UuidType.INSTANCE); + extensionWriter1.writeExtension(UUID.randomUUID(), UuidType.INSTANCE); + ExtensionWriter extensionWriter2 = structWriter.extension("uuid2", UuidType.INSTANCE); + extensionWriter2.writeExtension(UUID.randomUUID(), UuidType.INSTANCE); structWriter.end(); } @@ -951,7 +946,7 @@ public void testCopyStructVectorWithExtensionType() { for (int i = 0; i < COUNT; i++) { in.setPosition(i); out.setPosition(i); - ComplexCopier.copy(in, out, new UuidWriterFactory()); + ComplexCopier.copy(in, out); } to.setValueCount(COUNT); diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index 7b8b1f9ef9..5b6d65d6ba 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -31,6 +31,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.DirtyRootAllocator; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.LargeVarCharVector; import org.apache.arrow.vector.UuidVector; @@ -41,6 +42,7 @@ import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; +import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; import org.apache.arrow.vector.holders.NullableDecimalHolder; @@ -48,15 +50,16 @@ import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; import org.apache.arrow.vector.holders.UnionHolder; +import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.Text; +import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -100,7 +103,6 @@ public void testPromoteToUnion() throws Exception { writer.integer("A").writeInt(10); // we don't write anything in 3 - writer.setPosition(4); writer.integer("A").writeInt(100); @@ -130,9 +132,21 @@ public void testPromoteToUnion() throws Exception { binHolder.buffer = buf; writer.fixedSizeBinary("A", 4).write(binHolder); + writer.setPosition(9); + UUID uuid = UUID.randomUUID(); + writer.extension("A", UuidType.INSTANCE).writeExtension(uuid, UuidType.INSTANCE); + writer.end(); + + writer.setPosition(10); + UUID uuid2 = UUID.randomUUID(); + UuidHolder uuidHolder = new UuidHolder(); + uuidHolder.buffer = allocator.buffer(UuidType.UUID_BYTE_WIDTH); + uuidHolder.buffer.setBytes(0, UuidUtility.getBytesFromUUID(uuid2)); + writer.extension("A", UuidType.INSTANCE).write(uuidHolder); writer.end(); + allocator.releaseBytes(UuidType.UUID_BYTE_WIDTH); - container.setValueCount(9); + container.setValueCount(11); final UnionVector uv = v.getChild("A", UnionVector.class); @@ -169,6 +183,12 @@ public void testPromoteToUnion() throws Exception { .order(ByteOrder.nativeOrder()) .getInt()); + assertFalse(uv.isNull(9), "9 shouldn't be null"); + assertEquals(uuid, uv.getObject(9)); + + assertFalse(uv.isNull(10), "10 shouldn't be null"); + assertEquals(uuid2, uv.getObject(10)); + container.clear(); container.allocateNew(); @@ -785,18 +805,17 @@ public void testExtensionType() throws Exception { try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); final UuidVector v = - container.addOrGet("uuid", FieldType.nullable(new UuidType()), UuidVector.class); + container.addOrGet("uuid", FieldType.nullable(UuidType.INSTANCE), UuidVector.class); final PromotableWriter writer = new PromotableWriter(v, container)) { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); container.allocateNew(); container.setValueCount(1); - writer.addExtensionTypeWriterFactory(new UuidWriterFactory()); writer.setPosition(0); - writer.writeExtension(u1); + writer.writeExtension(u1, UuidType.INSTANCE); writer.setPosition(1); - writer.writeExtension(u2); + writer.writeExtension(u2, UuidType.INSTANCE); container.setValueCount(2); @@ -810,22 +829,22 @@ public void testExtensionType() throws Exception { public void testExtensionTypeForList() throws Exception { try (final ListVector container = ListVector.empty(EMPTY_SCHEMA_PATH, allocator); final UuidVector v = - (UuidVector) container.addOrGetVector(FieldType.nullable(new UuidType())).getVector(); + (UuidVector) + container.addOrGetVector(FieldType.nullable(UuidType.INSTANCE)).getVector(); final PromotableWriter writer = new PromotableWriter(v, container)) { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); container.allocateNew(); container.setValueCount(1); - writer.addExtensionTypeWriterFactory(new UuidWriterFactory()); writer.setPosition(0); - writer.writeExtension(u1); + writer.writeExtension(u1, UuidType.INSTANCE); writer.setPosition(1); - writer.writeExtension(u2); + writer.writeExtension(u2, UuidType.INSTANCE); container.setValueCount(2); - UuidVector uuidVector = (UuidVector) container.getDataVector(); + FieldVector uuidVector = container.getDataVector(); assertEquals(u1, uuidVector.getObject(0)); assertEquals(u2, uuidVector.getObject(1)); } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java deleted file mode 100644 index 68029b1df5..0000000000 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import java.nio.ByteBuffer; -import java.util.UUID; -import org.apache.arrow.vector.UuidVector; -import org.apache.arrow.vector.holder.UuidHolder; -import org.apache.arrow.vector.holders.ExtensionHolder; - -public class UuidWriterImpl extends AbstractExtensionTypeWriter { - - public UuidWriterImpl(UuidVector vector) { - super(vector); - } - - @Override - public void writeExtension(Object value) { - UUID uuid = (UUID) value; - ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - vector.setSafe(getPosition(), bb.array()); - vector.setValueCount(getPosition() + 1); - } - - @Override - public void write(ExtensionHolder holder) { - UuidHolder uuidHolder = (UuidHolder) holder; - vector.setSafe(getPosition(), uuidHolder.value); - vector.setValueCount(getPosition() + 1); - } -} diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index f374eb41e4..34425c3420 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -66,7 +66,6 @@ import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.complex.impl.UnionReader; import org.apache.arrow.vector.complex.impl.UnionWriter; -import org.apache.arrow.vector.complex.impl.UuidWriterFactory; import org.apache.arrow.vector.complex.reader.BaseReader.StructReader; import org.apache.arrow.vector.complex.reader.BigIntReader; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -78,7 +77,7 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.holder.UuidHolder; +import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.holders.DecimalHolder; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; @@ -87,7 +86,9 @@ import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder; +import org.apache.arrow.vector.holders.NullableUuidHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; +import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -99,13 +100,13 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.JsonStringHashMap; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1105,6 +1106,13 @@ public void simpleUnion() throws Exception { new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); UnionWriter unionWriter = new UnionWriter(vector); unionWriter.allocate(); + + UUID uuid = UUID.randomUUID(); + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + byte[] uuidByte = bb.array(); + for (int i = 0; i < COUNT; i++) { unionWriter.setPosition(i); if (i % 5 == 0) { @@ -1127,6 +1135,12 @@ public void simpleUnion() throws Exception { holder.buffer = buf; unionWriter.write(holder); bufs.add(buf); + } else if (i % 5 == 4) { + UuidHolder holder = new UuidHolder(); + holder.buffer = allocator.buffer(UuidType.UUID_BYTE_WIDTH); + holder.buffer.setBytes(0, uuidByte); + unionWriter.write(holder); + allocator.releaseBytes(UuidType.UUID_BYTE_WIDTH); } else { unionWriter.writeFloat4((float) i); } @@ -1152,6 +1166,10 @@ public void simpleUnion() throws Exception { unionReader.read(holder); assertEquals(i, holder.buffer.getInt(0)); assertEquals(4, holder.byteWidth); + } else if (i % 5 == 4) { + NullableUuidHolder holder = new NullableUuidHolder(); + unionReader.read(holder); + assertEquals(UuidUtility.uuidFromArrowBuf(holder.buffer, 0), uuid); } else { assertEquals((float) i, unionReader.readFloat(), 1e-12); } @@ -2509,10 +2527,9 @@ public void extensionWriterReader() throws Exception { StructWriter rootWriter = writer.rootAsStruct(); { - ExtensionWriter extensionWriter = rootWriter.extension("uuid1", new UuidType()); + ExtensionWriter extensionWriter = rootWriter.extension("uuid1", UuidType.INSTANCE); extensionWriter.setPosition(0); - extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); - extensionWriter.writeExtension(u1); + extensionWriter.writeExtension(u1, UuidType.INSTANCE); } // read StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); @@ -2521,8 +2538,7 @@ public void extensionWriterReader() throws Exception { uuidReader.setPosition(0); UuidHolder uuidHolder = new UuidHolder(); uuidReader.read(uuidHolder); - final ByteBuffer bb = ByteBuffer.wrap(uuidHolder.value); - UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(uuidHolder.buffer, 0); assertEquals(u1, actualUuid); assertTrue(uuidReader.isSet()); assertEquals(uuidReader.getMinorType(), MinorType.EXTENSIONTYPE); diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java index 269cff0670..5bb5962704 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java @@ -20,21 +20,16 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.ByteBuffer; -import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.impl.LargeVarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.LargeVarCharWriterImpl; -import org.apache.arrow.vector.complex.impl.UuidReaderImpl; -import org.apache.arrow.vector.complex.impl.UuidWriterImpl; import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; -import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -189,39 +184,4 @@ public void testWriteTextToLargeVarChar() throws Exception { assertEquals(input, result); } } - - @Test - public void testWriteToExtensionVector() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector)) { - UUID uuid = UUID.randomUUID(); - ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - UuidHolder holder = new UuidHolder(); - holder.value = bb.array(); - writer.write(holder); - UUID result = vector.getObject(0); - assertEquals(uuid, result); - } - } - - @Test - public void testReaderCopyAsValueExtensionVector() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidVector vectorForRead = new UuidVector("test2", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector)) { - UUID uuid = UUID.randomUUID(); - vectorForRead.setValueCount(1); - vectorForRead.set(0, uuid); - UuidReaderImpl reader = (UuidReaderImpl) vectorForRead.getReader(); - reader.copyAsValue(writer); - UuidReaderImpl reader2 = (UuidReaderImpl) vector.getReader(); - UuidHolder holder = new UuidHolder(); - reader2.read(0, holder); - final ByteBuffer bb = ByteBuffer.wrap(holder.value); - UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); - assertEquals(uuid, actualUuid); - } - } } diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index d24708d66c..ae5ac0726c 100644 --- a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -16,6 +16,7 @@ */ package org.apache.arrow.vector.types.pojo; +import static org.apache.arrow.vector.TestUtils.ensureRegistered; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -43,10 +44,13 @@ import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.ValueIterableVector; +import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.compare.Range; import org.apache.arrow.vector.compare.RangeEqualsVisitor; import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.ipc.ArrowFileReader; import org.apache.arrow.vector.ipc.ArrowFileWriter; import org.apache.arrow.vector.types.FloatingPointPrecision; @@ -59,9 +63,9 @@ public class TestExtensionType { /** Test that a custom UUID type can be round-tripped through a temporary file. */ @Test public void roundtripUuid() throws IOException { - ExtensionTypeRegistry.register(new UuidType()); + ensureRegistered(UuidType.INSTANCE); final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new UuidType()))); + new Schema(Collections.singletonList(Field.nullable("a", UuidType.INSTANCE))); try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { UUID u1 = UUID.randomUUID(); @@ -89,7 +93,7 @@ public void roundtripUuid() throws IOException { assertEquals(root.getSchema(), readerRoot.getSchema()); final Field field = readerRoot.getSchema().getFields().get(0); - final UuidType expectedType = new UuidType(); + final UuidType expectedType = UuidType.INSTANCE; assertEquals( field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); @@ -113,9 +117,9 @@ public void roundtripUuid() throws IOException { /** Test that a custom UUID type can be read as its underlying type. */ @Test public void readUnderlyingType() throws IOException { - ExtensionTypeRegistry.register(new UuidType()); + ensureRegistered(UuidType.INSTANCE); final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new UuidType()))); + new Schema(Collections.singletonList(Field.nullable("a", UuidType.INSTANCE))); try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { UUID u1 = UUID.randomUUID(); @@ -135,7 +139,7 @@ public void readUnderlyingType() throws IOException { writer.end(); } - ExtensionTypeRegistry.unregister(new UuidType()); + ExtensionTypeRegistry.unregister(UuidType.INSTANCE); try (final SeekableByteChannel channel = Files.newByteChannel(Paths.get(file.getAbsolutePath())); @@ -153,7 +157,7 @@ public void readUnderlyingType() throws IOException { .getByteWidth()); final Field field = readerRoot.getSchema().getFields().get(0); - final UuidType expectedType = new UuidType(); + final UuidType expectedType = UuidType.INSTANCE; assertEquals( field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); @@ -254,7 +258,7 @@ public void roundtripLocation() throws IOException { @Test public void testVectorCompare() { - UuidType uuidType = new UuidType(); + UuidType uuidType = UuidType.INSTANCE; ExtensionTypeRegistry.register(uuidType); try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); UuidVector a1 = @@ -331,6 +335,11 @@ public String serialize() { public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { return new LocationVector(name, allocator); } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + throw new UnsupportedOperationException("Not yet implemented."); + } } public static class LocationVector extends ExtensionTypeVector diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java deleted file mode 100644 index 5e2bd8881b..0000000000 --- a/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types.pojo; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.UuidVector; -import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; - -public class UuidType extends ExtensionType { - - @Override - public ArrowType storageType() { - return new ArrowType.FixedSizeBinary(16); - } - - @Override - public String extensionName() { - return "uuid"; - } - - @Override - public boolean extensionEquals(ExtensionType other) { - return other instanceof UuidType; - } - - @Override - public ArrowType deserialize(ArrowType storageType, String serializedData) { - if (!storageType.equals(storageType())) { - throw new UnsupportedOperationException( - "Cannot construct UuidType from underlying type " + storageType); - } - return new UuidType(); - } - - @Override - public String serialize() { - return ""; - } - - @Override - public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { - return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); - } -} From 64828b786b354555dadb1ecea30304fabc5524f6 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Thu, 18 Dec 2025 11:20:37 +0200 Subject: [PATCH 2/6] Add cleanup step in jarbuild workflow Added a step to remove logs and unused SDKs to clean up the environment. --- .github/workflows/jarbuild.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/jarbuild.yml b/.github/workflows/jarbuild.yml index 0e9908b151..60b501e90e 100644 --- a/.github/workflows/jarbuild.yml +++ b/.github/workflows/jarbuild.yml @@ -216,6 +216,12 @@ jobs: ./bootstrap-vcpkg.sh echo "VCPKG_ROOT=${PWD}/arrow/vcpkg" >> ${GITHUB_ENV} echo "${PWD}/arrow/vcpkg" >> ${GITHUB_PATH} + - name: Remove logs and unused SDKs + run: | + sudo rm -rf /usr/local/share/vcpkg/buildtrees + sudo rm -rf /Applications/Xcode*/Contents/Developer/Platforms/iPhone* + sudo rm -rf /Library/Developer/CoreSimulator/Caches/* + df -h - name: Install dependencies run: | # Ensure updating python@XXX with the "--overwrite" option. From 4fe9501095aabd6b5775b9eb8fd9f52070a400be Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Thu, 18 Dec 2025 13:51:33 +0200 Subject: [PATCH 3/6] fix --- .github/workflows/jarbuild.yml | 59 +++++++++++++++++++++++++--------- .github/workflows/rc.yml | 1 - 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/.github/workflows/jarbuild.yml b/.github/workflows/jarbuild.yml index 60b501e90e..760561eb60 100644 --- a/.github/workflows/jarbuild.yml +++ b/.github/workflows/jarbuild.yml @@ -16,7 +16,7 @@ # under the License. name: JarBuild -on: +on: workflow_dispatch: inputs: arrow_branch: @@ -165,10 +165,9 @@ jobs: fail-fast: false matrix: platform: - - { runs_on: macos-15-intel, arch: "x86_64"} - { runs_on: macos-15, arch: "aarch_64" } env: - MACOSX_DEPLOYMENT_TARGET: "14.0" + MACOSX_DEPLOYMENT_TARGET: "15.0" steps: - name: Download source archive uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 @@ -216,14 +215,30 @@ jobs: ./bootstrap-vcpkg.sh echo "VCPKG_ROOT=${PWD}/arrow/vcpkg" >> ${GITHUB_ENV} echo "${PWD}/arrow/vcpkg" >> ${GITHUB_PATH} - - name: Remove logs and unused SDKs + - name: Clean up disk space run: | - sudo rm -rf /usr/local/share/vcpkg/buildtrees - sudo rm -rf /Applications/Xcode*/Contents/Developer/Platforms/iPhone* - sudo rm -rf /Library/Developer/CoreSimulator/Caches/* - df -h + echo "=== Free disk space before cleanup ===" + df -h / + + echo "" + echo "=== Removing Xcode simulators ===" + sudo rm -rf /Library/Developer/CoreSimulator/Caches || : + echo "Removed /Library/Developer/CoreSimulator/Caches" + + echo "" + echo "=== Removing user simulator data ===" + rm -rf ~/Library/Developer/CoreSimulator || : + echo "Removed ~/Library/Developer/CoreSimulator" + + echo "" + echo "=== Free disk space after cleanup ===" + df -h / - name: Install dependencies run: | + echo "=== Free disk space at start of dependency installation ===" + df -h / + + echo "" # Ensure updating python@XXX with the "--overwrite" option. # If python@XXX is updated without "--overwrite", it causes # a conflict error. Because Python 3 installed not by @@ -270,6 +285,11 @@ jobs: # bundled Protobuf. brew uninstall protobuf + echo "" + echo "=== Free disk space before LLVM build ===" + df -h / + + echo "" # Use vcpkg to install LLVM. vcpkg install \ --clean-after-build \ @@ -277,7 +297,12 @@ jobs: --x-manifest-root=arrow/ci/vcpkg \ --overlay-ports=arrow/ci/vcpkg/overlay/llvm/ \ --x-feature=gandiva-llvm - + + echo "" + echo "=== Free disk space after LLVM build ===" + df -h / + + echo "" brew bundle --file=Brewfile - name: Prepare ccache run: | @@ -290,10 +315,18 @@ jobs: restore-keys: jni-macos-${{ matrix.platform.arch }}- - name: Build run: | + echo "=== Free disk space at start of build ===" + df -h / + + echo "" set -e # make brew Java available to CMake export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home ci/scripts/jni_macos_build.sh . arrow build jni + + echo "" + echo "=== Free disk space at end of build ===" + df -h / - name: Compress into single artifact to keep directory structure run: tar -cvzf jni-macos-${{ matrix.platform.arch }}.tar.gz jni/ - name: Upload artifacts @@ -318,7 +351,6 @@ jobs: tar -xf apache-arrow-java-*.tar.gz --strip-components=1 tar -xvzf jni-linux-x86_64.tar.gz tar -xvzf jni-linux-aarch_64.tar.gz - tar -xvzf jni-macos-x86_64.tar.gz tar -xvzf jni-macos-aarch_64.tar.gz - name: Test that shared libraries exist run: | @@ -334,11 +366,6 @@ jobs: test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.so test -f jni/gandiva_jni/aarch_64/libgandiva_jni.so - test -f jni/arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib - test -f jni/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib - test -f jni/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib - test -f jni/gandiva_jni/x86_64/libgandiva_jni.dylib - test -f jni/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib test -f jni/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib @@ -484,4 +511,4 @@ jobs: $artifact done env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/rc.yml b/.github/workflows/rc.yml index 7e3cf5f6f2..9319a18ecf 100644 --- a/.github/workflows/rc.yml +++ b/.github/workflows/rc.yml @@ -380,7 +380,6 @@ jobs: tar -xf apache-arrow-java-*.tar.gz --strip-components=1 tar -xvzf jni-linux-x86_64.tar.gz tar -xvzf jni-linux-aarch_64.tar.gz - tar -xvzf jni-macos-x86_64.tar.gz tar -xvzf jni-macos-aarch_64.tar.gz tar -xvzf jni-windows-x86_64.tar.gz - name: Test that shared libraries exist From dd8645da7cf4801075bd93b1cbf68c685c40ca2a Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Thu, 18 Dec 2025 15:23:44 +0200 Subject: [PATCH 4/6] disable unity --- ci/scripts/jni_macos_build.sh | 3 ++- ci/scripts/jni_manylinux_build.sh | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/ci/scripts/jni_macos_build.sh b/ci/scripts/jni_macos_build.sh index f7543b6f7a..18bd6f4ec9 100755 --- a/ci/scripts/jni_macos_build.sh +++ b/ci/scripts/jni_macos_build.sh @@ -72,7 +72,8 @@ export ARROW_ORC : "${ARROW_PARQUET:=ON}" : "${ARROW_S3:=ON}" : "${CMAKE_BUILD_TYPE:=Release}" -: "${CMAKE_UNITY_BUILD:=ON}" +# Disable Unity build due to compilation issues with Gandiva +: "${CMAKE_UNITY_BUILD:=OFF}" export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" diff --git a/ci/scripts/jni_manylinux_build.sh b/ci/scripts/jni_manylinux_build.sh index 0c63fc3408..de68043a00 100755 --- a/ci/scripts/jni_manylinux_build.sh +++ b/ci/scripts/jni_manylinux_build.sh @@ -71,6 +71,20 @@ fi github_actions_group_begin "Building Arrow C++ libraries" devtoolset_version="$(rpm -qa "devtoolset-*-gcc" --queryformat '%{VERSION}' | grep -o "^[0-9]*")" devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" + +# Detect architecture for devtoolset include path +case "$(uname -m)" in + x86_64) + devtoolset_arch="x86_64-redhat-linux" + ;; + aarch64) + devtoolset_arch="aarch64-redhat-linux" + ;; + *) + devtoolset_arch="$(uname -m)-redhat-linux" + ;; +esac + : "${ARROW_ACERO:=ON}" export ARROW_ACERO : "${ARROW_BUILD_TESTS:=OFF}" @@ -88,11 +102,12 @@ export ARROW_ORC : "${ARROW_PARQUET:=ON}" : "${ARROW_S3:=ON}" : "${CMAKE_BUILD_TYPE:=release}" -: "${CMAKE_UNITY_BUILD:=ON}" +# Disable Unity build due to compilation issues with Gandiva +: "${CMAKE_UNITY_BUILD:=OFF}" : "${VCPKG_ROOT:=/opt/vcpkg}" : "${VCPKG_FEATURE_FLAGS:=-manifests}" : "${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}" -: "${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-lpthread}" +: "${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/${devtoolset_arch};-lpthread}" export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" From 3b3d208b4d6edde6277db63aca3d5d347fe03741 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Thu, 18 Dec 2025 17:45:20 +0200 Subject: [PATCH 5/6] Revert "disable unity" This reverts commit dd8645da7cf4801075bd93b1cbf68c685c40ca2a. --- ci/scripts/jni_macos_build.sh | 3 +-- ci/scripts/jni_manylinux_build.sh | 19 ++----------------- 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/ci/scripts/jni_macos_build.sh b/ci/scripts/jni_macos_build.sh index 18bd6f4ec9..f7543b6f7a 100755 --- a/ci/scripts/jni_macos_build.sh +++ b/ci/scripts/jni_macos_build.sh @@ -72,8 +72,7 @@ export ARROW_ORC : "${ARROW_PARQUET:=ON}" : "${ARROW_S3:=ON}" : "${CMAKE_BUILD_TYPE:=Release}" -# Disable Unity build due to compilation issues with Gandiva -: "${CMAKE_UNITY_BUILD:=OFF}" +: "${CMAKE_UNITY_BUILD:=ON}" export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" diff --git a/ci/scripts/jni_manylinux_build.sh b/ci/scripts/jni_manylinux_build.sh index de68043a00..0c63fc3408 100755 --- a/ci/scripts/jni_manylinux_build.sh +++ b/ci/scripts/jni_manylinux_build.sh @@ -71,20 +71,6 @@ fi github_actions_group_begin "Building Arrow C++ libraries" devtoolset_version="$(rpm -qa "devtoolset-*-gcc" --queryformat '%{VERSION}' | grep -o "^[0-9]*")" devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" - -# Detect architecture for devtoolset include path -case "$(uname -m)" in - x86_64) - devtoolset_arch="x86_64-redhat-linux" - ;; - aarch64) - devtoolset_arch="aarch64-redhat-linux" - ;; - *) - devtoolset_arch="$(uname -m)-redhat-linux" - ;; -esac - : "${ARROW_ACERO:=ON}" export ARROW_ACERO : "${ARROW_BUILD_TESTS:=OFF}" @@ -102,12 +88,11 @@ export ARROW_ORC : "${ARROW_PARQUET:=ON}" : "${ARROW_S3:=ON}" : "${CMAKE_BUILD_TYPE:=release}" -# Disable Unity build due to compilation issues with Gandiva -: "${CMAKE_UNITY_BUILD:=OFF}" +: "${CMAKE_UNITY_BUILD:=ON}" : "${VCPKG_ROOT:=/opt/vcpkg}" : "${VCPKG_FEATURE_FLAGS:=-manifests}" : "${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}" -: "${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/${devtoolset_arch};-lpthread}" +: "${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-lpthread}" export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" From 4d92fa0e213bfff27e72c96abfb981243cbc015e Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Sun, 21 Dec 2025 19:48:15 +0200 Subject: [PATCH 6/6] removed cherry-pick for uuid vector --- .../org/apache/arrow/vector/UuidVector.java | 481 ------------------ .../vector/holders/NullableUuidHolder.java | 42 -- .../apache/arrow/vector/util/UuidUtility.java | 77 --- .../arrow/vector/TestLargeListVector.java | 26 +- .../apache/arrow/vector/TestListVector.java | 46 +- .../apache/arrow/vector/TestMapVector.java | 32 +- .../apache/arrow/vector/TestStructVector.java | 4 +- .../org/apache/arrow/vector/TestUuidType.java | 275 ---------- .../apache/arrow/vector/TestUuidVector.java | 464 ----------------- .../org/apache/arrow/vector/UuidVector.java | 127 +++++ .../complex/impl/TestComplexCopier.java | 24 +- .../complex/impl/TestPromotableWriter.java | 29 +- .../vector/complex/impl/UuidReaderImpl.java | 35 +- .../vector/complex/impl/UuidWriterImpl.java | 41 +- .../complex/writer/TestComplexWriter.java | 23 +- .../arrow/vector/holder}/UuidHolder.java | 25 +- .../vector/types/pojo/TestExtensionType.java | 17 +- .../arrow/vector/types/pojo}/UuidType.java | 60 +-- 18 files changed, 257 insertions(+), 1571 deletions(-) delete mode 100644 vector/src/main/java/org/apache/arrow/vector/UuidVector.java delete mode 100644 vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java delete mode 100644 vector/src/main/java/org/apache/arrow/vector/util/UuidUtility.java delete mode 100644 vector/src/test/java/org/apache/arrow/vector/TestUuidType.java delete mode 100644 vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/UuidVector.java rename vector/src/{main => test}/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java (61%) rename vector/src/{main => test}/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java (54%) rename vector/src/{main/java/org/apache/arrow/vector/holders => test/java/org/apache/arrow/vector/holder}/UuidHolder.java (61%) rename vector/src/{main/java/org/apache/arrow/vector/extension => test/java/org/apache/arrow/vector/types/pojo}/UuidType.java (51%) diff --git a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java deleted file mode 100644 index c662a6e064..0000000000 --- a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java +++ /dev/null @@ -1,481 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.extension.UuidType.UUID_BYTE_WIDTH; - -import java.nio.ByteBuffer; -import java.util.UUID; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.vector.complex.impl.UuidReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.extension.UuidType; -import org.apache.arrow.vector.holders.NullableUuidHolder; -import org.apache.arrow.vector.holders.UuidHolder; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; - -/** - * Vector implementation for UUID values using {@link UuidType}. - * - *

Supports setting and retrieving UUIDs with efficient storage and nullable value handling. - * - *

Usage: - * - *

{@code
- * UuidVector vector = new UuidVector("uuid_col", allocator);
- * vector.set(0, UUID.randomUUID());
- * UUID value = vector.getObject(0);
- * }
- * - * @see UuidType - * @see UuidHolder - * @see NullableUuidHolder - */ -public class UuidVector extends ExtensionTypeVector - implements ValueIterableVector, FixedWidthVector { - private final Field field; - - /** The fixed byte width of UUID values (16 bytes). */ - public static final int TYPE_WIDTH = UUID_BYTE_WIDTH; - - /** - * Constructs a UUID vector with the given name, allocator, and underlying vector. - * - * @param name the name of the vector - * @param allocator the buffer allocator - * @param underlyingVector the underlying FixedSizeBinaryVector for storage - */ - public UuidVector( - String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { - super(name, allocator, underlyingVector); - this.field = new Field(name, FieldType.nullable(new UuidType()), null); - } - - /** - * Constructs a UUID vector with the given name, field type, allocator, and underlying vector. - * - * @param name the name of the vector - * @param fieldType the field type (should contain UuidType) - * @param allocator the buffer allocator - * @param underlyingVector the underlying FixedSizeBinaryVector for storage - */ - public UuidVector( - String name, - FieldType fieldType, - BufferAllocator allocator, - FixedSizeBinaryVector underlyingVector) { - super(name, allocator, underlyingVector); - this.field = new Field(name, fieldType, null); - } - - /** - * Constructs a UUID vector with the given name and allocator. - * - *

Creates a new underlying FixedSizeBinaryVector with 16-byte width. - * - * @param name the name of the vector - * @param allocator the buffer allocator - */ - public UuidVector(String name, BufferAllocator allocator) { - super(name, allocator, new FixedSizeBinaryVector(name, allocator, UUID_BYTE_WIDTH)); - this.field = new Field(name, FieldType.nullable(new UuidType()), null); - } - - /** - * Constructs a UUID vector from a field and allocator. - * - * @param field the field definition (should contain UuidType) - * @param allocator the buffer allocator - */ - public UuidVector(Field field, BufferAllocator allocator) { - super( - field.getName(), - allocator, - new FixedSizeBinaryVector(field.getName(), allocator, UUID_BYTE_WIDTH)); - this.field = field; - } - - @Override - public UUID getObject(int index) { - if (isSet(index) == 0) { - return null; - } - final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); - return new UUID(bb.getLong(), bb.getLong()); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); - } - - /** - * Checks if the value at the given index is set (non-null). - * - * @param index the index to check - * @return 1 if the value is set, 0 if null - */ - public int isSet(int index) { - return getUnderlyingVector().isSet(index); - } - - /** - * Gets the UUID value at the given index as an ArrowBuf. - * - * @param index the index to retrieve - * @return a buffer slice containing the 16-byte UUID - * @throws IllegalStateException if the value at the index is null and null checking is enabled - */ - public ArrowBuf get(int index) throws IllegalStateException { - if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } else { - return getBufferSlicePostNullCheck(index); - } - } - - /** - * Reads the UUID value at the given index into a NullableUuidHolder. - * - * @param index the index to read from - * @param holder the holder to populate with the UUID data - */ - public void get(int index, NullableUuidHolder holder) { - if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { - holder.isSet = 0; - } else { - holder.isSet = 1; - holder.buffer = getBufferSlicePostNullCheck(index); - } - } - - /** - * Reads the UUID value at the given index into a UuidHolder. - * - * @param index the index to read from - * @param holder the holder to populate with the UUID data - */ - public void get(int index, UuidHolder holder) { - holder.isSet = 1; - holder.buffer = getBufferSlicePostNullCheck(index); - } - - /** - * Sets the UUID value at the given index. - * - * @param index the index to set - * @param value the UUID value to set, or null to set a null value - */ - public void set(int index, UUID value) { - if (value != null) { - set(index, UuidUtility.getBytesFromUUID(value)); - } else { - getUnderlyingVector().setNull(index); - } - } - - /** - * Sets the UUID value at the given index from a UuidHolder. - * - * @param index the index to set - * @param holder the holder containing the UUID data - */ - public void set(int index, UuidHolder holder) { - this.set(index, holder.isSet, holder.buffer); - } - - /** - * Sets the UUID value at the given index from a NullableUuidHolder. - * - * @param index the index to set - * @param holder the holder containing the UUID data - */ - public void set(int index, NullableUuidHolder holder) { - this.set(index, holder.isSet, holder.buffer); - } - - /** - * Sets the UUID value at the given index with explicit null flag. - * - * @param index the index to set - * @param isSet 1 if the value is set, 0 if null - * @param buffer the buffer containing the 16-byte UUID data - */ - public void set(int index, int isSet, ArrowBuf buffer) { - getUnderlyingVector().set(index, isSet, buffer); - } - - /** - * Sets the UUID value at the given index from an ArrowBuf. - * - * @param index the index to set - * @param value the buffer containing the 16-byte UUID data - */ - public void set(int index, ArrowBuf value) { - getUnderlyingVector().set(index, value); - } - - /** - * Sets the UUID value at the given index by copying from a source buffer. - * - * @param index the index to set - * @param source the source buffer to copy from - * @param sourceOffset the offset in the source buffer where the UUID data starts - */ - public void set(int index, ArrowBuf source, int sourceOffset) { - // Copy bytes from source buffer to target vector data buffer - ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer(); - dataBuffer.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH); - getUnderlyingVector().setIndexDefined(index); - } - - /** - * Sets the UUID value at the given index from a byte array. - * - * @param index the index to set - * @param value the 16-byte array containing the UUID data - */ - public void set(int index, byte[] value) { - getUnderlyingVector().set(index, value); - } - - /** - * Sets the UUID value at the given index, expanding capacity if needed. - * - * @param index the index to set - * @param value the UUID value to set, or null to set a null value - */ - public void setSafe(int index, UUID value) { - if (value != null) { - setSafe(index, UuidUtility.getBytesFromUUID(value)); - } else { - getUnderlyingVector().setNull(index); - } - } - - /** - * Sets the UUID value at the given index from a NullableUuidHolder, expanding capacity if needed. - * - * @param index the index to set - * @param holder the holder containing the UUID data, or null to set a null value - */ - public void setSafe(int index, NullableUuidHolder holder) { - if (holder != null) { - getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer); - } else { - getUnderlyingVector().setNull(index); - } - } - - /** - * Sets the UUID value at the given index from a UuidHolder, expanding capacity if needed. - * - * @param index the index to set - * @param holder the holder containing the UUID data, or null to set a null value - */ - public void setSafe(int index, UuidHolder holder) { - if (holder != null) { - getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer); - } else { - getUnderlyingVector().setNull(index); - } - } - - /** - * Sets the UUID value at the given index from a byte array, expanding capacity if needed. - * - * @param index the index to set - * @param value the 16-byte array containing the UUID data - */ - public void setSafe(int index, byte[] value) { - getUnderlyingVector().setIndexDefined(index); - getUnderlyingVector().setSafe(index, value); - } - - /** - * Sets the UUID value at the given index from an ArrowBuf, expanding capacity if needed. - * - * @param index the index to set - * @param value the buffer containing the 16-byte UUID data - */ - public void setSafe(int index, ArrowBuf value) { - getUnderlyingVector().setSafe(index, value); - } - - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - getUnderlyingVector() - .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); - } - - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - getUnderlyingVector() - .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); - } - - @Override - public Field getField() { - return field; - } - - @Override - public ArrowBufPointer getDataPointer(int i) { - return getUnderlyingVector().getDataPointer(i); - } - - @Override - public ArrowBufPointer getDataPointer(int i, ArrowBufPointer arrowBufPointer) { - return getUnderlyingVector().getDataPointer(i, arrowBufPointer); - } - - @Override - public void allocateNew(int valueCount) { - getUnderlyingVector().allocateNew(valueCount); - } - - @Override - public void zeroVector() { - getUnderlyingVector().zeroVector(); - } - - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((UuidVector) to); - } - - @Override - protected FieldReader getReaderImpl() { - return new UuidReaderImpl(this); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(field, allocator); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(ref, allocator); - } - - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(this.getField().getName(), allocator); - } - - private ArrowBuf getBufferSlicePostNullCheck(int index) { - return getUnderlyingVector() - .getDataBuffer() - .slice((long) index * UUID_BYTE_WIDTH, UUID_BYTE_WIDTH); - } - - @Override - public int getTypeWidth() { - return getUnderlyingVector().getTypeWidth(); - } - - /** {@link TransferPair} for {@link UuidVector}. */ - public class TransferImpl implements TransferPair { - UuidVector to; - - /** - * Constructs a transfer pair with the given target vector. - * - * @param to the target UUID vector - */ - public TransferImpl(UuidVector to) { - this.to = to; - } - - /** - * Constructs a transfer pair, creating a new target vector from the field and allocator. - * - * @param field the field definition for the target vector - * @param allocator the buffer allocator for the target vector - */ - public TransferImpl(Field field, BufferAllocator allocator) { - this.to = new UuidVector(field, allocator); - } - - /** - * Constructs a transfer pair, creating a new target vector with the given name and allocator. - * - * @param ref the name for the target vector - * @param allocator the buffer allocator for the target vector - */ - public TransferImpl(String ref, BufferAllocator allocator) { - this.to = new UuidVector(ref, allocator); - } - - /** - * Gets the target vector of this transfer pair. - * - * @return the target UUID vector - */ - public UuidVector getTo() { - return this.to; - } - - /** Transfers ownership of data from the source vector to the target vector. */ - public void transfer() { - getUnderlyingVector().transferTo(to.getUnderlyingVector()); - } - - /** - * Splits and transfers a range of values from the source vector to the target vector. - * - * @param startIndex the starting index in the source vector - * @param length the number of values to transfer - */ - public void splitAndTransfer(int startIndex, int length) { - getUnderlyingVector().splitAndTransferTo(startIndex, length, to.getUnderlyingVector()); - } - - /** - * Copies a value from the source vector to the target vector, expanding capacity if needed. - * - * @param fromIndex the index in the source vector - * @param toIndex the index in the target vector - */ - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, (ValueVector) UuidVector.this); - } - } -} diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java deleted file mode 100644 index 7fa50ca761..0000000000 --- a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.holders; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.extension.UuidType; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** - * Value holder for nullable UUID values. - * - *

The {@code isSet} field controls nullability: when {@code isSet = 1}, the holder contains a - * valid UUID in {@code buffer}; when {@code isSet = 0}, the holder represents a null value and - * {@code buffer} should not be accessed. - * - * @see UuidHolder - * @see org.apache.arrow.vector.UuidVector - * @see org.apache.arrow.vector.extension.UuidType - */ -public class NullableUuidHolder extends ExtensionHolder { - /** Buffer containing 16-byte UUID data. */ - public ArrowBuf buffer; - - @Override - public ArrowType type() { - return UuidType.INSTANCE; - } -} diff --git a/vector/src/main/java/org/apache/arrow/vector/util/UuidUtility.java b/vector/src/main/java/org/apache/arrow/vector/util/UuidUtility.java deleted file mode 100644 index a1b0b54579..0000000000 --- a/vector/src/main/java/org/apache/arrow/vector/util/UuidUtility.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.apache.arrow.vector.extension.UuidType.UUID_BYTE_WIDTH; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.UUID; -import org.apache.arrow.memory.ArrowBuf; - -/** - * Utility class for UUID conversions and operations. - * - *

Provides methods to convert between {@link UUID} objects and byte representations used in - * Arrow vectors. - * - * @see org.apache.arrow.vector.UuidVector - * @see org.apache.arrow.vector.extension.UuidType - */ -public class UuidUtility { - /** - * Converts a UUID to a 16-byte array. - * - *

The UUID is stored in big-endian byte order, with the most significant bits first. - * - * @param uuid the UUID to convert - * @return a 16-byte array representing the UUID - */ - public static byte[] getBytesFromUUID(UUID uuid) { - byte[] result = new byte[16]; - long msb = uuid.getMostSignificantBits(); - long lsb = uuid.getLeastSignificantBits(); - for (int i = 15; i >= 8; i--) { - result[i] = (byte) (lsb & 0xFF); - lsb >>= 8; - } - for (int i = 7; i >= 0; i--) { - result[i] = (byte) (msb & 0xFF); - msb >>= 8; - } - return result; - } - - /** - * Constructs a UUID from bytes stored in an ArrowBuf at the specified index. - * - *

Reads 16 bytes from the buffer starting at the given index and interprets them as a UUID in - * big-endian byte order. - * - * @param buffer the buffer containing UUID data - * @param index the byte offset in the buffer where the UUID starts - * @return the UUID constructed from the buffer data - */ - public static UUID uuidFromArrowBuf(ArrowBuf buffer, long index) { - ByteBuffer buf = buffer.nioBuffer(index, UUID_BYTE_WIDTH); - - buf.order(ByteOrder.BIG_ENDIAN); - long mostSigBits = buf.getLong(0); - long leastSigBits = buf.getLong(Long.BYTES); - return new UUID(mostSigBits, leastSigBits); - } -} diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index eecd4884ee..c811114365 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -22,6 +22,7 @@ import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -35,14 +36,13 @@ import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; -import org.apache.arrow.vector.extension.UuidType; -import org.apache.arrow.vector.holders.UuidHolder; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1038,9 +1038,9 @@ public void testCopyValueSafeForExtensionType() throws Exception { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); writer.startList(); - ExtensionWriter extensionWriter = writer.extension(UuidType.INSTANCE); - extensionWriter.writeExtension(u1); - extensionWriter.writeExtension(u2); + ExtensionWriter extensionWriter = writer.extension(new UuidType()); + extensionWriter.writeExtension(u1, new UuidType()); + extensionWriter.writeExtension(u2, new UuidType()); writer.endList(); // Create second list with UUIDs @@ -1048,7 +1048,7 @@ public void testCopyValueSafeForExtensionType() throws Exception { UUID u3 = UUID.randomUUID(); UUID u4 = UUID.randomUUID(); writer.startList(); - extensionWriter = writer.extension(UuidType.INSTANCE); + extensionWriter = writer.extension(new UuidType()); extensionWriter.writeExtension(u3); extensionWriter.writeExtension(u4); extensionWriter.writeNull(); @@ -1072,12 +1072,14 @@ public void testCopyValueSafeForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u2, actualUuid); // Verify second list @@ -1086,12 +1088,14 @@ public void testCopyValueSafeForExtensionType() throws Exception { reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u3, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u4, actualUuid); reader.next(); uuidReader = reader.reader(); diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index d6f15141eb..5817564a78 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -36,18 +37,17 @@ import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; -import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; -import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1214,7 +1214,7 @@ public void testListVectorWithExtensionType() throws Exception { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); writer.startList(); - ExtensionWriter extensionWriter = writer.extension(UuidType.INSTANCE); + ExtensionWriter extensionWriter = writer.extension(new UuidType()); extensionWriter.writeExtension(u1); extensionWriter.writeExtension(u2); writer.endList(); @@ -1241,9 +1241,9 @@ public void testListVectorReaderForExtensionType() throws Exception { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); writer.startList(); - ExtensionWriter extensionWriter = writer.extension(UuidType.INSTANCE); - extensionWriter.writeExtension(u1); - extensionWriter.writeExtension(u2); + ExtensionWriter extensionWriter = writer.extension(new UuidType()); + extensionWriter.writeExtension(u1, new UuidType()); + extensionWriter.writeExtension(u2, new UuidType()); writer.endList(); writer.setValueCount(1); @@ -1255,12 +1255,14 @@ public void testListVectorReaderForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u2, actualUuid); } } @@ -1276,8 +1278,8 @@ public void testCopyFromForExtensionType() throws Exception { UUID u2 = UUID.randomUUID(); writer.startList(); - writer.extension(UuidType.INSTANCE).writeExtension(u1); - writer.writeExtension(u2); + writer.extension(new UuidType()).writeExtension(u1, new UuidType()); + writer.writeExtension(u2, new UuidType()); writer.writeNull(); writer.endList(); @@ -1295,12 +1297,14 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u2, actualUuid); } } @@ -1317,7 +1321,7 @@ public void testCopyValueSafeForExtensionType() throws Exception { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); writer.startList(); - ExtensionWriter extensionWriter = writer.extension(UuidType.INSTANCE); + ExtensionWriter extensionWriter = writer.extension(new UuidType()); extensionWriter.writeExtension(u1); extensionWriter.writeExtension(u2); writer.endList(); @@ -1327,7 +1331,7 @@ public void testCopyValueSafeForExtensionType() throws Exception { UUID u3 = UUID.randomUUID(); UUID u4 = UUID.randomUUID(); writer.startList(); - extensionWriter = writer.extension(UuidType.INSTANCE); + extensionWriter = writer.extension(new UuidType()); extensionWriter.writeExtension(u3); extensionWriter.writeExtension(u4); extensionWriter.writeNull(); @@ -1351,12 +1355,14 @@ public void testCopyValueSafeForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u2, actualUuid); // Verify second list @@ -1365,12 +1371,14 @@ public void testCopyValueSafeForExtensionType() throws Exception { reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u3, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u4, actualUuid); reader.next(); uuidReader = reader.reader(); diff --git a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 17bb3b5455..929470b7fa 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -22,6 +22,7 @@ import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -38,15 +39,14 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.extension.UuidType; -import org.apache.arrow.vector.holders.UuidHolder; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1280,12 +1280,12 @@ public void testMapVectorWithExtensionType() throws Exception { writer.startMap(); writer.startEntry(); writer.key().bigInt().writeBigInt(0); - ExtensionWriter extensionWriter = writer.value().extension(UuidType.INSTANCE); - extensionWriter.writeExtension(u1, UuidType.INSTANCE); + ExtensionWriter extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.writeExtension(u1, new UuidType()); writer.endEntry(); writer.startEntry(); writer.key().bigInt().writeBigInt(1); - extensionWriter.writeExtension(u2, UuidType.INSTANCE); + extensionWriter.writeExtension(u2, new UuidType()); writer.endEntry(); writer.endMap(); @@ -1297,12 +1297,14 @@ public void testMapVectorWithExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u2, actualUuid); } } @@ -1319,13 +1321,13 @@ public void testCopyFromForExtensionType() throws Exception { writer.startMap(); writer.startEntry(); writer.key().bigInt().writeBigInt(0); - ExtensionWriter extensionWriter = writer.value().extension(UuidType.INSTANCE); - extensionWriter.writeExtension(u1, UuidType.INSTANCE); + ExtensionWriter extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.writeExtension(u1, new UuidType()); writer.endEntry(); writer.startEntry(); writer.key().bigInt().writeBigInt(1); - extensionWriter = writer.value().extension(UuidType.INSTANCE); - extensionWriter.writeExtension(u2, UuidType.INSTANCE); + extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.writeExtension(u2, new UuidType()); writer.endEntry(); writer.endMap(); @@ -1340,12 +1342,14 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index c5e23c8e8d..2898e38abb 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -35,7 +35,6 @@ import org.apache.arrow.vector.complex.impl.NullableStructWriter; import org.apache.arrow.vector.complex.writer.Float8Writer; import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.holders.ComplexHolder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; @@ -43,6 +42,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -167,7 +167,7 @@ public void testGetPrimitiveVectors() { "varchar", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class); // add extension vector - vector.addOrGet("extension", FieldType.nullable(UuidType.INSTANCE), UuidVector.class); + vector.addOrGet("extension", FieldType.nullable(new UuidType()), UuidVector.class); List primitiveVectors = vector.getPrimitiveVectors(); assertEquals(6, primitiveVectors.size()); diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java deleted file mode 100644 index 9f7c65b82b..0000000000 --- a/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.TestUtils.ensureRegistered; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertInstanceOf; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Collections; -import java.util.UUID; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.extension.UuidType; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.UuidUtility; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class TestUuidType { - BufferAllocator allocator; - - @BeforeEach - void beforeEach() { - allocator = new RootAllocator(); - } - - @AfterEach - void afterEach() { - allocator.close(); - } - - @Test - void testConstants() { - assertEquals("arrow.uuid", UuidType.EXTENSION_NAME); - assertNotNull(UuidType.INSTANCE); - assertNotNull(UuidType.STORAGE_TYPE); - assertInstanceOf(ArrowType.FixedSizeBinary.class, UuidType.STORAGE_TYPE); - assertEquals( - UuidType.UUID_BYTE_WIDTH, - ((ArrowType.FixedSizeBinary) UuidType.STORAGE_TYPE).getByteWidth()); - } - - @Test - void testStorageType() { - UuidType type = new UuidType(); - assertEquals(UuidType.STORAGE_TYPE, type.storageType()); - assertInstanceOf(ArrowType.FixedSizeBinary.class, type.storageType()); - } - - @Test - void testExtensionName() { - UuidType type = new UuidType(); - assertEquals("arrow.uuid", type.extensionName()); - } - - @Test - void testExtensionEquals() { - UuidType type1 = new UuidType(); - UuidType type2 = new UuidType(); - UuidType type3 = UuidType.INSTANCE; - - assertTrue(type1.extensionEquals(type2)); - assertTrue(type1.extensionEquals(type3)); - assertTrue(type2.extensionEquals(type3)); - } - - @Test - void testIsComplex() { - UuidType type = new UuidType(); - assertFalse(type.isComplex()); - } - - @Test - void testSerialize() { - UuidType type = new UuidType(); - String serialized = type.serialize(); - assertEquals("", serialized); - } - - @Test - void testDeserializeValid() { - UuidType type = new UuidType(); - ArrowType storageType = new ArrowType.FixedSizeBinary(UuidType.UUID_BYTE_WIDTH); - - ArrowType deserialized = assertDoesNotThrow(() -> type.deserialize(storageType, "")); - assertInstanceOf(UuidType.class, deserialized); - assertEquals(UuidType.INSTANCE, deserialized); - } - - @Test - void testDeserializeInvalidStorageType() { - UuidType type = new UuidType(); - ArrowType wrongStorageType = new ArrowType.FixedSizeBinary(32); - - assertThrows(UnsupportedOperationException.class, () -> type.deserialize(wrongStorageType, "")); - } - - @Test - void testGetNewVector() { - UuidType type = new UuidType(); - try (FieldVector vector = - type.getNewVector("uuid_field", FieldType.nullable(type), allocator)) { - assertInstanceOf(UuidVector.class, vector); - assertEquals("uuid_field", vector.getField().getName()); - assertEquals(type, vector.getField().getType()); - } - } - - @Test - void testVectorOperations() { - UuidType type = new UuidType(); - try (FieldVector vector = - type.getNewVector("uuid_field", FieldType.nullable(type), allocator)) { - UuidVector uuidVector = (UuidVector) vector; - - UUID uuid1 = UUID.randomUUID(); - UUID uuid2 = UUID.randomUUID(); - - uuidVector.setSafe(0, uuid1); - uuidVector.setSafe(1, uuid2); - uuidVector.setNull(2); - uuidVector.setValueCount(3); - - assertEquals(uuid1, uuidVector.getObject(0)); - assertEquals(uuid2, uuidVector.getObject(1)); - assertNull(uuidVector.getObject(2)); - assertFalse(uuidVector.isNull(0)); - assertFalse(uuidVector.isNull(1)); - assertTrue(uuidVector.isNull(2)); - } - } - - @Test - void testIpcRoundTrip() { - UuidType type = UuidType.INSTANCE; - ensureRegistered(type); - - Schema schema = new Schema(Collections.singletonList(Field.nullable("uuid", type))); - byte[] serialized = schema.serializeAsMessage(); - Schema deserialized = Schema.deserializeMessage(ByteBuffer.wrap(serialized)); - assertEquals(schema, deserialized); - } - - @Test - void testVectorIpcRoundTrip() throws IOException { - UuidType type = UuidType.INSTANCE; - ensureRegistered(type); - - UUID uuid1 = UUID.randomUUID(); - UUID uuid2 = UUID.randomUUID(); - - try (FieldVector vector = type.getNewVector("field", FieldType.nullable(type), allocator)) { - UuidVector uuidVector = (UuidVector) vector; - uuidVector.setSafe(0, uuid1); - uuidVector.setNull(1); - uuidVector.setSafe(2, uuid2); - uuidVector.setValueCount(3); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (VectorSchemaRoot root = new VectorSchemaRoot(Collections.singletonList(uuidVector)); - ArrowStreamWriter writer = - new ArrowStreamWriter(root, new DictionaryProvider.MapDictionaryProvider(), baos)) { - writer.start(); - writer.writeBatch(); - } - - try (ArrowStreamReader reader = - new ArrowStreamReader(new ByteArrayInputStream(baos.toByteArray()), allocator)) { - assertTrue(reader.loadNextBatch()); - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - assertEquals(3, root.getRowCount()); - assertEquals( - new Schema(Collections.singletonList(uuidVector.getField())), root.getSchema()); - - UuidVector actual = assertInstanceOf(UuidVector.class, root.getVector("field")); - assertFalse(actual.isNull(0)); - assertTrue(actual.isNull(1)); - assertFalse(actual.isNull(2)); - assertEquals(uuid1, actual.getObject(0)); - assertNull(actual.getObject(1)); - assertEquals(uuid2, actual.getObject(2)); - } - } - } - - @Test - void testVectorByteArrayOperations() { - UuidType type = new UuidType(); - try (FieldVector vector = - type.getNewVector("uuid_field", FieldType.nullable(type), allocator)) { - UuidVector uuidVector = (UuidVector) vector; - - UUID uuid = UUID.randomUUID(); - byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); - - uuidVector.setSafe(0, uuidBytes); - uuidVector.setValueCount(1); - - assertEquals(uuid, uuidVector.getObject(0)); - - // Verify the bytes match - byte[] actualBytes = new byte[UuidType.UUID_BYTE_WIDTH]; - uuidVector.get(0).getBytes(0, actualBytes); - assertArrayEquals(uuidBytes, actualBytes); - } - } - - @Test - void testGetNewVectorWithCustomFieldType() { - UuidType type = new UuidType(); - FieldType fieldType = new FieldType(false, type, null); - - try (FieldVector vector = type.getNewVector("non_nullable_uuid", fieldType, allocator)) { - assertInstanceOf(UuidVector.class, vector); - assertEquals("non_nullable_uuid", vector.getField().getName()); - assertFalse(vector.getField().isNullable()); - } - } - - @Test - void testSingleton() { - UuidType type1 = UuidType.INSTANCE; - UuidType type2 = UuidType.INSTANCE; - - // Same instance - assertSame(type1, type2); - assertTrue(type1.extensionEquals(type2)); - } - - @Test - void testUnderlyingVector() { - UuidType type = new UuidType(); - try (FieldVector vector = - type.getNewVector("uuid_field", FieldType.nullable(type), allocator)) { - UuidVector uuidVector = (UuidVector) vector; - FixedSizeBinaryVector underlying = uuidVector.getUnderlyingVector(); - - assertInstanceOf(FixedSizeBinaryVector.class, underlying); - assertEquals(UuidType.UUID_BYTE_WIDTH, underlying.getByteWidth()); - } - } -} diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java deleted file mode 100644 index a3690461cf..0000000000 --- a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java +++ /dev/null @@ -1,464 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.ByteBuffer; -import java.util.UUID; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.impl.UuidReaderImpl; -import org.apache.arrow.vector.complex.impl.UuidWriterImpl; -import org.apache.arrow.vector.extension.UuidType; -import org.apache.arrow.vector.holders.ExtensionHolder; -import org.apache.arrow.vector.holders.NullableUuidHolder; -import org.apache.arrow.vector.holders.UuidHolder; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.util.UuidUtility; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Tests for UuidVector, UuidWriterImpl, and UuidReaderImpl. */ -class TestUuidVector { - - private BufferAllocator allocator; - - @BeforeEach - void beforeEach() { - allocator = new RootAllocator(); - } - - @AfterEach - void afterEach() { - allocator.close(); - } - - // ========== Writer Tests ========== - - @Test - void testWriteToExtensionVector() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector)) { - UUID uuid = UUID.randomUUID(); - ByteBuffer bb = ByteBuffer.allocate(UuidType.UUID_BYTE_WIDTH); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - - // Allocate ArrowBuf for the holder - try (ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { - buf.setBytes(0, bb.array()); - - UuidHolder holder = new UuidHolder(); - holder.buffer = buf; - - writer.write(holder); - UUID result = vector.getObject(0); - assertEquals(uuid, result); - } - } - } - - @Test - void testWriteExtensionWithUUID() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector)) { - UUID uuid = UUID.randomUUID(); - writer.setPosition(0); - writer.writeExtension(uuid); - - UUID result = vector.getObject(0); - assertEquals(uuid, result); - assertEquals(1, vector.getValueCount()); - } - } - - @Test - void testWriteExtensionWithByteArray() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector)) { - UUID uuid = UUID.randomUUID(); - byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); - - writer.setPosition(0); - writer.writeExtension(uuidBytes); - - UUID result = vector.getObject(0); - assertEquals(uuid, result); - assertEquals(1, vector.getValueCount()); - } - } - - @Test - void testWriteExtensionWithArrowBuf() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector); - ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { - UUID uuid = UUID.randomUUID(); - byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); - buf.setBytes(0, uuidBytes); - - writer.setPosition(0); - writer.writeExtension(buf); - - UUID result = vector.getObject(0); - assertEquals(uuid, result); - assertEquals(1, vector.getValueCount()); - } - } - - @Test - void testWriteExtensionWithUnsupportedType() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector)) { - writer.setPosition(0); - - IllegalArgumentException exception = - assertThrows(IllegalArgumentException.class, () -> writer.writeExtension("invalid-type")); - - assertEquals( - "Unsupported value type for UUID: class java.lang.String", exception.getMessage()); - } - } - - @Test - void testWriteExtensionMultipleValues() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector)) { - UUID uuid1 = UUID.randomUUID(); - UUID uuid2 = UUID.randomUUID(); - UUID uuid3 = UUID.randomUUID(); - - writer.setPosition(0); - writer.writeExtension(uuid1); - writer.setPosition(1); - writer.writeExtension(uuid2); - writer.setPosition(2); - writer.writeExtension(uuid3); - - assertEquals(uuid1, vector.getObject(0)); - assertEquals(uuid2, vector.getObject(1)); - assertEquals(uuid3, vector.getObject(2)); - assertEquals(3, vector.getValueCount()); - } - } - - @Test - void testWriteWithUuidHolder() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector); - ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { - UUID uuid = UUID.randomUUID(); - byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); - buf.setBytes(0, uuidBytes); - - UuidHolder holder = new UuidHolder(); - holder.buffer = buf; - holder.isSet = 1; - - writer.setPosition(0); - writer.write(holder); - - UUID result = vector.getObject(0); - assertEquals(uuid, result); - assertEquals(1, vector.getValueCount()); - } - } - - @Test - void testWriteWithNullableUuidHolder() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector); - ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { - UUID uuid = UUID.randomUUID(); - byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); - buf.setBytes(0, uuidBytes); - - NullableUuidHolder holder = new NullableUuidHolder(); - holder.buffer = buf; - holder.isSet = 1; - - writer.setPosition(0); - writer.write(holder); - - UUID result = vector.getObject(0); - assertEquals(uuid, result); - assertEquals(1, vector.getValueCount()); - } - } - - @Test - void testWriteWithNullableUuidHolderNull() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector)) { - NullableUuidHolder holder = new NullableUuidHolder(); - holder.isSet = 0; - - writer.setPosition(0); - writer.write(holder); - - assertTrue(vector.isNull(0)); - assertEquals(1, vector.getValueCount()); - } - } - - // ========== Reader Tests ========== - - @Test - void testReaderCopyAsValueExtensionVector() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator); - UuidVector vectorForRead = new UuidVector("test2", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector)) { - UUID uuid = UUID.randomUUID(); - vectorForRead.setValueCount(1); - vectorForRead.set(0, uuid); - UuidReaderImpl reader = (UuidReaderImpl) vectorForRead.getReader(); - reader.copyAsValue(writer); - UuidReaderImpl reader2 = (UuidReaderImpl) vector.getReader(); - UuidHolder holder = new UuidHolder(); - reader2.read(0, holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); - assertEquals(uuid, actualUuid); - } - } - - @Test - void testReaderReadWithUuidHolder() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UUID uuid = UUID.randomUUID(); - vector.setSafe(0, uuid); - vector.setValueCount(1); - - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - reader.setPosition(0); - - UuidHolder holder = new UuidHolder(); - reader.read(holder); - - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); - assertEquals(uuid, actualUuid); - assertEquals(1, holder.isSet); - } - } - - @Test - void testReaderReadWithNullableUuidHolder() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UUID uuid = UUID.randomUUID(); - vector.setSafe(0, uuid); - vector.setValueCount(1); - - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - reader.setPosition(0); - - NullableUuidHolder holder = new NullableUuidHolder(); - reader.read(holder); - - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); - assertEquals(uuid, actualUuid); - assertEquals(1, holder.isSet); - } - } - - @Test - void testReaderReadWithNullableUuidHolderNull() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - vector.setNull(0); - vector.setValueCount(1); - - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - reader.setPosition(0); - - NullableUuidHolder holder = new NullableUuidHolder(); - reader.read(holder); - - assertEquals(0, holder.isSet); - } - } - - @Test - void testReaderReadWithArrayIndexUuidHolder() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UUID uuid1 = UUID.randomUUID(); - UUID uuid2 = UUID.randomUUID(); - UUID uuid3 = UUID.randomUUID(); - - vector.setSafe(0, uuid1); - vector.setSafe(1, uuid2); - vector.setSafe(2, uuid3); - vector.setValueCount(3); - - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - - UuidHolder holder = new UuidHolder(); - reader.read(1, holder); - - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); - assertEquals(uuid2, actualUuid); - assertEquals(1, holder.isSet); - } - } - - @Test - void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UUID uuid1 = UUID.randomUUID(); - UUID uuid2 = UUID.randomUUID(); - - vector.setSafe(0, uuid1); - vector.setNull(1); - vector.setSafe(2, uuid2); - vector.setValueCount(3); - - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - - NullableUuidHolder holder1 = new NullableUuidHolder(); - reader.read(0, holder1); - assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, 0)); - assertEquals(1, holder1.isSet); - - NullableUuidHolder holder2 = new NullableUuidHolder(); - reader.read(1, holder2); - assertEquals(0, holder2.isSet); - - NullableUuidHolder holder3 = new NullableUuidHolder(); - reader.read(2, holder3); - assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, 0)); - assertEquals(1, holder3.isSet); - } - } - - @Test - void testReaderReadWithUnsupportedHolder() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UUID uuid = UUID.randomUUID(); - vector.setSafe(0, uuid); - vector.setValueCount(1); - - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - reader.setPosition(0); - - // Create a mock unsupported holder - ExtensionHolder unsupportedHolder = - new ExtensionHolder() { - @Override - public ArrowType type() { - return null; - } - }; - - IllegalArgumentException exception = - assertThrows(IllegalArgumentException.class, () -> reader.read(unsupportedHolder)); - - assertTrue(exception.getMessage().contains("Unsupported holder type for UuidReader")); - } - } - - @Test - void testReaderReadWithArrayIndexUnsupportedHolder() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UUID uuid = UUID.randomUUID(); - vector.setSafe(0, uuid); - vector.setValueCount(1); - - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - - // Create a mock unsupported holder - ExtensionHolder unsupportedHolder = - new ExtensionHolder() { - @Override - public ArrowType type() { - return null; - } - }; - - IllegalArgumentException exception = - assertThrows(IllegalArgumentException.class, () -> reader.read(0, unsupportedHolder)); - - assertTrue(exception.getMessage().contains("Unsupported holder type for UuidReader")); - } - } - - @Test - void testReaderIsSet() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UUID uuid = UUID.randomUUID(); - vector.setSafe(0, uuid); - vector.setNull(1); - vector.setSafe(2, uuid); - vector.setValueCount(3); - - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - - reader.setPosition(0); - assertTrue(reader.isSet()); - - reader.setPosition(1); - assertFalse(reader.isSet()); - - reader.setPosition(2); - assertTrue(reader.isSet()); - } - } - - @Test - void testReaderReadObject() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UUID uuid1 = UUID.randomUUID(); - UUID uuid2 = UUID.randomUUID(); - - vector.setSafe(0, uuid1); - vector.setNull(1); - vector.setSafe(2, uuid2); - vector.setValueCount(3); - - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - - reader.setPosition(0); - assertEquals(uuid1, reader.readObject()); - - reader.setPosition(1); - assertNull(reader.readObject()); - - reader.setPosition(2); - assertEquals(uuid2, reader.readObject()); - } - } - - @Test - void testReaderGetMinorType() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - assertEquals(vector.getMinorType(), reader.getMinorType()); - } - } - - @Test - void testReaderGetField() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - assertEquals(vector.getField(), reader.getField()); - assertEquals("test", reader.getField().getName()); - } - } -} diff --git a/vector/src/test/java/org/apache/arrow/vector/UuidVector.java b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java new file mode 100644 index 0000000000..72ba4aa555 --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.complex.impl.UuidReaderImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holder.UuidHolder; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; +import org.apache.arrow.vector.util.TransferPair; + +public class UuidVector extends ExtensionTypeVector + implements ValueIterableVector { + private final Field field; + + public UuidVector( + String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { + super(name, allocator, underlyingVector); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + public UuidVector(String name, BufferAllocator allocator) { + super(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + @Override + public UUID getObject(int index) { + final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); + return new UUID(bb.getLong(), bb.getLong()); + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return getUnderlyingVector().hashCode(index, hasher); + } + + public void set(int index, UUID uuid) { + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + getUnderlyingVector().set(index, bb.array()); + } + + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); + } + + @Override + public Field getField() { + return field; + } + + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((UuidVector) to); + } + + @Override + protected FieldReader getReaderImpl() { + return new UuidReaderImpl(this); + } + + public void setSafe(int index, byte[] value) { + getUnderlyingVector().setIndexDefined(index); + getUnderlyingVector().setSafe(index, value); + } + + public void get(int index, UuidHolder holder) { + holder.value = getUnderlyingVector().get(index); + holder.isSet = 1; + } + + public class TransferImpl implements TransferPair { + UuidVector to; + ValueVector targetUnderlyingVector; + TransferPair tp; + + public TransferImpl(UuidVector to) { + this.to = to; + targetUnderlyingVector = this.to.getUnderlyingVector(); + tp = getUnderlyingVector().makeTransferPair(targetUnderlyingVector); + } + + public UuidVector getTo() { + return this.to; + } + + public void transfer() { + tp.transfer(); + } + + public void splitAndTransfer(int startIndex, int length) { + tp.splitAndTransfer(startIndex, length); + } + + public void copyValueSafe(int fromIndex, int toIndex) { + tp.copyValueSafe(fromIndex, toIndex); + } + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java index b2a8cf9ba4..ebc16e90c0 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java @@ -34,11 +34,11 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.holders.DecimalHolder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.DecimalUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -860,9 +860,9 @@ public void testCopyListVectorWithExtensionType() { for (int i = 0; i < COUNT; i++) { listWriter.setPosition(i); listWriter.startList(); - ExtensionWriter extensionWriter = listWriter.extension(UuidType.INSTANCE); - extensionWriter.writeExtension(UUID.randomUUID()); - extensionWriter.writeExtension(UUID.randomUUID()); + ExtensionWriter extensionWriter = listWriter.extension(new UuidType()); + extensionWriter.writeExtension(UUID.randomUUID(), new UuidType()); + extensionWriter.writeExtension(UUID.randomUUID(), new UuidType()); listWriter.endList(); } from.setValueCount(COUNT); @@ -895,10 +895,10 @@ public void testCopyMapVectorWithExtensionType() { mapWriter.setPosition(i); mapWriter.startMap(); mapWriter.startEntry(); - ExtensionWriter extensionKeyWriter = mapWriter.key().extension(UuidType.INSTANCE); - extensionKeyWriter.writeExtension(UUID.randomUUID(), UuidType.INSTANCE); - ExtensionWriter extensionValueWriter = mapWriter.value().extension(UuidType.INSTANCE); - extensionValueWriter.writeExtension(UUID.randomUUID(), UuidType.INSTANCE); + ExtensionWriter extensionKeyWriter = mapWriter.key().extension(new UuidType()); + extensionKeyWriter.writeExtension(UUID.randomUUID(), new UuidType()); + ExtensionWriter extensionValueWriter = mapWriter.value().extension(new UuidType()); + extensionValueWriter.writeExtension(UUID.randomUUID(), new UuidType()); mapWriter.endEntry(); mapWriter.endMap(); } @@ -931,10 +931,10 @@ public void testCopyStructVectorWithExtensionType() { for (int i = 0; i < COUNT; i++) { structWriter.setPosition(i); structWriter.start(); - ExtensionWriter extensionWriter1 = structWriter.extension("uuid1", UuidType.INSTANCE); - extensionWriter1.writeExtension(UUID.randomUUID(), UuidType.INSTANCE); - ExtensionWriter extensionWriter2 = structWriter.extension("uuid2", UuidType.INSTANCE); - extensionWriter2.writeExtension(UUID.randomUUID(), UuidType.INSTANCE); + ExtensionWriter extensionWriter1 = structWriter.extension("uuid1", new UuidType()); + extensionWriter1.writeExtension(UUID.randomUUID(), new UuidType()); + ExtensionWriter extensionWriter2 = structWriter.extension("uuid2", new UuidType()); + extensionWriter2.writeExtension(UUID.randomUUID(), new UuidType()); structWriter.end(); } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index 5b6d65d6ba..c9fd216c6d 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -42,7 +42,7 @@ import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; import org.apache.arrow.vector.holders.NullableDecimalHolder; @@ -50,16 +50,15 @@ import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; import org.apache.arrow.vector.holders.UnionHolder; -import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -134,17 +133,18 @@ public void testPromoteToUnion() throws Exception { writer.setPosition(9); UUID uuid = UUID.randomUUID(); - writer.extension("A", UuidType.INSTANCE).writeExtension(uuid, UuidType.INSTANCE); + writer.extension("A", new UuidType()).writeExtension(uuid, new UuidType()); writer.end(); writer.setPosition(10); UUID uuid2 = UUID.randomUUID(); UuidHolder uuidHolder = new UuidHolder(); - uuidHolder.buffer = allocator.buffer(UuidType.UUID_BYTE_WIDTH); - uuidHolder.buffer.setBytes(0, UuidUtility.getBytesFromUUID(uuid2)); - writer.extension("A", UuidType.INSTANCE).write(uuidHolder); + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid2.getMostSignificantBits()); + bb.putLong(uuid2.getLeastSignificantBits()); + uuidHolder.value = bb.array(); + writer.extension("A", new UuidType()).write(uuidHolder); writer.end(); - allocator.releaseBytes(UuidType.UUID_BYTE_WIDTH); container.setValueCount(11); @@ -805,7 +805,7 @@ public void testExtensionType() throws Exception { try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); final UuidVector v = - container.addOrGet("uuid", FieldType.nullable(UuidType.INSTANCE), UuidVector.class); + container.addOrGet("uuid", FieldType.nullable(new UuidType()), UuidVector.class); final PromotableWriter writer = new PromotableWriter(v, container)) { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); @@ -813,9 +813,9 @@ public void testExtensionType() throws Exception { container.setValueCount(1); writer.setPosition(0); - writer.writeExtension(u1, UuidType.INSTANCE); + writer.writeExtension(u1, new UuidType()); writer.setPosition(1); - writer.writeExtension(u2, UuidType.INSTANCE); + writer.writeExtension(u2, new UuidType()); container.setValueCount(2); @@ -829,8 +829,7 @@ public void testExtensionType() throws Exception { public void testExtensionTypeForList() throws Exception { try (final ListVector container = ListVector.empty(EMPTY_SCHEMA_PATH, allocator); final UuidVector v = - (UuidVector) - container.addOrGetVector(FieldType.nullable(UuidType.INSTANCE)).getVector(); + (UuidVector) container.addOrGetVector(FieldType.nullable(new UuidType())).getVector(); final PromotableWriter writer = new PromotableWriter(v, container)) { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); @@ -838,9 +837,9 @@ public void testExtensionTypeForList() throws Exception { container.setValueCount(1); writer.setPosition(0); - writer.writeExtension(u1, UuidType.INSTANCE); + writer.writeExtension(u1, new UuidType()); writer.setPosition(1); - writer.writeExtension(u2, UuidType.INSTANCE); + writer.writeExtension(u2, new UuidType()); container.setValueCount(2); diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java similarity index 61% rename from vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java rename to vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java index bb35b960d3..6b98d3b340 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java @@ -17,30 +17,15 @@ package org.apache.arrow.vector.complex.impl; import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.holders.ExtensionHolder; -import org.apache.arrow.vector.holders.NullableUuidHolder; -import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Field; -/** - * Reader implementation for {@link UuidVector}. - * - *

Provides methods to read UUID values from a vector, including support for reading into {@link - * UuidHolder} and retrieving values as {@link java.util.UUID} objects. - * - * @see UuidVector - * @see org.apache.arrow.vector.extension.UuidType - */ public class UuidReaderImpl extends AbstractFieldReader { private final UuidVector vector; - /** - * Constructs a reader for the given UUID vector. - * - * @param vector the UUID vector to read from - */ public UuidReaderImpl(UuidVector vector) { super(); this.vector = vector; @@ -63,26 +48,12 @@ public boolean isSet() { @Override public void read(ExtensionHolder holder) { - if (holder instanceof UuidHolder) { - vector.get(idx(), (UuidHolder) holder); - } else if (holder instanceof NullableUuidHolder) { - vector.get(idx(), (NullableUuidHolder) holder); - } else { - throw new IllegalArgumentException( - "Unsupported holder type for UuidReader: " + holder.getClass()); - } + vector.get(idx(), (UuidHolder) holder); } @Override public void read(int arrayIndex, ExtensionHolder holder) { - if (holder instanceof UuidHolder) { - vector.get(arrayIndex, (UuidHolder) holder); - } else if (holder instanceof NullableUuidHolder) { - vector.get(arrayIndex, (NullableUuidHolder) holder); - } else { - throw new IllegalArgumentException( - "Unsupported holder type for UuidReader: " + holder.getClass()); - } + vector.get(arrayIndex, (UuidHolder) holder); } @Override diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java similarity index 54% rename from vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java rename to vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java index ee3c79d5e3..d0fb008a47 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java @@ -16,44 +16,26 @@ */ package org.apache.arrow.vector.complex.impl; -import org.apache.arrow.memory.ArrowBuf; +import java.nio.ByteBuffer; +import java.util.UUID; import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.holders.ExtensionHolder; -import org.apache.arrow.vector.holders.NullableUuidHolder; -import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.pojo.ArrowType; -/** - * Writer implementation for {@link UuidVector}. - * - *

Supports writing UUID values in multiple formats: {@link java.util.UUID}, byte arrays, and - * {@link ArrowBuf}. Also handles {@link UuidHolder} and {@link NullableUuidHolder}. - * - * @see UuidVector - * @see org.apache.arrow.vector.extension.UuidType - */ public class UuidWriterImpl extends AbstractExtensionTypeWriter { - /** - * Constructs a writer for the given UUID vector. - * - * @param vector the UUID vector to write to - */ public UuidWriterImpl(UuidVector vector) { super(vector); } @Override public void writeExtension(Object value) { - if (value instanceof byte[]) { - vector.setSafe(getPosition(), (byte[]) value); - } else if (value instanceof ArrowBuf) { - vector.setSafe(getPosition(), (ArrowBuf) value); - } else if (value instanceof java.util.UUID) { - vector.setSafe(getPosition(), (java.util.UUID) value); - } else { - throw new IllegalArgumentException("Unsupported value type for UUID: " + value.getClass()); - } + UUID uuid = (UUID) value; + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + vector.setSafe(getPosition(), bb.array()); vector.setValueCount(getPosition() + 1); } @@ -64,11 +46,8 @@ public void writeExtension(Object value, ArrowType type) { @Override public void write(ExtensionHolder holder) { - if (holder instanceof UuidHolder) { - vector.setSafe(getPosition(), (UuidHolder) holder); - } else if (holder instanceof NullableUuidHolder) { - vector.setSafe(getPosition(), (NullableUuidHolder) holder); - } + UuidHolder uuidHolder = (UuidHolder) holder; + vector.setSafe(getPosition(), uuidHolder.value); vector.setValueCount(getPosition() + 1); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 34425c3420..5e516c14b4 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -77,7 +77,7 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.holders.DecimalHolder; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; @@ -86,9 +86,7 @@ import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder; -import org.apache.arrow.vector.holders.NullableUuidHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; -import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -100,13 +98,13 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.JsonStringHashMap; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1137,10 +1135,8 @@ public void simpleUnion() throws Exception { bufs.add(buf); } else if (i % 5 == 4) { UuidHolder holder = new UuidHolder(); - holder.buffer = allocator.buffer(UuidType.UUID_BYTE_WIDTH); - holder.buffer.setBytes(0, uuidByte); + holder.value = uuidByte; unionWriter.write(holder); - allocator.releaseBytes(UuidType.UUID_BYTE_WIDTH); } else { unionWriter.writeFloat4((float) i); } @@ -1167,9 +1163,11 @@ public void simpleUnion() throws Exception { assertEquals(i, holder.buffer.getInt(0)); assertEquals(4, holder.byteWidth); } else if (i % 5 == 4) { - NullableUuidHolder holder = new NullableUuidHolder(); + UuidHolder holder = new UuidHolder(); unionReader.read(holder); - assertEquals(UuidUtility.uuidFromArrowBuf(holder.buffer, 0), uuid); + ByteBuffer uuidBb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(uuidBb.getLong(), uuidBb.getLong()); + assertEquals(actualUuid, uuid); } else { assertEquals((float) i, unionReader.readFloat(), 1e-12); } @@ -2527,9 +2525,9 @@ public void extensionWriterReader() throws Exception { StructWriter rootWriter = writer.rootAsStruct(); { - ExtensionWriter extensionWriter = rootWriter.extension("uuid1", UuidType.INSTANCE); + ExtensionWriter extensionWriter = rootWriter.extension("uuid1", new UuidType()); extensionWriter.setPosition(0); - extensionWriter.writeExtension(u1, UuidType.INSTANCE); + extensionWriter.writeExtension(u1, new UuidType()); } // read StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); @@ -2538,7 +2536,8 @@ public void extensionWriterReader() throws Exception { uuidReader.setPosition(0); UuidHolder uuidHolder = new UuidHolder(); uuidReader.read(uuidHolder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(uuidHolder.buffer, 0); + ByteBuffer bb = ByteBuffer.wrap(uuidHolder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); assertEquals(u1, actualUuid); assertTrue(uuidReader.isSet()); assertEquals(uuidReader.getMinorType(), MinorType.EXTENSIONTYPE); diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java b/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java similarity index 61% rename from vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java rename to vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java index 8a0a66e435..a206bc265e 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java +++ b/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java @@ -14,32 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.holders; +package org.apache.arrow.vector.holder; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.holders.ExtensionHolder; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.UuidType; -/** - * Value holder for non-nullable UUID values. - * - *

Contains a 16-byte UUID in {@code buffer} with {@code isSet} always 1. - * - * @see NullableUuidHolder - * @see org.apache.arrow.vector.UuidVector - * @see org.apache.arrow.vector.extension.UuidType - */ public class UuidHolder extends ExtensionHolder { - /** Buffer containing 16-byte UUID data. */ - public ArrowBuf buffer; - - /** Constructs a UuidHolder with isSet = 1. */ - public UuidHolder() { - this.isSet = 1; - } + public byte[] value; @Override public ArrowType type() { - return UuidType.INSTANCE; + return new UuidType(); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index ae5ac0726c..ff8e45afc3 100644 --- a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -50,7 +50,6 @@ import org.apache.arrow.vector.compare.RangeEqualsVisitor; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.ipc.ArrowFileReader; import org.apache.arrow.vector.ipc.ArrowFileWriter; import org.apache.arrow.vector.types.FloatingPointPrecision; @@ -63,9 +62,9 @@ public class TestExtensionType { /** Test that a custom UUID type can be round-tripped through a temporary file. */ @Test public void roundtripUuid() throws IOException { - ensureRegistered(UuidType.INSTANCE); + ensureRegistered(new UuidType()); final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", UuidType.INSTANCE))); + new Schema(Collections.singletonList(Field.nullable("a", new UuidType()))); try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { UUID u1 = UUID.randomUUID(); @@ -93,7 +92,7 @@ public void roundtripUuid() throws IOException { assertEquals(root.getSchema(), readerRoot.getSchema()); final Field field = readerRoot.getSchema().getFields().get(0); - final UuidType expectedType = UuidType.INSTANCE; + final UuidType expectedType = new UuidType(); assertEquals( field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); @@ -117,9 +116,9 @@ public void roundtripUuid() throws IOException { /** Test that a custom UUID type can be read as its underlying type. */ @Test public void readUnderlyingType() throws IOException { - ensureRegistered(UuidType.INSTANCE); + ensureRegistered(new UuidType()); final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", UuidType.INSTANCE))); + new Schema(Collections.singletonList(Field.nullable("a", new UuidType()))); try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { UUID u1 = UUID.randomUUID(); @@ -139,7 +138,7 @@ public void readUnderlyingType() throws IOException { writer.end(); } - ExtensionTypeRegistry.unregister(UuidType.INSTANCE); + ExtensionTypeRegistry.unregister(new UuidType()); try (final SeekableByteChannel channel = Files.newByteChannel(Paths.get(file.getAbsolutePath())); @@ -157,7 +156,7 @@ public void readUnderlyingType() throws IOException { .getByteWidth()); final Field field = readerRoot.getSchema().getFields().get(0); - final UuidType expectedType = UuidType.INSTANCE; + final UuidType expectedType = new UuidType(); assertEquals( field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); @@ -258,7 +257,7 @@ public void roundtripLocation() throws IOException { @Test public void testVectorCompare() { - UuidType uuidType = UuidType.INSTANCE; + UuidType uuidType = new UuidType(); ExtensionTypeRegistry.register(uuidType); try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); UuidVector a1 = diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/UuidType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java similarity index 51% rename from vector/src/main/java/org/apache/arrow/vector/extension/UuidType.java rename to vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java index 7a7af298e8..eb2666eb25 100644 --- a/vector/src/main/java/org/apache/arrow/vector/extension/UuidType.java +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.extension; +package org.apache.arrow.vector.types.pojo; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; @@ -23,61 +23,17 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.impl.UuidWriterImpl; import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; -import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; -import org.apache.arrow.vector.types.pojo.FieldType; -/** - * Extension type for UUID (Universally Unique Identifier) values. - * - *

UUIDs are stored as 16-byte fixed-size binary values. This extension type provides a - * standardized way to represent UUIDs in Arrow, making them interoperable across different systems - * and languages.π - * - *

The extension name is "arrow.uuid" and it uses {@link ArrowType.FixedSizeBinary} with 16 bytes - * as the storage type. - * - *

Usage: - * - *

{@code
- * UuidVector vector = new UuidVector("uuid_col", allocator);
- * vector.set(0, UUID.randomUUID());
- * UUID value = vector.getObject(0);
- * }
- * - * @see UuidVector - * @see org.apache.arrow.vector.holders.UuidHolder - * @see org.apache.arrow.vector.holders.NullableUuidHolder - */ public class UuidType extends ExtensionType { - /** Singleton instance of UuidType. */ - public static final UuidType INSTANCE = new UuidType(); - - /** Extension name registered in the Arrow extension type registry. */ - public static final String EXTENSION_NAME = "arrow.uuid"; - - /** Number of bytes used to store a UUID (128 bits = 16 bytes). */ - public static final int UUID_BYTE_WIDTH = 16; - - /** Number of characters in the standard UUID string representation (with hyphens). */ - public static final int UUID_STRING_WIDTH = 36; - - /** Storage type for UUID: FixedSizeBinary(16). */ - public static final ArrowType STORAGE_TYPE = new ArrowType.FixedSizeBinary(UUID_BYTE_WIDTH); - - static { - ExtensionTypeRegistry.register(INSTANCE); - } - @Override public ArrowType storageType() { - return STORAGE_TYPE; + return new ArrowType.FixedSizeBinary(16); } @Override public String extensionName() { - return EXTENSION_NAME; + return "uuid"; } @Override @@ -91,7 +47,7 @@ public ArrowType deserialize(ArrowType storageType, String serializedData) { throw new UnsupportedOperationException( "Cannot construct UuidType from underlying type " + storageType); } - return INSTANCE; + return new UuidType(); } @Override @@ -99,15 +55,9 @@ public String serialize() { return ""; } - @Override - public boolean isComplex() { - return false; - } - @Override public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { - return new UuidVector( - name, fieldType, allocator, new FixedSizeBinaryVector(name, allocator, UUID_BYTE_WIDTH)); + return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); } @Override