From fe2c3b11d140e92bb69f8bf51f530ba7d0a2c46e Mon Sep 17 00:00:00 2001 From: per Date: Fri, 1 May 2026 11:33:40 +0200 Subject: [PATCH 1/3] Fix matrix-avro CodeNarc warnings --- matrix-avro/build.gradle | 5 + .../matrix/avro/ArrayAvroSchemaDecl.groovy | 11 +- .../matrix/avro/AvroFormatProvider.groovy | 15 +- .../alipsa/matrix/avro/AvroReadOptions.groovy | 29 +- .../matrix/avro/AvroScalarTypeDecl.groovy | 2 + .../alipsa/matrix/avro/AvroSchemaDecl.groovy | 46 +- .../alipsa/matrix/avro/AvroSchemaUtil.groovy | 14 +- .../matrix/avro/AvroWriteOptions.groovy | 49 +- .../alipsa/matrix/avro/ColumnProfile.groovy | 36 + .../matrix/avro/DecimalAvroSchemaDecl.groovy | 11 +- .../matrix/avro/MapAvroSchemaDecl.groovy | 11 +- .../matrix/avro/MatrixAvroReader.groovy | 345 ++++----- .../matrix/avro/MatrixAvroWriter.groovy | 692 ++++++++---------- .../matrix/avro/RecordAvroSchemaDecl.groovy | 11 +- .../matrix/avro/ScalarAvroSchemaDecl.groovy | 11 +- .../alipsa/matrix/avro/SchemaCacheKey.groovy | 59 ++ .../exceptions/AvroConversionException.groovy | 53 +- .../exceptions/AvroSchemaException.groovy | 28 +- .../exceptions/AvroValidationException.groovy | 71 +- .../avro/benchmarks/BenchmarkUtils.groovy | 32 +- .../benchmarks/MatrixAvroBenchmark.groovy | 34 +- .../MatrixAvroConversionBenchmark.groovy | 52 +- .../MatrixAvroParquetBenchmark.groovy | 43 +- .../MatrixAvroSchemaBenchmark.groovy | 38 +- .../matrix/avro/AvroExceptionTest.groovy | 34 +- .../matrix/avro/AvroFormatProviderTest.groovy | 19 +- .../matrix/avro/MatrixAvroEdgeCaseTest.groovy | 87 ++- .../matrix/avro/MatrixAvroReaderTest.groovy | 181 ++--- .../avro/MatrixAvroRoundTripTest.groovy | 281 ++++--- .../matrix/avro/MatrixAvroWriterTest.groovy | 327 +++++---- 30 files changed, 1211 insertions(+), 1416 deletions(-) create mode 100644 matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ColumnProfile.groovy create mode 100644 matrix-avro/src/main/groovy/se/alipsa/matrix/avro/SchemaCacheKey.groovy diff --git a/matrix-avro/build.gradle b/matrix-avro/build.gradle index 7b0a54850..3e74a8b5d 100644 --- a/matrix-avro/build.gradle +++ b/matrix-avro/build.gradle @@ -21,6 +21,11 @@ JavaCompile javaCompile = compileJava { compileGroovy { options.deprecation = true + groovyOptions.configurationScript = rootProject.file('config/groovy/compileStatic.groovy') +} + +codenarc { + ignoreFailures = false } repositories { diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ArrayAvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ArrayAvroSchemaDecl.groovy index 50bdbc24f..40620b74d 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ArrayAvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ArrayAvroSchemaDecl.groovy @@ -1,32 +1,31 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope import groovy.transform.ToString - import org.apache.avro.Schema +/** + * Avro schema declaration for array columns. + */ @PackageScope -@CompileStatic @EqualsAndHashCode @ToString(includeNames = true) class ArrayAvroSchemaDecl extends AvroSchemaDecl { - final AvroSchemaDecl elementType + final AvroSchemaDecl elementType ArrayAvroSchemaDecl(AvroSchemaDecl elementType) { this.elementType = elementType } - @Override Map toMap() { [kind: 'array', elementType: elementType.toMap()] } - @Override @PackageScope Schema toAvroSchema(String defaultName, String namespace) { Schema elementSchema = elementType.toAvroSchema("${defaultName}_item", namespace) Schema.createArray(AvroSchemaUtil.nullableSchema(elementSchema)) } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroFormatProvider.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroFormatProvider.groovy index 2a64e6370..9dace4d28 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroFormatProvider.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroFormatProvider.groovy @@ -1,66 +1,55 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic - import se.alipsa.matrix.core.Matrix import se.alipsa.matrix.core.spi.AbstractFormatProvider + import se.alipsa.matrix.core.spi.OptionDescriptor /** * SPI format provider for Avro files. */ -@CompileStatic class AvroFormatProvider extends AbstractFormatProvider { private static final Set EXTENSIONS = ['avro'] as Set - @Override Set supportedExtensions() { EXTENSIONS } - @Override String formatName() { 'Avro' } - @Override boolean canRead() { true } - @Override boolean canWrite() { true } - @Override Matrix read(File file, Map options) { MatrixAvroReader.read(file, AvroReadOptions.fromMap(options)) } - @Override Matrix read(URL url, Map options) { MatrixAvroReader.read(url, AvroReadOptions.fromMap(options)) } - @Override Matrix read(InputStream is, Map options) { MatrixAvroReader.read(is, AvroReadOptions.fromMap(options)) } - @Override void write(Matrix matrix, File file, Map options) { MatrixAvroWriter.write(matrix, file, AvroWriteOptions.fromMap(options)) } - @Override List readOptionDescriptors() { AvroReadOptions.descriptors() } - @Override List writeOptionDescriptors() { AvroWriteOptions.descriptors() } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroReadOptions.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroReadOptions.groovy index 8bf10f792..5b5f2c3c6 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroReadOptions.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroReadOptions.groovy @@ -1,10 +1,8 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic - import org.apache.avro.Schema - import se.alipsa.matrix.core.spi.OptionDescriptor + import se.alipsa.matrix.core.spi.OptionMaps /** @@ -20,7 +18,7 @@ import se.alipsa.matrix.core.spi.OptionMaps * * // With custom matrix name override * def options = new AvroReadOptions() - * .matrixName("MyData") + * .matrixName('MyData') * Matrix m = MatrixAvroReader.read(file, options) * * // With schema evolution (reader schema) @@ -32,23 +30,15 @@ import se.alipsa.matrix.core.spi.OptionMaps * * @see MatrixAvroReader */ -@CompileStatic class AvroReadOptions { private String matrixName = null private Schema readerSchema = null - - /** - * Creates a new AvroReadOptions with default settings. - */ - AvroReadOptions() { - } - /** * Sets the name for the resulting Matrix. * *

If not set, the reader falls back to the Avro record name from the file schema and then - * to a source-derived fallback such as the file name or "AvroMatrix". + * to a source-derived fallback such as the file name or 'AvroMatrix'. * * @param name the Matrix name * @return this options instance for method chaining @@ -57,7 +47,6 @@ class AvroReadOptions { this.matrixName = name return this } - /** * Sets a reader schema for schema evolution. * @@ -77,23 +66,19 @@ class AvroReadOptions { this.readerSchema = schema return this } - // Getters - /** * @return the Matrix name, or null if not set */ String getMatrixName() { return matrixName } - /** * @return the reader schema, or null if not set */ Schema getReaderSchema() { return readerSchema } - /** * Converts this options object to an SPI-friendly map. * @@ -109,7 +94,6 @@ class AvroReadOptions { } options } - /** * Creates {@link AvroReadOptions} from a generic SPI options map. * @@ -127,9 +111,9 @@ class AvroReadOptions { } if (normalized.containsKey('readerschema')) { def value = normalized.readerschema - if (value instanceof Schema) { + if (Schema.isInstance(value)) { result.readerSchema(value as Schema) - } else if (value instanceof CharSequence) { + } else if (CharSequence.isInstance(value)) { result.readerSchema(new Schema.Parser().parse(String.valueOf(value))) } else { throw new IllegalArgumentException("readerSchema must be a Schema or schema JSON string but was ${value?.class}") @@ -137,7 +121,6 @@ class AvroReadOptions { } result } - /** * Returns a human-readable description of all supported read options. * @@ -146,7 +129,6 @@ class AvroReadOptions { static String describe() { OptionDescriptor.describe(descriptors()) } - /** * Returns descriptors for all supported read options. * @@ -158,4 +140,5 @@ class AvroReadOptions { new OptionDescriptor('readerSchema', Schema, null, 'Reader schema or schema JSON for schema evolution') ] } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroScalarTypeDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroScalarTypeDecl.groovy index 52b7db24c..16c68aadf 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroScalarTypeDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroScalarTypeDecl.groovy @@ -4,6 +4,7 @@ package se.alipsa.matrix.avro * Scalar Avro schema declarations supported by explicit per-column overrides. */ enum AvroScalarTypeDecl { + STRING, BOOLEAN, INT, @@ -16,4 +17,5 @@ enum AvroScalarTypeDecl { TIMESTAMP_MILLIS, LOCAL_TIMESTAMP_MICROS, UUID + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaDecl.groovy index 90e97c025..57cb6ad3f 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaDecl.groovy @@ -1,16 +1,13 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic import groovy.transform.PackageScope - import org.apache.avro.Schema - import se.alipsa.matrix.core.spi.OptionMaps - import java.time.Instant import java.time.LocalDate import java.time.LocalDateTime import java.time.LocalTime + import java.util.Locale /** @@ -21,7 +18,6 @@ import java.util.Locale * Instances can be round-tripped through SPI option maps by calling {@link AvroSchemaDecl#toMap()} * and {@link AvroSchemaDecl#fromMap(java.util.Map)}. */ -@CompileStatic abstract class AvroSchemaDecl { private static final Map, AvroScalarTypeDecl> SCALAR_TYPE_BY_CLASS = [ @@ -52,7 +48,6 @@ abstract class AvroSchemaDecl { (LocalDateTime) : AvroScalarTypeDecl.LOCAL_TIMESTAMP_MICROS, (UUID) : AvroScalarTypeDecl.UUID ].asImmutable() - /** * Creates a scalar schema declaration. * @@ -65,7 +60,6 @@ abstract class AvroSchemaDecl { } new ScalarAvroSchemaDecl(scalarType) } - /** * Creates a scalar schema declaration from a supported Java type. * @@ -90,7 +84,6 @@ abstract class AvroSchemaDecl { } scalar(toScalarType(javaType)) } - /** * Creates a fixed decimal schema declaration. * @@ -102,7 +95,6 @@ abstract class AvroSchemaDecl { validateDecimal(precision, scale, 'decimal') new DecimalAvroSchemaDecl(precision, scale) } - /** * Creates an array schema declaration. * @@ -115,7 +107,6 @@ abstract class AvroSchemaDecl { } new ArrayAvroSchemaDecl(elementType) } - /** * Creates a map schema declaration. * @@ -128,7 +119,6 @@ abstract class AvroSchemaDecl { } new MapAvroSchemaDecl(valueType) } - /** * Creates a record schema declaration using the default nested record name for the column. * @@ -138,7 +128,6 @@ abstract class AvroSchemaDecl { static AvroSchemaDecl record(Map fields) { record(null, fields) } - /** * Creates a record schema declaration. * @@ -149,7 +138,6 @@ abstract class AvroSchemaDecl { static AvroSchemaDecl record(String recordName, Map fields) { new RecordAvroSchemaDecl(recordName, validateFields(fields)) } - /** * Parses a schema declaration from an SPI-friendly nested map. * @@ -174,23 +162,20 @@ abstract class AvroSchemaDecl { default -> throw new IllegalArgumentException("Unsupported schema declaration kind '$kind'") } } - /** * Converts this declaration to an SPI-friendly nested map. * * @return map representation of this schema declaration */ abstract Map toMap() - @PackageScope abstract Schema toAvroSchema(String defaultName, String namespace) - /** * Parses the nested `columnSchemas` SPI option map used by {@link AvroWriteOptions}. */ @PackageScope static Map columnSchemasValue(Object value, String optionName) { - if (!(value instanceof Map)) { + if (!(Map.isInstance(value))) { throw new IllegalArgumentException("$optionName must be a Map but was ${value?.class}") } Map result = [:] @@ -200,7 +185,6 @@ abstract class AvroSchemaDecl { } result } - /** * Serializes typed column schema declarations back to the SPI map shape used by {@link AvroWriteOptions}. */ @@ -212,7 +196,6 @@ abstract class AvroSchemaDecl { } result } - private static AvroScalarTypeDecl toScalarType(Class javaType) { AvroScalarTypeDecl scalarType = SCALAR_TYPE_BY_CLASS[javaType] if (scalarType != null) { @@ -220,7 +203,6 @@ abstract class AvroSchemaDecl { } throw new IllegalArgumentException("Unsupported scalar Java type '${javaType.name}' for explicit Avro schema control") } - private static Map normalizeNestedKeys(Map value) { Map normalized = [:] value.each { key, item -> @@ -228,15 +210,14 @@ abstract class AvroSchemaDecl { } normalized } - private static AvroSchemaDecl schemaDeclValue(Object value, String optionName) { - if (value instanceof AvroSchemaDecl) { + if (AvroSchemaDecl.isInstance(value)) { return value as AvroSchemaDecl } - if (value instanceof AvroScalarTypeDecl) { + if (AvroScalarTypeDecl.isInstance(value)) { return scalar(value as AvroScalarTypeDecl) } - if (value instanceof Map) { + if (Map.isInstance(value)) { return fromMap(value as Map) } if (value != null) { @@ -251,9 +232,8 @@ abstract class AvroSchemaDecl { "$optionName must be an AvroSchemaDecl, AvroScalarTypeDecl, or declaration Map but was ${value?.class}" ) } - private static Map recordFieldsValue(Object value, String optionName) { - if (!(value instanceof Map)) { + if (!(Map.isInstance(value))) { throw new IllegalArgumentException("$optionName must be a Map but was ${value?.class}") } Map result = [:] @@ -266,7 +246,6 @@ abstract class AvroSchemaDecl { } result } - private static Map validateFields(Map fields) { if (fields == null || fields.isEmpty()) { throw new IllegalArgumentException('record fields must contain at least one field') @@ -281,7 +260,6 @@ abstract class AvroSchemaDecl { } result } - private static String requireName(String value, String optionName) { String trimmed = value?.trim() if (trimmed == null || trimmed.isEmpty()) { @@ -289,9 +267,8 @@ abstract class AvroSchemaDecl { } trimmed } - private static AvroScalarTypeDecl scalarTypeValue(Object value, String optionName) { - if (value instanceof AvroScalarTypeDecl) { + if (AvroScalarTypeDecl.isInstance(value)) { return value as AvroScalarTypeDecl } String text = OptionMaps.stringValueOrNull(value) @@ -304,7 +281,6 @@ abstract class AvroSchemaDecl { throw new IllegalArgumentException("Unsupported $optionName value '$text'", e) } } - private static int intValue(Object value, String optionName) { try { return new BigDecimal(String.valueOf(value)).intValueExact() @@ -312,7 +288,6 @@ abstract class AvroSchemaDecl { throw new IllegalArgumentException("$optionName must be an integer but was $value", e) } } - private static void validateDecimal(int precision, int scale, String optionName) { if (precision <= 0) { throw new IllegalArgumentException("$optionName precision must be > 0 but was $precision") @@ -324,7 +299,6 @@ abstract class AvroSchemaDecl { throw new IllegalArgumentException("$optionName scale must be <= precision but was $scale > $precision") } } - private static void ensureOnlyKeys(Map value, String optionName, Set allowedKeys) { List unexpected = value.keySet().findAll { String key -> !allowedKeys.contains(key) }.sort() if (!unexpected.isEmpty()) { @@ -333,30 +307,26 @@ abstract class AvroSchemaDecl { ) } } - private static AvroSchemaDecl parseScalarDecl(Map normalized) { ensureOnlyKeys(normalized, 'scalar', ['kind', 'scalartype'] as Set) scalar(scalarTypeValue(normalized.scalartype, 'scalar.scalarType')) } - private static AvroSchemaDecl parseDecimalDecl(Map normalized) { ensureOnlyKeys(normalized, 'decimal', ['kind', 'precision', 'scale'] as Set) decimal(intValue(normalized.precision, 'decimal.precision'), intValue(normalized.scale, 'decimal.scale')) } - private static AvroSchemaDecl parseArrayDecl(Map normalized) { ensureOnlyKeys(normalized, 'array', ['kind', 'elementtype'] as Set) array(schemaDeclValue(normalized.elementtype, 'array.elementType')) } - private static AvroSchemaDecl parseMapDecl(Map normalized) { ensureOnlyKeys(normalized, 'map', ['kind', 'valuetype'] as Set) map(schemaDeclValue(normalized.valuetype, 'map.valueType')) } - private static AvroSchemaDecl parseRecordDecl(Map normalized) { ensureOnlyKeys(normalized, 'record', ['kind', 'recordname', 'fields'] as Set) record(OptionMaps.stringValueOrNull(normalized.recordname), recordFieldsValue(normalized.fields, 'record.fields')) } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy index cd9aceb8b..2d741d2ff 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy @@ -1,25 +1,19 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic - import org.apache.avro.LogicalTypes import org.apache.avro.Schema - import se.alipsa.matrix.avro.exceptions.AvroSchemaException /** * Shared helpers for translating explicit schema declarations to Avro types. */ -@CompileStatic final class AvroSchemaUtil { private AvroSchemaUtil() { } - static Schema nullableSchema(Schema schema) { Schema.createUnion([Schema.create(Schema.Type.NULL), schema]) } - static Schema scalarSchema(AvroScalarTypeDecl scalarType) { switch (scalarType) { case AvroScalarTypeDecl.STRING -> Schema.create(Schema.Type.STRING) @@ -37,7 +31,6 @@ final class AvroSchemaUtil { default -> throw new IllegalArgumentException("Unsupported Avro scalar type declaration $scalarType") } } - static void validateAvroFieldName(String fieldName, String location) { if (!isValidAvroName(fieldName)) { throw new AvroSchemaException( @@ -48,38 +41,33 @@ final class AvroSchemaUtil { ) } } - private static Schema createDateSchema() { Schema dateSchema = Schema.create(Schema.Type.INT) LogicalTypes.date().addToSchema(dateSchema) dateSchema } - private static Schema createTimeMillisSchema() { Schema timeSchema = Schema.create(Schema.Type.INT) LogicalTypes.timeMillis().addToSchema(timeSchema) timeSchema } - private static Schema createTimestampMillisSchema() { Schema timestampSchema = Schema.create(Schema.Type.LONG) LogicalTypes.timestampMillis().addToSchema(timestampSchema) timestampSchema } - private static Schema createLocalTimestampMicrosSchema() { Schema localTimestampSchema = Schema.create(Schema.Type.LONG) LogicalTypes.localTimestampMicros().addToSchema(localTimestampSchema) localTimestampSchema } - private static Schema createUuidSchema() { Schema uuidSchema = Schema.create(Schema.Type.STRING) LogicalTypes.uuid().addToSchema(uuidSchema) uuidSchema } - private static boolean isValidAvroName(String name) { name != null && !name.isEmpty() && name ==~ /[A-Za-z_][A-Za-z0-9_]*/ } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroWriteOptions.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroWriteOptions.groovy index b5d24224a..2a94898c4 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroWriteOptions.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroWriteOptions.groovy @@ -1,10 +1,7 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic - import org.apache.avro.file.CodecFactory import org.apache.avro.file.DataFileConstants - import se.alipsa.matrix.core.spi.OptionDescriptor import se.alipsa.matrix.core.spi.OptionMaps @@ -28,8 +25,8 @@ import se.alipsa.matrix.core.spi.OptionMaps * // With all options * def options = new AvroWriteOptions() * .inferPrecisionAndScale(true) - * .namespace("com.example.data") - * .schemaName("MyData") + * .namespace('com.example.data') + * .schemaName('MyData') * .compression(AvroWriteOptions.Compression.SNAPPY) * .syncInterval(64000) * MatrixAvroWriter.write(matrix, file, options) @@ -37,7 +34,6 @@ import se.alipsa.matrix.core.spi.OptionMaps * * @see MatrixAvroWriter */ -@CompileStatic class AvroWriteOptions { static final String DEFAULT_NAMESPACE = 'se.alipsa.matrix.avro' @@ -45,11 +41,11 @@ class AvroWriteOptions { static final int DEFAULT_SYNC_INTERVAL = 0 static final int MIN_SYNC_INTERVAL = 32 static final int MAX_SYNC_INTERVAL = 1 << 30 - /** * Supported compression codecs for Avro files. */ enum Compression { + /** No compression (default) */ NULL, /** Deflate compression (zlib) - good balance of speed and compression ratio */ @@ -63,7 +59,6 @@ class AvroWriteOptions { /** Zstandard compression - excellent balance of speed and ratio (requires zstd library) */ ZSTANDARD } - private boolean inferPrecisionAndScale = false private String namespace = DEFAULT_NAMESPACE private String schemaName = null @@ -71,13 +66,11 @@ class AvroWriteOptions { private int compressionLevel = DEFAULT_COMPRESSION_LEVEL private int syncInterval = DEFAULT_SYNC_INTERVAL private Map columnSchemas = [:] - /** * Creates a new AvroWriteOptions with default settings. */ AvroWriteOptions() { } - /** * Sets whether to infer precision and scale for BigDecimal columns. * @@ -92,34 +85,31 @@ class AvroWriteOptions { this.inferPrecisionAndScale = infer return this } - /** * Sets the namespace for the generated Avro schema. * - *

The namespace is used in the Avro schema's "namespace" field and helps + *

The namespace is used in the Avro schema's 'namespace' field and helps * organize schemas when using schema registries. * - * @param namespace the schema namespace (e.g., "com.example.data") + * @param namespace the schema namespace (e.g., 'com.example.data') * @return this options instance for method chaining */ AvroWriteOptions namespace(String namespace) { this.namespace = namespace return this } - /** * Sets the name for the generated Avro record schema. * - *

This name appears in the Avro schema's "name" field. + *

This name appears in the Avro schema's 'name' field. * - * @param name the schema name (e.g., "UserData") + * @param name the schema name (e.g., 'UserData') * @return this options instance for method chaining */ AvroWriteOptions schemaName(String name) { this.schemaName = name return this } - /** * Sets the compression codec to use for the Avro file. * @@ -136,7 +126,6 @@ class AvroWriteOptions { this.compression = effectiveCompression return this } - /** * Sets the compression level for codecs that support it. * @@ -153,7 +142,6 @@ class AvroWriteOptions { this.compressionLevel = level return this } - /** * Sets the sync marker interval in bytes. * @@ -169,7 +157,6 @@ class AvroWriteOptions { this.syncInterval = interval return this } - /** * Sets explicit per-column Avro schema declarations. * @@ -183,7 +170,6 @@ class AvroWriteOptions { this.columnSchemas = copyColumnSchemas(columnSchemas) return this } - /** * Sets an explicit Avro schema declaration for a single column. * @@ -197,58 +183,49 @@ class AvroWriteOptions { this.columnSchemas = updated return this } - // Getters - /** * @return true if precision and scale should be inferred for BigDecimal columns */ boolean getInferPrecisionAndScale() { return inferPrecisionAndScale } - /** * @return the schema namespace */ String getNamespace() { return namespace } - /** * @return the schema name */ String getSchemaName() { return schemaName } - /** * @return the compression codec */ Compression getCompression() { return compression } - /** * @return the compression level, or -1 for codec default */ int getCompressionLevel() { return compressionLevel } - /** * @return the sync interval in bytes, or 0 for default */ int getSyncInterval() { return syncInterval } - /** * @return explicit per-column schema declarations */ Map getColumnSchemas() { columnSchemas.asImmutable() } - /** * Creates the Avro CodecFactory based on the configured compression settings. * @@ -270,7 +247,6 @@ class AvroWriteOptions { CodecFactory.nullCodec() } } - /** * Converts this options object to an SPI-friendly map. * @@ -296,7 +272,6 @@ class AvroWriteOptions { } options } - /** * Creates {@link AvroWriteOptions} from a generic SPI options map. * @@ -329,7 +304,7 @@ class AvroWriteOptions { } if (normalized.containsKey('compression')) { def value = normalized.compression - if (value instanceof Compression) { + if (Compression.isInstance(value)) { compression = value as Compression } else if (value != null) { compression = Compression.valueOf(String.valueOf(value).toUpperCase()) @@ -364,7 +339,6 @@ class AvroWriteOptions { } result } - private static void validateCompressionLevel(Compression compression, int level) { Compression effectiveCompression = compression ?: Compression.NULL if (level == DEFAULT_COMPRESSION_LEVEL) { @@ -391,7 +365,6 @@ class AvroWriteOptions { } } } - private static void validateSyncInterval(int interval) { if (interval == DEFAULT_SYNC_INTERVAL) { return @@ -402,7 +375,6 @@ class AvroWriteOptions { ) } } - /** * Returns a human-readable description of all supported write options. * @@ -411,7 +383,6 @@ class AvroWriteOptions { static String describe() { OptionDescriptor.describe(descriptors()) } - /** * Returns descriptors for all supported write options. * @@ -428,7 +399,6 @@ class AvroWriteOptions { new OptionDescriptor('columnSchemas', Map, null, 'Map of column names to explicit Avro schema declarations for decimal, array, map, or record overrides') ] } - private static Map copyColumnSchemas(Map value) { if (value == null) { return [:] @@ -440,7 +410,6 @@ class AvroWriteOptions { } result } - private static String requireColumnName(String value) { String trimmed = value?.trim() if (trimmed == null || trimmed.isEmpty()) { @@ -448,11 +417,11 @@ class AvroWriteOptions { } trimmed } - private static AvroSchemaDecl requireDeclaration(String columnName, AvroSchemaDecl declaration) { if (declaration == null) { throw new IllegalArgumentException("columnSchemas['$columnName'] must not be null") } declaration } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ColumnProfile.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ColumnProfile.groovy new file mode 100644 index 000000000..a3c6e7d78 --- /dev/null +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ColumnProfile.groovy @@ -0,0 +1,36 @@ +package se.alipsa.matrix.avro + +import groovy.transform.PackageScope + +/** + * Inferred column characteristics used when building Avro schemas. + */ +@PackageScope +final class ColumnProfile { + + final String name + final Class declaredType + Class effectiveType + Class listElemClass + Class mapValueClass + boolean recordLike = false + boolean recordSeen = false + Map recordSample + Set recordKeys + boolean sawDecimal = false + int maxIntegerDigits = 0 + int maxScale = 0 + ColumnProfile(String name, Class declaredType) { + this.name = name + this.declaredType = declaredType + } + int[] decimalMeta() { + if (!sawDecimal) { + return [10, 0] as int[] + } + int scale = Math.max(0, maxScale) + int precision = Math.max(1, maxIntegerDigits + scale) + [precision, scale] as int[] + } + +} diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/DecimalAvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/DecimalAvroSchemaDecl.groovy index efda9a858..d14a16148 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/DecimalAvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/DecimalAvroSchemaDecl.groovy @@ -1,31 +1,29 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope import groovy.transform.ToString - import org.apache.avro.LogicalTypes import org.apache.avro.Schema +/** + * Avro schema declaration for decimal logical types. + */ @PackageScope -@CompileStatic @EqualsAndHashCode @ToString(includeNames = true) class DecimalAvroSchemaDecl extends AvroSchemaDecl { + final int precision final int scale - DecimalAvroSchemaDecl(int precision, int scale) { this.precision = precision this.scale = scale } - @Override Map toMap() { [kind: 'decimal', precision: precision, scale: scale] } - @Override @PackageScope Schema toAvroSchema(String defaultName, String namespace) { @@ -33,4 +31,5 @@ class DecimalAvroSchemaDecl extends AvroSchemaDecl { LogicalTypes.decimal(precision, scale).addToSchema(schema) schema } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MapAvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MapAvroSchemaDecl.groovy index 3301ce826..592bafcdc 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MapAvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MapAvroSchemaDecl.groovy @@ -1,32 +1,31 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope import groovy.transform.ToString - import org.apache.avro.Schema +/** + * Avro schema declaration for map columns. + */ @PackageScope -@CompileStatic @EqualsAndHashCode @ToString(includeNames = true) class MapAvroSchemaDecl extends AvroSchemaDecl { - final AvroSchemaDecl valueType + final AvroSchemaDecl valueType MapAvroSchemaDecl(AvroSchemaDecl valueType) { this.valueType = valueType } - @Override Map toMap() { [kind: 'map', valueType: valueType.toMap()] } - @Override @PackageScope Schema toAvroSchema(String defaultName, String namespace) { Schema valueSchema = valueType.toAvroSchema("${defaultName}_value", namespace) Schema.createMap(AvroSchemaUtil.nullableSchema(valueSchema)) } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy index 56627bde3..4882f0e9a 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy @@ -1,7 +1,6 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic - +import org.apache.avro.LogicalType import org.apache.avro.LogicalTypes import org.apache.avro.Schema import org.apache.avro.file.DataFileStream @@ -9,14 +8,9 @@ import org.apache.avro.generic.GenericDatumReader import org.apache.avro.generic.GenericFixed import org.apache.avro.generic.GenericRecord import org.apache.avro.util.Utf8 - import se.alipsa.matrix.avro.exceptions.AvroConversionException import se.alipsa.matrix.avro.exceptions.AvroValidationException import se.alipsa.matrix.core.Matrix - -import java.net.MalformedURLException -import java.net.URI -import java.net.URISyntaxException import java.nio.ByteBuffer import java.nio.file.Path import java.time.* @@ -30,18 +24,31 @@ import java.time.* *

Example usage: *

{@code
  * // Read from file
- * Matrix m = MatrixAvroReader.read(new File("data.avro"))
+ * Matrix m = MatrixAvroReader.read(new File('data.avro'))
  *
  * // Read from file path string
- * Matrix m = MatrixAvroReader.readFile("/path/to/data.avro")
+ * Matrix m = MatrixAvroReader.readFile('/path/to/data.avro')
  *
  * // Read from URL
- * Matrix m = MatrixAvroReader.readUrl("https://example.com/data.avro")
+ * Matrix m = MatrixAvroReader.readUrl('https://example.com/data.avro')
  * }
*/ -@CompileStatic class MatrixAvroReader { + private static final String DEFAULT_MATRIX_NAME = 'AvroMatrix' + private static final String FILE_PARAMETER = 'file' + private static final String OPTIONS_NULL_MESSAGE = 'Options cannot be null' + private static final String INVALID_URL_STRING_MESSAGE = 'Invalid URL string: ' + private static final String INVALID_OCF_SUGGESTION = 'Ensure the file contains valid Avro OCF data' + private static final String PATH_NULL_MESSAGE = 'Path cannot be null' + private static final String URL_NULL_MESSAGE = 'URL cannot be null' + private static final String CONTENT_NULL_MESSAGE = 'Content cannot be null' + private static final String INPUT_STREAM_NULL_MESSAGE = 'InputStream cannot be null' + private static final String DOT = '.' + private static final String SLASH = '/' + private static final long MILLIS_PER_SECOND = 1_000L + private static final long NANOS_PER_MICRO = 1_000L + private static final long NANOS_PER_MILLI = 1_000_000L /** * Read an Avro file from a File object. * @@ -62,15 +69,14 @@ class MatrixAvroReader { } catch (Exception e) { throw new AvroValidationException( "Invalid or corrupt Avro file: ${file.absolutePath}", - "file", - "Ensure the file contains valid Avro OCF data", + FILE_PARAMETER, + INVALID_OCF_SUGGESTION, e ) } finally { is.close() } } - /** * Read an Avro file from a Path. * @@ -82,11 +88,10 @@ class MatrixAvroReader { */ static Matrix read(Path path) { if (path == null) { - throw new IllegalArgumentException("Path cannot be null") + throw new IllegalArgumentException(PATH_NULL_MESSAGE) } return read(path.toFile()) } - /** * Read Avro data from a URL. * @@ -98,7 +103,7 @@ class MatrixAvroReader { */ static Matrix read(URL url, String name = null) { if (url == null) { - throw new IllegalArgumentException("URL cannot be null") + throw new IllegalArgumentException(URL_NULL_MESSAGE) } InputStream is = url.openStream() try { @@ -107,7 +112,6 @@ class MatrixAvroReader { is.close() } } - /** * Read Avro data from a byte array. * @@ -119,11 +123,10 @@ class MatrixAvroReader { */ static Matrix read(byte[] content, String name = null) { if (content == null) { - throw new IllegalArgumentException("Content cannot be null") + throw new IllegalArgumentException(CONTENT_NULL_MESSAGE) } - return readInternal(new ByteArrayInputStream(content), name, "AvroMatrix", null) + return readInternal(new ByteArrayInputStream(content), name, DEFAULT_MATRIX_NAME, null) } - /** * Read an Avro file from a file path string. * @@ -136,11 +139,10 @@ class MatrixAvroReader { */ static Matrix readFile(String filePath) { if (filePath == null) { - throw new IllegalArgumentException("File path cannot be null") + throw new IllegalArgumentException('File path cannot be null') } return read(new File(filePath)) } - /** * Read Avro data from a URL string. * @@ -152,15 +154,14 @@ class MatrixAvroReader { */ static Matrix readUrl(String urlString) { if (urlString == null) { - throw new IllegalArgumentException("URL string cannot be null") + throw new IllegalArgumentException('URL string cannot be null') } try { return read(new URI(urlString).toURL()) } catch (URISyntaxException | MalformedURLException e) { - throw new IllegalArgumentException("Invalid URL string: " + urlString, e) + throw new IllegalArgumentException(INVALID_URL_STRING_MESSAGE + urlString, e) } } - /** * Read Avro data from an InputStream. * @@ -174,15 +175,13 @@ class MatrixAvroReader { */ static Matrix read(InputStream input, String name = null) { if (input == null) { - throw new IllegalArgumentException("InputStream cannot be null") + throw new IllegalArgumentException(INPUT_STREAM_NULL_MESSAGE) } - return readInternal(input, name, "AvroMatrix", null) + return readInternal(input, name, DEFAULT_MATRIX_NAME, null) } - // ---------------------------------------------------------------------- // Methods accepting AvroReadOptions // ---------------------------------------------------------------------- - /** * Read an Avro file from a File object with configurable options. * @@ -197,7 +196,7 @@ class MatrixAvroReader { static Matrix read(File file, AvroReadOptions options) { validateFile(file) if (options == null) { - throw new IllegalArgumentException("Options cannot be null") + throw new IllegalArgumentException(OPTIONS_NULL_MESSAGE) } InputStream is = new FileInputStream(file) try { @@ -207,15 +206,14 @@ class MatrixAvroReader { } catch (Exception e) { throw new AvroValidationException( "Invalid or corrupt Avro file: ${file.absolutePath}", - "file", - "Ensure the file contains valid Avro OCF data", + FILE_PARAMETER, + INVALID_OCF_SUGGESTION, e ) } finally { is.close() } } - /** * Read an Avro file from a Path with configurable options. * @@ -229,11 +227,10 @@ class MatrixAvroReader { */ static Matrix read(Path path, AvroReadOptions options) { if (path == null) { - throw new IllegalArgumentException("Path cannot be null") + throw new IllegalArgumentException(PATH_NULL_MESSAGE) } return read(path.toFile(), options) } - /** * Read Avro data from a URL with configurable options. * @@ -246,10 +243,10 @@ class MatrixAvroReader { */ static Matrix read(URL url, AvroReadOptions options) { if (url == null) { - throw new IllegalArgumentException("URL cannot be null") + throw new IllegalArgumentException(URL_NULL_MESSAGE) } if (options == null) { - throw new IllegalArgumentException("Options cannot be null") + throw new IllegalArgumentException(OPTIONS_NULL_MESSAGE) } InputStream is = url.openStream() try { @@ -258,7 +255,6 @@ class MatrixAvroReader { is.close() } } - /** * Read Avro data from a byte array with configurable options. * @@ -271,14 +267,13 @@ class MatrixAvroReader { */ static Matrix read(byte[] content, AvroReadOptions options) { if (content == null) { - throw new IllegalArgumentException("Content cannot be null") + throw new IllegalArgumentException(CONTENT_NULL_MESSAGE) } if (options == null) { - throw new IllegalArgumentException("Options cannot be null") + throw new IllegalArgumentException(OPTIONS_NULL_MESSAGE) } - return readInternal(new ByteArrayInputStream(content), options.matrixName, "AvroMatrix", options.readerSchema) + return readInternal(new ByteArrayInputStream(content), options.matrixName, DEFAULT_MATRIX_NAME, options.readerSchema) } - /** * Read Avro data from an InputStream with configurable options. * @@ -293,14 +288,13 @@ class MatrixAvroReader { */ static Matrix read(InputStream input, AvroReadOptions options) { if (input == null) { - throw new IllegalArgumentException("InputStream cannot be null") + throw new IllegalArgumentException(INPUT_STREAM_NULL_MESSAGE) } if (options == null) { - throw new IllegalArgumentException("Options cannot be null") + throw new IllegalArgumentException(OPTIONS_NULL_MESSAGE) } - return readInternal(input, options.matrixName, "AvroMatrix", options.readerSchema) + return readInternal(input, options.matrixName, DEFAULT_MATRIX_NAME, options.readerSchema) } - /** * Internal read implementation supporting optional reader schema and name resolution. */ @@ -315,40 +309,42 @@ class MatrixAvroReader { Schema effectiveSchema = readerSchema ?: writerSchema List fields = effectiveSchema.fields String matrixName = resolveMatrixName(overrideName, writerSchema, fallbackName) - - LinkedHashMap> columns = new LinkedHashMap<>() + Map> columns = [:] for (Schema.Field f : fields) { - columns.put(f.name(), new ArrayList<>()) + columns.put(f.name(), []) } - int rowNumber = 0 for (GenericRecord rec : dfs) { - for (Schema.Field f : fields) { - Object raw = rec.get(f.name()) - try { - Object val = convertValue(f.schema(), raw) - columns.get(f.name()).add(val) - } catch (Exception e) { - throw new AvroConversionException( - "Failed to convert value", - f.name(), - rowNumber, - raw?.getClass()?.simpleName ?: "null", - getTargetType(f.schema()), - raw, - e - ) - } - } + appendRecordValues(rec, fields, columns, rowNumber) rowNumber++ } - return Matrix.builder(matrixName).columns(columns).build() } finally { dfs.close() } } - + private static void appendRecordValues(GenericRecord rec, List fields, + Map> columns, int rowNumber) { + fields.each { Schema.Field f -> + Object raw = rec.get(f.name()) + try { + columns.get(f.name()).add(convertValue(f.schema(), raw)) + } catch (Exception e) { + throw conversionException(f, rowNumber, raw, e) + } + } + } + private static AvroConversionException conversionException(Schema.Field field, int rowNumber, Object raw, Exception e) { + new AvroConversionException( + 'Failed to convert value', + field.name(), + rowNumber, + raw?.getClass()?.simpleName ?: 'null', + getTargetType(field.schema()), + raw, + e + ) + } private static String resolveMatrixName(String overrideName, Schema writerSchema, String fallbackName) { if (overrideName != null && !overrideName.isBlank()) { return overrideName @@ -357,9 +353,8 @@ class MatrixAvroReader { if (schemaName != null && !schemaName.isBlank()) { return schemaName } - fallbackName ?: "AvroMatrix" + fallbackName ?: DEFAULT_MATRIX_NAME } - /** * Gets a human-readable target type name from an Avro schema. */ @@ -376,7 +371,6 @@ class MatrixAvroReader { } return schema.getType().name() } - /** * Converts an Avro-typed value to a suitable Java value for Matrix storage. * @@ -394,97 +388,76 @@ class MatrixAvroReader { * @return the converted Java value suitable for Matrix storage, or null if input is null */ private static Object convertValue(Schema schema, Object v) { - if (v == null) return null - - // Unwrap UNIONs (commonly ["null", T]) + if (v == null) { + return null + } if (schema.getType() == Schema.Type.UNION) { - Schema nonNull = schema.getTypes().stream() - .filter(s -> s.getType() != Schema.Type.NULL) - .findFirst().orElse(schema) - return convertValue(nonNull, v) + return convertValue(nonNullSchema(schema), v) } - - // Logical types: switch on the NAME to avoid nested-class access issues - def lt = schema.getLogicalType() + LogicalType lt = schema.getLogicalType() if (lt != null) { - // decimal needs instanceof to read scale; the rest can switch on the name string - if (lt instanceof LogicalTypes.Decimal) { - return toBigDecimal((LogicalTypes.Decimal) lt, schema, v) - } - - String name = lt.getName() // e.g. "date", "time-millis", "uuid", ... - switch (name) { - case "date": // int days since epoch - return toLocalDate(v) - case "time-millis": // int millis since midnight - return toLocalTimeMillis(v) - case "time-micros": // long micros since midnight - return toLocalTimeMicros(v) - case "timestamp-millis": // long epoch millis UTC - return toInstantMillis(v) - case "timestamp-micros": // long epoch micros UTC - return toInstantMicros(v) - case "local-timestamp-millis": // long millis, no zone - return toLocalDateTimeMillis(v) - case "local-timestamp-micros": // long micros, no zone - return toLocalDateTimeMicros(v) - case "uuid": - return v.toString() // or UUID.fromString(v.toString()) - default: - // fall through to primitive/complex handling - break - } + Object logicalValue = convertLogicalValue(lt, schema, v) + return logicalValue != null ? logicalValue : convertSchemaValue(schema, v) } - + convertSchemaValue(schema, v) + } + private static Schema nonNullSchema(Schema schema) { + schema.getTypes().stream() + .filter(s -> s.getType() != Schema.Type.NULL) + .findFirst().orElse(schema) + } + private static Object convertLogicalValue(LogicalType lt, Schema schema, Object v) { + if (LogicalTypes.Decimal.isInstance(lt)) { + return toBigDecimal((LogicalTypes.Decimal) lt, schema, v) + } + switch (lt.name) { + case 'date' -> toLocalDate(v) + case 'time-millis' -> toLocalTimeMillis(v) + case 'time-micros' -> toLocalTimeMicros(v) + case 'timestamp-millis' -> toInstantMillis(v) + case 'timestamp-micros' -> toInstantMicros(v) + case 'local-timestamp-millis' -> toLocalDateTimeMillis(v) + case 'local-timestamp-micros' -> toLocalDateTimeMicros(v) + case 'uuid' -> v.toString() + default -> null + } + } + private static Object convertSchemaValue(Schema schema, Object v) { switch (schema.getType()) { - case Schema.Type.NULL: return null - case Schema.Type.BOOLEAN: return (Boolean) v - case Schema.Type.INT: return (Integer) v - case Schema.Type.LONG: return (Long) v - case Schema.Type.FLOAT: return (Float) v - case Schema.Type.DOUBLE: return (Double) v - - case Schema.Type.STRING: - return (v instanceof Utf8) ? v.toString() : (String) v - - case Schema.Type.BYTES: - return byteBufferToArray((ByteBuffer) v) - - case Schema.Type.FIXED: - return (v as GenericFixed).bytes().clone() - - case Schema.Type.ENUM: - return v.toString() - - case Schema.Type.ARRAY: - Schema elem = schema.getElementType() - List list = (List) v - List out = new ArrayList<>(list.size()) - for (Object e : list) out.add(convertValue(elem, e)) - return out - - case Schema.Type.MAP: - Schema vs = schema.getValueType() - Map m = (Map) v - Map outMap = new LinkedHashMap<>(m.size()) - for (Map.Entry e : m.entrySet()) { - outMap.put(e.getKey().toString(), convertValue(vs, e.getValue())) - } - return outMap - - case Schema.Type.RECORD: - GenericRecord gr = (GenericRecord) v - Map recMap = new LinkedHashMap<>(schema.getFields().size()) - for (Schema.Field f : schema.getFields()) { - recMap.put(f.name(), convertValue(f.schema(), gr.get(f.name()))) - } - return recMap - - default: - return v + case Schema.Type.NULL -> null + case Schema.Type.BOOLEAN -> (Boolean) v + case Schema.Type.INT -> (Integer) v + case Schema.Type.LONG -> (Long) v + case Schema.Type.FLOAT -> (Float) v + case Schema.Type.DOUBLE -> (Double) v + case Schema.Type.STRING -> (Utf8.isInstance(v)) ? v.toString() : (String) v + case Schema.Type.BYTES -> byteBufferToArray((ByteBuffer) v) + case Schema.Type.FIXED -> (v as GenericFixed).bytes().clone() + case Schema.Type.ENUM -> v.toString() + case Schema.Type.ARRAY -> convertArrayValue(schema, v) + case Schema.Type.MAP -> convertMapValue(schema, v) + case Schema.Type.RECORD -> convertRecordValue(schema, v) + default -> v } } - + private static List convertArrayValue(Schema schema, Object v) { + Schema elem = schema.getElementType() + List list = (List) v + list.collect { Object e -> convertValue(elem, e) } + } + private static Map convertMapValue(Schema schema, Object v) { + Schema valueSchema = schema.getValueType() + Map map = (Map) v + map.collectEntries { Utf8 key, Object value -> + [(key.toString()): convertValue(valueSchema, value)] + } as Map + } + private static Map convertRecordValue(Schema schema, Object v) { + GenericRecord record = (GenericRecord) v + schema.getFields().collectEntries { Schema.Field field -> + [(field.name()): convertValue(field.schema(), record.get(field.name()))] + } as Map + } /** * Converts a ByteBuffer to a byte array, extracting only the remaining bytes. * @@ -497,7 +470,6 @@ class MatrixAvroReader { slice.get(exact) return exact } - /** * Converts an Avro date value (days since epoch) to a LocalDate. * @@ -505,10 +477,9 @@ class MatrixAvroReader { * @return the corresponding LocalDate */ private static LocalDate toLocalDate(Object v) { - int days = (v instanceof Integer) ? (Integer) v : ((Number) v).intValue() + int days = (Integer.isInstance(v)) ? (Integer) v : ((Number) v).intValue() return LocalDate.ofEpochDay(days) } - /** * Converts an Avro time-millis value to a LocalTime. * @@ -516,10 +487,9 @@ class MatrixAvroReader { * @return the corresponding LocalTime */ private static LocalTime toLocalTimeMillis(Object v) { - long ms = (v instanceof Integer) ? ((Integer) v).longValue() : ((Number) v).longValue() - return LocalTime.ofNanoOfDay(ms * 1_000_000L) + long ms = (Integer.isInstance(v)) ? ((Integer) v).longValue() : ((Number) v).longValue() + return LocalTime.ofNanoOfDay(ms * NANOS_PER_MILLI) } - /** * Converts an Avro time-micros value to a LocalTime. * @@ -528,9 +498,8 @@ class MatrixAvroReader { */ private static LocalTime toLocalTimeMicros(Object v) { long micros = ((Number) v).longValue() - return LocalTime.ofNanoOfDay(micros * 1_000L) + return LocalTime.ofNanoOfDay(micros * NANOS_PER_MICRO) } - /** * Converts an Avro timestamp-millis value to an Instant. * @@ -541,7 +510,6 @@ class MatrixAvroReader { long ms = ((Number) v).longValue() return Instant.ofEpochMilli(ms) } - /** * Converts an Avro timestamp-micros value to an Instant. * @@ -550,11 +518,10 @@ class MatrixAvroReader { */ private static Instant toInstantMicros(Object v) { long micros = ((Number) v).longValue() - long seconds = Math.floorDiv(micros, 1_000_000L) - long nanos = Math.floorMod(micros, 1_000_000L) * 1_000L + long seconds = Math.floorDiv(micros, NANOS_PER_MILLI) + long nanos = Math.floorMod(micros, NANOS_PER_MILLI) * NANOS_PER_MICRO return Instant.ofEpochSecond(seconds, nanos) } - /** * Converts an Avro local-timestamp-millis value to a LocalDateTime. * @@ -566,12 +533,11 @@ class MatrixAvroReader { private static LocalDateTime toLocalDateTimeMillis(Object v) { long ms = ((Number) v).longValue() return LocalDateTime.ofEpochSecond( - Math.floorDiv(ms, 1000L), - (int)((ms % 1000L) * 1_000_000L), + Math.floorDiv(ms, MILLIS_PER_SECOND), + (int)((ms % MILLIS_PER_SECOND) * NANOS_PER_MILLI), ZoneOffset.UTC ) } - /** * Converts an Avro local-timestamp-micros value to a LocalDateTime. * @@ -582,11 +548,10 @@ class MatrixAvroReader { */ private static LocalDateTime toLocalDateTimeMicros(Object v) { long micros = ((Number) v).longValue() - long seconds = Math.floorDiv(micros, 1_000_000L) - int nanos = (int) (Math.floorMod(micros, 1_000_000L) * 1_000L) + long seconds = Math.floorDiv(micros, NANOS_PER_MILLI) + int nanos = (int) (Math.floorMod(micros, NANOS_PER_MILLI) * NANOS_PER_MICRO) return LocalDateTime.ofEpochSecond(seconds, nanos, ZoneOffset.UTC) } - /** * Converts an Avro decimal logical type value to a BigDecimal. * @@ -607,11 +572,10 @@ class MatrixAvroReader { } else if (schema.getType() == Schema.Type.FIXED) { bytes = ((GenericFixed) v).bytes() } else { - throw new IllegalArgumentException("Decimal logical type on non-bytes/fixed field") + throw new IllegalArgumentException('Decimal logical type on non-bytes/fixed field') } return new BigDecimal(new BigInteger(bytes), scale) } - /** * Validates that the file exists and is not a directory. * @@ -619,7 +583,7 @@ class MatrixAvroReader { */ private static void validateFile(File file) { if (file == null) { - throw AvroValidationException.nullParameter("file") + throw AvroValidationException.nullParameter(FILE_PARAMETER) } if (!file.exists()) { throw AvroValidationException.fileNotFound(file.absolutePath) @@ -630,23 +594,21 @@ class MatrixAvroReader { if (file.length() == 0) { throw new AvroValidationException( "Avro file is empty: ${file.absolutePath}", - "file", - "Ensure the file contains Avro OCF data" + FILE_PARAMETER, + 'Ensure the file contains Avro OCF data' ) } } - /** * Extracts a default name from a file (file name without extension). */ private static String defaultName(File file) { String name = file.name - if (name != null && name.contains('.')) { - name = name.substring(0, name.lastIndexOf('.')) + if (name?.contains(DOT)) { + name = name.substring(0, name.lastIndexOf(DOT)) } - return name ?: "AvroMatrix" + return name ?: DEFAULT_MATRIX_NAME } - /** * Extracts a default name from a URL (file name without extension). */ @@ -656,14 +618,15 @@ class MatrixAvroReader { name = url.getFile() } if (name == null || name.isEmpty()) { - return "AvroMatrix" + return DEFAULT_MATRIX_NAME } - if (name.contains('/')) { - name = name.substring(name.lastIndexOf('/') + 1) + if (name.contains(SLASH)) { + name = name.substring(name.lastIndexOf(SLASH) + 1) } - if (name.contains('.')) { - name = name.substring(0, name.lastIndexOf('.')) + if (name.contains(DOT)) { + name = name.substring(0, name.lastIndexOf(DOT)) } - return name ?: "AvroMatrix" + return name ?: DEFAULT_MATRIX_NAME } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy index 05ff17b97..a60061945 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy @@ -1,7 +1,5 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic - import org.apache.avro.Conversions import org.apache.avro.LogicalTypes import org.apache.avro.Schema @@ -11,19 +9,15 @@ import org.apache.avro.generic.GenericData import org.apache.avro.generic.GenericDatumWriter import org.apache.avro.generic.GenericFixed import org.apache.avro.generic.GenericRecord - import se.alipsa.matrix.avro.exceptions.AvroConversionException import se.alipsa.matrix.avro.exceptions.AvroSchemaException import se.alipsa.matrix.avro.exceptions.AvroValidationException import se.alipsa.matrix.core.Matrix - import java.math.RoundingMode import java.nio.ByteBuffer import java.nio.file.Path import java.sql.Time import java.time.* -import java.util.Collections -import java.util.WeakHashMap /** * Writes Matrix objects to Avro Object Container Files (OCF). @@ -34,28 +28,35 @@ import java.util.WeakHashMap * *

Example usage: *

{@code
- * Matrix m = Matrix.builder("data")
- *     .columnNames(["id", "name", "price"])
- *     .rows([[1, "Alice", 10.50], [2, "Bob", 20.75]])
+ * Matrix m = Matrix.builder('data')
+ *     .columnNames(['id', 'name', 'price'])
+ *     .rows([[1, 'Alice', 10.50], [2, 'Bob', 20.75]])
  *     .types(Integer, String, BigDecimal)
  *     .build()
  *
  * // Write to file
- * MatrixAvroWriter.write(m, new File("data.avro"))
+ * MatrixAvroWriter.write(m, new File('data.avro'))
  *
  * // Write with decimal precision inference
- * MatrixAvroWriter.write(m, new File("data.avro"), true)
+ * MatrixAvroWriter.write(m, new File('data.avro'), true)
  *
  * // Write to byte array
  * byte[] bytes = MatrixAvroWriter.writeBytes(m)
  * }
*/ -@CompileStatic class MatrixAvroWriter { + private static final String PATH_NULL_MESSAGE = 'Path cannot be null' + private static final String OUTPUT_STREAM_NULL_MESSAGE = 'OutputStream cannot be null' + private static final String OPTIONS_NULL_MESSAGE = 'Options cannot be null' + private static final String NULL_TYPE_NAME = 'null' + private static final Object NO_AVRO_VALUE = new Object() + private static final long MICROS_PER_SECOND = 1_000_000L + private static final long MILLIS_PER_SECOND = 1_000L + private static final int NANOS_PER_MICRO = 1_000 + private static final int NANOS_PER_MILLI = 1_000_000 private static final Map> SCHEMA_CACHE = Collections.synchronizedMap(new WeakHashMap>()) - /** * Write a Matrix to an Avro file. * @@ -78,7 +79,6 @@ class MatrixAvroWriter { dfw.close() } } - /** * Write a Matrix to an Avro file at the specified Path. * @@ -92,12 +92,11 @@ class MatrixAvroWriter { */ static void write(Matrix matrix, Path path, boolean inferPrecisionAndScale = false) { if (path == null) { - throw new IllegalArgumentException("Path cannot be null") + throw new IllegalArgumentException(PATH_NULL_MESSAGE) } validateMatrix(matrix) write(matrix, path.toFile(), inferPrecisionAndScale) } - /** * Write a Matrix to an OutputStream in Avro format. * @@ -113,7 +112,7 @@ class MatrixAvroWriter { static void write(Matrix matrix, OutputStream out, boolean inferPrecisionAndScale = false) { validateMatrix(matrix) if (out == null) { - throw new IllegalArgumentException("OutputStream cannot be null") + throw new IllegalArgumentException(OUTPUT_STREAM_NULL_MESSAGE) } Schema schema = buildSchema(matrix, inferPrecisionAndScale) DataFileWriter dfw = new DataFileWriter<>(new GenericDatumWriter(schema)) @@ -124,7 +123,6 @@ class MatrixAvroWriter { dfw.close() } } - /** * Write a Matrix to a byte array in Avro format. * @@ -147,11 +145,9 @@ class MatrixAvroWriter { } return baos.toByteArray() } - // ---------------------------------------------------------------------- // Methods accepting AvroWriteOptions // ---------------------------------------------------------------------- - /** * Write a Matrix to an Avro file with configurable options. * @@ -166,7 +162,7 @@ class MatrixAvroWriter { validateMatrix(matrix) validateFile(file) if (options == null) { - throw new IllegalArgumentException("Options cannot be null") + throw new IllegalArgumentException(OPTIONS_NULL_MESSAGE) } Schema schema = buildSchema(matrix, options) DataFileWriter dfw = createDataFileWriter(schema, options) @@ -177,7 +173,6 @@ class MatrixAvroWriter { dfw.close() } } - /** * Write a Matrix to an Avro file at the specified Path with configurable options. * @@ -190,11 +185,10 @@ class MatrixAvroWriter { */ static void write(Matrix matrix, Path path, AvroWriteOptions options) { if (path == null) { - throw new IllegalArgumentException("Path cannot be null") + throw new IllegalArgumentException(PATH_NULL_MESSAGE) } write(matrix, path.toFile(), options) } - /** * Write a Matrix to an OutputStream in Avro format with configurable options. * @@ -210,10 +204,10 @@ class MatrixAvroWriter { static void write(Matrix matrix, OutputStream out, AvroWriteOptions options) { validateMatrix(matrix) if (out == null) { - throw new IllegalArgumentException("OutputStream cannot be null") + throw new IllegalArgumentException(OUTPUT_STREAM_NULL_MESSAGE) } if (options == null) { - throw new IllegalArgumentException("Options cannot be null") + throw new IllegalArgumentException(OPTIONS_NULL_MESSAGE) } Schema schema = buildSchema(matrix, options) DataFileWriter dfw = createDataFileWriter(schema, options) @@ -224,7 +218,6 @@ class MatrixAvroWriter { dfw.close() } } - /** * Write a Matrix to a byte array in Avro format with configurable options. * @@ -237,7 +230,7 @@ class MatrixAvroWriter { static byte[] writeBytes(Matrix matrix, AvroWriteOptions options) { validateMatrix(matrix) if (options == null) { - throw new IllegalArgumentException("Options cannot be null") + throw new IllegalArgumentException(OPTIONS_NULL_MESSAGE) } ByteArrayOutputStream baos = new ByteArrayOutputStream() Schema schema = buildSchema(matrix, options) @@ -250,7 +243,6 @@ class MatrixAvroWriter { } return baos.toByteArray() } - /** * Creates a DataFileWriter configured with the specified options. */ @@ -262,7 +254,6 @@ class MatrixAvroWriter { } return dfw } - /** * Validates that the matrix is not null and has at least one column. * @@ -270,7 +261,7 @@ class MatrixAvroWriter { */ private static void validateMatrix(Matrix matrix) { if (matrix == null) { - throw AvroValidationException.nullParameter("matrix") + throw AvroValidationException.nullParameter('matrix') } if (matrix.columnCount() == 0) { throw AvroValidationException.emptyMatrix() @@ -284,7 +275,6 @@ class MatrixAvroWriter { } } } - /** * Validates the file parameter and ensures parent directory exists. * @@ -293,26 +283,24 @@ class MatrixAvroWriter { */ private static void validateFile(File file) { if (file == null) { - throw AvroValidationException.nullParameter("file") + throw AvroValidationException.nullParameter('file') } File parentDir = file.parentFile if (parentDir != null && !parentDir.exists()) { if (!parentDir.mkdirs()) { throw new IOException("Failed to create parent directory: ${parentDir.absolutePath}. " + - "Check that you have write permissions and the path is valid.") + 'Check that you have write permissions and the path is valid.') } } } - // ---------------------------------------------------------------------- // Schema building // ---------------------------------------------------------------------- - /** * Builds an Avro schema for the given Matrix. * *

The schema is a record type with one field per Matrix column. Each field - * is wrapped in a nullable union ["null", T] to handle null values. The type + * is wrapped in a nullable union [NULL_TYPE_NAME, T] to handle null values. The type * mapping follows these rules: *

    *
  • Primitive types map directly (String, Boolean, Integer, Long, Float, Double)
  • @@ -335,7 +323,6 @@ class MatrixAvroWriter { [:] ) } - /** * Builds an Avro schema for the given Matrix using options. * @@ -352,7 +339,6 @@ class MatrixAvroWriter { options.columnSchemas ) } - private static String resolveSchemaName(Matrix matrix, String configuredSchemaName) { if (configuredSchemaName != null && !configuredSchemaName.isBlank()) { return configuredSchemaName @@ -361,9 +347,8 @@ class MatrixAvroWriter { if (matrixName != null && !matrixName.isBlank()) { return matrixName } - "MatrixSchema" + 'MatrixSchema' } - /** * Internal schema building with configurable name and namespace. */ @@ -385,12 +370,9 @@ class MatrixAvroWriter { if (cached != null) { return cached } - - Schema record = Schema.createRecord(schemaName, "Generated by MatrixAvroWriter", namespace, false) + Schema record = Schema.createRecord(schemaName, 'Generated by MatrixAvroWriter', namespace, false) List fields = new ArrayList<>(matrix.columnCount()) - Map profiles = analyzeColumns(matrix, inferPrecisionAndScale) - for (String col : matrix.columnNames()) { AvroSchemaUtil.validateAvroFieldName(col, col) Schema fieldSchema @@ -400,7 +382,6 @@ class MatrixAvroWriter { } else { ColumnProfile profile = profiles.get(col) Class clazz = profile.effectiveType - if (clazz == List) { Class elemClass = profile.listElemClass ?: String Schema elemSchema = toFieldSchema(elemClass, null) @@ -409,10 +390,10 @@ class MatrixAvroWriter { } else if (clazz == Map) { if (profile.recordLike) { Map first = profile.recordSample - def rec = Schema.createRecord(col + "_record", null, namespace, false) - List flds = new ArrayList<>() + def rec = Schema.createRecord(col + '_record', null, namespace, false) + List flds = [] for (def k : first.keySet()) { - String fieldName = k.toString() + String fieldName = k AvroSchemaUtil.validateAvroFieldName(fieldName, "${col}.${fieldName}") def v = first.get(k) Class vClazz = (v == null) ? String : v.getClass() @@ -429,18 +410,17 @@ class MatrixAvroWriter { fieldSchema = Schema.createMap(nullableValue) } } else { - fieldSchema = toFieldSchema(clazz, profile.decimalMeta(inferPrecisionAndScale)) + int[] decimalMeta = inferPrecisionAndScale && clazz == BigDecimal ? profile.decimalMeta() : null + fieldSchema = toFieldSchema(clazz, decimalMeta) } } Schema nullable = AvroSchemaUtil.nullableSchema(fieldSchema) fields.add(new Schema.Field(col, nullable, null as String, (Object) null)) } - record.setFields(fields) cacheSchema(matrix, cacheKey, record) return record } - /** * Maps a Java class to the corresponding Avro field schema. * @@ -460,6 +440,7 @@ class MatrixAvroWriter { * @return the corresponding Avro schema */ private static Schema toFieldSchema(Class clazz, int[] decimalMeta) { + if (clazz == BigDecimal) { if (decimalMeta != null) { int precision = decimalMeta[0] > 0 ? decimalMeta[0] : 10 @@ -471,15 +452,28 @@ class MatrixAvroWriter { return Schema.create(Schema.Type.DOUBLE) // fallback like Parquet writer } } + if (clazz == String) { + return Schema.create(Schema.Type.STRING) + } + if (clazz == Boolean || clazz == boolean.class) { + return Schema.create(Schema.Type.BOOLEAN) + } + if (clazz == Integer || clazz == int.class) { + return Schema.create(Schema.Type.INT) + } + if (clazz == Long || clazz == long.class || clazz == BigInteger) { - if (clazz == String) return Schema.create(Schema.Type.STRING) - if (clazz == Boolean || clazz == boolean.class) return Schema.create(Schema.Type.BOOLEAN) - if (clazz == Integer || clazz == int.class) return Schema.create(Schema.Type.INT) - if (clazz == Long || clazz == long.class || clazz == BigInteger) return Schema.create(Schema.Type.LONG) - if (clazz == Float || clazz == float.class) return Schema.create(Schema.Type.FLOAT) - if (clazz == Double || clazz == double.class) return Schema.create(Schema.Type.DOUBLE) - if (clazz == byte[].class) return Schema.create(Schema.Type.BYTES) - + return Schema.create(Schema.Type.LONG) + } + if (clazz == Float || clazz == float.class) { + return Schema.create(Schema.Type.FLOAT) + } + if (clazz == Double || clazz == double.class) { + return Schema.create(Schema.Type.DOUBLE) + } + if (clazz == byte[].class) { + return Schema.create(Schema.Type.BYTES) + } if (clazz == LocalDate || clazz == java.sql.Date) { Schema s = Schema.create(Schema.Type.INT) LogicalTypes.date().addToSchema(s) @@ -510,15 +504,12 @@ class MatrixAvroWriter { LogicalTypes.uuid().addToSchema(s) return s } - // Fallback return Schema.create(Schema.Type.STRING) } - // ---------------------------------------------------------------------- // Row writing // ---------------------------------------------------------------------- - /** * Writes all Matrix rows to the Avro data file. * @@ -533,9 +524,8 @@ class MatrixAvroWriter { private static void writeRows(Matrix matrix, DataFileWriter dfw, Schema schema) { GenericData.Record rec = new GenericData.Record(schema) Conversions.DecimalConversion decConv = new Conversions.DecimalConversion() - // Unwrap nullable unions to actual field schema - Map fieldSchemas = new LinkedHashMap<>() + Map fieldSchemas = [:] for (Schema.Field f : schema.getFields()) { Schema s = f.schema() if (s.getType() == Schema.Type.UNION) { @@ -547,10 +537,8 @@ class MatrixAvroWriter { } fieldSchemas.put(f.name(), s) } - List cols = matrix.columnNames() int rows = matrix.rowCount() - for (int r = 0; r < rows; r++) { for (String col : cols) { Object v = matrix[r, col] @@ -558,10 +546,10 @@ class MatrixAvroWriter { try { if (!isCompatible(fs, v)) { throw new AvroSchemaException( - "Value does not match schema type", + 'Value does not match schema type', col, schemaTypeLabel(fs), - v?.getClass()?.simpleName ?: "null" + v?.getClass()?.simpleName ?: NULL_TYPE_NAME ) } rec.put(col, toAvroValue(fs, v, decConv)) @@ -569,10 +557,10 @@ class MatrixAvroWriter { throw e } catch (Exception e) { throw new AvroConversionException( - "Failed to convert value to Avro format", + 'Failed to convert value to Avro format', col, r, - v?.getClass()?.simpleName ?: "null", + v?.getClass()?.simpleName ?: NULL_TYPE_NAME, fs.getType().name(), v, e @@ -583,7 +571,6 @@ class MatrixAvroWriter { rec = new GenericData.Record(schema) // fresh record per row } } - /** * Converts a Java value to its Avro representation for writing. * @@ -603,166 +590,162 @@ class MatrixAvroWriter { * @throws UnresolvedUnionException if value cannot be matched to any union branch */ private static Object toAvroValue(Schema fieldSchema, Object v, Conversions.DecimalConversion decConv) { - if (v == null) return null - - // Handle UNIONs (including nested unions for array items and map values) + if (v == null) { + return null + } if (fieldSchema.getType() == Schema.Type.UNION) { - List types = fieldSchema.getTypes() - - // Common case: ["null", T] - if (types.size() == 2 && (types[0].getType() == Schema.Type.NULL || types[1].getType() == Schema.Type.NULL)) { - Schema nonNull = (types[0].getType() == Schema.Type.NULL) ? types[1] : types[0] - return (v == null) ? null : toAvroValue(nonNull, v, decConv) - } - - // More general unions: pick the first compatible branch and serialize with it - for (Schema branch : types) { - if (branch.getType() == Schema.Type.NULL && v == null) return null - if (branch.getType() == Schema.Type.NULL) continue - if (isCompatible(branch, v)) { - return toAvroValue(branch, v, decConv) - } - } - - // Could not resolve union — let Avro complain in a predictable way - throw new UnresolvedUnionException(fieldSchema, v) + return toUnionAvroValue(fieldSchema, v, decConv) } - + Object logicalValue = toLogicalAvroValue(fieldSchema, v, decConv) + if (!NO_AVRO_VALUE.is(logicalValue)) { + return logicalValue + } + toPrimitiveAvroValue(fieldSchema, v, decConv) + } + private static Object toUnionAvroValue(Schema fieldSchema, Object v, Conversions.DecimalConversion decConv) { + List types = fieldSchema.getTypes() + if (types.size() == 2 && (types[0].getType() == Schema.Type.NULL || types[1].getType() == Schema.Type.NULL)) { + Schema nonNull = (types[0].getType() == Schema.Type.NULL) ? types[1] : types[0] + return toAvroValue(nonNull, v, decConv) + } + Schema branch = types.find { Schema candidate -> + candidate.getType() != Schema.Type.NULL && isCompatible(candidate, v) + } + if (branch != null) { + return toAvroValue(branch, v, decConv) + } + throw new UnresolvedUnionException(fieldSchema, v) + } + private static Object toLogicalAvroValue(Schema fieldSchema, Object v, Conversions.DecimalConversion decConv) { def lt = fieldSchema.getLogicalType() - if (lt != null) { - String name = lt.getName() - switch (name) { - case "date": - if (v instanceof java.sql.Date) v = ((java.sql.Date) v).toLocalDate() - if (v instanceof LocalDate) return (int) ((LocalDate) v).toEpochDay() - break - - case "time-millis": - if (v instanceof Time) v = ((Time) v).toLocalTime() - if (v instanceof LocalTime) { - int nanosMs = ((LocalTime) v).getNano().intdiv(1_000_000) // nanos -> millis - long ms = ((LocalTime) v).toSecondOfDay() * 1000L + nanosMs - return (int) ms - } - break - - case "local-timestamp-micros": - if (v instanceof LocalDateTime) { - int nanosUs = ((LocalDateTime) v).getNano().intdiv(1_000) // nanos -> micros - long micros = ((LocalDateTime) v).toEpochSecond(ZoneOffset.UTC) * 1_000_000L + nanosUs - return micros - } - break - - case "timestamp-millis": - if (v instanceof Date) return ((Date) v).getTime() - if (v instanceof Instant) return ((Instant) v).toEpochMilli() - if (v instanceof LocalDateTime) { - long ms = ((LocalDateTime) v) - .toInstant(ZoneOffset.systemDefault().getRules().getOffset((LocalDateTime) v)) - .toEpochMilli() - return ms - } - break - - case "local-timestamp-millis": - if (v instanceof LocalDateTime) { - int nanosMs = ((LocalDateTime) v).getNano().intdiv(1_000_000) // nanos -> millis - long ms = ((LocalDateTime) v).toEpochSecond(ZoneOffset.UTC) * 1_000L + nanosMs - return ms - } - break - - case "uuid": - return v.toString() - - case "decimal": - if (v instanceof BigDecimal) { - LogicalTypes.Decimal dec = (LogicalTypes.Decimal) lt - return decConv.toBytes((BigDecimal) v, fieldSchema, dec) - } else if (v instanceof Double || v instanceof Float) { - LogicalTypes.Decimal dec = (LogicalTypes.Decimal) lt - BigDecimal bd = new BigDecimal(((Number) v).toString()) - .setScale(dec.getScale(), RoundingMode.HALF_UP) - return decConv.toBytes(bd, fieldSchema, dec) - } - break - } + if (lt == null) { + return NO_AVRO_VALUE } - - // Primitive fallback based on schema type + switch (lt.getName()) { + case 'date' -> toDateAvroValue(v) + case 'time-millis' -> toTimeMillisAvroValue(v) + case 'local-timestamp-micros' -> toLocalTimestampMicrosAvroValue(v) + case 'timestamp-millis' -> toTimestampMillisAvroValue(v) + case 'local-timestamp-millis' -> toLocalTimestampMillisAvroValue(v) + case 'uuid' -> v.toString() + case 'decimal' -> toDecimalAvroValue(fieldSchema, v, (LogicalTypes.Decimal) lt, decConv) + default -> NO_AVRO_VALUE + } + } + private static Object toDateAvroValue(Object v) { + Object value = java.sql.Date.isInstance(v) ? ((java.sql.Date) v).toLocalDate() : v + LocalDate.isInstance(value) ? (int) ((LocalDate) value).toEpochDay() : NO_AVRO_VALUE + } + private static Object toTimeMillisAvroValue(Object v) { + Object value = Time.isInstance(v) ? ((Time) v).toLocalTime() : v + if (!LocalTime.isInstance(value)) { + return NO_AVRO_VALUE + } + int nanosMs = ((LocalTime) value).getNano().intdiv(NANOS_PER_MILLI) + (int) (((LocalTime) value).toSecondOfDay() * MILLIS_PER_SECOND + nanosMs) + } + private static Object toLocalTimestampMicrosAvroValue(Object v) { + if (!LocalDateTime.isInstance(v)) { + return NO_AVRO_VALUE + } + int nanosUs = ((LocalDateTime) v).getNano().intdiv(NANOS_PER_MICRO) + ((LocalDateTime) v).toEpochSecond(ZoneOffset.UTC) * MICROS_PER_SECOND + nanosUs + } + private static Object toTimestampMillisAvroValue(Object v) { + if (Date.isInstance(v)) { + return ((Date) v).getTime() + } + if (Instant.isInstance(v)) { + return ((Instant) v).toEpochMilli() + } + if (!LocalDateTime.isInstance(v)) { + return NO_AVRO_VALUE + } + ((LocalDateTime) v) + .toInstant(ZoneOffset.systemDefault().getRules().getOffset((LocalDateTime) v)) + .toEpochMilli() + } + private static Object toLocalTimestampMillisAvroValue(Object v) { + if (!LocalDateTime.isInstance(v)) { + return NO_AVRO_VALUE + } + int nanosMs = ((LocalDateTime) v).getNano().intdiv(NANOS_PER_MILLI) + ((LocalDateTime) v).toEpochSecond(ZoneOffset.UTC) * MILLIS_PER_SECOND + nanosMs + } + private static Object toDecimalAvroValue(Schema fieldSchema, Object v, LogicalTypes.Decimal dec, + Conversions.DecimalConversion decConv) { + if (BigDecimal.isInstance(v)) { + return decConv.toBytes((BigDecimal) v, fieldSchema, dec) + } + if (Double.isInstance(v) || Float.isInstance(v)) { + BigDecimal bd = new BigDecimal(((Number) v).toString()).setScale(dec.getScale(), RoundingMode.HALF_UP) + return decConv.toBytes(bd, fieldSchema, dec) + } + NO_AVRO_VALUE + } + private static Object toPrimitiveAvroValue(Schema fieldSchema, Object v, Conversions.DecimalConversion decConv) { switch (fieldSchema.getType()) { - case Schema.Type.STRING: - return v.toString() - case Schema.Type.BOOLEAN: - return (Boolean) v - case Schema.Type.INT: - if (v instanceof Number) return ((Number) v).intValue() - break - case Schema.Type.LONG: - if (v instanceof BigInteger) return ((BigInteger) v).longValue() - if (v instanceof Number) return ((Number) v).longValue() - if (v instanceof Date) return ((Date) v).time - if (v instanceof Instant) return ((Instant) v).toEpochMilli() - break - case Schema.Type.FLOAT: - if (v instanceof Number) return ((Number) v).floatValue() - break - case Schema.Type.DOUBLE: - if (v instanceof BigDecimal) return ((BigDecimal) v).doubleValue() - if (v instanceof Number) return ((Number) v).doubleValue() - break - case Schema.Type.BYTES: - if (v instanceof byte[]) return ByteBuffer.wrap((byte[]) v) - if (v instanceof ByteBuffer) return v - if (v instanceof BigDecimal) { - // fallback: unscaled bytes (only if schema is plain BYTES w/o decimal) - return ByteBuffer.wrap(((BigDecimal) v).unscaledValue().toByteArray()) - } - break - case Schema.Type.ARRAY: - Schema elem = fieldSchema.getElementType() - List input = (List) v - List out = new ArrayList( input == null ? 0 : input.size() ) - if ( input != null ) { - for (def e: input ) out.add(toAvroValue(elem, e, decConv)) - } - return out - - case Schema.Type.MAP: - Schema vs = fieldSchema.getValueType() - Map inMap = (Map) v - Map outMap = new LinkedHashMap<>() - if (inMap != null) { - for (def e : inMap.entrySet()) { - outMap.put(e.key?.toString(), toAvroValue(vs, e.value, decConv)) - } - } - return outMap - - case Schema.Type.RECORD: - GenericData.Record gr = new GenericData.Record(fieldSchema) - Map inRec = (Map) v - for (Schema.Field f : fieldSchema.getFields()) { - def fv = (inRec == null) ? null : inRec.get(f.name()) - // unwrap nullable union in field - Schema fs = f.schema() - if (fs.getType() == Schema.Type.UNION) { - for (Schema t : fs.getTypes()) { - if (t.getType() != Schema.Type.NULL) { - fs = t; break - } - } - } - gr.put(f.name(), toAvroValue(fs, fv, decConv)) - } - return gr + case Schema.Type.STRING -> v.toString() + case Schema.Type.BOOLEAN -> (Boolean) v + case Schema.Type.INT -> Number.isInstance(v) ? ((Number) v).intValue() : v.toString() + case Schema.Type.LONG -> toLongAvroValue(v) + case Schema.Type.FLOAT -> Number.isInstance(v) ? ((Number) v).floatValue() : v.toString() + case Schema.Type.DOUBLE -> Number.isInstance(v) ? ((Number) v).doubleValue() : v.toString() + case Schema.Type.BYTES -> toBytesAvroValue(v) + case Schema.Type.ARRAY -> toArrayAvroValue(fieldSchema, (List) v, decConv) + case Schema.Type.MAP -> toMapAvroValue(fieldSchema, (Map) v, decConv) + case Schema.Type.RECORD -> toRecordAvroValue(fieldSchema, (Map) v, decConv) + default -> v.toString() } - - // Last resort - return v.toString() } - + private static Object toLongAvroValue(Object v) { + if (BigInteger.isInstance(v)) { + return ((BigInteger) v).longValue() + } + if (Number.isInstance(v)) { + return ((Number) v).longValue() + } + if (Date.isInstance(v)) { + return ((Date) v).time + } + Instant.isInstance(v) ? ((Instant) v).toEpochMilli() : v.toString() + } + private static Object toBytesAvroValue(Object v) { + if (byte[].isInstance(v)) { + return ByteBuffer.wrap((byte[]) v) + } + if (ByteBuffer.isInstance(v)) { + return v + } + BigDecimal.isInstance(v) ? ByteBuffer.wrap(((BigDecimal) v).unscaledValue().toByteArray()) : v.toString() + } + private static List toArrayAvroValue(Schema fieldSchema, List input, Conversions.DecimalConversion decConv) { + Schema elem = fieldSchema.getElementType() + input?.collect { Object e -> toAvroValue(elem, e, decConv) } ?: [] + } + private static Map toMapAvroValue(Schema fieldSchema, Map input, Conversions.DecimalConversion decConv) { + Schema vs = fieldSchema.getValueType() + Map outMap = [:] + input?.each { key, value -> + outMap[key?.toString()] = toAvroValue(vs, value, decConv) + } + outMap + } + private static GenericData.Record toRecordAvroValue(Schema fieldSchema, Map input, + Conversions.DecimalConversion decConv) { + GenericData.Record record = new GenericData.Record(fieldSchema) + fieldSchema.getFields().each { Schema.Field field -> + def value = input == null ? null : input.get(field.name()) + record.put(field.name(), toAvroValue(nonNullSchema(field.schema()), value, decConv)) + } + record + } + private static Schema nonNullSchema(Schema schema) { + if (schema.getType() != Schema.Type.UNION) { + return schema + } + schema.getTypes().find { Schema candidate -> candidate.getType() != Schema.Type.NULL } ?: schema + } /** * Returns a cached schema for the given matrix and cache key, if available. * @@ -776,30 +759,26 @@ class MatrixAvroWriter { return perMatrix == null ? null : perMatrix.get(key) } } - private static void cacheSchema(Matrix matrix, SchemaCacheKey key, Schema schema) { synchronized (SCHEMA_CACHE) { Map perMatrix = SCHEMA_CACHE.get(matrix) if (perMatrix == null) { - perMatrix = new LinkedHashMap<>() + perMatrix = [:] SCHEMA_CACHE.put(matrix, perMatrix) } perMatrix.put(key, schema) } } - private static Map analyzeColumns(Matrix matrix, boolean inferPrecisionAndScale) { - Map profiles = new LinkedHashMap<>() + Map profiles = [:] for (String col : matrix.columnNames()) { profiles.put(col, analyzeColumn(matrix, col, inferPrecisionAndScale)) } return profiles } - private static ColumnProfile analyzeColumn(Matrix matrix, String col, boolean inferPrecisionAndScale) { Class declared = normalizeType(matrix.type(col)) ColumnProfile profile = new ColumnProfile(col, declared) - if (declared != Object && declared != Number) { profile.effectiveType = declared if (declared == BigDecimal && inferPrecisionAndScale) { @@ -811,49 +790,48 @@ class MatrixAvroWriter { } return profile } - boolean sawBigDecimal = false boolean sawFloat = false boolean sawIntegral = false boolean needsLong = false boolean fixedType = false - int rows = matrix.rowCount() for (int r = 0; r < rows; r++) { Object v = matrix[r, col] - if (v == null) continue - + if (v == null) { + continue + } if (!fixedType) { - if (v instanceof BigDecimal) { + if (BigDecimal.isInstance(v)) { sawBigDecimal = true if (inferPrecisionAndScale) { updateDecimalMeta((BigDecimal) v, profile) } continue } - if (v instanceof Float || v instanceof Double) { + if (Float.isInstance(v) || Double.isInstance(v)) { sawFloat = true continue } - if (v instanceof Byte || v instanceof Short || v instanceof Integer - || v instanceof Long || v instanceof BigInteger) { + if (Byte.isInstance(v) || Short.isInstance(v) || Integer.isInstance(v) + || Long.isInstance(v) || BigInteger.isInstance(v)) { sawIntegral = true - long lv = (v instanceof BigInteger) ? ((BigInteger) v).longValue() : ((Number) v).longValue() - if (lv < Integer.MIN_VALUE || lv > Integer.MAX_VALUE || v instanceof Long || v instanceof BigInteger) { + long lv = (BigInteger.isInstance(v)) ? ((BigInteger) v).longValue() : ((Number) v).longValue() + if (lv < Integer.MIN_VALUE || lv > Integer.MAX_VALUE || Long.isInstance(v) || BigInteger.isInstance(v)) { needsLong = true } continue } - if (v instanceof String || v instanceof Boolean || v instanceof byte[] - || v instanceof java.sql.Date || v instanceof Time || v instanceof Date - || v instanceof LocalDate || v instanceof LocalTime - || v instanceof Instant || v instanceof LocalDateTime - || v instanceof UUID) { + if (String.isInstance(v) || Boolean.isInstance(v) || byte[].isInstance(v) + || java.sql.Date.isInstance(v) || Time.isInstance(v) || Date.isInstance(v) + || LocalDate.isInstance(v) || LocalTime.isInstance(v) + || Instant.isInstance(v) || LocalDateTime.isInstance(v) + || UUID.isInstance(v)) { profile.effectiveType = v.getClass() fixedType = true break } - if (v instanceof List) { + if (List.isInstance(v)) { profile.effectiveType = List fixedType = true scanListElementValue((List) v, profile) @@ -862,7 +840,7 @@ class MatrixAvroWriter { } continue } - if (v instanceof Map) { + if (Map.isInstance(v)) { profile.effectiveType = Map fixedType = true scanMapValue((Map) v, profile) @@ -872,11 +850,11 @@ class MatrixAvroWriter { fixedType = true break } else if (profile.effectiveType == Map) { - if (v instanceof Map) { + if (Map.isInstance(v)) { scanMapValue((Map) v, profile) } } else if (profile.effectiveType == List) { - if (v instanceof List && profile.listElemClass == null) { + if (List.isInstance(v) && profile.listElemClass == null) { scanListElementValue((List) v, profile) if (profile.listElemClass != null) { break @@ -884,7 +862,6 @@ class MatrixAvroWriter { } } } - if (!fixedType) { if (sawBigDecimal) { profile.effectiveType = BigDecimal @@ -896,25 +873,22 @@ class MatrixAvroWriter { profile.effectiveType = String } } - return profile } - private static void scanDecimalPrecision(Matrix matrix, String col, ColumnProfile profile) { int rows = matrix.rowCount() for (int r = 0; r < rows; r++) { def v = matrix[r, col] - if (v instanceof BigDecimal) { + if (BigDecimal.isInstance(v)) { updateDecimalMeta((BigDecimal) v, profile) } } } - private static void scanListElement(Matrix matrix, String col, ColumnProfile profile) { int rows = matrix.rowCount() for (int r = 0; r < rows; r++) { def v = matrix[r, col] - if (v instanceof List) { + if (List.isInstance(v)) { scanListElementValue((List) v, profile) if (profile.listElemClass != null) { return @@ -922,17 +896,15 @@ class MatrixAvroWriter { } } } - private static void scanMapDetails(Matrix matrix, String col, ColumnProfile profile) { int rows = matrix.rowCount() for (int r = 0; r < rows; r++) { def v = matrix[r, col] - if (v instanceof Map) { + if (Map.isInstance(v)) { scanMapValue((Map) v, profile) } } } - private static void scanListElementValue(List list, ColumnProfile profile) { for (def e : list) { if (e != null) { @@ -941,20 +913,18 @@ class MatrixAvroWriter { } } } - private static void scanMapValue(Map map, ColumnProfile profile) { if (!profile.recordSeen) { profile.recordSeen = true profile.recordLike = true profile.recordSample = map - profile.recordKeys = new LinkedHashSet<>(map.keySet().collect { it?.toString() }) + profile.recordKeys = new LinkedHashSet<>(map.keySet()*.toString()) } else if (profile.recordLike) { - Set keys = new LinkedHashSet<>(map.keySet().collect { it?.toString() }) - if (!profile.recordKeys.equals(keys)) { + Set keys = new LinkedHashSet<>(map.keySet()*.toString()) + if (profile.recordKeys != keys) { profile.recordLike = false } } - if (profile.mapValueClass == null) { for (def e : map.values()) { if (e != null) { @@ -964,7 +934,6 @@ class MatrixAvroWriter { } } } - private static void updateDecimalMeta(BigDecimal value, ColumnProfile profile) { profile.sawDecimal = true int scale = value.scale() @@ -975,11 +944,9 @@ class MatrixAvroWriter { } profile.maxIntegerDigits = Math.max(profile.maxIntegerDigits, integerDigits) } - private static Class normalizeType(Class clazz) { return clazz == BigInteger ? Long : clazz } - private static void validateDeclaredColumnSchemas(Matrix matrix, Map declaredSchemas) { Set matrixColumns = matrix.columnNames() as Set declaredSchemas.keySet().each { String columnName -> @@ -988,7 +955,6 @@ class MatrixAvroWriter { } } } - private static Map> schemaSignature(Map declaredSchemas) { Map> signature = [:] declaredSchemas.each { String columnName, AvroSchemaDecl declaration -> @@ -996,87 +962,6 @@ class MatrixAvroWriter { } signature.asImmutable() } - - private static final class SchemaCacheKey { - private final String schemaName - private final String namespace - private final boolean inferPrecisionAndScale - private final int rowCount - private final List columnNames - private final List> columnTypes - private final Map> columnSchemas - - private SchemaCacheKey(String schemaName, String namespace, boolean inferPrecisionAndScale, - int rowCount, List columnNames, List> columnTypes, - Map> columnSchemas) { - this.schemaName = schemaName - this.namespace = namespace - this.inferPrecisionAndScale = inferPrecisionAndScale - this.rowCount = rowCount - this.columnNames = Collections.unmodifiableList(new ArrayList<>(columnNames)) - this.columnTypes = Collections.unmodifiableList(new ArrayList<>(columnTypes)) - this.columnSchemas = columnSchemas - } - - @Override - boolean equals(Object other) { - if (this.is(other)) return true - if (!(other instanceof SchemaCacheKey)) return false - SchemaCacheKey that = (SchemaCacheKey) other - return inferPrecisionAndScale == that.inferPrecisionAndScale && - rowCount == that.rowCount && - schemaName == that.schemaName && - namespace == that.namespace && - columnNames == that.columnNames && - columnTypes == that.columnTypes && - columnSchemas == that.columnSchemas - } - - @Override - int hashCode() { - int result = schemaName.hashCode() - result = 31 * result + namespace.hashCode() - result = 31 * result + (inferPrecisionAndScale ? 1 : 0) - result = 31 * result + rowCount - result = 31 * result + columnNames.hashCode() - result = 31 * result + columnTypes.hashCode() - result = 31 * result + columnSchemas.hashCode() - return result - } - } - - private static final class ColumnProfile { - final String name - final Class declaredType - Class effectiveType - Class listElemClass - Class mapValueClass - boolean recordLike = false - boolean recordSeen = false - Map recordSample - Set recordKeys - boolean sawDecimal = false - int maxIntegerDigits = 0 - int maxScale = 0 - - private ColumnProfile(String name, Class declaredType) { - this.name = name - this.declaredType = declaredType - } - - int[] decimalMeta(boolean inferPrecisionAndScale) { - if (!inferPrecisionAndScale || effectiveType != BigDecimal) { - return null - } - if (!sawDecimal) { - return [10, 0] as int[] - } - int scale = Math.max(0, maxScale) - int precision = Math.max(1, maxIntegerDigits + scale) - return [precision, scale] as int[] - } - } - /** * Checks if a Java value is compatible with an Avro schema type. * @@ -1089,80 +974,80 @@ class MatrixAvroWriter { * @return true if the value can be serialized under this schema */ private static boolean isCompatible(Schema s, Object v) { - if (v == null) return true + if (v == null) { + return true + } if (s.getType() == Schema.Type.UNION) { for (Schema branch : s.getTypes()) { - if (isCompatible(branch, v)) return true + if (isCompatible(branch, v)) { + return true + } } return false } def logical = s.getLogicalType() if (logical != null) { String name = logical.getName() - switch (name) { - case "date": - return v instanceof LocalDate || v instanceof java.sql.Date || v instanceof Number - case "time-millis": - case "time-micros": - return v instanceof LocalTime || v instanceof Time || v instanceof Number - case "timestamp-millis": - case "timestamp-micros": - return v instanceof Instant || v instanceof Date || v instanceof Number - case "local-timestamp-millis": - case "local-timestamp-micros": - return v instanceof LocalDateTime || v instanceof Number - case "uuid": - return v instanceof UUID || v instanceof String - case "decimal": - return v instanceof BigDecimal || v instanceof Double || v instanceof Float || - v instanceof byte[] || v instanceof ByteBuffer + return switch (name) { + case 'date' -> LocalDate.isInstance(v) || java.sql.Date.isInstance(v) || Number.isInstance(v) + case 'time-millis', 'time-micros' -> LocalTime.isInstance(v) || Time.isInstance(v) || Number.isInstance(v) + case 'timestamp-millis', 'timestamp-micros' -> Instant.isInstance(v) || Date.isInstance(v) || Number.isInstance(v) + case 'local-timestamp-millis', 'local-timestamp-micros' -> LocalDateTime.isInstance(v) || Number.isInstance(v) + case 'uuid' -> UUID.isInstance(v) || String.isInstance(v) + case 'decimal' -> BigDecimal.isInstance(v) || Double.isInstance(v) || Float.isInstance(v) || + byte[].isInstance(v) || ByteBuffer.isInstance(v) + default -> false } } - switch (s.getType()) { - case Schema.Type.STRING: return true // we'll toString() later - case Schema.Type.BOOLEAN: return v instanceof Boolean - case Schema.Type.INT: return v instanceof Byte || v instanceof Short || v instanceof Integer - case Schema.Type.LONG: return v instanceof Number || v instanceof Date || v instanceof Instant - case Schema.Type.FLOAT: return v instanceof Number - case Schema.Type.DOUBLE: return v instanceof Number || v instanceof BigDecimal - case Schema.Type.BYTES: return (v instanceof byte[]) || (v instanceof ByteBuffer) || (v instanceof BigDecimal) - case Schema.Type.ARRAY: - if (!(v instanceof List)) return false - Schema elem = s.getElementType() - for (def e : (List) v) { - if (!isCompatible(elem, e)) return false - } - return true - case Schema.Type.MAP: - if (!(v instanceof Map)) return false - Schema vs = s.getValueType() - for (def e : ((Map) v).entrySet()) { - if (!isCompatible(vs, e.value)) return false - } - return true - case Schema.Type.RECORD: - if (v instanceof GenericRecord) return true - if (!(v instanceof Map)) return false - Map inRec = (Map) v - for (Schema.Field f : s.getFields()) { - if (!isCompatible(f.schema(), inRec.get(f.name()))) return false - } - return true - case Schema.Type.FIXED: return v instanceof GenericFixed - default: return false + return switch (s.getType()) { + case Schema.Type.STRING -> true // we'll toString() later + case Schema.Type.BOOLEAN -> Boolean.isInstance(v) + case Schema.Type.INT -> Byte.isInstance(v) || Short.isInstance(v) || Integer.isInstance(v) + case Schema.Type.LONG -> Number.isInstance(v) || Date.isInstance(v) || Instant.isInstance(v) + case Schema.Type.FLOAT -> Number.isInstance(v) + case Schema.Type.DOUBLE -> Number.isInstance(v) || BigDecimal.isInstance(v) + case Schema.Type.BYTES -> (byte[].isInstance(v)) || (ByteBuffer.isInstance(v)) || (BigDecimal.isInstance(v)) + case Schema.Type.ARRAY -> isArrayCompatible(s, v) + case Schema.Type.MAP -> isMapCompatible(s, v) + case Schema.Type.RECORD -> isRecordCompatible(s, v) + case Schema.Type.FIXED -> GenericFixed.isInstance(v) + default -> false } } - + private static boolean isArrayCompatible(Schema schema, Object value) { + if (!(List.isInstance(value))) { + return false + } + Schema elem = schema.getElementType() + ((List) value).every { Object item -> isCompatible(elem, item) } + } + private static boolean isMapCompatible(Schema schema, Object value) { + if (!(Map.isInstance(value))) { + return false + } + Schema valueSchema = schema.getValueType() + ((Map) value).entrySet().every { Map.Entry entry -> isCompatible(valueSchema, entry.value) } + } + private static boolean isRecordCompatible(Schema schema, Object value) { + if (GenericRecord.isInstance(value)) { + return true + } + if (!(Map.isInstance(value))) { + return false + } + Map input = (Map) value + schema.getFields().every { Schema.Field field -> isCompatible(field.schema(), input.get(field.name())) } + } /** * Produces a human-readable label for a schema type, preferring logical types. */ private static String schemaTypeLabel(Schema schema) { if (schema.getType() == Schema.Type.UNION) { - List parts = new ArrayList<>() + List parts = [] for (Schema branch : schema.getTypes()) { parts.add(schemaTypeLabel(branch)) } - return "UNION[" + String.join(", ", parts) + "]" + return 'UNION[' + String.join(', ', parts) + ']' } def logical = schema.getLogicalType() if (logical != null) { @@ -1170,4 +1055,5 @@ class MatrixAvroWriter { } return schema.getType().name() } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/RecordAvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/RecordAvroSchemaDecl.groovy index 93d246b0f..d0a9c9ab0 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/RecordAvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/RecordAvroSchemaDecl.groovy @@ -1,25 +1,24 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope import groovy.transform.ToString - import org.apache.avro.Schema +/** + * Avro schema declaration for record columns. + */ @PackageScope -@CompileStatic @EqualsAndHashCode @ToString(includeNames = true) class RecordAvroSchemaDecl extends AvroSchemaDecl { + final String recordName final Map fields - RecordAvroSchemaDecl(String recordName, Map fields) { this.recordName = recordName this.fields = fields.asImmutable() } - @Override Map toMap() { Map result = [kind: 'record', fields: AvroSchemaDecl.columnSchemasToMap(fields)] @@ -28,7 +27,6 @@ class RecordAvroSchemaDecl extends AvroSchemaDecl { } result } - @Override @PackageScope Schema toAvroSchema(String defaultName, String namespace) { @@ -44,4 +42,5 @@ class RecordAvroSchemaDecl extends AvroSchemaDecl { recordSchema.setFields(recordFields) recordSchema } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ScalarAvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ScalarAvroSchemaDecl.groovy index 68fbbcc32..9c073ad42 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ScalarAvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ScalarAvroSchemaDecl.groovy @@ -1,31 +1,30 @@ package se.alipsa.matrix.avro -import groovy.transform.CompileStatic import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope import groovy.transform.ToString - import org.apache.avro.Schema +/** + * Avro schema declaration for scalar column values. + */ @PackageScope -@CompileStatic @EqualsAndHashCode @ToString(includeNames = true) class ScalarAvroSchemaDecl extends AvroSchemaDecl { - final AvroScalarTypeDecl scalarType + final AvroScalarTypeDecl scalarType ScalarAvroSchemaDecl(AvroScalarTypeDecl scalarType) { this.scalarType = scalarType } - @Override Map toMap() { [kind: 'scalar', scalarType: scalarType.name()] } - @Override @PackageScope Schema toAvroSchema(String defaultName, String namespace) { AvroSchemaUtil.scalarSchema(scalarType) } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/SchemaCacheKey.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/SchemaCacheKey.groovy new file mode 100644 index 000000000..fd0d45781 --- /dev/null +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/SchemaCacheKey.groovy @@ -0,0 +1,59 @@ +package se.alipsa.matrix.avro + +import groovy.transform.PackageScope + +/** + * Cache key for inferred or configured Avro schemas. + */ +@PackageScope +final class SchemaCacheKey { + + private static final int HASH_FACTOR = 31 + private final String schemaName + private final String namespace + private final boolean inferPrecisionAndScale + private final int rowCount + private final List columnNames + private final List> columnTypes + private final Map> columnSchemas + SchemaCacheKey(String schemaName, String namespace, boolean inferPrecisionAndScale, + int rowCount, List columnNames, List> columnTypes, + Map> columnSchemas) { + this.schemaName = schemaName + this.namespace = namespace + this.inferPrecisionAndScale = inferPrecisionAndScale + this.rowCount = rowCount + this.columnNames = Collections.unmodifiableList(new ArrayList<>(columnNames)) + this.columnTypes = Collections.unmodifiableList(new ArrayList<>(columnTypes)) + this.columnSchemas = columnSchemas + } + @Override + boolean equals(Object other) { + if (this.is(other)) { + return true + } + if (!SchemaCacheKey.isInstance(other)) { + return false + } + SchemaCacheKey that = (SchemaCacheKey) other + inferPrecisionAndScale == that.inferPrecisionAndScale && + rowCount == that.rowCount && + schemaName == that.schemaName && + namespace == that.namespace && + columnNames == that.columnNames && + columnTypes == that.columnTypes && + columnSchemas == that.columnSchemas + } + @Override + int hashCode() { + int result = schemaName.hashCode() + result = HASH_FACTOR * result + namespace.hashCode() + result = HASH_FACTOR * result + (inferPrecisionAndScale ? 1 : 0) + result = HASH_FACTOR * result + rowCount + result = HASH_FACTOR * result + columnNames.hashCode() + result = HASH_FACTOR * result + columnTypes.hashCode() + result = HASH_FACTOR * result + columnSchemas.hashCode() + result + } + +} diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroConversionException.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroConversionException.groovy index 20e005f0e..346815eb4 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroConversionException.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroConversionException.groovy @@ -1,7 +1,5 @@ package se.alipsa.matrix.avro.exceptions -import groovy.transform.CompileStatic - /** * Exception thrown when there is an error converting between Avro and Java types. * @@ -18,31 +16,26 @@ import groovy.transform.CompileStatic * try { * Matrix m = MatrixAvroReader.read(file) * } catch (AvroConversionException e) { - * System.err.println("Conversion error at row " + e.getRowNumber() + ": " + e.getMessage()) + * System.err.println('Conversion error at row ' + e.getRowNumber() + ': ' + e.getMessage()) * if (e.getColumnName() != null) { - * System.err.println(" Column: " + e.getColumnName()) + * System.err.println(' Column: ' + e.getColumnName()) * } * } * } */ -@CompileStatic class AvroConversionException extends RuntimeException { + private static final int NO_ROW = -1 /** The column name associated with this error, if applicable */ private final String columnName - - /** The row number (0-based) where the error occurred, or -1 if not applicable */ + /** The row number (0-based) where the error occurred, or NO_ROW if not applicable */ private final int rowNumber - /** The source type that could not be converted */ private final String sourceType - /** The target type for the conversion */ private final String targetType - /** The value that could not be converted */ private final Object value - /** * Creates a new AvroConversionException with a message. * @@ -51,12 +44,11 @@ class AvroConversionException extends RuntimeException { AvroConversionException(String message) { super(message) this.columnName = null - this.rowNumber = -1 + this.rowNumber = NO_ROW this.sourceType = null this.targetType = null this.value = null } - /** * Creates a new AvroConversionException with a message and cause. * @@ -66,18 +58,17 @@ class AvroConversionException extends RuntimeException { AvroConversionException(String message, Throwable cause) { super(message, cause) this.columnName = null - this.rowNumber = -1 + this.rowNumber = NO_ROW this.sourceType = null this.targetType = null this.value = null } - /** * Creates a new AvroConversionException with full contextual information. * * @param message the error message * @param columnName the column name where the error occurred - * @param rowNumber the row number (0-based) where the error occurred, or -1 if not applicable + * @param rowNumber the row number (0-based) where the error occurred, or NO_ROW if not applicable * @param sourceType the source type that could not be converted * @param targetType the target type for the conversion * @param value the value that could not be converted @@ -91,13 +82,12 @@ class AvroConversionException extends RuntimeException { this.targetType = targetType this.value = value } - /** * Creates a new AvroConversionException with full contextual information and a cause. * * @param message the error message * @param columnName the column name where the error occurred - * @param rowNumber the row number (0-based) where the error occurred, or -1 if not applicable + * @param rowNumber the row number (0-based) where the error occurred, or NO_ROW if not applicable * @param sourceType the source type that could not be converted * @param targetType the target type for the conversion * @param value the value that could not be converted @@ -112,67 +102,60 @@ class AvroConversionException extends RuntimeException { this.targetType = targetType this.value = value } - /** * @return the column name where the error occurred, or null if not applicable */ String getColumnName() { return columnName } - /** - * @return the row number (0-based) where the error occurred, or -1 if not applicable + * @return the row number (0-based) where the error occurred, or NO_ROW if not applicable */ int getRowNumber() { return rowNumber } - /** * @return the source type that could not be converted, or null if not applicable */ String getSourceType() { return sourceType } - /** * @return the target type for the conversion, or null if not applicable */ String getTargetType() { return targetType } - /** * @return the value that could not be converted, or null if not applicable */ Object getValue() { return value } - private static String buildMessage(String message, String columnName, int rowNumber, String sourceType, String targetType, Object value) { StringBuilder sb = new StringBuilder(message) - List context = new ArrayList<>() - + List context = [] if (columnName != null) { - context.add("column: " + columnName) + context.add('column: ' + columnName) } if (rowNumber >= 0) { - context.add("row: " + rowNumber) + context.add('row: ' + rowNumber) } if (sourceType != null && targetType != null) { - context.add(sourceType + " -> " + targetType) + context.add(sourceType + ' -> ' + targetType) } if (value != null) { - String valueStr = value.toString() + String valueStr = value if (valueStr.length() > 50) { - valueStr = valueStr.substring(0, 47) + "..." + valueStr = valueStr.substring(0, 47) + '...' } - context.add("value: " + valueStr) + context.add('value: ' + valueStr) } - if (!context.isEmpty()) { - sb.append(" [").append(String.join(", ", context)).append("]") + sb.append(' [').append(String.join(', ', context)).append(']') } return sb.toString() } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroSchemaException.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroSchemaException.groovy index 140a79a12..b8f30dde7 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroSchemaException.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroSchemaException.groovy @@ -1,7 +1,5 @@ package se.alipsa.matrix.avro.exceptions -import groovy.transform.CompileStatic - /** * Exception thrown when there is a problem with Avro schema generation or processing. * @@ -18,25 +16,23 @@ import groovy.transform.CompileStatic * try { * Matrix m = MatrixAvroReader.read(file) * } catch (AvroSchemaException e) { - * System.err.println("Schema error: " + e.getMessage()) + * System.err.println('Schema error: ' + e.getMessage()) * if (e.getColumnName() != null) { - * System.err.println(" Column: " + e.getColumnName()) + * System.err.println(' Column: ' + e.getColumnName()) * } * } * } */ -@CompileStatic class AvroSchemaException extends RuntimeException { + private static final String EXPECTED_PREFIX = ' (expected: ' + private static final String DETAILS_SUFFIX = ')' /** The column name associated with this error, if applicable */ private final String columnName - /** The expected type, if applicable */ private final String expectedType - /** The actual type encountered, if applicable */ private final String actualType - /** * Creates a new AvroSchemaException with a message. * @@ -48,7 +44,6 @@ class AvroSchemaException extends RuntimeException { this.expectedType = null this.actualType = null } - /** * Creates a new AvroSchemaException with a message and cause. * @@ -61,7 +56,6 @@ class AvroSchemaException extends RuntimeException { this.expectedType = null this.actualType = null } - /** * Creates a new AvroSchemaException with contextual information. * @@ -76,7 +70,6 @@ class AvroSchemaException extends RuntimeException { this.expectedType = expectedType this.actualType = actualType } - /** * Creates a new AvroSchemaException with contextual information and a cause. * @@ -92,40 +85,37 @@ class AvroSchemaException extends RuntimeException { this.expectedType = expectedType this.actualType = actualType } - /** * @return the column name where the error occurred, or null if not applicable */ String getColumnName() { return columnName } - /** * @return the expected type, or null if not applicable */ String getExpectedType() { return expectedType } - /** * @return the actual type encountered, or null if not applicable */ String getActualType() { return actualType } - private static String buildMessage(String message, String columnName, String expectedType, String actualType) { StringBuilder sb = new StringBuilder(message) if (columnName != null) { - sb.append(" [column: ").append(columnName).append("]") + sb.append(' [column: ').append(columnName).append(']') } if (expectedType != null && actualType != null) { - sb.append(" (expected: ").append(expectedType).append(", actual: ").append(actualType).append(")") + sb.append(EXPECTED_PREFIX).append(expectedType).append(', actual: ').append(actualType).append(DETAILS_SUFFIX) } else if (expectedType != null) { - sb.append(" (expected: ").append(expectedType).append(")") + sb.append(EXPECTED_PREFIX).append(expectedType).append(DETAILS_SUFFIX) } else if (actualType != null) { - sb.append(" (actual: ").append(actualType).append(")") + sb.append(' (actual: ').append(actualType).append(DETAILS_SUFFIX) } return sb.toString() } + } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroValidationException.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroValidationException.groovy index 48a5352e1..b0b7029bf 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroValidationException.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroValidationException.groovy @@ -1,7 +1,5 @@ package se.alipsa.matrix.avro.exceptions -import groovy.transform.CompileStatic - /** * Exception thrown when input validation fails for Avro operations. * @@ -18,25 +16,24 @@ import groovy.transform.CompileStatic * try { * MatrixAvroWriter.write(matrix, file) * } catch (AvroValidationException e) { - * System.err.println("Validation error: " + e.getMessage()) + * System.err.println('Validation error: ' + e.getMessage()) * if (e.getSuggestion() != null) { - * System.err.println(" Suggestion: " + e.getSuggestion()) + * System.err.println(' Suggestion: ' + e.getSuggestion()) * } * } * } */ -@CompileStatic class AvroValidationException extends IllegalArgumentException { + private static final int NO_ROW = -1 + private static final String MATRIX_PARAMETER = 'matrix' + private static final String FILE_PARAMETER = 'file' /** The parameter name that failed validation, if applicable */ private final String parameterName - /** A helpful suggestion for fixing the error */ private final String suggestion - - /** The row number associated with this error, or -1 if not applicable */ + /** The row number associated with this error, or NO_ROW if not applicable */ private final int rowNumber - /** * Creates a new AvroValidationException with a message. * @@ -46,9 +43,8 @@ class AvroValidationException extends IllegalArgumentException { super(message) this.parameterName = null this.suggestion = null - this.rowNumber = -1 + this.rowNumber = NO_ROW } - /** * Creates a new AvroValidationException with a message and cause. * @@ -59,9 +55,8 @@ class AvroValidationException extends IllegalArgumentException { super(message, cause) this.parameterName = null this.suggestion = null - this.rowNumber = -1 + this.rowNumber = NO_ROW } - /** * Creates a new AvroValidationException with contextual information. * @@ -70,12 +65,11 @@ class AvroValidationException extends IllegalArgumentException { * @param suggestion a helpful suggestion for fixing the error */ AvroValidationException(String message, String parameterName, String suggestion) { - super(buildMessage(message, parameterName, -1, suggestion)) + super(buildMessage(message, parameterName, NO_ROW, suggestion)) this.parameterName = parameterName this.suggestion = suggestion - this.rowNumber = -1 + this.rowNumber = NO_ROW } - /** * Creates a new AvroValidationException with contextual information and a cause. * @@ -85,12 +79,11 @@ class AvroValidationException extends IllegalArgumentException { * @param cause the underlying cause */ AvroValidationException(String message, String parameterName, String suggestion, Throwable cause) { - super(buildMessage(message, parameterName, -1, suggestion), cause) + super(buildMessage(message, parameterName, NO_ROW, suggestion), cause) this.parameterName = parameterName this.suggestion = suggestion - this.rowNumber = -1 + this.rowNumber = NO_ROW } - /** * Creates a new AvroValidationException with contextual information and row number. * @@ -105,46 +98,41 @@ class AvroValidationException extends IllegalArgumentException { this.suggestion = suggestion this.rowNumber = rowNumber } - /** * @return the parameter name that failed validation, or null if not applicable */ String getParameterName() { return parameterName } - /** * @return a helpful suggestion for fixing the error, or null if none available */ String getSuggestion() { return suggestion } - /** - * @return the row number where validation failed, or -1 if not applicable + * @return the row number where validation failed, or NO_ROW if not applicable */ int getRowNumber() { return rowNumber } - private static String buildMessage(String message, String parameterName, int rowNumber, String suggestion) { StringBuilder sb = new StringBuilder(message) - List context = new ArrayList<>() + List context = [] if (parameterName != null) { - context.add("parameter: " + parameterName) + context.add('parameter: ' + parameterName) } if (rowNumber >= 0) { - context.add("row: " + rowNumber) + context.add('row: ' + rowNumber) } if (!context.isEmpty()) { - sb.append(" [").append(String.join(", ", context)).append("]") + sb.append(' [').append(String.join(', ', context)).append(']') } if (suggestion != null) { - sb.append(". Suggestion: ").append(suggestion) + sb.append('. Suggestion: ').append(suggestion) } return sb.toString() } - /** * Creates an exception for a null parameter. * @@ -158,7 +146,6 @@ class AvroValidationException extends IllegalArgumentException { "Provide a non-null value for ${parameterName}" ) } - /** * Creates an exception for an empty Matrix. * @@ -166,12 +153,11 @@ class AvroValidationException extends IllegalArgumentException { */ static AvroValidationException emptyMatrix() { return new AvroValidationException( - "Matrix must have at least one column", - "matrix", - "Add at least one column to the Matrix before writing" + 'Matrix must have at least one column', + MATRIX_PARAMETER, + 'Add at least one column to the Matrix before writing' ) } - /** * Creates an exception for uneven column lengths. * @@ -184,12 +170,11 @@ class AvroValidationException extends IllegalArgumentException { static AvroValidationException columnSizeMismatch(String columnName, int rowNumber, int columnSize, int rowCount) { return new AvroValidationException( "Column '${columnName}' size (${columnSize}) does not match matrix row count (${rowCount})", - "matrix", + MATRIX_PARAMETER, rowNumber, - "Ensure all columns have the same number of rows before writing" + 'Ensure all columns have the same number of rows before writing' ) } - /** * Creates an exception for a file that doesn't exist. * @@ -199,11 +184,10 @@ class AvroValidationException extends IllegalArgumentException { static AvroValidationException fileNotFound(String path) { return new AvroValidationException( "File does not exist: ${path}", - "file", - "Check that the file path is correct and the file exists" + FILE_PARAMETER, + 'Check that the file path is correct and the file exists' ) } - /** * Creates an exception for a path that is a directory instead of a file. * @@ -213,8 +197,9 @@ class AvroValidationException extends IllegalArgumentException { static AvroValidationException isDirectory(String path) { return new AvroValidationException( "Expected a file but got a directory: ${path}", - "file", - "Provide a path to a file, not a directory" + FILE_PARAMETER, + 'Provide a path to a file, not a directory' ) } + } diff --git a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/BenchmarkUtils.groovy b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/BenchmarkUtils.groovy index 8ef996e0a..6e8e01c62 100644 --- a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/BenchmarkUtils.groovy +++ b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/BenchmarkUtils.groovy @@ -1,32 +1,38 @@ package se.alipsa.matrix.avro.benchmarks -import groovy.transform.CompileStatic - -@CompileStatic +/** + * Shared helpers for Matrix Avro benchmark entry points. + */ final class BenchmarkUtils { + private static final String ZERO_MS = '0' + private static final String TWO_DECIMALS = '%.2f' + private static final double NANOS_PER_MILLI = 1_000_000.0d private BenchmarkUtils() { } - static long timeNs(Closure action) { long start = System.nanoTime() action.call() return System.nanoTime() - start } - static String avgMs(List values) { - if (values.isEmpty()) return "0" + if (values.isEmpty()) { + return ZERO_MS + } long sum = values.sum() as long - return String.format(Locale.US, "%.2f", sum / 1_000_000.0d / values.size()) + return String.format(Locale.US, TWO_DECIMALS, sum / NANOS_PER_MILLI / values.size()) } - static String minMs(List values) { - if (values.isEmpty()) return "0" - return String.format(Locale.US, "%.2f", (values.min() as long) / 1_000_000.0d) + if (values.isEmpty()) { + return ZERO_MS + } + return String.format(Locale.US, TWO_DECIMALS, (values.min() as long) / NANOS_PER_MILLI) } - static String maxMs(List values) { - if (values.isEmpty()) return "0" - return String.format(Locale.US, "%.2f", (values.max() as long) / 1_000_000.0d) + if (values.isEmpty()) { + return ZERO_MS + } + return String.format(Locale.US, TWO_DECIMALS, (values.max() as long) / NANOS_PER_MILLI) } + } diff --git a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroBenchmark.groovy b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroBenchmark.groovy index 65e3728cc..279999710 100644 --- a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroBenchmark.groovy +++ b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroBenchmark.groovy @@ -1,29 +1,28 @@ package se.alipsa.matrix.avro.benchmarks -import groovy.transform.CompileStatic - import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.core.Matrix - import java.nio.file.Files -@CompileStatic +/** + * Runs Matrix Avro read/write throughput benchmarks. + */ class MatrixAvroBenchmark { + private static final int RUNS_ARG_INDEX = 2 + private static final int DEFAULT_RUNS = 5 + private static final int VALUE_MULTIPLIER = 2 static void main(String[] args) { int rows = args.size() > 0 ? (args[0] as int) : 100_000 int warmups = args.size() > 1 ? (args[1] as int) : 2 - int runs = args.size() > 2 ? (args[2] as int) : 5 - + int runs = args.size() > RUNS_ARG_INDEX ? (args[RUNS_ARG_INDEX] as int) : DEFAULT_RUNS Matrix matrix = buildMatrix(rows) - File tmp = Files.createTempFile("matrix-avro-benchmark-", ".avro").toFile() - + File tmp = Files.createTempFile('matrix-avro-benchmark-', '.avro').toFile() try { for (int i = 0; i < warmups; i++) { runOnce(matrix, tmp) } - List writeTimes = [] List readTimes = [] for (int i = 0; i < runs; i++) { @@ -31,8 +30,7 @@ class MatrixAvroBenchmark { writeTimes << results[0] readTimes << results[1] } - - println "MatrixAvroBenchmark" + println 'MatrixAvroBenchmark' println "Rows: ${rows}, Warmups: ${warmups}, Runs: ${runs}" println "Write ms avg: ${BenchmarkUtils.avgMs(writeTimes)} (min: ${BenchmarkUtils.minMs(writeTimes)}, max: ${BenchmarkUtils.maxMs(writeTimes)})" println "Read ms avg: ${BenchmarkUtils.avgMs(readTimes)} (min: ${BenchmarkUtils.minMs(readTimes)}, max: ${BenchmarkUtils.maxMs(readTimes)})" @@ -41,7 +39,6 @@ class MatrixAvroBenchmark { tmp.delete() } } - private static List runOnce(Matrix matrix, File tmp) { long writeNs = BenchmarkUtils.timeNs { MatrixAvroWriter.write(matrix, tmp) @@ -51,16 +48,15 @@ class MatrixAvroBenchmark { } return [writeNs, readNs] } - private static Matrix buildMatrix(int rows) { - def cols = new LinkedHashMap>() - cols["id"] = (1..rows).toList() - cols["value"] = (1..rows).collect { it * 2 } - cols["name"] = (1..rows).collect { "row" + it } - - return Matrix.builder("Benchmark") + Map> cols = [:] + cols['id'] = (1..rows).toList() + cols['value'] = (1..rows).collect { it * VALUE_MULTIPLIER } + cols['name'] = (1..rows).collect { 'row' + it } + return Matrix.builder('Benchmark') .columns(cols) .types(Integer, Integer, String) .build() } + } diff --git a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroConversionBenchmark.groovy b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroConversionBenchmark.groovy index e15e21409..454a79aac 100644 --- a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroConversionBenchmark.groovy +++ b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroConversionBenchmark.groovy @@ -1,32 +1,32 @@ package se.alipsa.matrix.avro.benchmarks -import groovy.transform.CompileStatic - import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.core.Matrix - import java.time.Instant import java.time.LocalDate import java.time.LocalDateTime import java.time.LocalTime -import java.util.UUID -@CompileStatic +/** + * Runs Matrix Avro in-memory conversion benchmarks. + */ class MatrixAvroConversionBenchmark { + private static final int RUNS_ARG_INDEX = 2 + private static final int DEFAULT_RUNS = 5 + private static final int BENCHMARK_YEAR = 2024 + private static final int SECONDS_PER_MINUTE = 60 + private static final int TAG_MODULO = 5 static void main(String[] args) { int rows = args.size() > 0 ? (args[0] as int) : 50_000 int warmups = args.size() > 1 ? (args[1] as int) : 2 - int runs = args.size() > 2 ? (args[2] as int) : 5 - + int runs = args.size() > RUNS_ARG_INDEX ? (args[RUNS_ARG_INDEX] as int) : DEFAULT_RUNS Matrix matrix = buildMatrix(rows) - for (int i = 0; i < warmups; i++) { byte[] bytes = MatrixAvroWriter.writeBytes(matrix, true) - MatrixAvroReader.read(bytes, "Warmup") + MatrixAvroReader.read(bytes, 'Warmup') } - List writeTimes = [] List readTimes = [] for (int i = 0; i < runs; i++) { @@ -35,34 +35,32 @@ class MatrixAvroConversionBenchmark { bytes = MatrixAvroWriter.writeBytes(matrix, true) } long readNs = BenchmarkUtils.timeNs { - MatrixAvroReader.read(bytes, "Conversion") + MatrixAvroReader.read(bytes, 'Conversion') } writeTimes << writeNs readTimes << readNs } - - println "MatrixAvroConversionBenchmark" + println 'MatrixAvroConversionBenchmark' println "Rows: ${rows}, Warmups: ${warmups}, Runs: ${runs}" println "Write bytes avg: ${BenchmarkUtils.avgMs(writeTimes)} (min: ${BenchmarkUtils.minMs(writeTimes)}, max: ${BenchmarkUtils.maxMs(writeTimes)})" println "Read bytes avg: ${BenchmarkUtils.avgMs(readTimes)} (min: ${BenchmarkUtils.minMs(readTimes)}, max: ${BenchmarkUtils.maxMs(readTimes)})" } - private static Matrix buildMatrix(int rows) { - def cols = new LinkedHashMap>() - cols["id"] = (1..rows).toList() - cols["amount"] = (1..rows).collect { it / 7 } - cols["date"] = (1..rows).collect { LocalDate.of(2024, 1, 1).plusDays(it % 365) } - cols["time"] = (1..rows).collect { LocalTime.of(10, 30).plusSeconds(it % 60) } - cols["timestamp"] = (1..rows).collect { Instant.ofEpochMilli(1_700_000_000_000L + it) } - cols["localTs"] = (1..rows).collect { LocalDateTime.of(2024, 1, 1, 12, 0).plusMinutes(it % 60) } - cols["uuid"] = (1..rows).collect { UUID.nameUUIDFromBytes(("id-${it}").bytes) } - cols["tags"] = (1..rows).collect { ["t${it % 5}", "t${(it + 1) % 5}"] } - cols["props"] = (1..rows).collect { [a: it, b: "v${it}"] } - cols["blob"] = (1..rows).collect { ("row-${it}" as String).bytes } - - return Matrix.builder("ConversionBenchmark") + Map> cols = [:] + cols['id'] = (1..rows).toList() + cols['amount'] = (1..rows).collect { it / 7 } + cols['date'] = (1..rows).collect { LocalDate.of(BENCHMARK_YEAR, 1, 1).plusDays(it % 365) } + cols['time'] = (1..rows).collect { LocalTime.of(10, 30).plusSeconds(it % SECONDS_PER_MINUTE) } + cols['timestamp'] = (1..rows).collect { Instant.ofEpochMilli(1_700_000_000_000L + it) } + cols['localTs'] = (1..rows).collect { LocalDateTime.of(BENCHMARK_YEAR, 1, 1, 12, 0).plusMinutes(it % SECONDS_PER_MINUTE) } + cols['uuid'] = (1..rows).collect { UUID.nameUUIDFromBytes(("id-${it}").bytes) } + cols['tags'] = (1..rows).collect { ["t${it % TAG_MODULO}", "t${(it + 1) % TAG_MODULO}"] } + cols['props'] = (1..rows).collect { [a: it, b: "v${it}"] } + cols['blob'] = (1..rows).collect { ("row-${it}" as String).bytes } + return Matrix.builder('ConversionBenchmark') .columns(cols) .types(Integer, BigDecimal, LocalDate, LocalTime, Instant, LocalDateTime, UUID, List, Map, byte[]) .build() } + } diff --git a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroParquetBenchmark.groovy b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroParquetBenchmark.groovy index 633fd4aa2..ecb5e3e8f 100644 --- a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroParquetBenchmark.groovy +++ b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroParquetBenchmark.groovy @@ -1,30 +1,30 @@ package se.alipsa.matrix.avro.benchmarks -import groovy.transform.CompileStatic - import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.core.Matrix import se.alipsa.matrix.parquet.MatrixParquetReader import se.alipsa.matrix.parquet.MatrixParquetWriter - import java.nio.file.Files import java.time.LocalDate import java.time.LocalDateTime -@CompileStatic +/** + * Compares Matrix Avro and Parquet read/write benchmarks. + */ class MatrixAvroParquetBenchmark { + private static final int RUNS_ARG_INDEX = 2 + private static final int DEFAULT_RUNS = 5 + private static final int BENCHMARK_YEAR = 2024 + private static final String TEMP_FILE_PREFIX = 'matrix-avro-parquet-benchmark-' static void main(String[] args) { int rows = args.size() > 0 ? (args[0] as int) : 100_000 int warmups = args.size() > 1 ? (args[1] as int) : 2 - int runs = args.size() > 2 ? (args[2] as int) : 5 - + int runs = args.size() > RUNS_ARG_INDEX ? (args[RUNS_ARG_INDEX] as int) : DEFAULT_RUNS Matrix matrix = buildMatrix(rows) - - File avroFile = Files.createTempFile("matrix-avro-parquet-benchmark-", ".avro").toFile() - File parquetFile = Files.createTempFile("matrix-avro-parquet-benchmark-", ".parquet").toFile() - + File avroFile = Files.createTempFile(TEMP_FILE_PREFIX, '.avro').toFile() + File parquetFile = Files.createTempFile(TEMP_FILE_PREFIX, '.parquet').toFile() try { for (int i = 0; i < warmups; i++) { MatrixAvroWriter.write(matrix, avroFile, true) @@ -32,12 +32,10 @@ class MatrixAvroParquetBenchmark { MatrixParquetWriter.write(matrix, parquetFile, true) MatrixParquetReader.read(parquetFile) } - List avroWriteTimes = [] List avroReadTimes = [] List parquetWriteTimes = [] List parquetReadTimes = [] - for (int i = 0; i < runs; i++) { avroWriteTimes << BenchmarkUtils.timeNs { MatrixAvroWriter.write(matrix, avroFile, true) @@ -45,7 +43,6 @@ class MatrixAvroParquetBenchmark { avroReadTimes << BenchmarkUtils.timeNs { MatrixAvroReader.read(avroFile) } - parquetWriteTimes << BenchmarkUtils.timeNs { MatrixParquetWriter.write(matrix, parquetFile, true) } @@ -53,8 +50,7 @@ class MatrixAvroParquetBenchmark { MatrixParquetReader.read(parquetFile) } } - - println "MatrixAvroParquetBenchmark" + println 'MatrixAvroParquetBenchmark' println "Rows: ${rows}, Warmups: ${warmups}, Runs: ${runs}" println "Avro write avg: ${BenchmarkUtils.avgMs(avroWriteTimes)} (min: ${BenchmarkUtils.minMs(avroWriteTimes)}, max: ${BenchmarkUtils.maxMs(avroWriteTimes)})" println "Avro read avg: ${BenchmarkUtils.avgMs(avroReadTimes)} (min: ${BenchmarkUtils.minMs(avroReadTimes)}, max: ${BenchmarkUtils.maxMs(avroReadTimes)})" @@ -67,18 +63,17 @@ class MatrixAvroParquetBenchmark { parquetFile.delete() } } - private static Matrix buildMatrix(int rows) { - def cols = new LinkedHashMap>() - cols["id"] = (1..rows).toList() - cols["amount"] = (1..rows).collect { it / 10 } - cols["name"] = (1..rows).collect { "row${it}" } - cols["created"] = (1..rows).collect { LocalDate.of(2024, 1, 1).plusDays(it % 365) } - cols["event"] = (1..rows).collect { LocalDateTime.of(2024, 1, 1, 12, 0).plusMinutes(it % 60) } - - return Matrix.builder("AvroParquetBenchmark") + Map> cols = [:] + cols['id'] = (1..rows).toList() + cols['amount'] = (1..rows).collect { it / 10 } + cols['name'] = (1..rows).collect { "row${it}" } + cols['created'] = (1..rows).collect { LocalDate.of(BENCHMARK_YEAR, 1, 1).plusDays(it % 365) } + cols['event'] = (1..rows).collect { LocalDateTime.of(BENCHMARK_YEAR, 1, 1, 12, 0).plusMinutes(it % 60) } + return Matrix.builder('AvroParquetBenchmark') .columns(cols) .types(Integer, BigDecimal, String, LocalDate, LocalDateTime) .build() } + } diff --git a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroSchemaBenchmark.groovy b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroSchemaBenchmark.groovy index 54328afa2..1029930ef 100644 --- a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroSchemaBenchmark.groovy +++ b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroSchemaBenchmark.groovy @@ -1,32 +1,31 @@ package se.alipsa.matrix.avro.benchmarks -import groovy.transform.CompileStatic - import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.core.Matrix - import java.time.LocalDate import java.time.LocalDateTime -@CompileStatic +/** + * Runs Matrix Avro schema inference benchmarks. + */ class MatrixAvroSchemaBenchmark { + private static final int RUNS_ARG_INDEX = 2 + private static final int DEFAULT_RUNS = 5 + private static final int BENCHMARK_YEAR = 2024 + private static final int TAG_MODULO = 3 static void main(String[] args) { int rows = args.size() > 0 ? (args[0] as int) : 100_000 int warmups = args.size() > 1 ? (args[1] as int) : 2 - int runs = args.size() > 2 ? (args[2] as int) : 5 - + int runs = args.size() > RUNS_ARG_INDEX ? (args[RUNS_ARG_INDEX] as int) : DEFAULT_RUNS Matrix matrix = buildMatrix(rows) - List inferFalse = runSchemaBenchmark(matrix, warmups, runs, false) List inferTrue = runSchemaBenchmark(matrix, warmups, runs, true) - - println "MatrixAvroSchemaBenchmark" + println 'MatrixAvroSchemaBenchmark' println "Rows: ${rows}, Warmups: ${warmups}, Runs: ${runs}" println "Infer precision/scale = false avg: ${BenchmarkUtils.avgMs(inferFalse)} (min: ${BenchmarkUtils.minMs(inferFalse)}, max: ${BenchmarkUtils.maxMs(inferFalse)})" println "Infer precision/scale = true avg: ${BenchmarkUtils.avgMs(inferTrue)} (min: ${BenchmarkUtils.minMs(inferTrue)}, max: ${BenchmarkUtils.maxMs(inferTrue)})" } - private static List runSchemaBenchmark(Matrix matrix, int warmups, int runs, boolean infer) { for (int i = 0; i < warmups; i++) { MatrixAvroWriter.buildSchema(matrix, infer) @@ -39,19 +38,18 @@ class MatrixAvroSchemaBenchmark { } return times } - private static Matrix buildMatrix(int rows) { - def cols = new LinkedHashMap>() - cols["id"] = (1..rows).toList() - cols["amount"] = (1..rows).collect { it / 10 } - cols["created"] = (1..rows).collect { LocalDate.of(2024, 1, 1).plusDays(it % 365) } - cols["event"] = (1..rows).collect { LocalDateTime.of(2024, 1, 1, 12, 0).plusMinutes(it % 60) } - cols["tags"] = (1..rows).collect { ["tag${it % 3}", "tag${(it + 1) % 3}"] } - cols["attrs"] = (1..rows).collect { [a: it, b: "v${it}"] } - - return Matrix.builder("SchemaBenchmark") + Map> cols = [:] + cols['id'] = (1..rows).toList() + cols['amount'] = (1..rows).collect { it / 10 } + cols['created'] = (1..rows).collect { LocalDate.of(BENCHMARK_YEAR, 1, 1).plusDays(it % 365) } + cols['event'] = (1..rows).collect { LocalDateTime.of(BENCHMARK_YEAR, 1, 1, 12, 0).plusMinutes(it % 60) } + cols['tags'] = (1..rows).collect { ["tag${it % TAG_MODULO}", "tag${(it + 1) % TAG_MODULO}"] } + cols['attrs'] = (1..rows).collect { [a: it, b: "v${it}"] } + return Matrix.builder('SchemaBenchmark') .columns(cols) .types(Integer, BigDecimal, LocalDate, LocalDateTime, List, Map) .build() } + } diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroExceptionTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroExceptionTest.groovy index 932178ac0..ca3c5c3a3 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroExceptionTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroExceptionTest.groovy @@ -2,35 +2,35 @@ package test.alipsa.matrix.avro import static org.junit.jupiter.api.Assertions.assertEquals import static org.junit.jupiter.api.Assertions.assertTrue - import org.junit.jupiter.api.Test - import se.alipsa.matrix.avro.exceptions.AvroConversionException + import se.alipsa.matrix.avro.exceptions.AvroSchemaException class AvroExceptionTest { @Test void testConversionExceptionMessageIncludesContext() { - String longValue = "x" * 60 - def ex = new AvroConversionException("Failed", "colA", 3, "String", "INT", longValue) - - assertTrue(ex.message.contains("Failed")) - assertTrue(ex.message.contains("column: colA")) - assertTrue(ex.message.contains("row: 3")) - assertTrue(ex.message.contains("String -> INT")) - assertTrue(ex.message.contains("value: ")) - assertTrue(ex.message.contains("...")) + String longValue = 'x' * 60 + def ex = new AvroConversionException('Failed', 'colA', 3, 'String', 'INT', longValue) + + assertTrue(ex.message.contains('Failed')) + assertTrue(ex.message.contains('column: colA')) + assertTrue(ex.message.contains('row: 3')) + assertTrue(ex.message.contains('String -> INT')) + assertTrue(ex.message.contains('value: ')) + assertTrue(ex.message.contains('...')) } @Test void testSchemaExceptionMessageIncludesExpectedAndActual() { - def ex = new AvroSchemaException("Schema mismatch", "colA", "INT", "String") + def ex = new AvroSchemaException('Schema mismatch', 'colA', 'INT', 'String') - assertEquals("colA", ex.columnName) - assertEquals("INT", ex.expectedType) - assertEquals("String", ex.actualType) - assertTrue(ex.message.contains("expected: INT")) - assertTrue(ex.message.contains("actual: String")) + assertEquals('colA', ex.columnName) + assertEquals('INT', ex.expectedType) + assertEquals('String', ex.actualType) + assertTrue(ex.message.contains('expected: INT')) + assertTrue(ex.message.contains('actual: String')) } + } diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroFormatProviderTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroFormatProviderTest.groovy index a47cd8d54..a15c69219 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroFormatProviderTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroFormatProviderTest.groovy @@ -3,10 +3,8 @@ package test.alipsa.matrix.avro import static org.junit.jupiter.api.Assertions.assertEquals import static org.junit.jupiter.api.Assertions.assertFalse import static org.junit.jupiter.api.Assertions.assertTrue - import org.junit.jupiter.api.Test import org.junit.jupiter.api.io.TempDir - import se.alipsa.matrix.avro.AvroFormatProvider import se.alipsa.matrix.avro.AvroReadOptions import se.alipsa.matrix.avro.AvroSchemaDecl @@ -36,7 +34,7 @@ class AvroFormatProviderTest { Matrix source = Matrix.builder('orders') .columns( id: [1, 2], - amount: [new BigDecimal('12.34'), new BigDecimal('56.78')] + amount: [12.34, 56.78] ) .types([Integer, BigDecimal]) .build() @@ -51,7 +49,7 @@ class AvroFormatProviderTest { Matrix matrix = Matrix.read([matrixName: 'loaded-orders'], file) assertEquals('loaded-orders', matrix.matrixName) assertEquals(source.columnNames(), matrix.columnNames()) - assertEquals(new BigDecimal('12.34'), matrix[0, 'amount']) + assertEquals(12.34, matrix[0, 'amount']) } @Test @@ -59,7 +57,7 @@ class AvroFormatProviderTest { Matrix source = Matrix.builder('orders') .columns( id: [1, 2], - amount: [new BigDecimal('12.34'), new BigDecimal('56.78')] + amount: [12.34, 56.78] ) .types([Integer, BigDecimal]) .build() @@ -105,7 +103,7 @@ class AvroFormatProviderTest { @Test void testReadOptionsRoundTripFromMapToMap() { - String readerSchemaJson = """ + String readerSchemaJson = ''' { "type": "record", "name": "ProjectedOrders", @@ -113,7 +111,7 @@ class AvroFormatProviderTest { {"name":"id", "type":"int"} ] } - """.stripIndent() + '''.stripIndent() AvroReadOptions options = AvroReadOptions.fromMap([ matrixName : 'OrdersView', @@ -132,7 +130,7 @@ class AvroFormatProviderTest { Matrix source = Matrix.builder('orders') .columns( id: [1, 2], - amount: [new BigDecimal('12.34'), new BigDecimal('56.78')] + amount: [12.34, 56.78] ) .types([Integer, BigDecimal]) .build() @@ -156,7 +154,7 @@ class AvroFormatProviderTest { Matrix source = Matrix.builder('orders') .columns( id: [1, 2], - amount: [new BigDecimal('12.34'), new BigDecimal('56.78')] + amount: [12.34, 56.78] ) .types([Integer, BigDecimal]) .build() @@ -230,7 +228,7 @@ class AvroFormatProviderTest { void testTypedAndSpiColumnSchemasProduceSameSchema() { Matrix source = Matrix.builder('typedWrite') .columns( - amount: [new BigDecimal('12.34'), new BigDecimal('56.78')], + amount: [12.34, 56.78], props: [[x: 1, y: 2], [y: 3, z: 4]], tags: [[1L, 2L], [3L, null]] ) @@ -281,4 +279,5 @@ class AvroFormatProviderTest { } assertTrue(ex.message.contains('decimal.scale')) } + } diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroEdgeCaseTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroEdgeCaseTest.groovy index d178456a0..dc5ffbd93 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroEdgeCaseTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroEdgeCaseTest.groovy @@ -4,9 +4,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals import static org.junit.jupiter.api.Assertions.assertNotNull import static org.junit.jupiter.api.Assertions.assertThrows import static org.junit.jupiter.api.Assertions.assertTrue - import org.junit.jupiter.api.Test - import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.avro.exceptions.AvroSchemaException @@ -18,23 +16,23 @@ class MatrixAvroEdgeCaseTest { @Test void testEmptyMatrixZeroRowsRoundTrip() { - def cols = new LinkedHashMap>() - cols["id"] = [] - cols["name"] = [] + Map> cols = [:] + cols['id'] = [] + cols['name'] = [] - Matrix m = Matrix.builder("EmptyRows") + Matrix m = Matrix.builder('EmptyRows') .columns(cols) .types(Integer, String) .build() - File tmp = Files.createTempFile("avro-empty-rows-", ".avro").toFile() + File tmp = Files.createTempFile('avro-empty-rows-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp) Matrix result = MatrixAvroReader.read(tmp) assertEquals(0, result.rowCount()) assertEquals(2, result.columnCount()) - assertEquals(["id", "name"], result.columnNames()) + assertEquals(['id', 'name'], result.columnNames()) } finally { tmp.delete() } @@ -42,24 +40,24 @@ class MatrixAvroEdgeCaseTest { @Test void testSingleRowRoundTrip() { - def cols = new LinkedHashMap>() - cols["id"] = [1] - cols["name"] = ["Alice"] + Map> cols = [:] + cols['id'] = [1] + cols['name'] = ['Alice'] - Matrix m = Matrix.builder("SingleRow") + Matrix m = Matrix.builder('SingleRow') .columns(cols) .types(Integer, String) .build() - File tmp = Files.createTempFile("avro-single-row-", ".avro").toFile() + File tmp = Files.createTempFile('avro-single-row-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp) Matrix result = MatrixAvroReader.read(tmp) assertEquals(1, result.rowCount()) assertEquals(2, result.columnCount()) - assertEquals(1, result[0, "id"]) - assertEquals("Alice", result[0, "name"]) + assertEquals(1, result[0, 'id']) + assertEquals('Alice', result[0, 'name']) } finally { tmp.delete() } @@ -67,22 +65,22 @@ class MatrixAvroEdgeCaseTest { @Test void testSingleColumnRoundTrip() { - def cols = new LinkedHashMap>() - cols["value"] = [1, 2, 3] + Map> cols = [:] + cols['value'] = [1, 2, 3] - Matrix m = Matrix.builder("SingleColumn") + Matrix m = Matrix.builder('SingleColumn') .columns(cols) .types(Integer) .build() - File tmp = Files.createTempFile("avro-single-col-", ".avro").toFile() + File tmp = Files.createTempFile('avro-single-col-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp) Matrix result = MatrixAvroReader.read(tmp) assertEquals(3, result.rowCount()) assertEquals(1, result.columnCount()) - assertEquals([1, 2, 3], result["value"]) + assertEquals([1, 2, 3], result['value']) } finally { tmp.delete() } @@ -90,28 +88,28 @@ class MatrixAvroEdgeCaseTest { @Test void testSpecialCharactersInStringValuesRoundTrip() { - def cols = new LinkedHashMap>() - cols["text"] = [ - "line1\nline2", - "tab\tvalue", - "quote \"inside\"", - "backslash \\\\", - "comma, semicolon;" + Map> cols = [:] + cols['text'] = [ + 'line1\nline2', + 'tab\tvalue', + 'quote "inside"', + 'backslash \\\\', + 'comma, semicolon;' ] - Matrix m = Matrix.builder("SpecialChars") + Matrix m = Matrix.builder('SpecialChars') .columns(cols) .types(String) .build() - File tmp = Files.createTempFile("avro-special-chars-", ".avro").toFile() + File tmp = Files.createTempFile('avro-special-chars-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp) Matrix result = MatrixAvroReader.read(tmp) assertEquals(5, result.rowCount()) - assertNotNull(result["text"]) - assertEquals(cols["text"], result["text"]) + assertNotNull(result['text']) + assertEquals(cols['text'], result['text']) } finally { tmp.delete() } @@ -119,21 +117,21 @@ class MatrixAvroEdgeCaseTest { @Test void testUnicodeColumnNameRejected() { - def cols = new LinkedHashMap>() - cols["naïve"] = [1, 2] + Map> cols = [:] + cols['naïve'] = [1, 2] - Matrix m = Matrix.builder("UnicodeColumns") + Matrix m = Matrix.builder('UnicodeColumns') .columns(cols) .types(Integer) .build() - File tmp = Files.createTempFile("avro-unicode-col-", ".avro").toFile() + File tmp = Files.createTempFile('avro-unicode-col-', '.avro').toFile() try { def ex = assertThrows(AvroSchemaException) { MatrixAvroWriter.write(m, tmp) } - assertEquals("naïve", ex.columnName) - assertTrue(ex.message.contains("Avro field name")) + assertEquals('naïve', ex.columnName) + assertTrue(ex.message.contains('Avro field name')) } finally { tmp.delete() } @@ -142,26 +140,27 @@ class MatrixAvroEdgeCaseTest { @Test void testLargeFileRoundTrip() { int rows = 10_000 - def cols = new LinkedHashMap>() - cols["id"] = (1..rows).toList() - cols["value"] = (1..rows).collect { it * 2 } - cols["name"] = (1..rows).collect { "row${it}" } + Map> cols = [:] + cols['id'] = (1..rows).toList() + cols['value'] = (1..rows).collect { it * 2 } + cols['name'] = (1..rows).collect { "row${it}" } - Matrix m = Matrix.builder("LargeFile") + Matrix m = Matrix.builder('LargeFile') .columns(cols) .types(Integer, Integer, String) .build() - File tmp = Files.createTempFile("avro-large-", ".avro").toFile() + File tmp = Files.createTempFile('avro-large-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp) Matrix result = MatrixAvroReader.read(tmp) assertEquals(rows, result.rowCount()) assertEquals(3, result.columnCount()) - assertEquals("row${rows}", result[rows - 1, "name"]) + assertEquals("row${rows}", result[rows - 1, 'name']) } finally { tmp.delete() } } + } diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroReaderTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroReaderTest.groovy index 4df607bd7..bd4e372a4 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroReaderTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroReaderTest.groovy @@ -1,7 +1,6 @@ package test.alipsa.matrix.avro import static org.junit.jupiter.api.Assertions.* - import org.apache.avro.Conversions import org.apache.avro.LogicalTypes import org.apache.avro.Schema @@ -10,16 +9,15 @@ import org.apache.avro.generic.GenericData import org.apache.avro.generic.GenericDatumWriter import org.apache.avro.generic.GenericRecord import org.junit.jupiter.api.* - import se.alipsa.matrix.avro.AvroReadOptions import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.exceptions.AvroValidationException import se.alipsa.matrix.core.Matrix - import java.nio.ByteBuffer import java.nio.file.Files import java.nio.file.Path import java.time.Instant + import java.time.LocalDate @TestMethodOrder(MethodOrderer.OrderAnnotation) @@ -27,28 +25,30 @@ class MatrixAvroReaderTest { private static File avroFile private static Schema schema - private static LocalDate birth1 = LocalDate.of(1990, 1, 5) - private static LocalDate birth2 = LocalDate.of(1984, 7, 23) - private static Instant ts1 = Instant.parse("2024-03-01T12:34:56Z") - private static Instant ts2 = Instant.parse("2024-12-24T08:09:10Z") + private static final LocalDate BIRTH1 = LocalDate.of(1990, 1, 5) + private static final LocalDate BIRTH2 = LocalDate.of(1984, 7, 23) + private static final Instant TS1 = Instant.parse('2024-03-01T12:34:56Z') + private static final Instant TS2 = Instant.parse('2024-12-24T08:09:10Z') @BeforeAll static void createAvroFixture() { schema = buildSchema() - avroFile = Files.createTempFile("matrix-avro-reader-", ".avro").toFile() + avroFile = Files.createTempFile('matrix-avro-reader-', '.avro').toFile() writeSampleOcfi(avroFile, schema) } @AfterAll static void cleanup() { - if (avroFile != null) avroFile.delete() + if (avroFile != null) { + avroFile.delete() + } } @Test @Order(1) void readFile() { Matrix m = MatrixAvroReader.read(avroFile) assertBasicShapeAndValues(m) - assertEquals("Person", m.matrixName) + assertEquals('Person', m.matrixName) } @Test @Order(2) @@ -56,7 +56,7 @@ class MatrixAvroReaderTest { Path p = avroFile.toPath() Matrix m = MatrixAvroReader.read(p) assertBasicShapeAndValues(m) - assertEquals("Person", m.matrixName) + assertEquals('Person', m.matrixName) } @Test @Order(3) @@ -64,7 +64,7 @@ class MatrixAvroReaderTest { URL url = avroFile.toURI().toURL() Matrix m = MatrixAvroReader.read(url) assertBasicShapeAndValues(m) - assertEquals("Person", m.matrixName) + assertEquals('Person', m.matrixName) } @Test @Order(4) @@ -73,7 +73,7 @@ class MatrixAvroReaderTest { try { Matrix m = MatrixAvroReader.read(is) assertBasicShapeAndValues(m) - assertEquals("Person", m.matrixName) + assertEquals('Person', m.matrixName) } finally { is.close() } @@ -84,15 +84,15 @@ class MatrixAvroReaderTest { byte[] content = Files.readAllBytes(avroFile.toPath()) Matrix m = MatrixAvroReader.read(content) assertBasicShapeAndValues(m) - assertEquals("Person", m.matrixName) + assertEquals('Person', m.matrixName) } @Test @Order(6) void readFromByteArrayWithExplicitName() { byte[] content = Files.readAllBytes(avroFile.toPath()) - Matrix m = MatrixAvroReader.read(content, "FromBytes") + Matrix m = MatrixAvroReader.read(content, 'FromBytes') assertBasicShapeAndValues(m) - assertEquals("FromBytes", m.matrixName) + assertEquals('FromBytes', m.matrixName) } // ---------- convenience method tests ---------- @@ -101,14 +101,14 @@ class MatrixAvroReaderTest { void testReadFile() { Matrix m = MatrixAvroReader.readFile(avroFile.absolutePath) assertBasicShapeAndValues(m) - assertEquals("Person", m.matrixName) + assertEquals('Person', m.matrixName) } @Test @Order(8) void testReadUrl() { Matrix m = MatrixAvroReader.readUrl(avroFile.toURI().toString()) assertBasicShapeAndValues(m) - assertEquals("Person", m.matrixName) + assertEquals('Person', m.matrixName) } // ---------- validation tests ---------- @@ -118,7 +118,7 @@ class MatrixAvroReaderTest { def ex = assertThrows(AvroValidationException) { MatrixAvroReader.read((File) null) } - assertTrue(ex.message.contains("cannot be null")) + assertTrue(ex.message.contains('cannot be null')) } @Test @Order(11) @@ -126,7 +126,7 @@ class MatrixAvroReaderTest { def ex = assertThrows(IllegalArgumentException) { MatrixAvroReader.read((Path) null) } - assertTrue(ex.message.contains("cannot be null")) + assertTrue(ex.message.contains('cannot be null')) } @Test @Order(12) @@ -134,7 +134,7 @@ class MatrixAvroReaderTest { def ex = assertThrows(IllegalArgumentException) { MatrixAvroReader.read((URL) null) } - assertTrue(ex.message.contains("cannot be null")) + assertTrue(ex.message.contains('cannot be null')) } @Test @Order(13) @@ -142,7 +142,7 @@ class MatrixAvroReaderTest { def ex = assertThrows(IllegalArgumentException) { MatrixAvroReader.read((InputStream) null) } - assertTrue(ex.message.contains("cannot be null")) + assertTrue(ex.message.contains('cannot be null')) } @Test @Order(14) @@ -150,7 +150,7 @@ class MatrixAvroReaderTest { def ex = assertThrows(IllegalArgumentException) { MatrixAvroReader.read((byte[]) null) } - assertTrue(ex.message.contains("cannot be null")) + assertTrue(ex.message.contains('cannot be null')) } @Test @Order(15) @@ -158,7 +158,7 @@ class MatrixAvroReaderTest { def ex = assertThrows(IllegalArgumentException) { MatrixAvroReader.readFile(null) } - assertTrue(ex.message.contains("cannot be null")) + assertTrue(ex.message.contains('cannot be null')) } @Test @Order(16) @@ -166,25 +166,25 @@ class MatrixAvroReaderTest { def ex = assertThrows(IllegalArgumentException) { MatrixAvroReader.readUrl(null) } - assertTrue(ex.message.contains("cannot be null")) + assertTrue(ex.message.contains('cannot be null')) } @Test @Order(17) void testValidationFileDoesNotExist() { def ex = assertThrows(AvroValidationException) { - MatrixAvroReader.read(new File("/non/existent/path/to/file.avro")) + MatrixAvroReader.read(new File('/non/existent/path/to/file.avro')) } - assertTrue(ex.message.contains("does not exist")) + assertTrue(ex.message.contains('does not exist')) } @Test @Order(18) void testValidationFileIsDirectory() { - File tempDir = Files.createTempDirectory("avro-test").toFile() + File tempDir = Files.createTempDirectory('avro-test').toFile() try { def ex = assertThrows(AvroValidationException) { MatrixAvroReader.read(tempDir) } - assertTrue(ex.message.contains("directory")) + assertTrue(ex.message.contains('directory')) } finally { tempDir.delete() } @@ -193,9 +193,9 @@ class MatrixAvroReaderTest { @Test @Order(19) void testValidationInvalidUrlString() { def ex = assertThrows(IllegalArgumentException) { - MatrixAvroReader.readUrl("not a valid url") + MatrixAvroReader.readUrl('not a valid url') } - assertTrue(ex.message.contains("Invalid URL string")) + assertTrue(ex.message.contains('Invalid URL string')) } // ---------- AvroReadOptions tests ---------- @@ -203,54 +203,54 @@ class MatrixAvroReaderTest { @Test @Order(20) void testReadWithOptionsCustomName() { def options = new AvroReadOptions() - .matrixName("CustomName") + .matrixName('CustomName') Matrix m = MatrixAvroReader.read(avroFile, options) assertBasicShapeAndValues(m) - assertEquals("CustomName", m.matrixName) + assertEquals('CustomName', m.matrixName) } @Test @Order(21) void testReadFromPathWithOptions() { def options = new AvroReadOptions() - .matrixName("PathOptions") + .matrixName('PathOptions') Matrix m = MatrixAvroReader.read(avroFile.toPath(), options) assertBasicShapeAndValues(m) - assertEquals("PathOptions", m.matrixName) + assertEquals('PathOptions', m.matrixName) } @Test @Order(22) void testReadFromUrlWithOptions() { def options = new AvroReadOptions() - .matrixName("UrlOptions") + .matrixName('UrlOptions') Matrix m = MatrixAvroReader.read(avroFile.toURI().toURL(), options) assertBasicShapeAndValues(m) - assertEquals("UrlOptions", m.matrixName) + assertEquals('UrlOptions', m.matrixName) } @Test @Order(23) void testReadFromByteArrayWithOptions() { byte[] content = Files.readAllBytes(avroFile.toPath()) def options = new AvroReadOptions() - .matrixName("ByteOptions") + .matrixName('ByteOptions') Matrix m = MatrixAvroReader.read(content, options) assertBasicShapeAndValues(m) - assertEquals("ByteOptions", m.matrixName) + assertEquals('ByteOptions', m.matrixName) } @Test @Order(24) void testReadFromInputStreamWithOptions() { def options = new AvroReadOptions() - .matrixName("StreamOptions") + .matrixName('StreamOptions') InputStream is = new FileInputStream(avroFile) try { Matrix m = MatrixAvroReader.read(is, options) assertBasicShapeAndValues(m) - assertEquals("StreamOptions", m.matrixName) + assertEquals('StreamOptions', m.matrixName) } finally { is.close() } @@ -261,7 +261,7 @@ class MatrixAvroReaderTest { def ex = assertThrows(IllegalArgumentException) { MatrixAvroReader.read(avroFile, (AvroReadOptions) null) } - assertEquals("Options cannot be null", ex.message) + assertEquals('Options cannot be null', ex.message) } @Test @Order(26) @@ -271,12 +271,12 @@ class MatrixAvroReaderTest { Matrix m = MatrixAvroReader.read(avroFile, options) assertBasicShapeAndValues(m) - assertEquals("Person", m.matrixName) + assertEquals('Person', m.matrixName) } @Test @Order(27) void testReadWithReaderSchemaProjection() { - String readerSchemaJson = """ + String readerSchemaJson = ''' { "type": "record", "name": "Person", @@ -286,20 +286,20 @@ class MatrixAvroReaderTest { {"name":"country", "type":["null","string"], "default": null} ] } - """.stripIndent() + '''.stripIndent() def options = new AvroReadOptions() .readerSchema(new Schema.Parser().parse(readerSchemaJson)) Matrix m = MatrixAvroReader.read(avroFile, options) - assertEquals("Person", m.matrixName) - assertEquals(["name", "age", "country"], m.columnNames()) + assertEquals('Person', m.matrixName) + assertEquals(['name', 'age', 'country'], m.columnNames()) assertEquals(3, m.columnCount()) assertEquals(2, m.rowCount()) - assertEquals("Alice", m[0, "name"]) - assertEquals(30L, m[0, "age"]) - assertEquals(null, m[0, "country"]) - assertTrue(m["age"][0] instanceof Long) + assertEquals('Alice', m[0, 'name']) + assertEquals(30L, m[0, 'age']) + assertEquals(null, m[0, 'country']) + assertTrue(m['age'][0] instanceof Long) } // ---------- custom exception tests ---------- @@ -309,31 +309,31 @@ class MatrixAvroReaderTest { def ex = assertThrows(AvroValidationException) { MatrixAvroReader.read((File) null) } - assertEquals("file", ex.parameterName) + assertEquals('file', ex.parameterName) assertNotNull(ex.suggestion) - assertTrue(ex.message.contains("cannot be null")) + assertTrue(ex.message.contains('cannot be null')) } @Test @Order(31) void testValidationExceptionForNonExistentFile() { def ex = assertThrows(AvroValidationException) { - MatrixAvroReader.read(new File("/non/existent/path.avro")) + MatrixAvroReader.read(new File('/non/existent/path.avro')) } - assertEquals("file", ex.parameterName) + assertEquals('file', ex.parameterName) assertNotNull(ex.suggestion) - assertTrue(ex.message.contains("does not exist")) + assertTrue(ex.message.contains('does not exist')) } @Test @Order(32) void testValidationExceptionForDirectory() { - File tempDir = Files.createTempDirectory("avro-test").toFile() + File tempDir = Files.createTempDirectory('avro-test').toFile() try { def ex = assertThrows(AvroValidationException) { MatrixAvroReader.read(tempDir) } - assertEquals("file", ex.parameterName) + assertEquals('file', ex.parameterName) assertNotNull(ex.suggestion) - assertTrue(ex.message.contains("directory")) + assertTrue(ex.message.contains('directory')) } finally { tempDir.delete() } @@ -341,13 +341,13 @@ class MatrixAvroReaderTest { @Test @Order(33) void testValidationEmptyFileHandling() { - File emptyFile = Files.createTempFile("avro-empty-", ".avro").toFile() + File emptyFile = Files.createTempFile('avro-empty-', '.avro').toFile() try { def ex = assertThrows(AvroValidationException) { MatrixAvroReader.read(emptyFile) } - assertEquals("file", ex.parameterName) - assertTrue(ex.message.contains("empty")) + assertEquals('file', ex.parameterName) + assertTrue(ex.message.contains('empty')) } finally { emptyFile.delete() } @@ -355,15 +355,15 @@ class MatrixAvroReaderTest { @Test @Order(34) void testValidationCorruptFileHandling() { - File corruptFile = Files.createTempFile("avro-corrupt-", ".avro").toFile() - Files.write(corruptFile.toPath(), "not avro data".bytes) + File corruptFile = Files.createTempFile('avro-corrupt-', '.avro').toFile() + Files.write(corruptFile.toPath(), 'not avro data'.bytes) try { def ex = assertThrows(AvroValidationException) { MatrixAvroReader.read(corruptFile) } - assertEquals("file", ex.parameterName) + assertEquals('file', ex.parameterName) assertNotNull(ex.cause) - assertTrue(ex.message.toLowerCase().contains("corrupt") || ex.message.toLowerCase().contains("invalid")) + assertTrue(ex.message.toLowerCase().contains('corrupt') || ex.message.toLowerCase().contains('invalid')) } finally { corruptFile.delete() } @@ -373,7 +373,7 @@ class MatrixAvroReaderTest { private static Schema buildSchema() { // language=JSON - def schemaJson = """ + def schemaJson = ''' { "type": "record", "name": "Person", @@ -385,7 +385,7 @@ class MatrixAvroReaderTest { {"name":"price", "type":{"type":"bytes","logicalType":"decimal","precision":10,"scale":2}} ] } - """.stripIndent() + '''.stripIndent() new Schema.Parser().parse(schemaJson) } @@ -394,9 +394,9 @@ class MatrixAvroReaderTest { writer.create(schema, outFile) try { writer.append(makeRecord(schema, - "Alice", 30, birth1, ts1, new BigDecimal("12.34"))) + 'Alice', 30, BIRTH1, TS1, 12.34)) writer.append(makeRecord(schema, - "Bob", 43, birth2, ts2, new BigDecimal("56.78"))) + 'Bob', 43, BIRTH2, TS2, 56.78)) } finally { writer.close() } @@ -407,16 +407,16 @@ class MatrixAvroReaderTest { LocalDate birthday, Instant ts, BigDecimal price) { def rec = new GenericData.Record(schema) - rec.put("name", name) - rec.put("age", age) - rec.put("birthday", (int) birthday.toEpochDay()) // date: days since epoch - rec.put("ts", ts.toEpochMilli()) // timestamp-millis + rec.put('name', name) + rec.put('age', age) + rec.put('birthday', (int) birthday.toEpochDay()) // date: days since epoch + rec.put('ts', ts.toEpochMilli()) // timestamp-millis // decimal to bytes - Schema priceSchema = schema.getField("price").schema() + Schema priceSchema = schema.getField('price').schema() LogicalTypes.Decimal dec = (LogicalTypes.Decimal) priceSchema.getLogicalType() ByteBuffer bb = new Conversions.DecimalConversion().toBytes(price, priceSchema, dec) - rec.put("price", bb) + rec.put('price', bb) return rec } @@ -427,21 +427,22 @@ class MatrixAvroReaderTest { assertEquals(5, m.columnCount()) // values (Groovy indexing works even in JUnit tests written in Groovy) - assertEquals("Alice", m[0, "name"]) - assertEquals(30, m[0, "age"]) - assertEquals(birth1, m[0, "birthday"]) - assertEquals(ts1, m[0, "ts"]) - assertEquals(new BigDecimal("12.34"), m[0, "price"]) - - assertEquals("Bob", m[1, "name"]) - assertEquals(43, m[1, "age"]) - assertEquals(birth2, m[1, "birthday"]) - assertEquals(ts2, m[1, "ts"]) - assertEquals(new BigDecimal("56.78"), m[1, "price"]) + assertEquals('Alice', m[0, 'name']) + assertEquals(30, m[0, 'age']) + assertEquals(BIRTH1, m[0, 'birthday']) + assertEquals(TS1, m[0, 'ts']) + assertEquals(12.34, m[0, 'price']) + + assertEquals('Bob', m[1, 'name']) + assertEquals(43, m[1, 'age']) + assertEquals(BIRTH2, m[1, 'birthday']) + assertEquals(TS2, m[1, 'ts']) + assertEquals(56.78, m[1, 'price']) // Basic type checks to ensure logical conversions landed correctly - assertTrue(m["birthday"][0] instanceof LocalDate) - assertTrue(m["ts"][0] instanceof Instant) - assertTrue(m["price"][0] instanceof BigDecimal) + assertTrue(m['birthday'][0] instanceof LocalDate) + assertTrue(m['ts'][0] instanceof Instant) + assertTrue(m['price'][0] instanceof BigDecimal) } + } diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroRoundTripTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroRoundTripTest.groovy index 0b49d1f0a..16eb3e7dd 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroRoundTripTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroRoundTripTest.groovy @@ -1,70 +1,67 @@ package test.alipsa.matrix.avro import static org.junit.jupiter.api.Assertions.* - import org.apache.avro.Schema import org.apache.avro.file.DataFileReader import org.apache.avro.generic.GenericDatumReader import org.apache.avro.generic.GenericRecord import org.junit.jupiter.api.* - import se.alipsa.matrix.avro.AvroSchemaDecl import se.alipsa.matrix.avro.AvroWriteOptions import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.core.Matrix - import java.nio.file.Files + import java.time.* class MatrixAvroRoundTripTest { - @Test void roundTrip_withDecimalInference_preservesTypes() { - File tmp = Files.createTempFile("matrix-avro-rt-withDecimalInference", ".avro").toFile() + File tmp = Files.createTempFile('matrix-avro-rt-withDecimalInference', '.avro').toFile() // --- Build a source Matrix with a variety of types (and some nulls) --- - def uuid1 = UUID.fromString("123e4567-e89b-12d3-a456-426614174000") - def uuid2 = UUID.fromString("aaaaaaaa-bbbb-cccc-dddd-eeeeffffffff") + def uuid1 = UUID.fromString('123e4567-e89b-12d3-a456-426614174000') + def uuid2 = UUID.fromString('aaaaaaaa-bbbb-cccc-dddd-eeeeffffffff') // Use millis-precision for time-like values to avoid rounding surprises LocalTime t1 = LocalTime.of(9, 10, 11, 345_000_000) LocalTime t2 = LocalTime.of(23, 59, 0, 1_000_000) // will round down to 00:00:00.001 - Instant i1 = Instant.parse("2024-03-01T12:34:56Z") - Instant i2 = Instant.parse("2024-12-24T08:09:10Z") + Instant i1 = Instant.parse('2024-03-01T12:34:56Z') + Instant i2 = Instant.parse('2024-12-24T08:09:10Z') LocalDateTime ldt1 = LocalDateTime.of(2024, 7, 1, 10, 20, 30, 999_000_000) LocalDateTime ldt2 = LocalDateTime.of(2025, 1, 2, 3, 4, 5, 123_000_000) - def cols = new LinkedHashMap>() as LinkedHashMap> - cols["name"] = ["Alice", "Bob", null] - cols["age"] = [30, null, 41] - cols["birthday"] = [LocalDate.of(1990,1,5), LocalDate.of(1984,7,23), null] - cols["time"] = [t1, t2, null] - cols["ts"] = [i1, i2, null] // Instant -> timestamp-millis - cols["ldt"] = [ldt1, ldt2, null] // LocalDateTime -> local-timestamp-millis - cols["price"] = [new BigDecimal("12.34"), null, new BigDecimal("1000.50")] // BigDecimal - cols["flag"] = [true, false, null] - cols["bytes"] = [([1,2,3] as byte[]), null, (byte[])[42]] - cols["uuid"] = [uuid1, null, uuid2] - cols["note"] = ["hi", null, ""] - - Matrix src = Matrix.builder("RoundTripDecimal") + Map> cols = [:] + cols['name'] = ['Alice', 'Bob', null] + cols['age'] = [30, null, 41] + cols['birthday'] = [LocalDate.of(1990, 1, 5), LocalDate.of(1984, 7, 23), null] + cols['time'] = [t1, t2, null] + cols['ts'] = [i1, i2, null] // Instant -> timestamp-millis + cols['ldt'] = [ldt1, ldt2, null] // LocalDateTime -> local-timestamp-millis + cols['price'] = [12.34, null, 1000.50] // BigDecimal + cols['flag'] = [true, false, null] + cols['bytes'] = [([1, 2, 3] as byte[]), null, (byte[])[42]] + cols['uuid'] = [uuid1, null, uuid2] + cols['note'] = ['hi', null, ''] + + Matrix src = Matrix.builder('RoundTripDecimal') .columns(cols) .types(String, Integer, LocalDate, LocalTime, Instant, LocalDateTime, BigDecimal, Boolean, byte[], UUID, String) .build() - assert src.type("ldt") == LocalDateTime : "Matrix reported type for 'ldt' is ${src.type("ldt")}" + assert src.type('ldt') == LocalDateTime : "Matrix reported type for 'ldt' is ${src.type('ldt')}" // --- Write with inference === true (decimal logical type) --- MatrixAvroWriter.write(src, tmp, true) - def ltName = avroLogicalTypeName(tmp, "ldt") + def ltName = avroLogicalTypeName(tmp, 'ldt') assertNotNull(ltName, "No logicalType on 'ldt' field") - assertTrue(ltName == "local-timestamp-micros" || ltName == "local-timestamp-millis", + assertTrue(ltName == 'local-timestamp-micros' || ltName == 'local-timestamp-millis', "Unexpected logicalType for 'ldt': $ltName") - long raw = rawLongFor(tmp, "ldt") - if (ltName == "local-timestamp-micros") { + long raw = rawLongFor(tmp, 'ldt') + if (ltName == 'local-timestamp-micros') { assertEquals(999_000L, raw % 1_000_000L, "Expected micros remainder 999000, got ${raw % 1_000_000L}") } else { // millis assertEquals(999L, raw % 1_000L, "Expected millis remainder 999, got ${raw % 1_000L}") @@ -72,81 +69,74 @@ class MatrixAvroRoundTripTest { // --- Read back --- Matrix back = MatrixAvroReader.read(tmp) - // --- Basic shape --- + assertBasicShape(src, back) + assertStringAndNumericValues(back) + assertDateTimeValues(back, t1, t2, i1, i2, ldt1, ldt2) + assertDecimalAndBinaryValues(back, uuid1, uuid2) + tmp.delete() + } + + private static void assertBasicShape(Matrix src, Matrix back) { assertEquals(src.rowCount(), back.rowCount()) assertEquals(src.columnCount(), back.columnCount()) assertEquals(src.columnNames(), back.columnNames()) + } - // --- Per-column checks --- - - // name (String) - assertEquals("Alice", back[0, "name"]) - assertEquals("Alice", back[0, "name"]) - assertEquals("Bob", back[1, "name"]) - assertNull(back[2, "name"]) - assertEquals("", back[2, "note"]) - - // age (Integer with null) - assertEquals(30, back[0, "age"]) - assertNull(back[1, "age"]) - assertEquals(41, back[2, "age"]) - - // birthday (LocalDate) - assertEquals(LocalDate.of(1990,1,5), back[0, "birthday"]) - assertEquals(LocalDate.of(1984,7,23), back[1, "birthday"]) - assertNull(back[2, "birthday"]) - assertTrue(back["birthday"][0] instanceof LocalDate) - - // time (LocalTime, millis precision) - assertEquals(truncMillis(t1), back[0, "time"]) - assertEquals(truncMillis(t2), back[1, "time"]) - assertNull(back[2, "time"]) - assertTrue(back["time"][0] instanceof LocalTime) - - // ts (Instant, millis precision) - assertEquals(i1, back[0, "ts"]) - assertEquals(i2, back[1, "ts"]) - assertNull(back[2, "ts"]) - assertTrue(back["ts"][0] instanceof Instant) - - // ldt (LocalDateTime, millis precision) - assertEquals(truncMillis(ldt1), back[0, "ldt"]) - assertEquals(truncMillis(ldt2), back[1, "ldt"]) - assertNull(back[2, "ldt"]) - assertTrue(back["ldt"][0] instanceof LocalDateTime) - - // price (BigDecimal due to inference) - assertEquals(new BigDecimal("12.34"), back[0, "price"]) - assertNull(back[1, "price"]) - assertEquals(new BigDecimal("1000.50"), back[2, "price"]) - assertTrue(back["price"][0] instanceof BigDecimal) - - // flag (Boolean) - assertEquals(true, back[0, "flag"]) - assertEquals(false, back[1, "flag"]) - assertNull(back[2, "flag"]) - - // bytes (byte[]) - assertArrayEquals([1,2,3] as byte[], (byte[]) back[0, "bytes"]) - assertNull(back[1, "bytes"]) - assertArrayEquals([42] as byte[], (byte[]) back[2, "bytes"]) - - // uuid (stored as logical uuid(string) → reader returns String) - assertEquals(uuid1.toString(), back[0, "uuid"]) - assertNull(back[1, "uuid"]) - assertEquals(uuid2.toString(), back[2, "uuid"]) - assertTrue(back["uuid"][0] instanceof String) - tmp.delete() + private static void assertStringAndNumericValues(Matrix back) { + assertEquals('Alice', back[0, 'name']) + assertEquals('Bob', back[1, 'name']) + assertNull(back[2, 'name']) + assertEquals('', back[2, 'note']) + assertEquals(30, back[0, 'age']) + assertNull(back[1, 'age']) + assertEquals(41, back[2, 'age']) + assertEquals(true, back[0, 'flag']) + assertEquals(false, back[1, 'flag']) + assertNull(back[2, 'flag']) + } + + private static void assertDateTimeValues(Matrix back, LocalTime t1, LocalTime t2, Instant i1, Instant i2, + LocalDateTime ldt1, LocalDateTime ldt2) { + assertEquals(LocalDate.of(1990, 1, 5), back[0, 'birthday']) + assertEquals(LocalDate.of(1984, 7, 23), back[1, 'birthday']) + assertNull(back[2, 'birthday']) + assertTrue(back['birthday'][0] instanceof LocalDate) + assertEquals(truncMillis(t1), back[0, 'time']) + assertEquals(truncMillis(t2), back[1, 'time']) + assertNull(back[2, 'time']) + assertTrue(back['time'][0] instanceof LocalTime) + assertEquals(i1, back[0, 'ts']) + assertEquals(i2, back[1, 'ts']) + assertNull(back[2, 'ts']) + assertTrue(back['ts'][0] instanceof Instant) + assertEquals(truncMillis(ldt1), back[0, 'ldt']) + assertEquals(truncMillis(ldt2), back[1, 'ldt']) + assertNull(back[2, 'ldt']) + assertTrue(back['ldt'][0] instanceof LocalDateTime) + } + + private static void assertDecimalAndBinaryValues(Matrix back, UUID uuid1, UUID uuid2) { + assertEquals(12.34, back[0, 'price']) + assertNull(back[1, 'price']) + assertEquals(1000.50, back[2, 'price']) + assertTrue(back['price'][0] instanceof BigDecimal) + assertArrayEquals([1, 2, 3] as byte[], (byte[]) back[0, 'bytes']) + assertNull(back[1, 'bytes']) + assertArrayEquals([42] as byte[], (byte[]) back[2, 'bytes']) + assertEquals(uuid1.toString(), back[0, 'uuid']) + assertNull(back[1, 'uuid']) + assertEquals(uuid2.toString(), back[2, 'uuid']) + assertTrue(back['uuid'][0] instanceof String) } @Test void roundTrip_withoutDecimalInference_writesBigDecimalAsDouble() { - File tmp = Files.createTempFile("matrix-avro-rt-withoutDecimalInference", ".avro").toFile() - def cols = new LinkedHashMap>() as LinkedHashMap> - cols["price"] = [new BigDecimal("12.34"), null, new BigDecimal("1000.5")] - cols["name"] = ["A", "B", "C"] + File tmp = Files.createTempFile('matrix-avro-rt-withoutDecimalInference', '.avro').toFile() + Map> cols = [:] + cols['price'] = [12.34, null, 1000.5] + cols['name'] = ['A', 'B', 'C'] - Matrix src = Matrix.builder("RoundTripNoDecimal").columns(cols).build() + Matrix src = Matrix.builder('RoundTripNoDecimal').columns(cols).build() // Write with inference === false → BigDecimal stored as DOUBLE MatrixAvroWriter.write(src, tmp, false) @@ -157,18 +147,18 @@ class MatrixAvroRoundTripTest { assertEquals(2, back.columnCount()) // price comes back as Double (per writer policy) - assertTrue(back["price"][0] instanceof Double) - assertEquals(12.34d, (Double) back[0, "price"], 1e-9) - assertNull(back[1, "price"]) - assertEquals(1000.5d, (Double) back[2, "price"], 1e-9) + assertTrue(back['price'][0] instanceof Double) + assertEquals(12.34d, (Double) back[0, 'price'], 1e-9) + assertNull(back[1, 'price']) + assertEquals(1000.5d, (Double) back[2, 'price'], 1e-9) - assertEquals(["A","B","C"], (0..<3).collect { back[it, "name"] }) + assertEquals(['A', 'B', 'C'], (0..<3).collect { back[it, 'name'] }) tmp.delete() } @Test void roundTripArrayMapRecordColumns() { - File tmp = Files.createTempFile("matrix-avro-rt-collections-", ".avro").toFile() + File tmp = Files.createTempFile('matrix-avro-rt-collections-', '.avro').toFile() // ----- build a Matrix with ARRAY, MAP, and RECORD-like columns ----- List a1 = [1, 2, 3] @@ -180,16 +170,16 @@ class MatrixAvroRoundTripTest { Map m3 = null // RECORD-like: fixed field set across rows => will serialize as Avro RECORD - Map r1 = [name:"Alice", age:30, birthday: LocalDate.of(1990,1,5)] - Map r2 = [name:"Bob", age:41, birthday: LocalDate.of(1984,7,23)] - Map r3 = [name:null, age:null, birthday:null] + Map r1 = [name: 'Alice', age: 30, birthday: LocalDate.of(1990, 1, 5)] + Map r2 = [name: 'Bob', age:41, birthday: LocalDate.of(1984, 7, 23)] + Map r3 = [name: null, age:null, birthday: null] - def cols = new LinkedHashMap>() as LinkedHashMap> - cols["arr"] = [a1, a2, a3] // ARRAY - cols["props"] = [m1, m2, m3] // MAP (keys vary across rows) - cols["person"] = [r1, r2, r3] // RECORD (same fields across rows) + Map> cols = [:] + cols['arr'] = [a1, a2, a3] // ARRAY + cols['props'] = [m1, m2, m3] // MAP (keys vary across rows) + cols['person'] = [r1, r2, r3] // RECORD (same fields across rows) - Matrix src = Matrix.builder("ArrMapRec") + Matrix src = Matrix.builder('ArrMapRec') .columns(cols) .types(List, Map, Map) // record-like column is a Map but will be written as RECORD by heuristic .build() @@ -200,52 +190,52 @@ class MatrixAvroRoundTripTest { // shape & columns assertEquals(3, back.rowCount()) - assertEquals(["arr","props","person"], back.columnNames()) + assertEquals(['arr', 'props', 'person'], back.columnNames()) // ----- ARRAY assertions ----- - assertEquals([1,2,3], back[0, "arr"]) - assertEquals([], back[1, "arr"]) - assertEquals([10, null, 30], back[2, "arr"]) + assertEquals([1, 2, 3], back[0, 'arr']) + assertEquals([], back[1, 'arr']) + assertEquals([10, null, 30], back[2, 'arr']) // ----- MAP assertions ----- - assertTrue(back[0, "props"] instanceof Map) - assertTrue(back[1, "props"] instanceof Map) - assertNull(back[2, "props"]) - assertEquals(1, (back[0, "props"] as Map).get("x")) - assertEquals(2, (back[0, "props"] as Map).get("y")) - assertEquals(5, (back[1, "props"] as Map).get("y")) - assertEquals(9, (back[1, "props"] as Map).get("z")) + assertTrue(back[0, 'props'] instanceof Map) + assertTrue(back[1, 'props'] instanceof Map) + assertNull(back[2, 'props']) + assertEquals(1, (back[0, 'props'] as Map).get('x')) + assertEquals(2, (back[0, 'props'] as Map).get('y')) + assertEquals(5, (back[1, 'props'] as Map).get('y')) + assertEquals(9, (back[1, 'props'] as Map).get('z')) // ----- RECORD-like assertions (nested Map from reader) ----- - def p0 = back[0, "person"] as Map - def p1 = back[1, "person"] as Map - def p2 = back[2, "person"] as Map + def p0 = back[0, 'person'] as Map + def p1 = back[1, 'person'] as Map + def p2 = back[2, 'person'] as Map - assertEquals("Alice", p0["name"]) - assertEquals(30, p0["age"]) - assertEquals(LocalDate.of(1990,1,5), p0["birthday"]) + assertEquals('Alice', p0['name']) + assertEquals(30, p0['age']) + assertEquals(LocalDate.of(1990, 1, 5), p0['birthday']) - assertEquals("Bob", p1["name"]) - assertEquals(41, p1["age"]) - assertEquals(LocalDate.of(1984,7,23), p1["birthday"]) + assertEquals('Bob', p1['name']) + assertEquals(41, p1['age']) + assertEquals(LocalDate.of(1984, 7, 23), p1['birthday']) - assertNull(p2["name"]) - assertNull(p2["age"]) - assertNull(p2["birthday"]) + assertNull(p2['name']) + assertNull(p2['age']) + assertNull(p2['birthday']) tmp.delete() } @Test void roundTripWithExplicitSchemaOverrides() { - File tmp = Files.createTempFile("matrix-avro-rt-explicit-schema-", ".avro").toFile() + File tmp = Files.createTempFile('matrix-avro-rt-explicit-schema-', '.avro').toFile() - Matrix src = Matrix.builder("ExplicitSchema") + Matrix src = Matrix.builder('ExplicitSchema') .columns( - amount: [new BigDecimal('12.340'), new BigDecimal('56.780')], + amount: [12.340, 56.780], tags: [[1, 2], [3L, null]], props: [[x: 1], [y: 2]], - person: [[name: 'Alice'], [age: 41]] + person: [[name: 'Alice'], [age: 41]] ) .types(BigDecimal, List, Map, Map) .build() @@ -263,8 +253,8 @@ class MatrixAvroRoundTripTest { MatrixAvroWriter.write(src, tmp, options) Matrix back = MatrixAvroReader.read(tmp) - assertEquals(new BigDecimal('12.340'), back[0, 'amount']) - assertEquals(new BigDecimal('56.780'), back[1, 'amount']) + assertEquals(12.340, back[0, 'amount']) + assertEquals(56.780, back[1, 'amount']) assertTrue(back['amount'][0] instanceof BigDecimal) assertEquals([1L, 2L], back[0, 'tags']) @@ -273,8 +263,8 @@ class MatrixAvroRoundTripTest { assertEquals([x: 1], back[0, 'props']) assertEquals([y: 2], back[1, 'props']) - assertEquals([name: 'Alice', age: null], back[0, 'person']) - assertEquals([name: null, age: 41], back[1, 'person']) + assertEquals([name: 'Alice', age: null], back[0, 'person']) + assertEquals([name: null, age: 41], back[1, 'person']) def reader = new DataFileReader(tmp, new GenericDatumReader<>()) try { @@ -290,14 +280,18 @@ class MatrixAvroRoundTripTest { // --- helpers --- private static LocalTime truncMillis(LocalTime t) { - if (t == null) return null + if (t == null) { + return null + } // intdiv => integer division; result is int int msPart = t.nano.intdiv(1_000_000) * 1_000_000 return t.withNano(msPart) } private static LocalDateTime truncMillis(LocalDateTime dt) { - if (dt == null) return null + if (dt == null) { + return null + } int msPart = dt.nano.intdiv(1_000_000) * 1_000_000 return dt.withNano(msPart) } @@ -317,11 +311,14 @@ class MatrixAvroRoundTripTest { private static long rawLongFor(File avro, String field) { def rdr = new DataFileReader(avro, new GenericDatumReader<>()) try { - if (!rdr.hasNext()) throw new IllegalStateException("no records written") + if (!rdr.hasNext()) { + throw new IllegalStateException('no records written') + } def rec = rdr.next() return (rec.get(field) as Long) } finally { rdr.close() } } + } diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroWriterTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroWriterTest.groovy index f790037e6..91d05577f 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroWriterTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroWriterTest.groovy @@ -1,24 +1,22 @@ package test.alipsa.matrix.avro import static org.junit.jupiter.api.Assertions.* - import org.apache.avro.LogicalTypes import org.apache.avro.Schema import org.apache.avro.file.DataFileReader import org.apache.avro.generic.GenericDatumReader import org.apache.avro.generic.GenericRecord import org.junit.jupiter.api.Test - import se.alipsa.matrix.avro.AvroSchemaDecl import se.alipsa.matrix.avro.AvroWriteOptions import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.avro.exceptions.AvroSchemaException import se.alipsa.matrix.avro.exceptions.AvroValidationException import se.alipsa.matrix.core.Matrix - import java.nio.file.Files import java.nio.file.Path import java.time.LocalDateTime + import java.time.LocalTime class MatrixAvroWriterTest { @@ -26,18 +24,18 @@ class MatrixAvroWriterTest { @Test void schema_has_expected_logical_types() { // Build a tiny Matrix with the three columns of interest - def cols = new LinkedHashMap>() as LinkedHashMap> - cols["ldt"] = [LocalDateTime.of(2024, 7, 1, 10, 20, 30, 999_000_000)] // nanos = .999 - cols["time"] = [LocalTime.of(9, 10, 11, 345_000_000)] // 09:10:11.345 - cols["price"] = [new BigDecimal("456.78")] // precision=5, scale=2 + Map> cols = [:] + cols['ldt'] = [LocalDateTime.of(2024, 7, 1, 10, 20, 30, 999_000_000)] // nanos = .999 + cols['time'] = [LocalTime.of(9, 10, 11, 345_000_000)] // 09:10:11.345 + cols['price'] = [456.78] // precision=5, scale=2 - Matrix m = Matrix.builder("WriterSanity") + Matrix m = Matrix.builder('WriterSanity') .columns(cols) .types(LocalDateTime, LocalTime, BigDecimal) .build() // Write with decimal inference enabled - File tmp = Files.createTempFile("matrix-avro-writer-schema-", ".avro").toFile() + File tmp = Files.createTempFile('matrix-avro-writer-schema-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp, true) @@ -47,26 +45,26 @@ class MatrixAvroWriterTest { Schema fileSchema = reader.schema // ---- ldt: local-timestamp-micros on LONG ---- - Schema ldtSchema = nonNullFieldSchema(fileSchema, "ldt") - assertEquals(Schema.Type.LONG, ldtSchema.getType(), "ldt should be LONG") - assertNotNull(ldtSchema.getLogicalType(), "ldt must have a logical type") - assertEquals("local-timestamp-micros", ldtSchema.getLogicalType().name, "ldt logicalType") + Schema ldtSchema = nonNullFieldSchema(fileSchema, 'ldt') + assertEquals(Schema.Type.LONG, ldtSchema.getType(), 'ldt should be LONG') + assertNotNull(ldtSchema.getLogicalType(), 'ldt must have a logical type') + assertEquals('local-timestamp-micros', ldtSchema.getLogicalType().name, 'ldt logicalType') // ---- time: time-millis on INT ---- - Schema timeSchema = nonNullFieldSchema(fileSchema, "time") - assertEquals(Schema.Type.INT, timeSchema.getType(), "time should be INT") - assertNotNull(timeSchema.getLogicalType(), "time must have a logical type") - assertEquals("time-millis", timeSchema.getLogicalType().name, "time logicalType") + Schema timeSchema = nonNullFieldSchema(fileSchema, 'time') + assertEquals(Schema.Type.INT, timeSchema.getType(), 'time should be INT') + assertNotNull(timeSchema.getLogicalType(), 'time must have a logical type') + assertEquals('time-millis', timeSchema.getLogicalType().name, 'time logicalType') // ---- price: decimal(bytes) with inferred precision/scale ---- - Schema priceSchema = nonNullFieldSchema(fileSchema, "price") - assertEquals(Schema.Type.BYTES, priceSchema.getType(), "price should be BYTES when decimal") - assertNotNull(priceSchema.getLogicalType(), "price must have a logical type") - assertEquals("decimal", priceSchema.getLogicalType().name, "price logicalType") + Schema priceSchema = nonNullFieldSchema(fileSchema, 'price') + assertEquals(Schema.Type.BYTES, priceSchema.getType(), 'price should be BYTES when decimal') + assertNotNull(priceSchema.getLogicalType(), 'price must have a logical type') + assertEquals('decimal', priceSchema.getLogicalType().name, 'price logicalType') def dec = (LogicalTypes.Decimal) priceSchema.getLogicalType() - assertEquals(5, dec.getPrecision(), "price decimal precision should be inferred as 5 (456.78)") - assertEquals(2, dec.getScale(), "price decimal scale should be inferred as 2") + assertEquals(5, dec.getPrecision(), 'price decimal precision should be inferred as 5 (456.78)') + assertEquals(2, dec.getScale(), 'price decimal scale should be inferred as 2') } finally { reader.close() } @@ -77,28 +75,28 @@ class MatrixAvroWriterTest { @Test void writeBytesBasic() { - def cols = new LinkedHashMap>() - cols["id"] = [1, 2, 3] - cols["name"] = ["Alice", "Bob", "Charlie"] - cols["value"] = [10.5, 20.5, 30.5] + Map> cols = [:] + cols['id'] = [1, 2, 3] + cols['name'] = ['Alice', 'Bob', 'Charlie'] + cols['value'] = [10.5, 20.5, 30.5] - Matrix m = Matrix.builder("ByteTest") + Matrix m = Matrix.builder('ByteTest') .columns(cols) .types(Integer, String, BigDecimal) .build() byte[] avroBytes = MatrixAvroWriter.writeBytes(m, true) assertNotNull(avroBytes) - assertTrue(avroBytes.length > 0, "Byte array should not be empty") + assertTrue(avroBytes.length > 0, 'Byte array should not be empty') } @Test void writeBytesRoundTrip() { - def cols = new LinkedHashMap>() - cols["id"] = [1, 2, 3] - cols["score"] = [95.5, 87.3, 92.1] + Map> cols = [:] + cols['id'] = [1, 2, 3] + cols['score'] = [95.5, 87.3, 92.1] - Matrix original = Matrix.builder("RoundTrip") + Matrix original = Matrix.builder('RoundTrip') .columns(cols) .types(Integer, BigDecimal) .build() @@ -107,7 +105,7 @@ class MatrixAvroWriterTest { byte[] avroBytes = MatrixAvroWriter.writeBytes(original, true) // Read it back - Matrix result = se.alipsa.matrix.avro.MatrixAvroReader.read(avroBytes, "RoundTrip") + Matrix result = se.alipsa.matrix.avro.MatrixAvroReader.read(avroBytes, 'RoundTrip') assertEquals(original.rowCount(), result.rowCount()) assertEquals(original.columnCount(), result.columnCount()) @@ -116,10 +114,10 @@ class MatrixAvroWriterTest { @Test void writeBytesWithoutInference() { - def cols = new LinkedHashMap>() - cols["price"] = [new BigDecimal("123.45"), new BigDecimal("678.90")] + Map> cols = [:] + cols['price'] = [123.45, 678.90] - Matrix m = Matrix.builder("NoInference") + Matrix m = Matrix.builder('NoInference') .columns(cols) .types(BigDecimal) .build() @@ -133,24 +131,24 @@ class MatrixAvroWriterTest { @Test void schema_infers_decimal_for_object_column() { - def cols = new LinkedHashMap>() - cols["price"] = [new BigDecimal("12.30"), new BigDecimal("456.789")] + Map> cols = [:] + cols['price'] = [12.30, 456.789] - Matrix m = Matrix.builder("ObjectDecimal") + Matrix m = Matrix.builder('ObjectDecimal') .columns(cols) .types(Object) .build() - File tmp = Files.createTempFile("matrix-avro-writer-object-decimal-", ".avro").toFile() + File tmp = Files.createTempFile('matrix-avro-writer-object-decimal-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp, true) def reader = new DataFileReader(tmp, new GenericDatumReader<>()) try { Schema schema = reader.schema - Schema priceSchema = nonNullFieldSchema(schema, "price") + Schema priceSchema = nonNullFieldSchema(schema, 'price') assertEquals(Schema.Type.BYTES, priceSchema.getType()) assertNotNull(priceSchema.getLogicalType()) - assertEquals("decimal", priceSchema.getLogicalType().name) + assertEquals('decimal', priceSchema.getLogicalType().name) def dec = (LogicalTypes.Decimal) priceSchema.getLogicalType() assertEquals(6, dec.getPrecision()) assertEquals(3, dec.getScale()) @@ -166,26 +164,26 @@ class MatrixAvroWriterTest { @Test void testValidationNullPath() { - def cols = new LinkedHashMap>() - cols["id"] = [1, 2] - Matrix m = Matrix.builder("Test").columns(cols).types(Integer).build() + Map> cols = [:] + cols['id'] = [1, 2] + Matrix m = Matrix.builder('Test').columns(cols).types(Integer).build() def ex = assertThrows(IllegalArgumentException) { MatrixAvroWriter.write(m, (Path) null) } - assertEquals("Path cannot be null", ex.message) + assertEquals('Path cannot be null', ex.message) } @Test void testValidationNullOutputStream() { - def cols = new LinkedHashMap>() - cols["id"] = [1, 2] - Matrix m = Matrix.builder("Test").columns(cols).types(Integer).build() + Map> cols = [:] + cols['id'] = [1, 2] + Matrix m = Matrix.builder('Test').columns(cols).types(Integer).build() def ex = assertThrows(IllegalArgumentException) { MatrixAvroWriter.write(m, (OutputStream) null) } - assertEquals("OutputStream cannot be null", ex.message) + assertEquals('OutputStream cannot be null', ex.message) } @Test @@ -193,27 +191,27 @@ class MatrixAvroWriterTest { def ex = assertThrows(AvroValidationException) { MatrixAvroWriter.writeBytes(null) } - assertEquals("matrix", ex.parameterName) - assertTrue(ex.message.contains("cannot be null")) + assertEquals('matrix', ex.parameterName) + assertTrue(ex.message.contains('cannot be null')) } @Test void testValidationWriteBytesEmptyMatrix() { - Matrix m = Matrix.builder("Empty").build() + Matrix m = Matrix.builder('Empty').build() def ex = assertThrows(AvroValidationException) { MatrixAvroWriter.writeBytes(m) } - assertEquals("matrix", ex.parameterName) - assertTrue(ex.message.contains("at least one column")) + assertEquals('matrix', ex.parameterName) + assertTrue(ex.message.contains('at least one column')) } @Test void testWriteToOutputStream() { - def cols = new LinkedHashMap>() - cols["id"] = [1, 2, 3] - cols["name"] = ["Alice", "Bob", "Charlie"] + Map> cols = [:] + cols['id'] = [1, 2, 3] + cols['name'] = ['Alice', 'Bob', 'Charlie'] - Matrix m = Matrix.builder("StreamTest") + Matrix m = Matrix.builder('StreamTest') .columns(cols) .types(Integer, String) .build() @@ -223,36 +221,38 @@ class MatrixAvroWriterTest { byte[] bytes = baos.toByteArray() assertNotNull(bytes) - assertTrue(bytes.length > 0, "Output stream should contain data") + assertTrue(bytes.length > 0, 'Output stream should contain data') // Verify it can be read back - Matrix result = se.alipsa.matrix.avro.MatrixAvroReader.read(bytes, "StreamTest") + Matrix result = se.alipsa.matrix.avro.MatrixAvroReader.read(bytes, 'StreamTest') assertEquals(3, result.rowCount()) assertEquals(2, result.columnCount()) } @Test void testWriteCreatesParentDirectory() { - def cols = new LinkedHashMap>() - cols["id"] = [1] - Matrix m = Matrix.builder("Test").columns(cols).types(Integer).build() + Map> cols = [:] + cols['id'] = [1] + Matrix m = Matrix.builder('Test').columns(cols).types(Integer).build() // Create a temp directory and then a nested path that doesn't exist yet - File tempDir = Files.createTempDirectory("avro-parent-test").toFile() - File nestedFile = new File(tempDir, "nested/subdir/test.avro") + File tempDir = Files.createTempDirectory('avro-parent-test').toFile() + File nestedFile = new File(tempDir, 'nested/subdir/test.avro') try { - assertFalse(nestedFile.parentFile.exists(), "Parent directory should not exist yet") + assertFalse(nestedFile.parentFile.exists(), 'Parent directory should not exist yet') MatrixAvroWriter.write(m, nestedFile) - assertTrue(nestedFile.exists(), "File should be created") - assertTrue(nestedFile.parentFile.exists(), "Parent directory should be created") + assertTrue(nestedFile.exists(), 'File should be created') + assertTrue(nestedFile.parentFile.exists(), 'Parent directory should be created') } finally { // Clean up - if (nestedFile.exists()) nestedFile.delete() - new File(tempDir, "nested/subdir").delete() - new File(tempDir, "nested").delete() + if (nestedFile.exists()) { + nestedFile.delete() + } + new File(tempDir, 'nested/subdir').delete() + new File(tempDir, 'nested').delete() tempDir.delete() } } @@ -261,20 +261,20 @@ class MatrixAvroWriterTest { @Test void testWriteWithOptions() { - def cols = new LinkedHashMap>() - cols["id"] = [1, 2, 3] - cols["name"] = ["Alice", "Bob", "Charlie"] + Map> cols = [:] + cols['id'] = [1, 2, 3] + cols['name'] = ['Alice', 'Bob', 'Charlie'] - Matrix m = Matrix.builder("OptionsTest") + Matrix m = Matrix.builder('OptionsTest') .columns(cols) .types(Integer, String) .build() def options = new AvroWriteOptions() - .namespace("com.example.test") - .schemaName("TestData") + .namespace('com.example.test') + .schemaName('TestData') - File tmp = Files.createTempFile("avro-options-", ".avro").toFile() + File tmp = Files.createTempFile('avro-options-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp, options) @@ -282,8 +282,8 @@ class MatrixAvroWriterTest { def reader = new DataFileReader(tmp, new GenericDatumReader<>()) try { Schema schema = reader.schema - assertEquals("TestData", schema.name) - assertEquals("com.example.test", schema.namespace) + assertEquals('TestData', schema.name) + assertEquals('com.example.test', schema.namespace) } finally { reader.close() } @@ -294,18 +294,18 @@ class MatrixAvroWriterTest { @Test void testWriteDefaultsSchemaNameFromMatrixName() { - Matrix m = Matrix.builder("Orders") - .columns(id: [1, 2], amount: [new BigDecimal("12.34"), new BigDecimal("56.78")]) + Matrix m = Matrix.builder('Orders') + .columns(id: [1, 2], amount: [12.34, 56.78]) .types(Integer, BigDecimal) .build() - File tmp = Files.createTempFile("avro-default-schema-name-", ".avro").toFile() + File tmp = Files.createTempFile('avro-default-schema-name-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp, false) def reader = new DataFileReader(tmp, new GenericDatumReader<>()) try { - assertEquals("Orders", reader.schema.name) + assertEquals('Orders', reader.schema.name) } finally { reader.close() } @@ -316,18 +316,18 @@ class MatrixAvroWriterTest { @Test void testWriteFallsBackToMatrixSchemaWhenMatrixNameBlank() { - Matrix m = Matrix.builder("") + Matrix m = Matrix.builder('') .columns(id: [1, 2]) .types(Integer) .build() - File tmp = Files.createTempFile("avro-fallback-schema-name-", ".avro").toFile() + File tmp = Files.createTempFile('avro-fallback-schema-name-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp, false) def reader = new DataFileReader(tmp, new GenericDatumReader<>()) try { - assertEquals("MatrixSchema", reader.schema.name) + assertEquals('MatrixSchema', reader.schema.name) } finally { reader.close() } @@ -338,18 +338,18 @@ class MatrixAvroWriterTest { @Test void testWriteOptionsDefaultSchemaNameFromMatrixName() { - Matrix m = Matrix.builder("Invoices") + Matrix m = Matrix.builder('Invoices') .columns(id: [1, 2], total: [10, 20]) .types(Integer, Integer) .build() - File tmp = Files.createTempFile("avro-options-default-schema-name-", ".avro").toFile() + File tmp = Files.createTempFile('avro-options-default-schema-name-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp, new AvroWriteOptions()) def reader = new DataFileReader(tmp, new GenericDatumReader<>()) try { - assertEquals("Invoices", reader.schema.name) + assertEquals('Invoices', reader.schema.name) } finally { reader.close() } @@ -360,17 +360,17 @@ class MatrixAvroWriterTest { @Test void testWriteWithDeflateCompression() { - def cols = new LinkedHashMap>() - cols["id"] = (1..100).toList() - cols["data"] = (1..100).collect { "This is test data row $it with some repeated content" } + Map> cols = [:] + cols['id'] = (1..100).toList() + cols['data'] = (1..100).collect { "This is test data row $it with some repeated content" } - Matrix m = Matrix.builder("CompressionTest") + Matrix m = Matrix.builder('CompressionTest') .columns(cols) .types(Integer, String) .build() - File uncompressed = Files.createTempFile("avro-uncompressed-", ".avro").toFile() - File compressed = Files.createTempFile("avro-compressed-", ".avro").toFile() + File uncompressed = Files.createTempFile('avro-uncompressed-', '.avro').toFile() + File compressed = Files.createTempFile('avro-compressed-', '.avro').toFile() try { // Write without compression @@ -403,7 +403,7 @@ class MatrixAvroWriterTest { .compression(AvroWriteOptions.Compression.SNAPPY) .compressionLevel(6) } - assertEquals("SNAPPY compression does not support compressionLevel; use -1", ex.message) + assertEquals('SNAPPY compression does not support compressionLevel; use -1', ex.message) } @Test @@ -414,7 +414,7 @@ class MatrixAvroWriterTest { compressionLevel: 6 ]) } - assertEquals("SNAPPY compression does not support compressionLevel; use -1", ex.message) + assertEquals('SNAPPY compression does not support compressionLevel; use -1', ex.message) } @Test @@ -422,7 +422,7 @@ class MatrixAvroWriterTest { def ex = assertThrows(IllegalArgumentException) { new AvroWriteOptions().syncInterval(16) } - assertTrue(ex.message.contains("syncInterval must be 0")) + assertTrue(ex.message.contains('syncInterval must be 0')) } @Test @@ -430,22 +430,22 @@ class MatrixAvroWriterTest { def ex = assertThrows(IllegalArgumentException) { AvroWriteOptions.fromMap([syncInterval: 16]) } - assertTrue(ex.message.contains("syncInterval must be 0")) + assertTrue(ex.message.contains('syncInterval must be 0')) } @Test void testWriteBytesWithOptions() { - def cols = new LinkedHashMap>() - cols["value"] = [new BigDecimal("123.45"), new BigDecimal("678.90")] + Map> cols = [:] + cols['value'] = [123.45, 678.90] - Matrix m = Matrix.builder("BytesOptions") + Matrix m = Matrix.builder('BytesOptions') .columns(cols) .types(BigDecimal) .build() def options = new AvroWriteOptions() .inferPrecisionAndScale(true) - .schemaName("DecimalData") + .schemaName('DecimalData') byte[] bytes = MatrixAvroWriter.writeBytes(m, options) assertNotNull(bytes) @@ -454,17 +454,17 @@ class MatrixAvroWriterTest { // Verify round-trip Matrix result = se.alipsa.matrix.avro.MatrixAvroReader.read(bytes) assertEquals(2, result.rowCount()) - assertEquals(new BigDecimal("123.45"), result[0, "value"]) + assertEquals(123.45, result[0, 'value']) } @Test void testExplicitDecimalColumnSchemaOverridesInferenceDefaults() { - Matrix m = Matrix.builder("ExplicitDecimal") - .columns(amount: [new BigDecimal("12.340"), new BigDecimal("56.780")]) + Matrix m = Matrix.builder('ExplicitDecimal') + .columns(amount: [12.340, 56.780]) .types(BigDecimal) .build() - File tmp = Files.createTempFile("avro-explicit-decimal-", ".avro").toFile() + File tmp = Files.createTempFile('avro-explicit-decimal-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp, new AvroWriteOptions() .inferPrecisionAndScale(false) @@ -488,12 +488,12 @@ class MatrixAvroWriterTest { @Test void testColumnSchemaCanForceMapEncoding() { - Matrix m = Matrix.builder("ForceMap") + Matrix m = Matrix.builder('ForceMap') .columns(props: [[x: 1, y: 2], [x: 3, y: 4]]) .types(Map) .build() - File tmp = Files.createTempFile("avro-force-map-", ".avro").toFile() + File tmp = Files.createTempFile('avro-force-map-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp, new AvroWriteOptions() .columnSchema('props', AvroSchemaDecl.map(AvroSchemaDecl.type(Integer)))) @@ -514,12 +514,12 @@ class MatrixAvroWriterTest { @Test void testColumnSchemaCanForceRecordEncoding() { - Matrix m = Matrix.builder("ForceRecord") + Matrix m = Matrix.builder('ForceRecord') .columns(props: [[x: 1], [y: 2], null]) .types(Map) .build() - File tmp = Files.createTempFile("avro-force-record-", ".avro").toFile() + File tmp = Files.createTempFile('avro-force-record-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp, new AvroWriteOptions() .columnSchema('props', AvroSchemaDecl.record('PropsRecord', [ @@ -543,12 +543,12 @@ class MatrixAvroWriterTest { @Test void testColumnSchemaCanForceArrayElementType() { - Matrix m = Matrix.builder("ForceArray") + Matrix m = Matrix.builder('ForceArray') .columns(tags: [[1, 2], [3L, null]]) .types(List) .build() - File tmp = Files.createTempFile("avro-force-array-", ".avro").toFile() + File tmp = Files.createTempFile('avro-force-array-', '.avro').toFile() try { MatrixAvroWriter.write(m, tmp, new AvroWriteOptions() .columnSchema('tags', AvroSchemaDecl.array(AvroSchemaDecl.type(Long)))) @@ -571,7 +571,7 @@ class MatrixAvroWriterTest { void testWriteOptionsRoundTripToMap() { AvroWriteOptions options = new AvroWriteOptions() .inferPrecisionAndScale(true) - .namespace("se.alipsa.matrix.roundtrip") + .namespace('se.alipsa.matrix.roundtrip') .compression(AvroWriteOptions.Compression.DEFLATE) .compressionLevel(9) .syncInterval(64000) @@ -580,26 +580,26 @@ class MatrixAvroWriterTest { Map roundTrip = options.toMap() assertEquals(true, roundTrip.inferPrecisionAndScale) - assertEquals("se.alipsa.matrix.roundtrip", roundTrip.namespace) - assertEquals("DEFLATE", roundTrip.compression) + assertEquals('se.alipsa.matrix.roundtrip', roundTrip.namespace) + assertEquals('DEFLATE', roundTrip.compression) assertEquals(9, roundTrip.compressionLevel) assertEquals(64000, roundTrip.syncInterval) - assertFalse(roundTrip.containsKey("schemaName")) + assertFalse(roundTrip.containsKey('schemaName')) assertEquals('decimal', (((roundTrip.columnSchemas as Map).amount) as Map).kind) } @Test void testWriteWithOptionsNullValidation() { - def cols = new LinkedHashMap>() - cols["id"] = [1] - Matrix m = Matrix.builder("Test").columns(cols).types(Integer).build() + Map> cols = [:] + cols['id'] = [1] + Matrix m = Matrix.builder('Test').columns(cols).types(Integer).build() - File tmp = Files.createTempFile("avro-test-", ".avro").toFile() + File tmp = Files.createTempFile('avro-test-', '.avro').toFile() try { def ex = assertThrows(IllegalArgumentException) { MatrixAvroWriter.write(m, tmp, (AvroWriteOptions) null) } - assertEquals("Options cannot be null", ex.message) + assertEquals('Options cannot be null', ex.message) } finally { tmp.delete() } @@ -607,12 +607,12 @@ class MatrixAvroWriterTest { @Test void testUnknownColumnSchemaFailsFast() { - Matrix m = Matrix.builder("UnknownColumn") + Matrix m = Matrix.builder('UnknownColumn') .columns(id: [1, 2]) .types(Integer) .build() - File tmp = Files.createTempFile("avro-unknown-column-schema-", ".avro").toFile() + File tmp = Files.createTempFile('avro-unknown-column-schema-', '.avro').toFile() try { IllegalArgumentException ex = assertThrows(IllegalArgumentException) { MatrixAvroWriter.write(m, tmp, new AvroWriteOptions() @@ -626,18 +626,18 @@ class MatrixAvroWriterTest { @Test void testWriteToOutputStreamWithOptions() { - def cols = new LinkedHashMap>() - cols["id"] = [1, 2] - cols["name"] = ["Test1", "Test2"] + Map> cols = [:] + cols['id'] = [1, 2] + cols['name'] = ['Test1', 'Test2'] - Matrix m = Matrix.builder("StreamOptions") + Matrix m = Matrix.builder('StreamOptions') .columns(cols) .types(Integer, String) .build() def options = new AvroWriteOptions() - .namespace("stream.test") - .schemaName("StreamData") + .namespace('stream.test') + .schemaName('StreamData') ByteArrayOutputStream baos = new ByteArrayOutputStream() MatrixAvroWriter.write(m, baos, options) @@ -653,14 +653,14 @@ class MatrixAvroWriterTest { @Test void testValidationExceptionForNullMatrix() { - File tmp = Files.createTempFile("avro-test-", ".avro").toFile() + File tmp = Files.createTempFile('avro-test-', '.avro').toFile() try { def ex = assertThrows(AvroValidationException) { MatrixAvroWriter.write(null, tmp) } - assertEquals("matrix", ex.parameterName) + assertEquals('matrix', ex.parameterName) assertNotNull(ex.suggestion) - assertTrue(ex.message.contains("cannot be null")) + assertTrue(ex.message.contains('cannot be null')) } finally { tmp.delete() } @@ -668,15 +668,15 @@ class MatrixAvroWriterTest { @Test void testValidationExceptionForEmptyMatrix() { - Matrix m = Matrix.builder("Empty").build() - File tmp = Files.createTempFile("avro-test-", ".avro").toFile() + Matrix m = Matrix.builder('Empty').build() + File tmp = Files.createTempFile('avro-test-', '.avro').toFile() try { def ex = assertThrows(AvroValidationException) { MatrixAvroWriter.write(m, tmp) } - assertEquals("matrix", ex.parameterName) + assertEquals('matrix', ex.parameterName) assertNotNull(ex.suggestion) - assertTrue(ex.message.contains("at least one column")) + assertTrue(ex.message.contains('at least one column')) } finally { tmp.delete() } @@ -684,38 +684,38 @@ class MatrixAvroWriterTest { @Test void testValidationExceptionForNullFileParam() { - def cols = new LinkedHashMap>() - cols["id"] = [1, 2] - Matrix m = Matrix.builder("Test").columns(cols).types(Integer).build() + Map> cols = [:] + cols['id'] = [1, 2] + Matrix m = Matrix.builder('Test').columns(cols).types(Integer).build() def ex = assertThrows(AvroValidationException) { MatrixAvroWriter.write(m, (File) null) } - assertEquals("file", ex.parameterName) + assertEquals('file', ex.parameterName) assertNotNull(ex.suggestion) - assertTrue(ex.message.contains("cannot be null")) + assertTrue(ex.message.contains('cannot be null')) } @Test void testValidationExceptionForColumnSizeMismatch() { - def cols = new LinkedHashMap>() - cols["id"] = [1, 2, 3] - cols["name"] = ["Alice", "Bob"] + Map> cols = [:] + cols['id'] = [1, 2, 3] + cols['name'] = ['Alice', 'Bob'] - Matrix m = Matrix.builder("Mismatch") + Matrix m = Matrix.builder('Mismatch') .columns(cols) .types(Integer, String) .build() - File tmp = Files.createTempFile("avro-test-", ".avro").toFile() + File tmp = Files.createTempFile('avro-test-', '.avro').toFile() try { def ex = assertThrows(AvroValidationException) { MatrixAvroWriter.write(m, tmp) } - assertEquals("matrix", ex.parameterName) + assertEquals('matrix', ex.parameterName) assertEquals(2, ex.rowNumber) assertNotNull(ex.suggestion) - assertTrue(ex.message.contains("row: 2")) + assertTrue(ex.message.contains('row: 2')) } finally { tmp.delete() } @@ -723,37 +723,40 @@ class MatrixAvroWriterTest { @Test void testSchemaExceptionForTypeMismatch() { - def cols = new LinkedHashMap>() - cols["props"] = [[a: 1], [1, 2]] + Map> cols = [:] + cols['props'] = [[a: 1], [1, 2]] - Matrix m = Matrix.builder("TypeMismatch") + Matrix m = Matrix.builder('TypeMismatch') .columns(cols) .types(Object) .build() - File tmp = Files.createTempFile("avro-test-", ".avro").toFile() + File tmp = Files.createTempFile('avro-test-', '.avro').toFile() try { def ex = assertThrows(AvroSchemaException) { MatrixAvroWriter.write(m, tmp) } - assertEquals("props", ex.columnName) - assertEquals("RECORD", ex.expectedType) - assertEquals("ArrayList", ex.actualType) - assertTrue(ex.message.contains("expected")) + assertEquals('props', ex.columnName) + assertEquals('RECORD', ex.expectedType) + assertEquals('ArrayList', ex.actualType) + assertTrue(ex.message.contains('expected')) } finally { tmp.delete() } } - // Helper: unwrap ["null", T] to T + // Helper: unwrap ['null', T] to T private static Schema nonNullFieldSchema(Schema record, String fieldName) { Schema s = record.getField(fieldName).schema() if (s.getType() == Schema.Type.UNION) { for (Schema t : s.getTypes()) { - if (t.getType() != Schema.Type.NULL) return t + if (t.getType() != Schema.Type.NULL) { + return t + } } fail("Union for field '$fieldName' had no non-null type") } return s } + } From 535b21aba52b36e98afd7d6dae7f5e990db56e9b Mon Sep 17 00:00:00 2001 From: per Date: Fri, 1 May 2026 12:29:42 +0200 Subject: [PATCH 2/3] Spotless formatting --- matrix-avro/build.gradle | 2 +- .../groovy/se/alipsa/matrix/avro/ArrayAvroSchemaDecl.groovy | 1 + .../groovy/se/alipsa/matrix/avro/AvroFormatProvider.groovy | 1 - .../main/groovy/se/alipsa/matrix/avro/AvroReadOptions.groovy | 2 +- .../main/groovy/se/alipsa/matrix/avro/AvroSchemaDecl.groovy | 4 +++- .../main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy | 1 + .../main/groovy/se/alipsa/matrix/avro/AvroWriteOptions.groovy | 1 + .../groovy/se/alipsa/matrix/avro/DecimalAvroSchemaDecl.groovy | 1 + .../groovy/se/alipsa/matrix/avro/MapAvroSchemaDecl.groovy | 1 + .../main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy | 2 ++ .../main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy | 2 ++ .../groovy/se/alipsa/matrix/avro/RecordAvroSchemaDecl.groovy | 1 + .../groovy/se/alipsa/matrix/avro/ScalarAvroSchemaDecl.groovy | 1 + .../alipsa/matrix/avro/benchmarks/MatrixAvroBenchmark.groovy | 1 + .../avro/benchmarks/MatrixAvroConversionBenchmark.groovy | 1 + .../matrix/avro/benchmarks/MatrixAvroParquetBenchmark.groovy | 1 + .../matrix/avro/benchmarks/MatrixAvroSchemaBenchmark.groovy | 1 + .../groovy/test/alipsa/matrix/avro/AvroExceptionTest.groovy | 3 ++- .../test/alipsa/matrix/avro/AvroFormatProviderTest.groovy | 2 ++ .../test/alipsa/matrix/avro/MatrixAvroEdgeCaseTest.groovy | 2 ++ .../test/alipsa/matrix/avro/MatrixAvroReaderTest.groovy | 4 +++- .../test/alipsa/matrix/avro/MatrixAvroRoundTripTest.groovy | 4 +++- .../test/alipsa/matrix/avro/MatrixAvroWriterTest.groovy | 4 +++- matrix-bom/bom.xml | 2 +- 24 files changed, 36 insertions(+), 9 deletions(-) diff --git a/matrix-avro/build.gradle b/matrix-avro/build.gradle index 3e74a8b5d..8f92f487c 100644 --- a/matrix-avro/build.gradle +++ b/matrix-avro/build.gradle @@ -10,7 +10,7 @@ plugins { } group = 'se.alipsa.matrix' -version = '0.2.1-SNAPSHOT' +version = '0.2.1' description = 'Matrix Avro import/export with schema evolution and logical type support' JavaCompile javaCompile = compileJava { diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ArrayAvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ArrayAvroSchemaDecl.groovy index 40620b74d..1fdc8169e 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ArrayAvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ArrayAvroSchemaDecl.groovy @@ -3,6 +3,7 @@ package se.alipsa.matrix.avro import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope import groovy.transform.ToString + import org.apache.avro.Schema /** diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroFormatProvider.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroFormatProvider.groovy index 9dace4d28..0f491eb80 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroFormatProvider.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroFormatProvider.groovy @@ -2,7 +2,6 @@ package se.alipsa.matrix.avro import se.alipsa.matrix.core.Matrix import se.alipsa.matrix.core.spi.AbstractFormatProvider - import se.alipsa.matrix.core.spi.OptionDescriptor /** diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroReadOptions.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroReadOptions.groovy index 5b5f2c3c6..72ad75b52 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroReadOptions.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroReadOptions.groovy @@ -1,8 +1,8 @@ package se.alipsa.matrix.avro import org.apache.avro.Schema -import se.alipsa.matrix.core.spi.OptionDescriptor +import se.alipsa.matrix.core.spi.OptionDescriptor import se.alipsa.matrix.core.spi.OptionMaps /** diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaDecl.groovy index 57cb6ad3f..b40ae5d08 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaDecl.groovy @@ -1,13 +1,15 @@ package se.alipsa.matrix.avro import groovy.transform.PackageScope + import org.apache.avro.Schema + import se.alipsa.matrix.core.spi.OptionMaps + import java.time.Instant import java.time.LocalDate import java.time.LocalDateTime import java.time.LocalTime - import java.util.Locale /** diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy index 2d741d2ff..831c77795 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy @@ -2,6 +2,7 @@ package se.alipsa.matrix.avro import org.apache.avro.LogicalTypes import org.apache.avro.Schema + import se.alipsa.matrix.avro.exceptions.AvroSchemaException /** diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroWriteOptions.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroWriteOptions.groovy index 2a94898c4..1cdd313bf 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroWriteOptions.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroWriteOptions.groovy @@ -2,6 +2,7 @@ package se.alipsa.matrix.avro import org.apache.avro.file.CodecFactory import org.apache.avro.file.DataFileConstants + import se.alipsa.matrix.core.spi.OptionDescriptor import se.alipsa.matrix.core.spi.OptionMaps diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/DecimalAvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/DecimalAvroSchemaDecl.groovy index d14a16148..34cb64324 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/DecimalAvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/DecimalAvroSchemaDecl.groovy @@ -3,6 +3,7 @@ package se.alipsa.matrix.avro import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope import groovy.transform.ToString + import org.apache.avro.LogicalTypes import org.apache.avro.Schema diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MapAvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MapAvroSchemaDecl.groovy index 592bafcdc..0652e77b9 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MapAvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MapAvroSchemaDecl.groovy @@ -3,6 +3,7 @@ package se.alipsa.matrix.avro import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope import groovy.transform.ToString + import org.apache.avro.Schema /** diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy index 4882f0e9a..afe0738ea 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy @@ -8,9 +8,11 @@ import org.apache.avro.generic.GenericDatumReader import org.apache.avro.generic.GenericFixed import org.apache.avro.generic.GenericRecord import org.apache.avro.util.Utf8 + import se.alipsa.matrix.avro.exceptions.AvroConversionException import se.alipsa.matrix.avro.exceptions.AvroValidationException import se.alipsa.matrix.core.Matrix + import java.nio.ByteBuffer import java.nio.file.Path import java.time.* diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy index a60061945..5cc12a180 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy @@ -9,10 +9,12 @@ import org.apache.avro.generic.GenericData import org.apache.avro.generic.GenericDatumWriter import org.apache.avro.generic.GenericFixed import org.apache.avro.generic.GenericRecord + import se.alipsa.matrix.avro.exceptions.AvroConversionException import se.alipsa.matrix.avro.exceptions.AvroSchemaException import se.alipsa.matrix.avro.exceptions.AvroValidationException import se.alipsa.matrix.core.Matrix + import java.math.RoundingMode import java.nio.ByteBuffer import java.nio.file.Path diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/RecordAvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/RecordAvroSchemaDecl.groovy index d0a9c9ab0..acd98bede 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/RecordAvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/RecordAvroSchemaDecl.groovy @@ -3,6 +3,7 @@ package se.alipsa.matrix.avro import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope import groovy.transform.ToString + import org.apache.avro.Schema /** diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ScalarAvroSchemaDecl.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ScalarAvroSchemaDecl.groovy index 9c073ad42..20931e395 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ScalarAvroSchemaDecl.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/ScalarAvroSchemaDecl.groovy @@ -3,6 +3,7 @@ package se.alipsa.matrix.avro import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope import groovy.transform.ToString + import org.apache.avro.Schema /** diff --git a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroBenchmark.groovy b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroBenchmark.groovy index 279999710..10b1c73fa 100644 --- a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroBenchmark.groovy +++ b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroBenchmark.groovy @@ -3,6 +3,7 @@ package se.alipsa.matrix.avro.benchmarks import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.core.Matrix + import java.nio.file.Files /** diff --git a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroConversionBenchmark.groovy b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroConversionBenchmark.groovy index 454a79aac..59b8656a7 100644 --- a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroConversionBenchmark.groovy +++ b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroConversionBenchmark.groovy @@ -3,6 +3,7 @@ package se.alipsa.matrix.avro.benchmarks import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.core.Matrix + import java.time.Instant import java.time.LocalDate import java.time.LocalDateTime diff --git a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroParquetBenchmark.groovy b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroParquetBenchmark.groovy index ecb5e3e8f..73ee2fb7d 100644 --- a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroParquetBenchmark.groovy +++ b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroParquetBenchmark.groovy @@ -5,6 +5,7 @@ import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.core.Matrix import se.alipsa.matrix.parquet.MatrixParquetReader import se.alipsa.matrix.parquet.MatrixParquetWriter + import java.nio.file.Files import java.time.LocalDate import java.time.LocalDateTime diff --git a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroSchemaBenchmark.groovy b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroSchemaBenchmark.groovy index 1029930ef..73aef5f37 100644 --- a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroSchemaBenchmark.groovy +++ b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/MatrixAvroSchemaBenchmark.groovy @@ -2,6 +2,7 @@ package se.alipsa.matrix.avro.benchmarks import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.core.Matrix + import java.time.LocalDate import java.time.LocalDateTime diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroExceptionTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroExceptionTest.groovy index ca3c5c3a3..d19725d0f 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroExceptionTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroExceptionTest.groovy @@ -2,9 +2,10 @@ package test.alipsa.matrix.avro import static org.junit.jupiter.api.Assertions.assertEquals import static org.junit.jupiter.api.Assertions.assertTrue + import org.junit.jupiter.api.Test -import se.alipsa.matrix.avro.exceptions.AvroConversionException +import se.alipsa.matrix.avro.exceptions.AvroConversionException import se.alipsa.matrix.avro.exceptions.AvroSchemaException class AvroExceptionTest { diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroFormatProviderTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroFormatProviderTest.groovy index a15c69219..04834837b 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroFormatProviderTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/AvroFormatProviderTest.groovy @@ -3,8 +3,10 @@ package test.alipsa.matrix.avro import static org.junit.jupiter.api.Assertions.assertEquals import static org.junit.jupiter.api.Assertions.assertFalse import static org.junit.jupiter.api.Assertions.assertTrue + import org.junit.jupiter.api.Test import org.junit.jupiter.api.io.TempDir + import se.alipsa.matrix.avro.AvroFormatProvider import se.alipsa.matrix.avro.AvroReadOptions import se.alipsa.matrix.avro.AvroSchemaDecl diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroEdgeCaseTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroEdgeCaseTest.groovy index dc5ffbd93..bda71fe3a 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroEdgeCaseTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroEdgeCaseTest.groovy @@ -4,7 +4,9 @@ import static org.junit.jupiter.api.Assertions.assertEquals import static org.junit.jupiter.api.Assertions.assertNotNull import static org.junit.jupiter.api.Assertions.assertThrows import static org.junit.jupiter.api.Assertions.assertTrue + import org.junit.jupiter.api.Test + import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.avro.exceptions.AvroSchemaException diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroReaderTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroReaderTest.groovy index bd4e372a4..9d7a8e6bd 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroReaderTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroReaderTest.groovy @@ -1,6 +1,7 @@ package test.alipsa.matrix.avro import static org.junit.jupiter.api.Assertions.* + import org.apache.avro.Conversions import org.apache.avro.LogicalTypes import org.apache.avro.Schema @@ -9,15 +10,16 @@ import org.apache.avro.generic.GenericData import org.apache.avro.generic.GenericDatumWriter import org.apache.avro.generic.GenericRecord import org.junit.jupiter.api.* + import se.alipsa.matrix.avro.AvroReadOptions import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.exceptions.AvroValidationException import se.alipsa.matrix.core.Matrix + import java.nio.ByteBuffer import java.nio.file.Files import java.nio.file.Path import java.time.Instant - import java.time.LocalDate @TestMethodOrder(MethodOrderer.OrderAnnotation) diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroRoundTripTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroRoundTripTest.groovy index 16eb3e7dd..af75be8f6 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroRoundTripTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroRoundTripTest.groovy @@ -1,18 +1,20 @@ package test.alipsa.matrix.avro import static org.junit.jupiter.api.Assertions.* + import org.apache.avro.Schema import org.apache.avro.file.DataFileReader import org.apache.avro.generic.GenericDatumReader import org.apache.avro.generic.GenericRecord import org.junit.jupiter.api.* + import se.alipsa.matrix.avro.AvroSchemaDecl import se.alipsa.matrix.avro.AvroWriteOptions import se.alipsa.matrix.avro.MatrixAvroReader import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.core.Matrix -import java.nio.file.Files +import java.nio.file.Files import java.time.* class MatrixAvroRoundTripTest { diff --git a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroWriterTest.groovy b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroWriterTest.groovy index 91d05577f..21396d146 100644 --- a/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroWriterTest.groovy +++ b/matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroWriterTest.groovy @@ -1,22 +1,24 @@ package test.alipsa.matrix.avro import static org.junit.jupiter.api.Assertions.* + import org.apache.avro.LogicalTypes import org.apache.avro.Schema import org.apache.avro.file.DataFileReader import org.apache.avro.generic.GenericDatumReader import org.apache.avro.generic.GenericRecord import org.junit.jupiter.api.Test + import se.alipsa.matrix.avro.AvroSchemaDecl import se.alipsa.matrix.avro.AvroWriteOptions import se.alipsa.matrix.avro.MatrixAvroWriter import se.alipsa.matrix.avro.exceptions.AvroSchemaException import se.alipsa.matrix.avro.exceptions.AvroValidationException import se.alipsa.matrix.core.Matrix + import java.nio.file.Files import java.nio.file.Path import java.time.LocalDateTime - import java.time.LocalTime class MatrixAvroWriterTest { diff --git a/matrix-bom/bom.xml b/matrix-bom/bom.xml index 00dfde7f6..b55d79962 100644 --- a/matrix-bom/bom.xml +++ b/matrix-bom/bom.xml @@ -33,7 +33,7 @@ 21 0.2.1 - 0.2.1-SNAPSHOT + 0.2.1 0.6.1 0.5.0-SNAPSHOT 3.7.1 From 02903044ba7c3f510d1783869d6a95e8f481faac Mon Sep 17 00:00:00 2001 From: per Date: Fri, 1 May 2026 14:16:14 +0200 Subject: [PATCH 3/3] Address matrix-avro review feedback --- matrix-avro/req/v0.3.0.md | 123 ++++++++++++++++++ .../alipsa/matrix/avro/AvroSchemaUtil.groovy | 6 + .../matrix/avro/MatrixAvroReader.groovy | 7 +- .../matrix/avro/MatrixAvroWriter.groovy | 10 +- .../exceptions/AvroConversionException.groovy | 8 +- .../exceptions/AvroSchemaException.groovy | 17 +-- .../exceptions/AvroValidationException.groovy | 4 +- .../avro/benchmarks/BenchmarkUtils.groovy | 8 +- 8 files changed, 150 insertions(+), 33 deletions(-) create mode 100644 matrix-avro/req/v0.3.0.md diff --git a/matrix-avro/req/v0.3.0.md b/matrix-avro/req/v0.3.0.md new file mode 100644 index 000000000..84dccc642 --- /dev/null +++ b/matrix-avro/req/v0.3.0.md @@ -0,0 +1,123 @@ +# Matrix-AVRO v0.3.0 Plan + +## Scope + +This plan addresses the module review findings and adds usability improvements for schema inspection, option ergonomics, decimal-safe writing, explicit schema declarations, and documentation. Each numbered section is intended to be implementable and reviewable as a separate pull request. + +A task is only complete when its tests have passed and the exact test command has been recorded in the section or PR description. + +## 1. Correct Stream Ownership Semantics + +1.1 [ ] Decide and document the stream ownership contract for `MatrixAvroReader.read(InputStream, ...)` and `MatrixAvroWriter.write(Matrix, OutputStream, ...)`: either caller-owned streams remain open, or the API explicitly states that Avro closes them. + +1.2 [ ] If caller-owned streams should remain open, add close-shield wrappers in `MatrixAvroReader` and `MatrixAvroWriter` so closing `DataFileStream` or `DataFileWriter` does not close the caller's stream. + +1.3 [ ] If streams are intentionally closed, update GroovyDoc, README, tutorial, and cookbook examples to state this clearly and show caller code that does not reuse the stream afterward. + +1.4 [ ] Add tests in `matrix-avro/src/test/groovy/test/alipsa/matrix/avro/MatrixAvroReaderTest.groovy` and `MatrixAvroWriterTest.groovy` proving the final stream ownership behavior for both direct and options-based overloads. + +1.5 [ ] Run tests and record the commands: `./gradlew :matrix-avro:test` and `./gradlew :matrix-avro:codenarcMain :matrix-avro:codenarcTest`. + +## 2. Remove or Harden Schema Caching + +2.1 [ ] Reevaluate the schema cache in `MatrixAvroWriter`: remove it unless benchmarks show it is needed, because schema inference depends on mutable Matrix data values beyond the current cache key. + +2.2 [ ] If the cache is retained, extend `SchemaCacheKey` to include the inferred data profile used by schema generation: decimal precision/scale, list element class, map value class, record-vs-map classification, and record field metadata. + +2.3 [ ] Add tests proving repeated writes of the same mutated `Matrix` instance produce fresh schemas when decimal precision/scale, list element type, map value type, or record-like keys change. + +2.4 [ ] Add or update benchmarks only if caching is retained, so the performance reason for the additional complexity is documented and reproducible. + +2.5 [ ] Run tests and record the commands: `./gradlew :matrix-avro:test` and `./gradlew :matrix-avro:codenarcMain :matrix-avro:codenarcTest`. + +## 3. Align Public API Validation + +3.1 [ ] Update `MatrixAvroWriter.buildSchema(Matrix, boolean)` to call the same Matrix validation used by write paths, including null, empty matrix, and column-size checks. + +3.2 [ ] Update `MatrixAvroWriter.buildSchema(Matrix, AvroWriteOptions)` to fail with `IllegalArgumentException('Options cannot be null')` or the module's established validation style when options are null. + +3.3 [ ] Audit public reader and writer methods for inconsistent null handling or NPE-prone paths, and align them with the module's explicit validation exceptions. + +3.4 [ ] Add tests covering public `buildSchema` validation and any newly aligned validation paths. + +3.5 [ ] Run tests and record the commands: `./gradlew :matrix-avro:test` and `./gradlew :matrix-avro:codenarcMain :matrix-avro:codenarcTest`. + +## 4. Fix Negative Local Timestamp Reads + +4.1 [ ] Update `MatrixAvroReader.toLocalDateTimeMillis` to use `Math.floorMod(ms, MILLIS_PER_SECOND)` when calculating nanoseconds, matching the micros path and supporting values before the Unix epoch. + +4.2 [ ] Add tests with `local-timestamp-millis` and `local-timestamp-micros` values before `1970-01-01T00:00:00` to verify both paths decode correctly. + +4.3 [ ] Run tests and record the commands: `./gradlew :matrix-avro:test` and `./gradlew :matrix-avro:codenarcMain :matrix-avro:codenarcTest`. + +## 5. Add Schema Inspection APIs + +5.1 [ ] Add `MatrixAvroReader.schema(File)`, `schema(Path)`, `schema(URL)`, `schema(byte[])`, and `schema(InputStream)` methods that return the Avro writer schema without reading all rows. + +5.2 [ ] Add options-aware schema inspection overloads only where they are meaningful, for example to expose the effective reader schema or projected schema when `AvroReadOptions.readerSchema(...)` is supplied. + +5.3 [ ] Ensure schema inspection methods follow the same validation, naming, and stream ownership contract chosen in section 1. + +5.4 [ ] Add tests for schema inspection from file, path, byte array, and stream sources, including invalid or empty Avro content. + +5.5 [ ] Document schema inspection in `matrix-avro/README.md`, the Avro tutorial, and the Avro cookbook. + +5.6 [ ] Run tests and record the commands: `./gradlew :matrix-avro:test` and `./gradlew :matrix-avro:codenarcMain :matrix-avro:codenarcTest`. + +## 6. Improve Options Ergonomics + +6.1 [ ] Add static factory methods to `AvroReadOptions` and `AvroWriteOptions` for common starting points, such as `AvroWriteOptions.defaults()`, `AvroWriteOptions.exactDecimals()`, and `AvroReadOptions.named(String)`. + +6.2 [ ] Evaluate a closure-based Groovy configuration API for options, for example `AvroWriteOptions.configure { inferPrecisionAndScale true; schemaName 'Orders' }`, while keeping the typed fluent API as the primary Java-friendly surface. + +6.3 [ ] Add convenience methods for common schema declarations, such as `AvroSchemaDecl.arrayOf(Class)`, `mapOf(Class)`, `decimalColumn(int, int)` naming alternatives, or similar helpers that reduce nested calls without introducing `Object`-typed APIs. + +6.4 [ ] Add tests proving the new factories and convenience helpers produce the same option maps and Avro schemas as the existing explicit fluent API. + +6.5 [ ] Document the new ergonomic API in README, tutorial, and cookbook examples without removing the existing direct and SPI examples. + +6.6 [ ] Run tests and record the commands: `./gradlew :matrix-avro:test` and `./gradlew :matrix-avro:codenarcMain :matrix-avro:codenarcTest`. + +## 7. Make Decimal-Safe Writes Easier + +7.1 [ ] Add a clearly named shortcut for decimal-safe writes, such as `MatrixAvroWriter.writeExactDecimals(...)`, `MatrixAvroWriter.writeDecimalSafe(...)`, or an `AvroWriteOptions.exactDecimals()` factory, while preserving existing overloads for compatibility. + +7.2 [ ] Decide whether v0.3.0 should change the default BigDecimal behavior from double fallback to decimal inference. If changing the default is too breaking, document the compatibility reason and keep the shortcut prominent. + +7.3 [ ] Add tests showing BigDecimal columns round-trip as `BigDecimal` through the new decimal-safe shortcut and still follow the documented compatibility path through existing defaults. + +7.4 [ ] Update docs to explain the precision tradeoff in one place, with examples for default, decimal-safe, and explicit precision/scale writes. + +7.5 [ ] Run tests and record the commands: `./gradlew :matrix-avro:test` and `./gradlew :matrix-avro:codenarcMain :matrix-avro:codenarcTest`. + +## 8. Strengthen Explicit Schema Declaration Usability + +8.1 [ ] Add ergonomic aliases to `AvroSchemaDecl` for common nested declarations while keeping the existing typed declaration classes package-scoped. + +8.2 [ ] Expand explicit schema declaration tests to cover nested arrays, nested maps, mixed record fields, invalid nested Avro field names, and SPI map parsing of the new aliases if aliases are serializable. + +8.3 [ ] Review error messages from `AvroSchemaDecl.fromMap(...)` and `AvroWriteOptions.columnSchema(...)` so common mistakes explain the expected typed alternative. + +8.4 [ ] Document map-vs-record inference prominently, including how to force map encoding and record encoding with `columnSchema`. + +8.5 [ ] Run tests and record the commands: `./gradlew :matrix-avro:test` and `./gradlew :matrix-avro:codenarcMain :matrix-avro:codenarcTest`. + +## 9. Documentation and Release Notes + +9.1 [ ] Update `matrix-avro/README.md` with a v0.3.0 section covering stream ownership, schema inspection, decimal-safe writes, schema cache behavior, and explicit schema declaration shortcuts. + +9.2 [ ] Update the root Avro tutorial page with end-to-end examples for schema inspection, decimal-safe writes, schema evolution, and forcing map-vs-record encoding. + +9.3 [ ] Update the root Avro cookbook page with focused recipes for each usability enhancement. + +9.4 [ ] Update `matrix-avro/release.md` with user-visible v0.3.0 changes and migration notes for any behavior changes. + +9.5 [ ] Run documentation-related checks if available, and record the commands. At minimum, run `./gradlew :matrix-avro:test` after documentation examples are updated. + +## 10. Final Verification + +10.1 [ ] Run module verification and record the commands: `./gradlew :matrix-avro:compileGroovy :matrix-avro:compileTestGroovy`, `./gradlew :matrix-avro:codenarcMain :matrix-avro:codenarcTest --rerun-tasks`, and `./gradlew :matrix-avro:test`. + +10.2 [ ] Run full repository verification and record the command: `./gradlew test`. + +10.3 [ ] Confirm `matrix-avro` CodeNarc remains fail-on-warning and the final PR description lists the exact verification commands and outcomes. diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy index 831c77795..66109f06f 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/AvroSchemaUtil.groovy @@ -15,6 +15,12 @@ final class AvroSchemaUtil { static Schema nullableSchema(Schema schema) { Schema.createUnion([Schema.create(Schema.Type.NULL), schema]) } + static Schema nonNullSchema(Schema schema) { + if (schema.getType() != Schema.Type.UNION) { + return schema + } + schema.getTypes().find { Schema candidate -> candidate.getType() != Schema.Type.NULL } ?: schema + } static Schema scalarSchema(AvroScalarTypeDecl scalarType) { switch (scalarType) { case AvroScalarTypeDecl.STRING -> Schema.create(Schema.Type.STRING) diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy index afe0738ea..cef63c070 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroReader.groovy @@ -394,7 +394,7 @@ class MatrixAvroReader { return null } if (schema.getType() == Schema.Type.UNION) { - return convertValue(nonNullSchema(schema), v) + return convertValue(AvroSchemaUtil.nonNullSchema(schema), v) } LogicalType lt = schema.getLogicalType() if (lt != null) { @@ -403,11 +403,6 @@ class MatrixAvroReader { } convertSchemaValue(schema, v) } - private static Schema nonNullSchema(Schema schema) { - schema.getTypes().stream() - .filter(s -> s.getType() != Schema.Type.NULL) - .findFirst().orElse(schema) - } private static Object convertLogicalValue(LogicalType lt, Schema schema, Object v) { if (LogicalTypes.Decimal.isInstance(lt)) { return toBigDecimal((LogicalTypes.Decimal) lt, schema, v) diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy index 5cc12a180..64ba9f605 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/MatrixAvroWriter.groovy @@ -442,7 +442,6 @@ class MatrixAvroWriter { * @return the corresponding Avro schema */ private static Schema toFieldSchema(Class clazz, int[] decimalMeta) { - if (clazz == BigDecimal) { if (decimalMeta != null) { int precision = decimalMeta[0] > 0 ? decimalMeta[0] : 10 @@ -464,7 +463,6 @@ class MatrixAvroWriter { return Schema.create(Schema.Type.INT) } if (clazz == Long || clazz == long.class || clazz == BigInteger) { - return Schema.create(Schema.Type.LONG) } if (clazz == Float || clazz == float.class) { @@ -738,16 +736,10 @@ class MatrixAvroWriter { GenericData.Record record = new GenericData.Record(fieldSchema) fieldSchema.getFields().each { Schema.Field field -> def value = input == null ? null : input.get(field.name()) - record.put(field.name(), toAvroValue(nonNullSchema(field.schema()), value, decConv)) + record.put(field.name(), toAvroValue(AvroSchemaUtil.nonNullSchema(field.schema()), value, decConv)) } record } - private static Schema nonNullSchema(Schema schema) { - if (schema.getType() != Schema.Type.UNION) { - return schema - } - schema.getTypes().find { Schema candidate -> candidate.getType() != Schema.Type.NULL } ?: schema - } /** * Returns a cached schema for the given matrix and cache key, if available. * diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroConversionException.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroConversionException.groovy index 346815eb4..a95701876 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroConversionException.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroConversionException.groovy @@ -28,7 +28,7 @@ class AvroConversionException extends RuntimeException { private static final int NO_ROW = -1 /** The column name associated with this error, if applicable */ private final String columnName - /** The row number (0-based) where the error occurred, or NO_ROW if not applicable */ + /** The row number (0-based) where the error occurred, or -1 if not applicable */ private final int rowNumber /** The source type that could not be converted */ private final String sourceType @@ -68,7 +68,7 @@ class AvroConversionException extends RuntimeException { * * @param message the error message * @param columnName the column name where the error occurred - * @param rowNumber the row number (0-based) where the error occurred, or NO_ROW if not applicable + * @param rowNumber the row number (0-based) where the error occurred, or -1 if not applicable * @param sourceType the source type that could not be converted * @param targetType the target type for the conversion * @param value the value that could not be converted @@ -87,7 +87,7 @@ class AvroConversionException extends RuntimeException { * * @param message the error message * @param columnName the column name where the error occurred - * @param rowNumber the row number (0-based) where the error occurred, or NO_ROW if not applicable + * @param rowNumber the row number (0-based) where the error occurred, or -1 if not applicable * @param sourceType the source type that could not be converted * @param targetType the target type for the conversion * @param value the value that could not be converted @@ -109,7 +109,7 @@ class AvroConversionException extends RuntimeException { return columnName } /** - * @return the row number (0-based) where the error occurred, or NO_ROW if not applicable + * @return the row number (0-based) where the error occurred, or -1 if not applicable */ int getRowNumber() { return rowNumber diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroSchemaException.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroSchemaException.groovy index b8f30dde7..7b3fc4220 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroSchemaException.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroSchemaException.groovy @@ -25,8 +25,6 @@ package se.alipsa.matrix.avro.exceptions */ class AvroSchemaException extends RuntimeException { - private static final String EXPECTED_PREFIX = ' (expected: ' - private static final String DETAILS_SUFFIX = ')' /** The column name associated with this error, if applicable */ private final String columnName /** The expected type, if applicable */ @@ -108,12 +106,15 @@ class AvroSchemaException extends RuntimeException { if (columnName != null) { sb.append(' [column: ').append(columnName).append(']') } - if (expectedType != null && actualType != null) { - sb.append(EXPECTED_PREFIX).append(expectedType).append(', actual: ').append(actualType).append(DETAILS_SUFFIX) - } else if (expectedType != null) { - sb.append(EXPECTED_PREFIX).append(expectedType).append(DETAILS_SUFFIX) - } else if (actualType != null) { - sb.append(' (actual: ').append(actualType).append(DETAILS_SUFFIX) + List details = [] + if (expectedType != null) { + details << 'expected: ' + expectedType + } + if (actualType != null) { + details << 'actual: ' + actualType + } + if (!details.isEmpty()) { + sb.append(' (').append(details.join(', ')).append(')') } return sb.toString() } diff --git a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroValidationException.groovy b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroValidationException.groovy index b0b7029bf..20eb967e2 100644 --- a/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroValidationException.groovy +++ b/matrix-avro/src/main/groovy/se/alipsa/matrix/avro/exceptions/AvroValidationException.groovy @@ -32,7 +32,7 @@ class AvroValidationException extends IllegalArgumentException { private final String parameterName /** A helpful suggestion for fixing the error */ private final String suggestion - /** The row number associated with this error, or NO_ROW if not applicable */ + /** The row number associated with this error, or -1 if not applicable */ private final int rowNumber /** * Creates a new AvroValidationException with a message. @@ -111,7 +111,7 @@ class AvroValidationException extends IllegalArgumentException { return suggestion } /** - * @return the row number where validation failed, or NO_ROW if not applicable + * @return the row number where validation failed, or -1 if not applicable */ int getRowNumber() { return rowNumber diff --git a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/BenchmarkUtils.groovy b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/BenchmarkUtils.groovy index 6e8e01c62..61ceb9234 100644 --- a/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/BenchmarkUtils.groovy +++ b/matrix-avro/src/test/groovy/se/alipsa/matrix/avro/benchmarks/BenchmarkUtils.groovy @@ -7,7 +7,7 @@ final class BenchmarkUtils { private static final String ZERO_MS = '0' private static final String TWO_DECIMALS = '%.2f' - private static final double NANOS_PER_MILLI = 1_000_000.0d + private static final BigDecimal NANOSECONDS_PER_MILLISECOND = 1_000_000.0 private BenchmarkUtils() { } static long timeNs(Closure action) { @@ -20,19 +20,19 @@ final class BenchmarkUtils { return ZERO_MS } long sum = values.sum() as long - return String.format(Locale.US, TWO_DECIMALS, sum / NANOS_PER_MILLI / values.size()) + return String.format(Locale.US, TWO_DECIMALS, sum / NANOSECONDS_PER_MILLISECOND / values.size()) } static String minMs(List values) { if (values.isEmpty()) { return ZERO_MS } - return String.format(Locale.US, TWO_DECIMALS, (values.min() as long) / NANOS_PER_MILLI) + return String.format(Locale.US, TWO_DECIMALS, (values.min() as long) / NANOSECONDS_PER_MILLISECOND) } static String maxMs(List values) { if (values.isEmpty()) { return ZERO_MS } - return String.format(Locale.US, TWO_DECIMALS, (values.max() as long) / NANOS_PER_MILLI) + return String.format(Locale.US, TWO_DECIMALS, (values.max() as long) / NANOSECONDS_PER_MILLISECOND) } }