From 90f0e64c53882440cd073a161306c0438bff0e19 Mon Sep 17 00:00:00 2001 From: SAY-5 Date: Sun, 10 May 2026 16:26:18 -0700 Subject: [PATCH] fix(avro): error on complex (non-nullable) unions instead of silently dropping (#777) Signed-off-by: SAY-5 --- arrow/avro/schema.go | 11 ++++++++--- arrow/avro/schema_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/arrow/avro/schema.go b/arrow/avro/schema.go index 4d9e76707..8eb4b91a3 100644 --- a/arrow/avro/schema.go +++ b/arrow/avro/schema.go @@ -139,13 +139,16 @@ func arrowSchemafromAvro(n *schemaNode) { arrowSchemafromAvro(c) n.arrowField = buildArrowField(n, arrow.MapOf(arrow.BinaryTypes.String, c.arrowField.Type), c.arrowField.Metadata) case "union": - if n.schema.(*avro.UnionSchema).Nullable() { - if len(n.schema.(*avro.UnionSchema).Types()) > 1 { - n.schema = n.schema.(*avro.UnionSchema).Types()[1] + us := n.schema.(*avro.UnionSchema) + if us.Nullable() { + if len(us.Types()) > 1 { + n.schema = us.Types()[1] n.union = true n.nullable = true arrowSchemafromAvro(n) } + } else { + panic(fmt.Errorf("complex (non-nullable) avro union at '%v' is not supported", n.schemaPath())) } // Avro "fixed" field type = Arrow FixedSize Primitive BinaryType case "fixed": @@ -245,6 +248,8 @@ func iterateFields(n *schemaNode) { c.nullable = true arrowSchemafromAvro(c) } + } else { + panic(fmt.Errorf("complex (non-nullable) avro union in field '%v' is not supported", f.Name())) } default: n.schemaCache.Add(f.Name(), f.Type()) diff --git a/arrow/avro/schema_test.go b/arrow/avro/schema_test.go index 33b6d2a05..b4b91b85a 100644 --- a/arrow/avro/schema_test.go +++ b/arrow/avro/schema_test.go @@ -18,10 +18,12 @@ package avro import ( "fmt" + "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/avro/testdata" + avropkg "github.com/hamba/avro/v2" ) func TestSchemaStringEqual(t *testing.T) { @@ -174,3 +176,26 @@ func TestSchemaStringEqual(t *testing.T) { }) } } + +func TestComplexUnionReportsError(t *testing.T) { + // Non-nullable union (e.g. [int, string]) is not supported and should + // produce a clear error rather than being silently dropped. + const avroSchemaJSON = `{ + "type": "record", + "name": "WithComplexUnion", + "fields": [ + {"name": "value", "type": ["int", "string"]} + ] + }` + schema, err := avropkg.Parse(avroSchemaJSON) + if err != nil { + t.Fatalf("avro parse: %v", err) + } + got, err := ArrowSchemaFromAvro(schema) + if err == nil { + t.Fatalf("expected error for complex union, got schema=%v", got) + } + if !strings.Contains(err.Error(), "union") { + t.Fatalf("expected error to mention union, got: %v", err) + } +}