From a12e53f32d38d31379cef6beba4f3a7b3de3d7cf Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:25:55 +0200 Subject: [PATCH 01/26] refactor(search): add json tags to Document and Resource fields Use the exact current Go field names as json tags so the bleve index schema and OpenSearch JSON-unmarshal behavior stay identical. No reindex needed. Prepares the ground for a reflection-based mapping builder that uses json tags as the single source of truth for field names. --- services/search/pkg/content/content.go | 16 ++++++++-------- services/search/pkg/search/search.go | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/services/search/pkg/content/content.go b/services/search/pkg/content/content.go index e185351678..b162ccdcfc 100644 --- a/services/search/pkg/content/content.go +++ b/services/search/pkg/content/content.go @@ -14,14 +14,14 @@ func init() { // Document wraps all resource meta fields, // it is used as a content extraction result. type Document struct { - Title string - Name string - Content string - Size uint64 - Mtime string - MimeType string - Tags []string - Favorites []string + Title string `json:"Title"` + Name string `json:"Name"` + Content string `json:"Content"` + Size uint64 `json:"Size"` + Mtime string `json:"Mtime"` + MimeType string `json:"MimeType"` + Tags []string `json:"Tags"` + Favorites []string `json:"Favorites"` Audio *libregraph.Audio `json:"audio,omitempty"` Image *libregraph.Image `json:"image,omitempty"` Location *libregraph.GeoCoordinates `json:"location,omitempty"` diff --git a/services/search/pkg/search/search.go b/services/search/pkg/search/search.go index be0e922268..d5c6a3653f 100644 --- a/services/search/pkg/search/search.go +++ b/services/search/pkg/search/search.go @@ -51,13 +51,13 @@ type BatchOperator interface { type Resource struct { content.Document - ID string - RootID string - Path string - ParentID string - Type uint64 - Deleted bool - Hidden bool + ID string `json:"ID"` + RootID string `json:"RootID"` + Path string `json:"Path"` + ParentID string `json:"ParentID"` + Type uint64 `json:"Type"` + Deleted bool `json:"Deleted"` + Hidden bool `json:"Hidden"` } // ResolveReference makes sure the path is relative to the space root From f4b7a35c91eb89e9fd14f52b0a9083ef555b775e Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:33:37 +0200 Subject: [PATCH 02/26] refactor(search): add mapping package core (opts, infer, validate) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce services/search/pkg/mapping as the foundation for a reflection-based index mapping builder. This commit contains only the building blocks: - opts.go: FieldOpts struct and Type* constants - infer.go: Go type → mapping type inference, json-tag field resolution, embedded-struct flattening walker - validate.go: checks that override keys reference real json-tag paths No production code uses these yet; followups will wire up BleveBuild- Mapping, OpenSearchBuildMapping, PrepareForIndex, and Deserialize on top. --- services/search/pkg/mapping/infer.go | 112 ++++++++++++++++++ services/search/pkg/mapping/infer_test.go | 116 +++++++++++++++++++ services/search/pkg/mapping/opts.go | 30 +++++ services/search/pkg/mapping/validate.go | 47 ++++++++ services/search/pkg/mapping/validate_test.go | 51 ++++++++ 5 files changed, 356 insertions(+) create mode 100644 services/search/pkg/mapping/infer.go create mode 100644 services/search/pkg/mapping/infer_test.go create mode 100644 services/search/pkg/mapping/opts.go create mode 100644 services/search/pkg/mapping/validate.go create mode 100644 services/search/pkg/mapping/validate_test.go diff --git a/services/search/pkg/mapping/infer.go b/services/search/pkg/mapping/infer.go new file mode 100644 index 0000000000..869ba76f02 --- /dev/null +++ b/services/search/pkg/mapping/infer.go @@ -0,0 +1,112 @@ +package mapping + +import ( + "reflect" + "strings" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +var ( + timeType = reflect.TypeFor[time.Time]() + timestampType = reflect.TypeFor[timestamppb.Timestamp]() +) + +// inferType returns the mapping type for a Go type. Pointers and slices are +// unwrapped to their element type. time.Time and timestamppb.Timestamp become +// datetime; other structs become object. +func inferType(t reflect.Type) string { + for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice { + t = t.Elem() + } + switch t.Kind() { + case reflect.String: + return TypeKeyword + case reflect.Bool: + return TypeBool + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Float32, reflect.Float64: + return TypeNumeric + case reflect.Struct: + if t == timeType || t == timestampType { + return TypeDatetime + } + return TypeObject + } + return "" +} + +// fieldInfo is the resolved metadata for one struct field. +type fieldInfo struct { + Name string + GoField reflect.StructField + Skip bool + Embedded bool +} + +// resolveField resolves a struct field's json-tag name and skip/embed state. +func resolveField(sf reflect.StructField) fieldInfo { + if !sf.IsExported() { + return fieldInfo{Skip: true} + } + name := sf.Name + tag := sf.Tag.Get("json") + if tag != "" { + first, _, _ := strings.Cut(tag, ",") + if first == "-" { + return fieldInfo{Skip: true} + } + if first != "" { + name = first + } + } + return fieldInfo{ + Name: name, + GoField: sf, + Embedded: sf.Anonymous, + } +} + +// walkFields visits exported leaf fields of t, flattening embedded structs +// onto the enclosing level. It returns the first error returned by fn. +func walkFields(t reflect.Type, fn func(fi fieldInfo) error) error { + for t.Kind() == reflect.Ptr { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return nil + } + for i := 0; i < t.NumField(); i++ { + fi := resolveField(t.Field(i)) + if fi.Skip { + continue + } + if fi.Embedded { + if err := walkFields(fi.GoField.Type, fn); err != nil { + return err + } + continue + } + if err := fn(fi); err != nil { + return err + } + } + return nil +} + +// structType returns the underlying struct type, unwrapping pointers and +// slices. Returns nil when t is not a walkable struct (e.g. time.Time). +func structType(t reflect.Type) reflect.Type { + for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return nil + } + if t == timeType || t == timestampType { + return nil + } + return t +} diff --git a/services/search/pkg/mapping/infer_test.go b/services/search/pkg/mapping/infer_test.go new file mode 100644 index 0000000000..080b116e47 --- /dev/null +++ b/services/search/pkg/mapping/infer_test.go @@ -0,0 +1,116 @@ +package mapping + +import ( + "reflect" + "testing" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +func TestInferType(t *testing.T) { + cases := []struct { + name string + in any + want string + }{ + {"string", "", TypeKeyword}, + {"*string", (*string)(nil), TypeKeyword}, + {"[]string", []string(nil), TypeKeyword}, + {"bool", false, TypeBool}, + {"int", int(0), TypeNumeric}, + {"int64", int64(0), TypeNumeric}, + {"uint64", uint64(0), TypeNumeric}, + {"float64", float64(0), TypeNumeric}, + {"time.Time", time.Time{}, TypeDatetime}, + {"*time.Time", (*time.Time)(nil), TypeDatetime}, + {"*timestamppb.Timestamp", (*timestamppb.Timestamp)(nil), TypeDatetime}, + {"struct", struct{ X int }{}, TypeObject}, + {"*struct", (*struct{ X int })(nil), TypeObject}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got := inferType(reflect.TypeOf(c.in)) + if got != c.want { + t.Fatalf("inferType(%s): got %q, want %q", c.name, got, c.want) + } + }) + } +} + +func TestResolveField(t *testing.T) { + type S struct { + Exported string `json:"exp"` + Renamed string `json:"renamed,omitempty"` + NoTag string + OmitOnly string `json:",omitempty"` + Skipped string `json:"-"` + unexported string //nolint:unused + } + st := reflect.TypeFor[S]() + cases := []struct { + fieldIdx int + wantName string + wantSkip bool + }{ + {0, "exp", false}, + {1, "renamed", false}, + {2, "NoTag", false}, + {3, "OmitOnly", false}, + {4, "", true}, + {5, "", true}, + } + for _, c := range cases { + fi := resolveField(st.Field(c.fieldIdx)) + if fi.Skip != c.wantSkip { + t.Errorf("field %d: skip=%v, want %v", c.fieldIdx, fi.Skip, c.wantSkip) + } + if !c.wantSkip && fi.Name != c.wantName { + t.Errorf("field %d: name=%q, want %q", c.fieldIdx, fi.Name, c.wantName) + } + } +} + +func TestWalkFieldsFlattensEmbedded(t *testing.T) { + type Inner struct { + A string `json:"a"` + B int `json:"b"` + } + type Outer struct { + Inner + C bool `json:"c"` + } + var names []string + err := walkFields(reflect.TypeFor[Outer](), func(fi fieldInfo) error { + names = append(names, fi.Name) + return nil + }) + if err != nil { + t.Fatalf("walkFields: %v", err) + } + want := []string{"a", "b", "c"} + if !reflect.DeepEqual(names, want) { + t.Fatalf("got %v, want %v", names, want) + } +} + +func TestStructType(t *testing.T) { + type S struct{ X int } + cases := []struct { + name string + in reflect.Type + wantNil bool + }{ + {"struct", reflect.TypeFor[S](), false}, + {"*struct", reflect.TypeFor[*S](), false}, + {"[]struct", reflect.TypeFor[[]S](), false}, + {"time.Time", reflect.TypeFor[time.Time](), true}, + {"string", reflect.TypeFor[string](), true}, + } + for _, c := range cases { + got := structType(c.in) + if (got == nil) != c.wantNil { + t.Errorf("%s: got %v, wantNil %v", c.name, got, c.wantNil) + } + } +} diff --git a/services/search/pkg/mapping/opts.go b/services/search/pkg/mapping/opts.go new file mode 100644 index 0000000000..68edc98c49 --- /dev/null +++ b/services/search/pkg/mapping/opts.go @@ -0,0 +1,30 @@ +// Package mapping builds search index mappings for bleve and OpenSearch from +// a Go struct via reflection. Field names come from json tags; the caller +// provides overrides for fields that need a specific type or analyzer. +package mapping + +// Field type constants used in FieldOpts.Type. An empty Type means the type +// is inferred from the Go field via reflection. +const ( + TypeKeyword = "keyword" + TypeFulltext = "fulltext" + TypePath = "path" + TypeNumeric = "numeric" + TypeDatetime = "datetime" + TypeBool = "bool" + TypeObject = "object" + TypeGeopoint = "geopoint" +) + +// FieldOpts overrides the default type inference for a struct field. Keys in +// the override map are json-tag names (e.g. "Name", "location", "audio.artist"), +// not Go field names. +type FieldOpts struct { + // Type is one of the Type* constants. Empty means "infer from Go type". + Type string + + // Analyzer is the name of a custom analyzer registered on the bleve + // IndexMapping (e.g. "lowercaseKeyword", "fulltext"). For OpenSearch it + // becomes the analyzer attribute on the field. + Analyzer string +} diff --git a/services/search/pkg/mapping/validate.go b/services/search/pkg/mapping/validate.go new file mode 100644 index 0000000000..6a85c718d4 --- /dev/null +++ b/services/search/pkg/mapping/validate.go @@ -0,0 +1,47 @@ +package mapping + +import ( + "fmt" + "reflect" + "sort" + "strings" +) + +// Validate returns an error if any override key does not match a known field +// name in t. Top-level fields are identified by their json-tag name; nested +// struct fields are reachable as "parent.child". +func Validate(t reflect.Type, overrides map[string]FieldOpts) error { + if len(overrides) == 0 { + return nil + } + names := collectNames(t, "") + var unknown []string + for k := range overrides { + if _, ok := names[k]; !ok { + unknown = append(unknown, k) + } + } + if len(unknown) == 0 { + return nil + } + sort.Strings(unknown) + return fmt.Errorf("mapping: unknown override keys: %s", strings.Join(unknown, ", ")) +} + +func collectNames(t reflect.Type, prefix string) map[string]struct{} { + out := map[string]struct{}{} + _ = walkFields(t, func(fi fieldInfo) error { + key := fi.Name + if prefix != "" { + key = prefix + "." + fi.Name + } + out[key] = struct{}{} + if sub := structType(fi.GoField.Type); sub != nil { + for k := range collectNames(sub, key) { + out[k] = struct{}{} + } + } + return nil + }) + return out +} diff --git a/services/search/pkg/mapping/validate_test.go b/services/search/pkg/mapping/validate_test.go new file mode 100644 index 0000000000..2d8500c2b8 --- /dev/null +++ b/services/search/pkg/mapping/validate_test.go @@ -0,0 +1,51 @@ +package mapping + +import ( + "reflect" + "strings" + "testing" +) + +type inner struct { + Artist string `json:"artist"` +} + +type sample struct { + Name string `json:"Name"` + Audio *inner `json:"audio,omitempty"` + Location *struct { //nolint:unused + Lon float64 `json:"longitude"` + Lat float64 `json:"latitude"` + } `json:"location,omitempty"` +} + +func TestValidateAccepts(t *testing.T) { + err := Validate(reflect.TypeFor[sample](), map[string]FieldOpts{ + "Name": {Analyzer: "lowercaseKeyword"}, + "audio": {Type: TypeObject}, + "audio.artist": {Analyzer: "lowercaseKeyword"}, + "location": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestValidateRejectsUnknown(t *testing.T) { + err := Validate(reflect.TypeFor[sample](), map[string]FieldOpts{ + "nope": {}, + "audio.zzz": {}, + }) + if err == nil { + t.Fatalf("expected error") + } + if !strings.Contains(err.Error(), "nope") || !strings.Contains(err.Error(), "audio.zzz") { + t.Fatalf("error missing keys: %v", err) + } +} + +func TestValidateEmpty(t *testing.T) { + if err := Validate(reflect.TypeFor[sample](), nil); err != nil { + t.Fatalf("empty overrides should pass: %v", err) + } +} From 4d1ebb55c0fbff7122a70bb4c54f2333c6b0d3ec Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:38:37 +0200 Subject: [PATCH 03/26] refactor(search): add BleveBuildMapping to the mapping package Build a bleve DocumentMapping from a Go struct via reflection, using json tags for field names and FieldOpts overrides for analyzer / type tweaks. Nested structs recurse into sub-document mappings. Also extend FieldOpts with IncludeInAll so call sites can reproduce the existing bleve mapping exactly (Name stays in _all, Tags / Favorites / Content opt out). Not yet wired up; Step 7 will replace bleve/index.go NewMapping. --- services/search/pkg/mapping/bleve.go | 90 ++++++++++++++++ services/search/pkg/mapping/bleve_test.go | 123 ++++++++++++++++++++++ services/search/pkg/mapping/opts.go | 4 + 3 files changed, 217 insertions(+) create mode 100644 services/search/pkg/mapping/bleve.go create mode 100644 services/search/pkg/mapping/bleve_test.go diff --git a/services/search/pkg/mapping/bleve.go b/services/search/pkg/mapping/bleve.go new file mode 100644 index 0000000000..dcf8bd785d --- /dev/null +++ b/services/search/pkg/mapping/bleve.go @@ -0,0 +1,90 @@ +package mapping + +import ( + "fmt" + "reflect" + + "github.com/blevesearch/bleve/v2" + bleveMapping "github.com/blevesearch/bleve/v2/mapping" +) + +// BleveBuildMapping builds a bleve DocumentMapping for t by walking the +// struct via reflection. Field names come from json tags; overrides are +// keyed by those names (or dotted paths for nested fields). +// +// The returned mapping references analyzer names (Analyzer field on the +// FieldOpts, plus "fulltext" / "path_hierarchy" for the corresponding Types); +// the caller is responsible for registering those analyzers on the enclosing +// IndexMapping. +func BleveBuildMapping(t reflect.Type, overrides map[string]FieldOpts) (*bleveMapping.DocumentMapping, error) { + return buildBleveDocMapping(t, overrides, "") +} + +func buildBleveDocMapping(t reflect.Type, overrides map[string]FieldOpts, prefix string) (*bleveMapping.DocumentMapping, error) { + doc := bleve.NewDocumentMapping() + err := walkFields(t, func(fi fieldInfo) error { + key := fi.Name + if prefix != "" { + key = prefix + "." + fi.Name + } + opts := overrides[key] + fieldType := opts.Type + if fieldType == "" { + fieldType = inferType(fi.GoField.Type) + } + + if fieldType == TypeObject { + sub := structType(fi.GoField.Type) + if sub == nil { + return fmt.Errorf("mapping: object type on non-struct field %q", key) + } + subDoc, err := buildBleveDocMapping(sub, overrides, key) + if err != nil { + return err + } + doc.AddSubDocumentMapping(fi.Name, subDoc) + return nil + } + + fm, err := bleveFieldMapping(fieldType, opts) + if err != nil { + return fmt.Errorf("mapping: field %q: %w", key, err) + } + doc.AddFieldMappingsAt(fi.Name, fm) + return nil + }) + return doc, err +} + +func bleveFieldMapping(fieldType string, opts FieldOpts) (*bleveMapping.FieldMapping, error) { + switch fieldType { + case TypeKeyword, TypeFulltext, TypePath: + fm := bleve.NewTextFieldMapping() + switch { + case opts.Analyzer != "": + fm.Analyzer = opts.Analyzer + case fieldType == TypeFulltext: + fm.Analyzer = "fulltext" + case fieldType == TypePath: + fm.Analyzer = "path_hierarchy" + } + switch { + case opts.IncludeInAll != nil: + fm.IncludeInAll = *opts.IncludeInAll + case fieldType == TypeFulltext, fieldType == TypePath: + fm.IncludeInAll = false + } + return fm, nil + case TypeNumeric: + return bleve.NewNumericFieldMapping(), nil + case TypeBool: + return bleve.NewBooleanFieldMapping(), nil + case TypeDatetime: + return bleve.NewDateTimeFieldMapping(), nil + case TypeGeopoint: + return bleve.NewGeoPointFieldMapping(), nil + case "": + return nil, fmt.Errorf("no type inferred and no override") + } + return nil, fmt.Errorf("unsupported type %q", fieldType) +} diff --git a/services/search/pkg/mapping/bleve_test.go b/services/search/pkg/mapping/bleve_test.go new file mode 100644 index 0000000000..cd140eb796 --- /dev/null +++ b/services/search/pkg/mapping/bleve_test.go @@ -0,0 +1,123 @@ +package mapping + +import ( + "reflect" + "testing" + "time" +) + +type bleveDoc struct { + Name string `json:"Name"` + Content string `json:"Content"` + Tags []string `json:"Tags"` + Size uint64 `json:"Size"` + Deleted bool `json:"Deleted"` + CreatedAt time.Time `json:"CreatedAt"` + Nested *nested `json:"nested,omitempty"` +} + +type nested struct { + Artist string `json:"artist"` + Year int `json:"year"` +} + +func TestBleveBuildMappingInferredTypes(t *testing.T) { + dm, err := BleveBuildMapping(reflect.TypeFor[bleveDoc](), nil) + if err != nil { + t.Fatalf("BleveBuildMapping: %v", err) + } + cases := map[string]string{ + "Name": "text", + "Content": "text", + "Tags": "text", + "Size": "number", + "Deleted": "boolean", + "CreatedAt": "datetime", + } + for field, wantType := range cases { + prop := dm.Properties[field] + if prop == nil { + t.Errorf("missing property %q", field) + continue + } + if len(prop.Fields) == 0 { + t.Errorf("%q: no field mappings", field) + continue + } + if got := prop.Fields[0].Type; got != wantType { + t.Errorf("%q: got type %q, want %q", field, got, wantType) + } + } +} + +func TestBleveBuildMappingNestedIsSubDocument(t *testing.T) { + dm, err := BleveBuildMapping(reflect.TypeFor[bleveDoc](), nil) + if err != nil { + t.Fatalf("BleveBuildMapping: %v", err) + } + sub := dm.Properties["nested"] + if sub == nil { + t.Fatal("missing nested sub-document") + } + if sub.Properties["artist"] == nil || sub.Properties["year"] == nil { + t.Fatalf("nested fields missing: %#v", sub.Properties) + } + if got := sub.Properties["artist"].Fields[0].Type; got != "text" { + t.Errorf("nested.artist: type %q, want text", got) + } + if got := sub.Properties["year"].Fields[0].Type; got != "number" { + t.Errorf("nested.year: type %q, want number", got) + } +} + +func TestBleveBuildMappingOverrides(t *testing.T) { + includeInAllFalse := false + dm, err := BleveBuildMapping(reflect.TypeFor[bleveDoc](), map[string]FieldOpts{ + "Name": {Analyzer: "lowercaseKeyword"}, + "Content": {Type: TypeFulltext}, + "Tags": {Analyzer: "lowercaseKeyword", IncludeInAll: &includeInAllFalse}, + }) + if err != nil { + t.Fatalf("BleveBuildMapping: %v", err) + } + nameField := dm.Properties["Name"].Fields[0] + if nameField.Analyzer != "lowercaseKeyword" { + t.Errorf("Name analyzer: %q, want lowercaseKeyword", nameField.Analyzer) + } + if !nameField.IncludeInAll { + t.Errorf("Name IncludeInAll should stay default-true when not overridden") + } + contentField := dm.Properties["Content"].Fields[0] + if contentField.Analyzer != "fulltext" { + t.Errorf("Content analyzer: %q, want fulltext", contentField.Analyzer) + } + if contentField.IncludeInAll { + t.Errorf("Content IncludeInAll should default to false for fulltext type") + } + tagsField := dm.Properties["Tags"].Fields[0] + if tagsField.IncludeInAll { + t.Errorf("Tags IncludeInAll should honor the explicit false override") + } +} + +func TestBleveBuildMappingGeopoint(t *testing.T) { + type geoDoc struct { + Location *struct { + Lon float64 `json:"longitude"` + Lat float64 `json:"latitude"` + } `json:"location,omitempty"` + } + dm, err := BleveBuildMapping(reflect.TypeFor[geoDoc](), map[string]FieldOpts{ + "location": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("BleveBuildMapping: %v", err) + } + loc := dm.Properties["location"] + if loc == nil || len(loc.Fields) == 0 { + t.Fatalf("location field missing: %#v", dm.Properties) + } + if got := loc.Fields[0].Type; got != "geopoint" { + t.Errorf("location type: %q, want geopoint", got) + } +} diff --git a/services/search/pkg/mapping/opts.go b/services/search/pkg/mapping/opts.go index 68edc98c49..d30b1e68d8 100644 --- a/services/search/pkg/mapping/opts.go +++ b/services/search/pkg/mapping/opts.go @@ -27,4 +27,8 @@ type FieldOpts struct { // IndexMapping (e.g. "lowercaseKeyword", "fulltext"). For OpenSearch it // becomes the analyzer attribute on the field. Analyzer string + + // IncludeInAll controls bleve's _all field inclusion. Nil means "use the + // bleve default for this field type". Has no effect on OpenSearch. + IncludeInAll *bool } From 4053011d9e5ee117620f4d9fe104b1a869e23a83 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:40:59 +0200 Subject: [PATCH 04/26] refactor(search): add OpenSearchBuildMapping to the mapping package Build the OpenSearch mappings.properties map from a Go struct via reflection, using the same FieldOpts overrides as the bleve builder. Type mapping is tailored to OpenSearch primitives (keyword, text, long/integer/double, date, boolean, wildcard, geo_point), with nested structs emitted as {"properties": {...}}. Also add TypeWildcard to the shared type constants so MimeType can be reproduced exactly in Step 11. Bleve has no wildcard type, so it falls back to keyword-ish text. Not yet wired up; Step 11 will replace the static resource_v2.json. --- services/search/pkg/mapping/bleve.go | 4 + services/search/pkg/mapping/opensearch.go | 115 ++++++++++++++++ .../search/pkg/mapping/opensearch_test.go | 128 ++++++++++++++++++ services/search/pkg/mapping/opts.go | 1 + 4 files changed, 248 insertions(+) create mode 100644 services/search/pkg/mapping/opensearch.go create mode 100644 services/search/pkg/mapping/opensearch_test.go diff --git a/services/search/pkg/mapping/bleve.go b/services/search/pkg/mapping/bleve.go index dcf8bd785d..7d5027dcaf 100644 --- a/services/search/pkg/mapping/bleve.go +++ b/services/search/pkg/mapping/bleve.go @@ -58,6 +58,10 @@ func buildBleveDocMapping(t reflect.Type, overrides map[string]FieldOpts, prefix func bleveFieldMapping(fieldType string, opts FieldOpts) (*bleveMapping.FieldMapping, error) { switch fieldType { + case TypeWildcard: + // bleve has no wildcard type; fall back to keyword-ish text. + fieldType = TypeKeyword + fallthrough case TypeKeyword, TypeFulltext, TypePath: fm := bleve.NewTextFieldMapping() switch { diff --git a/services/search/pkg/mapping/opensearch.go b/services/search/pkg/mapping/opensearch.go new file mode 100644 index 0000000000..cae94d616b --- /dev/null +++ b/services/search/pkg/mapping/opensearch.go @@ -0,0 +1,115 @@ +package mapping + +import ( + "fmt" + "reflect" +) + +// OpenSearchBuildMapping builds the OpenSearch "properties" map (the value +// of mappings.properties) for type t by walking the struct via reflection. +// Field names come from json tags; overrides are keyed by those names. +// +// The returned map contains plain JSON-friendly values (strings, bools, +// nested maps) and can be marshalled directly. +func OpenSearchBuildMapping(t reflect.Type, overrides map[string]FieldOpts) (map[string]any, error) { + return buildOpenSearchProperties(t, overrides, "") +} + +func buildOpenSearchProperties(t reflect.Type, overrides map[string]FieldOpts, prefix string) (map[string]any, error) { + props := map[string]any{} + err := walkFields(t, func(fi fieldInfo) error { + key := fi.Name + if prefix != "" { + key = prefix + "." + fi.Name + } + opts := overrides[key] + fieldType := opts.Type + if fieldType == "" { + fieldType = inferType(fi.GoField.Type) + } + + if fieldType == TypeObject { + sub := structType(fi.GoField.Type) + if sub == nil { + return fmt.Errorf("mapping: object type on non-struct field %q", key) + } + subProps, err := buildOpenSearchProperties(sub, overrides, key) + if err != nil { + return err + } + props[fi.Name] = map[string]any{"properties": subProps} + return nil + } + + fm, err := openSearchFieldMapping(fieldType, opts, fi.GoField.Type) + if err != nil { + return fmt.Errorf("mapping: field %q: %w", key, err) + } + props[fi.Name] = fm + return nil + }) + return props, err +} + +func openSearchFieldMapping(fieldType string, opts FieldOpts, goType reflect.Type) (map[string]any, error) { + switch fieldType { + case TypeKeyword: + m := map[string]any{"type": "keyword"} + if opts.Analyzer != "" { + m["type"] = "text" + m["analyzer"] = opts.Analyzer + } + return m, nil + case TypeFulltext: + m := map[string]any{ + "type": "text", + "term_vector": "with_positions_offsets", + } + if opts.Analyzer != "" { + m["analyzer"] = opts.Analyzer + } else { + m["analyzer"] = "fulltext" + } + return m, nil + case TypePath: + m := map[string]any{"type": "text"} + if opts.Analyzer != "" { + m["analyzer"] = opts.Analyzer + } else { + m["analyzer"] = "path_hierarchy" + } + return m, nil + case TypeWildcard: + return map[string]any{"type": "wildcard"}, nil + case TypeNumeric: + return map[string]any{"type": openSearchNumericType(goType)}, nil + case TypeBool: + return map[string]any{"type": "boolean"}, nil + case TypeDatetime: + return map[string]any{"type": "date"}, nil + case TypeGeopoint: + return map[string]any{"type": "geo_point"}, nil + case "": + return nil, fmt.Errorf("no type inferred and no override") + } + return nil, fmt.Errorf("unsupported type %q", fieldType) +} + +// openSearchNumericType maps a Go numeric type to an OpenSearch numeric +// field type. +func openSearchNumericType(t reflect.Type) string { + for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice { + t = t.Elem() + } + switch t.Kind() { + case reflect.Float32: + return "float" + case reflect.Float64: + return "double" + case reflect.Int8, reflect.Uint8, reflect.Int16, reflect.Uint16: + return "short" + case reflect.Int32, reflect.Uint32: + return "integer" + } + return "long" +} diff --git a/services/search/pkg/mapping/opensearch_test.go b/services/search/pkg/mapping/opensearch_test.go new file mode 100644 index 0000000000..537ed4584c --- /dev/null +++ b/services/search/pkg/mapping/opensearch_test.go @@ -0,0 +1,128 @@ +package mapping + +import ( + "reflect" + "testing" + "time" +) + +type osDoc struct { + ID string `json:"ID"` + Size uint64 `json:"Size"` + Deleted bool `json:"Deleted"` + CreatedAt time.Time `json:"CreatedAt"` + Rating float64 `json:"Rating"` + Nested *struct { + Artist string `json:"artist"` + Year int32 `json:"year"` + } `json:"nested,omitempty"` +} + +func TestOpenSearchBuildMappingInferred(t *testing.T) { + props, err := OpenSearchBuildMapping(reflect.TypeFor[osDoc](), nil) + if err != nil { + t.Fatalf("OpenSearchBuildMapping: %v", err) + } + want := map[string]string{ + "ID": "keyword", + "Size": "long", + "Deleted": "boolean", + "CreatedAt": "date", + "Rating": "double", + } + for k, v := range want { + m, ok := props[k].(map[string]any) + if !ok { + t.Errorf("%s: missing or not a map: %#v", k, props[k]) + continue + } + if got := m["type"]; got != v { + t.Errorf("%s: type %v, want %v", k, got, v) + } + } +} + +func TestOpenSearchBuildMappingNested(t *testing.T) { + props, err := OpenSearchBuildMapping(reflect.TypeFor[osDoc](), nil) + if err != nil { + t.Fatalf("OpenSearchBuildMapping: %v", err) + } + nested, ok := props["nested"].(map[string]any) + if !ok { + t.Fatalf("nested: not a map: %#v", props["nested"]) + } + sub, ok := nested["properties"].(map[string]any) + if !ok { + t.Fatalf("nested.properties: missing: %#v", nested) + } + artist, ok := sub["artist"].(map[string]any) + if !ok { + t.Fatalf("nested.artist: %#v", sub) + } + if artist["type"] != "keyword" { + t.Errorf("nested.artist.type: %v", artist["type"]) + } + year, ok := sub["year"].(map[string]any) + if !ok { + t.Fatalf("nested.year: %#v", sub) + } + if year["type"] != "integer" { + t.Errorf("nested.year.type: %v (int32 → integer expected)", year["type"]) + } +} + +func TestOpenSearchBuildMappingOverrides(t *testing.T) { + type doc struct { + Name string `json:"Name"` + Content string `json:"Content"` + Path string `json:"Path"` + MimeType string `json:"MimeType"` + } + props, err := OpenSearchBuildMapping(reflect.TypeFor[doc](), map[string]FieldOpts{ + "Name": {Analyzer: "lowercaseKeyword"}, + "Content": {Type: TypeFulltext}, + "Path": {Type: TypePath}, + "MimeType": {Type: TypeWildcard}, + }) + if err != nil { + t.Fatalf("OpenSearchBuildMapping: %v", err) + } + name := props["Name"].(map[string]any) + if name["type"] != "text" || name["analyzer"] != "lowercaseKeyword" { + t.Errorf("Name: %#v", name) + } + content := props["Content"].(map[string]any) + if content["type"] != "text" || content["analyzer"] != "fulltext" || content["term_vector"] != "with_positions_offsets" { + t.Errorf("Content: %#v", content) + } + path := props["Path"].(map[string]any) + if path["type"] != "text" || path["analyzer"] != "path_hierarchy" { + t.Errorf("Path: %#v", path) + } + mime := props["MimeType"].(map[string]any) + if mime["type"] != "wildcard" { + t.Errorf("MimeType: %#v", mime) + } +} + +func TestOpenSearchBuildMappingGeopoint(t *testing.T) { + type doc struct { + Location *struct { + Lon float64 `json:"longitude"` + Lat float64 `json:"latitude"` + } `json:"location,omitempty"` + } + props, err := OpenSearchBuildMapping(reflect.TypeFor[doc](), map[string]FieldOpts{ + "location": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("OpenSearchBuildMapping: %v", err) + } + loc, ok := props["location"].(map[string]any) + if !ok { + t.Fatalf("location: %#v", props["location"]) + } + if loc["type"] != "geo_point" { + t.Errorf("location.type: %v", loc["type"]) + } +} diff --git a/services/search/pkg/mapping/opts.go b/services/search/pkg/mapping/opts.go index d30b1e68d8..3daa258a62 100644 --- a/services/search/pkg/mapping/opts.go +++ b/services/search/pkg/mapping/opts.go @@ -9,6 +9,7 @@ const ( TypeKeyword = "keyword" TypeFulltext = "fulltext" TypePath = "path" + TypeWildcard = "wildcard" TypeNumeric = "numeric" TypeDatetime = "datetime" TypeBool = "bool" From 664127d8c8789e4fee3c8f0d852a057592dc60eb Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:42:14 +0200 Subject: [PATCH 05/26] refactor(search): add PrepareForIndex to the mapping package Convert a struct to the flat map[string]any form bleve expects, via a json marshal/unmarshal round-trip. This is equivalent to what bleve does internally when given the struct directly, but it gives the mapping package a hook point for splicing in type-specific adaptations (geopoint being the first planned consumer; the overrides parameter is already part of the signature for that reason). Not yet wired up; Step 8 will route bleve/batch.go Upsert through here. --- services/search/pkg/mapping/serialize.go | 29 +++++++++ services/search/pkg/mapping/serialize_test.go | 65 +++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 services/search/pkg/mapping/serialize.go create mode 100644 services/search/pkg/mapping/serialize_test.go diff --git a/services/search/pkg/mapping/serialize.go b/services/search/pkg/mapping/serialize.go new file mode 100644 index 0000000000..b145ef26d8 --- /dev/null +++ b/services/search/pkg/mapping/serialize.go @@ -0,0 +1,29 @@ +package mapping + +import ( + "encoding/json" + "fmt" +) + +// PrepareForIndex converts v to a flat map[string]any suitable for passing +// to bleve.Batch.Index. It honors json tags, omitempty semantics, and +// embedded-struct flattening via a json marshal/unmarshal round-trip — the +// same field resolution bleve would do internally when given the struct +// directly, but in a form that lets the mapping package splice in extra +// representations (e.g. geopoint adapter) without mutating the caller's +// value. +// +// The overrides parameter is reserved for type-specific adaptations +// (geopoint is the first user); callers should pass the same map used for +// BleveBuildMapping so the two stay in sync. +func PrepareForIndex(v any, _ map[string]FieldOpts) (map[string]any, error) { + b, err := json.Marshal(v) + if err != nil { + return nil, fmt.Errorf("mapping: marshal %T: %w", v, err) + } + var out map[string]any + if err := json.Unmarshal(b, &out); err != nil { + return nil, fmt.Errorf("mapping: unmarshal %T: %w", v, err) + } + return out, nil +} diff --git a/services/search/pkg/mapping/serialize_test.go b/services/search/pkg/mapping/serialize_test.go new file mode 100644 index 0000000000..0ce7f84f58 --- /dev/null +++ b/services/search/pkg/mapping/serialize_test.go @@ -0,0 +1,65 @@ +package mapping + +import ( + "reflect" + "testing" +) + +func TestPrepareForIndexFlattensEmbedded(t *testing.T) { + type inner struct { + Name string `json:"Name"` + Size uint64 `json:"Size"` + } + type outer struct { + inner + ID string `json:"ID"` + } + m, err := PrepareForIndex(outer{inner: inner{Name: "a", Size: 7}, ID: "x"}, nil) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + want := map[string]any{"Name": "a", "Size": float64(7), "ID": "x"} + if !reflect.DeepEqual(m, want) { + t.Fatalf("got %#v, want %#v", m, want) + } +} + +func TestPrepareForIndexOmitsNilWithOmitempty(t *testing.T) { + type facet struct { + Artist string `json:"artist"` + } + type doc struct { + Name string `json:"Name"` + Audio *facet `json:"audio,omitempty"` + } + m, err := PrepareForIndex(doc{Name: "n"}, nil) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + if _, ok := m["audio"]; ok { + t.Errorf("audio should be omitted when nil: %#v", m) + } + if m["Name"] != "n" { + t.Errorf("Name: %v", m["Name"]) + } +} + +func TestPrepareForIndexIncludesNestedWhenSet(t *testing.T) { + type facet struct { + Artist string `json:"artist"` + } + type doc struct { + Audio *facet `json:"audio,omitempty"` + } + m, err := PrepareForIndex(doc{Audio: &facet{Artist: "A"}}, nil) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + nested, ok := m["audio"].(map[string]any) + if !ok { + t.Fatalf("audio should be a nested map: %#v", m["audio"]) + } + if nested["artist"] != "A" { + t.Errorf("audio.artist: %v", nested["artist"]) + } +} From 26b652e0265ffa7552615da598f5db2a83fd581f Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:45:01 +0200 Subject: [PATCH 06/26] refactor(search): add Deserialize[T] to the mapping package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reconstruct a typed value from bleve's flat hit.Fields map: - Nested struct pointers recurse under a "parent.child" prefix and stay nil when none of their sub-fields were present. - Slice fields accept bleve's scalar-on-single-element quirk and wrap scalars into a single-element slice. - time.Time and timestamppb.Timestamp are parsed from RFC3339 strings. - Numeric types are converted via reflect (float64 → uint64, etc). Replaces the hand-rolled getAudioValue / getImageValue / getLocation- Value / getPhotoValue + unmarshalInterfaceMap pair that Step 9 will remove in favor of this generic walker. Not yet wired up. --- services/search/pkg/mapping/deserialize.go | 163 ++++++++++++++++++ .../search/pkg/mapping/deserialize_test.go | 129 ++++++++++++++ 2 files changed, 292 insertions(+) create mode 100644 services/search/pkg/mapping/deserialize.go create mode 100644 services/search/pkg/mapping/deserialize_test.go diff --git a/services/search/pkg/mapping/deserialize.go b/services/search/pkg/mapping/deserialize.go new file mode 100644 index 0000000000..79f5a04ce0 --- /dev/null +++ b/services/search/pkg/mapping/deserialize.go @@ -0,0 +1,163 @@ +package mapping + +import ( + "fmt" + "reflect" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +// Deserialize builds a *T from bleve's flat hit.Fields map, using json-tag +// names for lookup and "parent.child" for nested struct pointers. Scalar +// values stored as slices (bleve unwraps single-element slices) are +// re-wrapped when the target field is a slice. Time-valued fields accept +// RFC3339 strings and are decoded into time.Time or timestamppb.Timestamp. +// +// The overrides parameter is reserved for type-specific adaptations +// (geopoint is the first planned consumer). +func Deserialize[T any](fields map[string]any, _ map[string]FieldOpts) (*T, error) { + t := reflect.TypeFor[T]() + if t.Kind() != reflect.Struct { + return nil, fmt.Errorf("mapping: Deserialize requires a struct type, got %v", t) + } + out := reflect.New(t) + if _, err := fillStruct(out.Elem(), fields, ""); err != nil { + return nil, err + } + return out.Interface().(*T), nil +} + +// fillStruct walks v's fields, copying values from fields at the dotted +// prefix. Returns true if any leaf was populated (used by caller to decide +// whether to keep a newly-allocated pointer to v). +func fillStruct(v reflect.Value, fields map[string]any, prefix string) (bool, error) { + t := v.Type() + touched := false + for i := 0; i < t.NumField(); i++ { + fi := resolveField(t.Field(i)) + if fi.Skip { + continue + } + fv := v.Field(i) + + if fi.Embedded { + sub, err := fillStruct(fv, fields, prefix) + if err != nil { + return touched, err + } + if sub { + touched = true + } + continue + } + + key := fi.Name + if prefix != "" { + key = prefix + "." + fi.Name + } + ok, err := fillField(fv, fields, key) + if err != nil { + return touched, err + } + if ok { + touched = true + } + } + return touched, nil +} + +// fillField populates a single struct field. For pointers to nested structs +// (non-time), it recurses with the field key as prefix and keeps the pointer +// only if something was set. +func fillField(v reflect.Value, fields map[string]any, key string) (bool, error) { + ft := v.Type() + if ft.Kind() == reflect.Ptr { + elem := ft.Elem() + if elem.Kind() == reflect.Struct && elem != timeType && elem != timestampType { + alloc := reflect.New(elem) + touched, err := fillStruct(alloc.Elem(), fields, key) + if err != nil { + return false, err + } + if touched { + v.Set(alloc) + return true, nil + } + return false, nil + } + } + raw, ok := fields[key] + if !ok { + return false, nil + } + return true, setValue(v, raw) +} + +func setValue(v reflect.Value, raw any) error { + if v.Kind() == reflect.Ptr { + elem := v.Type().Elem() + alloc := reflect.New(elem) + if err := setValue(alloc.Elem(), raw); err != nil { + return err + } + v.Set(alloc) + return nil + } + if v.Type() == timeType { + t, err := parseTime(raw) + if err != nil { + return err + } + v.Set(reflect.ValueOf(t)) + return nil + } + if v.Type() == timestampType { + t, err := parseTime(raw) + if err != nil { + return err + } + v.Set(reflect.ValueOf(*timestamppb.New(t))) + return nil + } + if v.Kind() == reflect.Slice { + return setSlice(v, raw) + } + rv := reflect.ValueOf(raw) + if !rv.IsValid() { + return nil + } + if rv.Type().ConvertibleTo(v.Type()) { + v.Set(rv.Convert(v.Type())) + return nil + } + return fmt.Errorf("mapping: cannot set %v from %T", v.Type(), raw) +} + +func setSlice(v reflect.Value, raw any) error { + items, ok := raw.([]any) + if !ok { + // bleve unwraps single-element slices; re-wrap here. + items = []any{raw} + } + out := reflect.MakeSlice(v.Type(), len(items), len(items)) + for i, item := range items { + if err := setValue(out.Index(i), item); err != nil { + return fmt.Errorf("mapping: slice[%d]: %w", i, err) + } + } + v.Set(out) + return nil +} + +func parseTime(raw any) (time.Time, error) { + s, ok := raw.(string) + if !ok { + return time.Time{}, fmt.Errorf("mapping: expected time string, got %T", raw) + } + t, err := time.Parse(time.RFC3339, s) + if err != nil { + return time.Time{}, fmt.Errorf("mapping: parse time %q: %w", s, err) + } + return t, nil +} diff --git a/services/search/pkg/mapping/deserialize_test.go b/services/search/pkg/mapping/deserialize_test.go new file mode 100644 index 0000000000..d9efc8ac76 --- /dev/null +++ b/services/search/pkg/mapping/deserialize_test.go @@ -0,0 +1,129 @@ +package mapping + +import ( + "testing" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +type Leaf struct { + Name string `json:"Name"` + Size uint64 `json:"Size"` + Deleted bool `json:"Deleted"` + Tags []string `json:"Tags"` + Favorites []string `json:"Favorites"` +} + +type audio struct { + Artist *string `json:"artist,omitempty"` + Year *int32 `json:"year,omitempty"` +} + +type photo struct { + Taken *timestamppb.Timestamp `json:"takenDateTime,omitempty"` + Mtime *time.Time `json:"mtime,omitempty"` +} + +type embedded struct { + Leaf + Audio *audio `json:"audio,omitempty"` + Photo *photo `json:"photo,omitempty"` +} + +func TestDeserializeLeafFields(t *testing.T) { + r, err := Deserialize[Leaf](map[string]any{ + "Name": "n", + "Size": float64(42), + "Deleted": true, + }, nil) + if err != nil { + t.Fatalf("Deserialize: %v", err) + } + if r.Name != "n" || r.Size != 42 || !r.Deleted { + t.Fatalf("got %#v", r) + } +} + +func TestDeserializeScalarToSlice(t *testing.T) { + r, err := Deserialize[Leaf](map[string]any{ + "Tags": "single", + "Favorites": []any{"a", "b"}, + }, nil) + if err != nil { + t.Fatalf("Deserialize: %v", err) + } + if len(r.Tags) != 1 || r.Tags[0] != "single" { + t.Errorf("Tags: %#v", r.Tags) + } + if len(r.Favorites) != 2 || r.Favorites[0] != "a" || r.Favorites[1] != "b" { + t.Errorf("Favorites: %#v", r.Favorites) + } +} + +func TestDeserializeNestedPointer(t *testing.T) { + r, err := Deserialize[embedded](map[string]any{ + "audio.artist": "A", + "audio.year": float64(2024), + }, nil) + if err != nil { + t.Fatalf("Deserialize: %v", err) + } + if r.Audio == nil { + t.Fatal("Audio is nil") + } + if r.Audio.Artist == nil || *r.Audio.Artist != "A" { + t.Errorf("Artist: %#v", r.Audio.Artist) + } + if r.Audio.Year == nil || *r.Audio.Year != 2024 { + t.Errorf("Year: %#v", r.Audio.Year) + } +} + +func TestDeserializeEmptyNestedStaysNil(t *testing.T) { + r, err := Deserialize[embedded](map[string]any{ + "Name": "n", + }, nil) + if err != nil { + t.Fatalf("Deserialize: %v", err) + } + if r.Audio != nil || r.Photo != nil { + t.Fatalf("nested pointers should stay nil: %#v", r) + } + if r.Name != "n" { + t.Errorf("Name: %q", r.Name) + } +} + +func TestDeserializeTimestamp(t *testing.T) { + r, err := Deserialize[embedded](map[string]any{ + "photo.takenDateTime": "2024-01-02T03:04:05Z", + "photo.mtime": "2024-05-06T07:08:09Z", + }, nil) + if err != nil { + t.Fatalf("Deserialize: %v", err) + } + if r.Photo == nil { + t.Fatal("Photo is nil") + } + if r.Photo.Taken == nil { + t.Fatal("Taken is nil") + } + expected := time.Date(2024, 1, 2, 3, 4, 5, 0, time.UTC) + if !r.Photo.Taken.AsTime().Equal(expected) { + t.Errorf("Taken: got %v, want %v", r.Photo.Taken.AsTime(), expected) + } + if r.Photo.Mtime == nil { + t.Fatal("Mtime is nil") + } + if !r.Photo.Mtime.Equal(time.Date(2024, 5, 6, 7, 8, 9, 0, time.UTC)) { + t.Errorf("Mtime: %v", r.Photo.Mtime) + } +} + +func TestDeserializeRejectsNonStruct(t *testing.T) { + _, err := Deserialize[int](nil, nil) + if err == nil { + t.Fatal("expected error for non-struct T") + } +} From da18ce18511913f5440aa45d31ccf63cf8c1fb27 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:46:45 +0200 Subject: [PATCH 07/26] refactor(search): build the bleve document mapping via reflection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hand-rolled field mappings in bleve/index.go NewMapping with a call through the new mapping package: - search.Resource.SearchFieldOverrides() is the single source of truth for the per-backend field options (Name, Content, Tags, Favorites). - mapping.BleveBuildMapping walks the Resource struct and emits the DocumentMapping from json tags + overrides. - mapping.Validate runs at startup so a typo in an override key fails loudly instead of silently being ignored. The custom analyzers (lowercaseKeyword, fulltext) remain registered on the IndexMapping here — the mapping package only references them by name. Behavior is preserved: the existing ginkgo suite exercises Upsert + Search roundtrips and still passes. --- services/search/pkg/bleve/index.go | 29 ++++++++++++---------------- services/search/pkg/search/search.go | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/services/search/pkg/bleve/index.go b/services/search/pkg/bleve/index.go index f11bd61402..910d585f36 100644 --- a/services/search/pkg/bleve/index.go +++ b/services/search/pkg/bleve/index.go @@ -4,6 +4,7 @@ import ( "errors" "math" "path/filepath" + "reflect" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" @@ -15,6 +16,7 @@ import ( "github.com/blevesearch/bleve/v2/mapping" storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + searchmapping "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) @@ -38,27 +40,20 @@ func NewIndex(root string) (bleve.Index, error) { } func NewMapping() (mapping.IndexMapping, error) { - nameMapping := bleve.NewTextFieldMapping() - nameMapping.Analyzer = "lowercaseKeyword" - - lowercaseMapping := bleve.NewTextFieldMapping() - lowercaseMapping.IncludeInAll = false - lowercaseMapping.Analyzer = "lowercaseKeyword" - - fulltextFieldMapping := bleve.NewTextFieldMapping() - fulltextFieldMapping.Analyzer = "fulltext" - fulltextFieldMapping.IncludeInAll = false - - docMapping := bleve.NewDocumentMapping() - docMapping.AddFieldMappingsAt("Name", nameMapping) - docMapping.AddFieldMappingsAt("Tags", lowercaseMapping) - docMapping.AddFieldMappingsAt("Favorites", lowercaseMapping) - docMapping.AddFieldMappingsAt("Content", fulltextFieldMapping) + resourceType := reflect.TypeFor[search.Resource]() + overrides := search.Resource{}.SearchFieldOverrides() + if err := searchmapping.Validate(resourceType, overrides); err != nil { + return nil, err + } + docMapping, err := searchmapping.BleveBuildMapping(resourceType, overrides) + if err != nil { + return nil, err + } indexMapping := bleve.NewIndexMapping() indexMapping.DefaultAnalyzer = keyword.Name indexMapping.DefaultMapping = docMapping - err := indexMapping.AddCustomAnalyzer("lowercaseKeyword", + err = indexMapping.AddCustomAnalyzer("lowercaseKeyword", map[string]any{ "type": custom.Name, "tokenizer": single.Name, diff --git a/services/search/pkg/search/search.go b/services/search/pkg/search/search.go index d5c6a3653f..4a1d652d60 100644 --- a/services/search/pkg/search/search.go +++ b/services/search/pkg/search/search.go @@ -19,6 +19,7 @@ import ( searchmsg "github.com/opencloud-eu/opencloud/protogen/gen/opencloud/messages/search/v0" searchService "github.com/opencloud-eu/opencloud/protogen/gen/opencloud/services/search/v0" "github.com/opencloud-eu/opencloud/services/search/pkg/content" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" ) var scopeRegex = regexp.MustCompile(`scope:\s*([^" "\n\r]*)`) @@ -60,6 +61,19 @@ type Resource struct { Hidden bool `json:"Hidden"` } +// SearchFieldOverrides returns the field options the mapping package needs +// to build per-backend index mappings for a Resource. Keys are json-tag +// names; see package mapping for the available FieldOpts knobs. +func (Resource) SearchFieldOverrides() map[string]mapping.FieldOpts { + excludeFromAll := false + return map[string]mapping.FieldOpts{ + "Name": {Analyzer: "lowercaseKeyword"}, + "Content": {Type: mapping.TypeFulltext}, + "Tags": {Analyzer: "lowercaseKeyword", IncludeInAll: &excludeFromAll}, + "Favorites": {Analyzer: "lowercaseKeyword", IncludeInAll: &excludeFromAll}, + } +} + // ResolveReference makes sure the path is relative to the space root func ResolveReference(ctx context.Context, ref *provider.Reference, ri *provider.ResourceInfo, gatewaySelector pool.Selectable[gateway.GatewayAPIClient]) (*provider.Reference, error) { if ref.GetResourceId().GetOpaqueId() == ref.GetResourceId().GetSpaceId() { From fec9931e323af5b43828ad69d9ea8b572e5df171 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:47:34 +0200 Subject: [PATCH 08/26] refactor(search): route bleve batch Upsert through mapping.PrepareForIndex All four write paths in bleve/batch.go (Upsert, Move, Delete, Restore) now go through a shared indexResource helper that calls mapping.Prepare- ForIndex before handing the document to bleve. That gives the mapping package a single hook point for type-specific adaptations (geopoint adapter lands in the showcase commit at the end). Bleve's own reflection-based field walker behaved the same way the json roundtrip does here, so the ginkgo suite still passes unchanged. --- services/search/pkg/bleve/batch.go | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/services/search/pkg/bleve/batch.go b/services/search/pkg/bleve/batch.go index 60a3d0c867..0b3bcf2775 100644 --- a/services/search/pkg/bleve/batch.go +++ b/services/search/pkg/bleve/batch.go @@ -10,6 +10,7 @@ import ( "github.com/opencloud-eu/reva/v2/pkg/utils" "github.com/opencloud-eu/opencloud/pkg/log" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) @@ -36,10 +37,21 @@ func NewBatch(index bleve.Index, size int) (*Batch, error) { func (b *Batch) Upsert(id string, r search.Resource) error { return b.withSizeLimit(func() error { - return b.batch.Index(id, r) + return b.indexResource(id, r) }) } +// indexResource prepares r for bleve (resolving json tags and splicing in +// type-specific adaptations via the mapping package) and appends it to the +// batch under id. +func (b *Batch) indexResource(id string, r search.Resource) error { + doc, err := mapping.PrepareForIndex(r, r.SearchFieldOverrides()) + if err != nil { + return err + } + return b.batch.Index(id, doc) +} + func (b *Batch) Move(id, parentID, location string) error { return b.withSizeLimit(func() error { rootResource, err := searchResourceByID(id, b.index) @@ -68,7 +80,7 @@ func (b *Batch) Move(id, parentID, location string) error { } for _, resource := range resources { - if err := b.batch.Index(resource.ID, resource); err != nil { + if err := b.indexResource(resource.ID, *resource); err != nil { return err } if b.batch.Size() >= b.size { @@ -90,7 +102,7 @@ func (b *Batch) Delete(id string) error { } for _, resource := range affectedResources { - if err := b.batch.Index(resource.ID, resource); err != nil { + if err := b.indexResource(resource.ID, *resource); err != nil { return err } if b.batch.Size() >= b.size { @@ -112,7 +124,7 @@ func (b *Batch) Restore(id string) error { } for _, resource := range affectedResources { - if err := b.batch.Index(resource.ID, resource); err != nil { + if err := b.indexResource(resource.ID, *resource); err != nil { return err } if b.batch.Size() >= b.size { From 3cfbfcb215518f2dde5644090361766b5a6a2e8b Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:50:25 +0200 Subject: [PATCH 09/26] refactor(search): replace hand-rolled bleve hit deserializers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bleve/bleve.go: matchToResource now goes through the generic mapping.Deserialize[search.Resource]. Drop getAudioValue, getImage- Value, getLocationValue, getPhotoValue, unmarshalInterfaceMap, new- PointerOfType and getFieldName — the mapping package covers all of them. The legacy "only expose Audio for MimeType audio/*" safety net is preserved as a post-processing step. bleve/backend.go: Search() now uses mapping.DeserializeAt for the Audio / Image / Location / Photo facets on the protobuf Match. This relies on the json tags generated on the proto structs. Also add DeserializeAt for reading sub-trees of the flat fields map under a dotted prefix, returning nil when nothing matches so the enclosing pointer stays nil. --- services/search/pkg/bleve/backend.go | 19 ++- services/search/pkg/bleve/bleve.go | 139 ++---------------- services/search/pkg/mapping/deserialize.go | 19 +++ .../search/pkg/mapping/deserialize_test.go | 25 ++++ 4 files changed, 71 insertions(+), 131 deletions(-) diff --git a/services/search/pkg/bleve/backend.go b/services/search/pkg/bleve/backend.go index 8adcdda09e..ee472c5f2d 100644 --- a/services/search/pkg/bleve/backend.go +++ b/services/search/pkg/bleve/backend.go @@ -15,6 +15,7 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" "github.com/opencloud-eu/opencloud/pkg/log" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" "github.com/opencloud-eu/opencloud/services/search/pkg/search" searchMessage "github.com/opencloud-eu/opencloud/protogen/gen/opencloud/messages/search/v0" @@ -136,12 +137,22 @@ func (b *Backend) Search(_ context.Context, sir *searchService.SearchIndexReques Tags: getFieldSliceValue[string](hit.Fields, "Tags"), Favorites: getFieldSliceValue[string](hit.Fields, "Favorites"), Highlights: getFragmentValue(hit.Fragments, "Content", 0), - Audio: getAudioValue[searchMessage.Audio](hit.Fields), - Image: getImageValue[searchMessage.Image](hit.Fields), - Location: getLocationValue[searchMessage.GeoCoordinates](hit.Fields), - Photo: getPhotoValue[searchMessage.Photo](hit.Fields), }, } + if strings.HasPrefix(match.Entity.MimeType, "audio/") { + if audio, _ := mapping.DeserializeAt[searchMessage.Audio](hit.Fields, "audio", nil); audio != nil { + match.Entity.Audio = audio + } + } + if image, _ := mapping.DeserializeAt[searchMessage.Image](hit.Fields, "image", nil); image != nil { + match.Entity.Image = image + } + if loc, _ := mapping.DeserializeAt[searchMessage.GeoCoordinates](hit.Fields, "location", nil); loc != nil { + match.Entity.Location = loc + } + if photo, _ := mapping.DeserializeAt[searchMessage.Photo](hit.Fields, "photo", nil); photo != nil { + match.Entity.Photo = photo + } if mtime, err := time.Parse(time.RFC3339, getFieldValue[string](hit.Fields, "Mtime")); err == nil { match.Entity.LastModifiedTime = ×tamppb.Timestamp{Seconds: mtime.Unix(), Nanos: int32(mtime.Nanosecond())} diff --git a/services/search/pkg/bleve/bleve.go b/services/search/pkg/bleve/bleve.go index e478c2c148..336a327398 100644 --- a/services/search/pkg/bleve/bleve.go +++ b/services/search/pkg/bleve/bleve.go @@ -1,18 +1,14 @@ package bleve import ( - "reflect" "regexp" "strings" - "time" bleveSearch "github.com/blevesearch/bleve/v2/search" storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" - libregraph "github.com/opencloud-eu/libre-graph-api-go" - "google.golang.org/protobuf/types/known/timestamppb" searchMessage "github.com/opencloud-eu/opencloud/protogen/gen/opencloud/messages/search/v0" - "github.com/opencloud-eu/opencloud/services/search/pkg/content" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) @@ -75,131 +71,20 @@ func getFragmentValue(m bleveSearch.FieldFragmentMap, key string, idx int) strin return val[idx] } -func getAudioValue[T any](fields map[string]any) *T { - if !strings.HasPrefix(getFieldValue[string](fields, "MimeType"), "audio/") { +// matchToResource reconstructs a search.Resource from a bleve hit. Used by +// the Move / Delete / Restore / Purge paths that round-trip a record through +// the index. +func matchToResource(match *bleveSearch.DocumentMatch) *search.Resource { + r, err := mapping.Deserialize[search.Resource](match.Fields, search.Resource{}.SearchFieldOverrides()) + if err != nil || r == nil { return nil } - - var audio = newPointerOfType[T]() - if ok := unmarshalInterfaceMap(audio, fields, "audio."); ok { - return audio - } - - return nil -} - -func getImageValue[T any](fields map[string]any) *T { - var image = newPointerOfType[T]() - if ok := unmarshalInterfaceMap(image, fields, "image."); ok { - return image - } - - return nil -} - -func getLocationValue[T any](fields map[string]any) *T { - var location = newPointerOfType[T]() - if ok := unmarshalInterfaceMap(location, fields, "location."); ok { - return location - } - - return nil -} - -func getPhotoValue[T any](fields map[string]any) *T { - var photo = newPointerOfType[T]() - if ok := unmarshalInterfaceMap(photo, fields, "photo."); ok { - return photo - } - - return nil -} - -func newPointerOfType[T any]() *T { - t := reflect.TypeOf((*T)(nil)).Elem() - ptr := reflect.New(t).Interface() - return ptr.(*T) -} - -func unmarshalInterfaceMap(out any, flatMap map[string]any, prefix string) bool { - nonEmpty := false - obj := reflect.ValueOf(out).Elem() - for i := 0; i < obj.NumField(); i++ { - field := obj.Field(i) - structField := obj.Type().Field(i) - mapKey := prefix + getFieldName(structField) - - if value, ok := flatMap[mapKey]; ok { - if field.Kind() == reflect.Ptr { - alloc := reflect.New(field.Type().Elem()) - elemType := field.Type().Elem() - - // convert time strings from index for search requests - if elemType == reflect.TypeOf(timestamppb.Timestamp{}) { - if strValue, ok := value.(string); ok { - if parsedTime, err := time.Parse(time.RFC3339, strValue); err == nil { - alloc.Elem().Set(reflect.ValueOf(*timestamppb.New(parsedTime))) - field.Set(alloc) - nonEmpty = true - } - } - continue - } - - // convert time strings from index for libregraph structs when updating resources - if elemType == reflect.TypeOf(time.Time{}) { - if strValue, ok := value.(string); ok { - if parsedTime, err := time.Parse(time.RFC3339, strValue); err == nil { - alloc.Elem().Set(reflect.ValueOf(parsedTime)) - field.Set(alloc) - nonEmpty = true - } - } - continue - } - - alloc.Elem().Set(reflect.ValueOf(value).Convert(elemType)) - field.Set(alloc) - nonEmpty = true - } - } - } - - return nonEmpty -} - -func getFieldName(structField reflect.StructField) string { - tag := structField.Tag.Get("json") - if tag == "" { - return structField.Name - } - - return strings.Split(tag, ",")[0] -} - -func matchToResource(match *bleveSearch.DocumentMatch) *search.Resource { - return &search.Resource{ - ID: getFieldValue[string](match.Fields, "ID"), - RootID: getFieldValue[string](match.Fields, "RootID"), - Path: getFieldValue[string](match.Fields, "Path"), - ParentID: getFieldValue[string](match.Fields, "ParentID"), - Type: uint64(getFieldValue[float64](match.Fields, "Type")), - Deleted: getFieldValue[bool](match.Fields, "Deleted"), - Document: content.Document{ - Name: getFieldValue[string](match.Fields, "Name"), - Title: getFieldValue[string](match.Fields, "Title"), - Size: uint64(getFieldValue[float64](match.Fields, "Size")), - Mtime: getFieldValue[string](match.Fields, "Mtime"), - MimeType: getFieldValue[string](match.Fields, "MimeType"), - Content: getFieldValue[string](match.Fields, "Content"), - Tags: getFieldSliceValue[string](match.Fields, "Tags"), - Favorites: getFieldSliceValue[string](match.Fields, "Favorites"), - Audio: getAudioValue[libregraph.Audio](match.Fields), - Image: getImageValue[libregraph.Image](match.Fields), - Location: getLocationValue[libregraph.GeoCoordinates](match.Fields), - Photo: getPhotoValue[libregraph.Photo](match.Fields), - }, + // Legacy safety net: only expose audio metadata for audio MimeTypes, + // matching the pre-refactor getAudioValue behavior. + if r.Audio != nil && !strings.HasPrefix(r.MimeType, "audio/") { + r.Audio = nil } + return r } func escapeQuery(s string) string { diff --git a/services/search/pkg/mapping/deserialize.go b/services/search/pkg/mapping/deserialize.go index 79f5a04ce0..2880ce0854 100644 --- a/services/search/pkg/mapping/deserialize.go +++ b/services/search/pkg/mapping/deserialize.go @@ -28,6 +28,25 @@ func Deserialize[T any](fields map[string]any, _ map[string]FieldOpts) (*T, erro return out.Interface().(*T), nil } +// DeserializeAt reads sub-fields of the flat fields map under the given +// dotted prefix into a new *T. If no sub-fields were present, it returns +// (nil, nil) so callers can leave the enclosing pointer nil. +func DeserializeAt[T any](fields map[string]any, prefix string, _ map[string]FieldOpts) (*T, error) { + t := reflect.TypeFor[T]() + if t.Kind() != reflect.Struct { + return nil, fmt.Errorf("mapping: DeserializeAt requires a struct type, got %v", t) + } + out := reflect.New(t) + touched, err := fillStruct(out.Elem(), fields, prefix) + if err != nil { + return nil, err + } + if !touched { + return nil, nil + } + return out.Interface().(*T), nil +} + // fillStruct walks v's fields, copying values from fields at the dotted // prefix. Returns true if any leaf was populated (used by caller to decide // whether to keep a newly-allocated pointer to v). diff --git a/services/search/pkg/mapping/deserialize_test.go b/services/search/pkg/mapping/deserialize_test.go index d9efc8ac76..c8a452254f 100644 --- a/services/search/pkg/mapping/deserialize_test.go +++ b/services/search/pkg/mapping/deserialize_test.go @@ -127,3 +127,28 @@ func TestDeserializeRejectsNonStruct(t *testing.T) { t.Fatal("expected error for non-struct T") } } + +func TestDeserializeAtReturnsNilWhenNothingMatches(t *testing.T) { + r, err := DeserializeAt[audio](map[string]any{"Name": "n"}, "audio", nil) + if err != nil { + t.Fatalf("DeserializeAt: %v", err) + } + if r != nil { + t.Fatalf("expected nil, got %#v", r) + } +} + +func TestDeserializeAtReturnsValueWhenPrefixMatches(t *testing.T) { + r, err := DeserializeAt[audio](map[string]any{ + "audio.artist": "A", + }, "audio", nil) + if err != nil { + t.Fatalf("DeserializeAt: %v", err) + } + if r == nil { + t.Fatal("expected non-nil *audio") + } + if r.Artist == nil || *r.Artist != "A" { + t.Errorf("Artist: %#v", r.Artist) + } +} From 4244c4eeff885a2d8619ebdf7cc8526c9c337481 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:57:50 +0200 Subject: [PATCH 10/26] refactor(search): generate the OpenSearch V2 index mapping at runtime Drop internal/indexes/resource_v2.json and build the index body from search.Resource + mapping.OpenSearchBuildMapping inside index.go. The V2 IndexManager sentinel now routes MarshalJSON to buildResource- V2Mapping instead of reading an embedded file; V1 still reads from the embedded FS for migration compatibility. The generated mapping is a completed superset of the old static JSON: - Content: text + term_vector, no explicit analyzer (dropped the "fulltext" default for OpenSearch since the analyzer is a bleve- only thing; resource_v2.json did not set one either). - Path: text + path_hierarchy (unchanged). - MimeType: wildcard (unchanged; doc_values: false is dropped as a minor storage tradeoff). - Name / Tags / Favorites: text + lowercaseKeyword. This unifies OpenSearch behavior with bleve's (previously Favorites was plain "keyword" on OpenSearch, so a lowercaseKeyword analyzer is now registered in the settings so OpenSearch can speak the same name). - Facet fields (audio, image, location, photo): now explicit nested object mappings driven by the proto-embedded json tags, instead of relying on OpenSearch's dynamic templating. Existing OpenSearch indexes keep their stored mapping; the new shape only applies to new indexes. Queries keep working because our query generation does not rely on the specific mapping primitives that changed here. --- services/search/pkg/mapping/opensearch.go | 2 - .../search/pkg/mapping/opensearch_test.go | 5 +- services/search/pkg/opensearch/index.go | 54 +++++++++++++++++- .../internal/indexes/resource_v2.json | 56 ------------------- 4 files changed, 57 insertions(+), 60 deletions(-) delete mode 100644 services/search/pkg/opensearch/internal/indexes/resource_v2.json diff --git a/services/search/pkg/mapping/opensearch.go b/services/search/pkg/mapping/opensearch.go index cae94d616b..1ef05617d7 100644 --- a/services/search/pkg/mapping/opensearch.go +++ b/services/search/pkg/mapping/opensearch.go @@ -67,8 +67,6 @@ func openSearchFieldMapping(fieldType string, opts FieldOpts, goType reflect.Typ } if opts.Analyzer != "" { m["analyzer"] = opts.Analyzer - } else { - m["analyzer"] = "fulltext" } return m, nil case TypePath: diff --git a/services/search/pkg/mapping/opensearch_test.go b/services/search/pkg/mapping/opensearch_test.go index 537ed4584c..7a34c9e483 100644 --- a/services/search/pkg/mapping/opensearch_test.go +++ b/services/search/pkg/mapping/opensearch_test.go @@ -92,9 +92,12 @@ func TestOpenSearchBuildMappingOverrides(t *testing.T) { t.Errorf("Name: %#v", name) } content := props["Content"].(map[string]any) - if content["type"] != "text" || content["analyzer"] != "fulltext" || content["term_vector"] != "with_positions_offsets" { + if content["type"] != "text" || content["term_vector"] != "with_positions_offsets" { t.Errorf("Content: %#v", content) } + if _, ok := content["analyzer"]; ok { + t.Errorf("Content should leave analyzer unset (use OpenSearch default), got %#v", content["analyzer"]) + } path := props["Path"].(map[string]any) if path["type"] != "text" || path["analyzer"] != "path_hierarchy" { t.Errorf("Path: %#v", path) diff --git a/services/search/pkg/opensearch/index.go b/services/search/pkg/opensearch/index.go index 6f10b23c3d..9ea1c0ccfd 100644 --- a/services/search/pkg/opensearch/index.go +++ b/services/search/pkg/opensearch/index.go @@ -6,19 +6,23 @@ import ( "embed" "errors" "fmt" + "maps" "path" "reflect" "github.com/go-jose/go-jose/v3/json" opensearchgoAPI "github.com/opensearch-project/opensearch-go/v4/opensearchapi" "github.com/tidwall/gjson" + + searchmapping "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" + "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) var ( ErrManualActionRequired = errors.New("manual action required") IndexManagerLatest = IndexIndexManagerResourceV2 IndexIndexManagerResourceV1 IndexManager = "resource_v1.json" - IndexIndexManagerResourceV2 IndexManager = "resource_v2.json" + IndexIndexManagerResourceV2 IndexManager = "resource_v2" ) //go:embed internal/indexes/*.json @@ -36,6 +40,9 @@ func (m IndexManager) String() string { } func (m IndexManager) MarshalJSON() ([]byte, error) { + if m == IndexIndexManagerResourceV2 { + return buildResourceV2Mapping() + } filePath := string(m) body, err := indexes.ReadFile(path.Join("./internal/indexes", filePath)) switch { @@ -48,6 +55,51 @@ func (m IndexManager) MarshalJSON() ([]byte, error) { return body, nil } +// buildResourceV2Mapping renders the OpenSearch index template for a +// search.Resource from the shared SearchFieldOverrides. OpenSearch-specific +// tweaks (wildcard MimeType, path_hierarchy Path) are applied on top. +func buildResourceV2Mapping() ([]byte, error) { + resourceType := reflect.TypeFor[search.Resource]() + overrides := maps.Clone(search.Resource{}.SearchFieldOverrides()) + overrides["MimeType"] = searchmapping.FieldOpts{Type: searchmapping.TypeWildcard} + overrides["Path"] = searchmapping.FieldOpts{Type: searchmapping.TypePath} + if err := searchmapping.Validate(resourceType, overrides); err != nil { + return nil, err + } + props, err := searchmapping.OpenSearchBuildMapping(resourceType, overrides) + if err != nil { + return nil, err + } + + index := map[string]any{ + "settings": map[string]any{ + "number_of_shards": "1", + "number_of_replicas": "1", + "analysis": map[string]any{ + "analyzer": map[string]any{ + "path_hierarchy": map[string]any{ + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": []string{"lowercase"}, + }, + "lowercaseKeyword": map[string]any{ + "type": "custom", + "tokenizer": "keyword", + "filter": []string{"lowercase"}, + }, + }, + "tokenizer": map[string]any{ + "path_hierarchy": map[string]any{"type": "path_hierarchy"}, + }, + }, + }, + "mappings": map[string]any{ + "properties": props, + }, + } + return json.Marshal(index) +} + func (m IndexManager) Apply(ctx context.Context, name string, client *opensearchgoAPI.Client) error { localIndexB, err := m.MarshalJSON() if err != nil { diff --git a/services/search/pkg/opensearch/internal/indexes/resource_v2.json b/services/search/pkg/opensearch/internal/indexes/resource_v2.json deleted file mode 100644 index 64b450ef51..0000000000 --- a/services/search/pkg/opensearch/internal/indexes/resource_v2.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "settings": { - "number_of_shards": "1", - "number_of_replicas": "1", - "analysis": { - "analyzer": { - "path_hierarchy": { - "filter": [ - "lowercase" - ], - "tokenizer": "path_hierarchy", - "type": "custom" - } - }, - "tokenizer": { - "path_hierarchy": { - "type": "path_hierarchy" - } - } - } - }, - "mappings": { - "properties": { - "Content": { - "type": "text", - "term_vector": "with_positions_offsets" - }, - "ID": { - "type": "keyword" - }, - "ParentID": { - "type": "keyword" - }, - "RootID": { - "type": "keyword" - }, - "MimeType": { - "type": "wildcard", - "doc_values": false - }, - "Path": { - "type": "text", - "analyzer": "path_hierarchy" - }, - "Deleted": { - "type": "boolean" - }, - "Hidden": { - "type": "boolean" - }, - "Favorites": { - "type": "keyword" - } - } - } -} \ No newline at end of file From b2950034bcf16f45e66cee766ca623c6c9e01f50 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 20:59:03 +0200 Subject: [PATCH 11/26] refactor(search): collapse add*Metadata helpers into a generic wrapper The four add{Audio,Image,Location,Photo}Metadata functions were identical modulo the facet type and metadata key prefix. Replace them with a single generic addFacetMetadata parameterised on the libregraph MappedNullable type. The nil check moves into the generic body via reflect, so the doUpsertItem call site reads as four one-liners. --- services/search/pkg/search/service.go | 39 ++++++++------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/services/search/pkg/search/service.go b/services/search/pkg/search/service.go index 980f332572..171b08fe86 100644 --- a/services/search/pkg/search/service.go +++ b/services/search/pkg/search/service.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "path/filepath" + "reflect" "sort" "strconv" "strings" @@ -628,10 +629,10 @@ func (s *Service) doUpsertItem(ref *provider.Reference, batch BatchOperator) { // determine if metadata needs to be stored in storage as well metadata := map[string]string{} - addAudioMetadata(metadata, doc.Audio) - addImageMetadata(metadata, doc.Image) - addLocationMetadata(metadata, doc.Location) - addPhotoMetadata(metadata, doc.Photo) + addFacetMetadata(metadata, doc.Audio, "libre.graph.audio.") + addFacetMetadata(metadata, doc.Image, "libre.graph.image.") + addFacetMetadata(metadata, doc.Location, "libre.graph.location.") + addFacetMetadata(metadata, doc.Photo, "libre.graph.photo.") if len(metadata) == 0 { return } @@ -656,32 +657,14 @@ func (s *Service) doUpsertItem(ref *provider.Reference, batch BatchOperator) { } } -func addAudioMetadata(metadata map[string]string, audio *libregraph.Audio) { - if audio == nil { +// addFacetMetadata flattens a libregraph facet (Audio / Image / Location / +// Photo pointer) into the metadata map under the given prefix. No-op when +// the facet pointer is nil. +func addFacetMetadata[T libregraph.MappedNullable](metadata map[string]string, facet T, prefix string) { + if reflect.ValueOf(facet).IsNil() { return } - marshalToStringMap(audio, metadata, "libre.graph.audio.") -} - -func addImageMetadata(metadata map[string]string, image *libregraph.Image) { - if image == nil { - return - } - marshalToStringMap(image, metadata, "libre.graph.image.") -} - -func addLocationMetadata(metadata map[string]string, location *libregraph.GeoCoordinates) { - if location == nil { - return - } - marshalToStringMap(location, metadata, "libre.graph.location.") -} - -func addPhotoMetadata(metadata map[string]string, photo *libregraph.Photo) { - if photo == nil { - return - } - marshalToStringMap(photo, metadata, "libre.graph.photo.") + marshalToStringMap(facet, metadata, prefix) } func marshalToStringMap[T libregraph.MappedNullable](source T, target map[string]string, prefix string) { From d622fecbc568334e9ffa3d7d7cf446dbcc06ee25 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 22:45:08 +0200 Subject: [PATCH 12/26] refactor(search): add DeserializeStringMap to the mapping package Twin of Deserialize[T] but for string-typed flat maps (CS3's ArbitraryMetadata is map[string]string, not map[string]any). Parses raw strings into string / bool / intN / uintN / floatN / time.Time / timestamppb.Timestamp target fields via strconv and time.Parse, using the same json-tag walker fillStruct uses. Returns (nil, nil) when no field under the prefix matched so callers can leave the enclosing pointer nil. Prepares a follow-up in services/graph to replace the hand-rolled unmarshalStringMap + four cs3ResourceToDriveItem*Facet functions. --- .../search/pkg/mapping/deserialize_string.go | 148 ++++++++++++++++++ .../pkg/mapping/deserialize_string_test.go | 112 +++++++++++++ 2 files changed, 260 insertions(+) create mode 100644 services/search/pkg/mapping/deserialize_string.go create mode 100644 services/search/pkg/mapping/deserialize_string_test.go diff --git a/services/search/pkg/mapping/deserialize_string.go b/services/search/pkg/mapping/deserialize_string.go new file mode 100644 index 0000000000..8fb6100949 --- /dev/null +++ b/services/search/pkg/mapping/deserialize_string.go @@ -0,0 +1,148 @@ +package mapping + +import ( + "fmt" + "reflect" + "strconv" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +// DeserializeStringMap reads sub-fields of a string-typed flat map (e.g. CS3 +// ArbitraryMetadata, which is map[string]string) under the given dotted +// prefix into a new *T. String values are parsed into the target field's Go +// type via strconv / time.Parse. Returns (nil, nil) when nothing under the +// prefix matched, so callers can leave the enclosing pointer nil. +func DeserializeStringMap[T any](fields map[string]string, prefix string) (*T, error) { + t := reflect.TypeFor[T]() + if t.Kind() != reflect.Struct { + return nil, fmt.Errorf("mapping: DeserializeStringMap requires a struct type, got %v", t) + } + out := reflect.New(t) + touched, err := fillStructFromStrings(out.Elem(), fields, prefix) + if err != nil { + return nil, err + } + if !touched { + return nil, nil + } + return out.Interface().(*T), nil +} + +func fillStructFromStrings(v reflect.Value, fields map[string]string, prefix string) (bool, error) { + t := v.Type() + touched := false + for i := 0; i < t.NumField(); i++ { + fi := resolveField(t.Field(i)) + if fi.Skip { + continue + } + fv := v.Field(i) + + if fi.Embedded { + sub, err := fillStructFromStrings(fv, fields, prefix) + if err != nil { + return touched, err + } + if sub { + touched = true + } + continue + } + + key := fi.Name + if prefix != "" { + key = prefix + fi.Name + } + + if fv.Kind() == reflect.Ptr { + elem := fv.Type().Elem() + if elem.Kind() == reflect.Struct && elem != timeType && elem != timestampType { + alloc := reflect.New(elem) + subPrefix := key + "." + sub, err := fillStructFromStrings(alloc.Elem(), fields, subPrefix) + if err != nil { + return touched, err + } + if sub { + fv.Set(alloc) + touched = true + } + continue + } + } + + raw, ok := fields[key] + if !ok { + continue + } + if err := setValueFromString(fv, raw); err != nil { + return touched, fmt.Errorf("mapping: field %q: %w", key, err) + } + touched = true + } + return touched, nil +} + +func setValueFromString(v reflect.Value, raw string) error { + if v.Kind() == reflect.Ptr { + elem := v.Type().Elem() + alloc := reflect.New(elem) + if err := setValueFromString(alloc.Elem(), raw); err != nil { + return err + } + v.Set(alloc) + return nil + } + if v.Type() == timeType { + t, err := time.Parse(time.RFC3339, raw) + if err != nil { + return fmt.Errorf("parse time %q: %w", raw, err) + } + v.Set(reflect.ValueOf(t)) + return nil + } + if v.Type() == timestampType { + t, err := time.Parse(time.RFC3339, raw) + if err != nil { + return fmt.Errorf("parse time %q: %w", raw, err) + } + v.Set(reflect.ValueOf(*timestamppb.New(t))) + return nil + } + switch v.Kind() { + case reflect.String: + v.SetString(raw) + return nil + case reflect.Bool: + b, err := strconv.ParseBool(raw) + if err != nil { + return fmt.Errorf("parse bool %q: %w", raw, err) + } + v.SetBool(b) + return nil + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + n, err := strconv.ParseInt(raw, 10, v.Type().Bits()) + if err != nil { + return fmt.Errorf("parse int %q: %w", raw, err) + } + v.SetInt(n) + return nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + n, err := strconv.ParseUint(raw, 10, v.Type().Bits()) + if err != nil { + return fmt.Errorf("parse uint %q: %w", raw, err) + } + v.SetUint(n) + return nil + case reflect.Float32, reflect.Float64: + f, err := strconv.ParseFloat(raw, v.Type().Bits()) + if err != nil { + return fmt.Errorf("parse float %q: %w", raw, err) + } + v.SetFloat(f) + return nil + } + return fmt.Errorf("unsupported target kind %s", v.Kind()) +} diff --git a/services/search/pkg/mapping/deserialize_string_test.go b/services/search/pkg/mapping/deserialize_string_test.go new file mode 100644 index 0000000000..82370284ca --- /dev/null +++ b/services/search/pkg/mapping/deserialize_string_test.go @@ -0,0 +1,112 @@ +package mapping + +import ( + "testing" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +type stringFacet struct { + Artist *string `json:"artist,omitempty"` + Year *int32 `json:"year,omitempty"` + Duration *int64 `json:"duration,omitempty"` + Rating *float64 `json:"rating,omitempty"` + Explicit *bool `json:"explicit,omitempty"` + Taken *time.Time `json:"takenDateTime,omitempty"` +} + +func TestDeserializeStringMapBasicTypes(t *testing.T) { + r, err := DeserializeStringMap[stringFacet](map[string]string{ + "libre.graph.audio.artist": "Queen", + "libre.graph.audio.year": "1975", + "libre.graph.audio.duration": "354000", + "libre.graph.audio.rating": "4.9", + "libre.graph.audio.explicit": "true", + "libre.graph.audio.takenDateTime": "2024-01-02T03:04:05Z", + }, "libre.graph.audio.") + if err != nil { + t.Fatalf("DeserializeStringMap: %v", err) + } + if r == nil { + t.Fatal("expected non-nil *stringFacet") + } + if r.Artist == nil || *r.Artist != "Queen" { + t.Errorf("Artist: %#v", r.Artist) + } + if r.Year == nil || *r.Year != 1975 { + t.Errorf("Year: %#v", r.Year) + } + if r.Duration == nil || *r.Duration != 354000 { + t.Errorf("Duration: %#v", r.Duration) + } + if r.Rating == nil || *r.Rating != 4.9 { + t.Errorf("Rating: %#v", r.Rating) + } + if r.Explicit == nil || !*r.Explicit { + t.Errorf("Explicit: %#v", r.Explicit) + } + if r.Taken == nil || !r.Taken.Equal(time.Date(2024, 1, 2, 3, 4, 5, 0, time.UTC)) { + t.Errorf("Taken: %#v", r.Taken) + } +} + +func TestDeserializeStringMapReturnsNilWhenEmpty(t *testing.T) { + r, err := DeserializeStringMap[stringFacet](map[string]string{ + "libre.graph.image.width": "1200", + }, "libre.graph.audio.") + if err != nil { + t.Fatalf("DeserializeStringMap: %v", err) + } + if r != nil { + t.Fatalf("expected nil, got %#v", r) + } +} + +func TestDeserializeStringMapTimestamppb(t *testing.T) { + type photoFacet struct { + Taken *timestamppb.Timestamp `json:"takenDateTime,omitempty"` + } + r, err := DeserializeStringMap[photoFacet](map[string]string{ + "libre.graph.photo.takenDateTime": "2024-05-06T07:08:09Z", + }, "libre.graph.photo.") + if err != nil { + t.Fatalf("DeserializeStringMap: %v", err) + } + if r == nil || r.Taken == nil { + t.Fatalf("Taken missing: %#v", r) + } + want := time.Date(2024, 5, 6, 7, 8, 9, 0, time.UTC) + if !r.Taken.AsTime().Equal(want) { + t.Errorf("Taken: got %v, want %v", r.Taken.AsTime(), want) + } +} + +func TestDeserializeStringMapParseErrorBubblesUp(t *testing.T) { + _, err := DeserializeStringMap[stringFacet](map[string]string{ + "libre.graph.audio.year": "not-a-number", + }, "libre.graph.audio.") + if err == nil { + t.Fatal("expected error for unparsable int") + } +} + +func TestDeserializeStringMapIgnoresOtherPrefixes(t *testing.T) { + r, err := DeserializeStringMap[stringFacet](map[string]string{ + "libre.graph.audio.artist": "Mercury", + "libre.graph.image.artist": "someone-else", + }, "libre.graph.audio.") + if err != nil { + t.Fatalf("DeserializeStringMap: %v", err) + } + if r == nil || r.Artist == nil || *r.Artist != "Mercury" { + t.Fatalf("Artist: %#v", r.Artist) + } +} + +func TestDeserializeStringMapRejectsNonStruct(t *testing.T) { + _, err := DeserializeStringMap[int](nil, "") + if err == nil { + t.Fatal("expected error for non-struct T") + } +} From 8df64e3d66b4670fb3d5408b0a88ac567c72e0c0 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 22:50:40 +0200 Subject: [PATCH 13/26] refactor(graph): route CS3 facet parsing through mapping.DeserializeStringMap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit services/graph/pkg/service/v0/driveitems.go had a parallel copy of the same hand-rolled reflection walker that services/search/pkg/bleve used to have (unmarshalStringMap + getFieldName), plus four near-identical cs3ResourceToDriveItem{Audio,Image,Location,Photo}Facet wrappers. The mapping package already provides this machinery — point graph at it. A small local setFacet helper swallows parse errors and logs them, matching the original fail-soft behavior (bad metadata leaves the facet nil, it does not propagate). Adding a new facet (motionPhoto, ...) is now one line in the call site of cs3ResourceInfoToDriveItem. --- services/graph/pkg/service/v0/driveitems.go | 136 +++----------------- 1 file changed, 18 insertions(+), 118 deletions(-) diff --git a/services/graph/pkg/service/v0/driveitems.go b/services/graph/pkg/service/v0/driveitems.go index da89e25bad..ca18faf02d 100644 --- a/services/graph/pkg/service/v0/driveitems.go +++ b/services/graph/pkg/service/v0/driveitems.go @@ -9,7 +9,6 @@ import ( "net/http" "net/url" "path" - "reflect" "strconv" "strings" "time" @@ -30,6 +29,7 @@ import ( "github.com/opencloud-eu/opencloud/pkg/log" "github.com/opencloud-eu/opencloud/services/graph/pkg/errorcode" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" ) // CreateUploadSession create an upload session to allow your app to upload files up to the maximum file size. @@ -471,130 +471,30 @@ func cs3ResourceToDriveItem(logger *log.Logger, res *storageprovider.ResourceInf driveItem.Folder = &libregraph.Folder{} } - if res.GetArbitraryMetadata() != nil { - driveItem.Audio = cs3ResourceToDriveItemAudioFacet(logger, res) - driveItem.Image = cs3ResourceToDriveItemImageFacet(logger, res) - driveItem.Location = cs3ResourceToDriveItemLocationFacet(logger, res) - driveItem.Photo = cs3ResourceToDriveItemPhotoFacet(logger, res) + if metadata := res.GetArbitraryMetadata().GetMetadata(); metadata != nil { + if strings.HasPrefix(res.GetMimeType(), "audio/") { + setFacet(logger, &driveItem.Audio, metadata, "libre.graph.audio.") + } + setFacet(logger, &driveItem.Image, metadata, "libre.graph.image.") + setFacet(logger, &driveItem.Location, metadata, "libre.graph.location.") + setFacet(logger, &driveItem.Photo, metadata, "libre.graph.photo.") } return driveItem, nil } -func cs3ResourceToDriveItemAudioFacet(logger *log.Logger, res *storageprovider.ResourceInfo) *libregraph.Audio { - if !strings.HasPrefix(res.GetMimeType(), "audio/") { - return nil - } - - k := res.GetArbitraryMetadata().GetMetadata() - if k == nil { - return nil - } - - var audio = &libregraph.Audio{} - if ok := unmarshalStringMap(logger, audio, k, "libre.graph.audio."); ok { - return audio - } - - return nil -} - -func cs3ResourceToDriveItemImageFacet(logger *log.Logger, res *storageprovider.ResourceInfo) *libregraph.Image { - k := res.GetArbitraryMetadata().GetMetadata() - if k == nil { - return nil - } - - var image = &libregraph.Image{} - if ok := unmarshalStringMap(logger, image, k, "libre.graph.image."); ok { - return image - } - - return nil -} - -func cs3ResourceToDriveItemLocationFacet(logger *log.Logger, res *storageprovider.ResourceInfo) *libregraph.GeoCoordinates { - k := res.GetArbitraryMetadata().GetMetadata() - if k == nil { - return nil - } - - var location = &libregraph.GeoCoordinates{} - if ok := unmarshalStringMap(logger, location, k, "libre.graph.location."); ok { - return location - } - - return nil -} - -func cs3ResourceToDriveItemPhotoFacet(logger *log.Logger, res *storageprovider.ResourceInfo) *libregraph.Photo { - k := res.GetArbitraryMetadata().GetMetadata() - if k == nil { - return nil - } - - var photo = &libregraph.Photo{} - if ok := unmarshalStringMap(logger, photo, k, "libre.graph.photo."); ok { - return photo - } - - return nil -} - -func getFieldName(structField reflect.StructField) string { - tag := structField.Tag.Get("json") - if tag == "" { - return structField.Name +// setFacet decodes a libre.graph..* slice of CS3 ArbitraryMetadata +// into *dst. Parse errors are logged and the facet stays nil, matching the +// original fail-soft behavior of the per-field unmarshaller this replaced. +func setFacet[T any](logger *log.Logger, dst **T, metadata map[string]string, prefix string) { + v, err := mapping.DeserializeStringMap[T](metadata, prefix) + if err != nil { + logger.Error().Err(err).Str("prefix", prefix).Msg("graph: failed to parse facet metadata") + return } - - return strings.Split(tag, ",")[0] -} - -func unmarshalStringMap(logger *log.Logger, out any, flatMap map[string]string, prefix string) bool { - nonEmpty := false - obj := reflect.ValueOf(out).Elem() - timeKind := reflect.TypeOf(&time.Time{}).Elem().Kind() - for i := 0; i < obj.NumField(); i++ { - field := obj.Field(i) - structField := obj.Type().Field(i) - mapKey := prefix + getFieldName(structField) - - if value, ok := flatMap[mapKey]; ok { - if field.Kind() == reflect.Ptr { - newValue := reflect.New(field.Type().Elem()) - var tmp any - var err error - switch t := newValue.Type().Elem().Kind(); t { - case reflect.String: - tmp = value - case reflect.Int32: - tmp, err = strconv.ParseInt(value, 10, 32) - case reflect.Int64: - tmp, err = strconv.ParseInt(value, 10, 64) - case reflect.Float32: - tmp, err = strconv.ParseFloat(value, 32) - case reflect.Float64: - tmp, err = strconv.ParseFloat(value, 64) - case reflect.Bool: - tmp, err = strconv.ParseBool(value) - case timeKind: - tmp, err = time.Parse(time.RFC3339, value) - default: - err = errors.New("unsupported type") - logger.Error().Err(err).Str("type", t.String()).Str("mapKey", mapKey).Msg("target field type for value of mapKey is not supported") - } - if err != nil { - logger.Error().Err(err).Str("mapKey", mapKey).Msg("unmarshalling failed") - continue - } - newValue.Elem().Set(reflect.ValueOf(tmp).Convert(field.Type().Elem())) - field.Set(newValue) - nonEmpty = true - } - } + if v != nil { + *dst = v } - - return nonEmpty } func cs3ResourceToRemoteItem(res *storageprovider.ResourceInfo) (*libregraph.RemoteItem, error) { From 167b169f6bfeddfa7fb48b8e9fd57e2a19f3d631 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 23:07:46 +0200 Subject: [PATCH 14/26] refactor(search): collapse OpenSearch facet conversion via copyFacet helper OpenSearchHitToMatch had four near-identical inline closures for the Audio / Image / Location / Photo facets, each calling conversions.To from the libregraph indexed shape to the protobuf searchMessage shape. Replace them with a small local generic helper: func copyFacet[Dst, Src any](src *Src) *Dst The Audio MimeType guard stays visible at the call site instead of hiding inside the closure. Net -10 lines, and adding a new facet (motionPhoto, ...) to this conversion is now one line instead of five. --- .../opensearch/internal/convert/opensearch.go | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/services/search/pkg/opensearch/internal/convert/opensearch.go b/services/search/pkg/opensearch/internal/convert/opensearch.go index c4d8212dcd..d60cbe86bd 100644 --- a/services/search/pkg/opensearch/internal/convert/opensearch.go +++ b/services/search/pkg/opensearch/internal/convert/opensearch.go @@ -15,6 +15,17 @@ import ( "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) +// copyFacet converts a typed pointer from the indexed shape (libregraph) to +// the protobuf shape via conversions.To. Returns nil when src is nil so the +// enclosing Match.Entity field stays nil. +func copyFacet[Dst, Src any](src *Src) *Dst { + if src == nil { + return nil + } + dst, _ := conversions.To[*Dst](src) + return dst +} + func OpenSearchHitToMatch(hit opensearchgoAPI.SearchHit) (*searchMessage.Match, error) { resource, err := conversions.To[search.Resource](hit.Source) if err != nil { @@ -68,29 +79,16 @@ func OpenSearchHitToMatch(hit opensearchgoAPI.SearchHit) (*searchMessage.Match, return strings.Join(contentHighlights[:], "; ") }(), - Audio: func() *searchMessage.Audio { - if !strings.HasPrefix(resource.MimeType, "audio/") { - return nil - } - - audio, _ := conversions.To[*searchMessage.Audio](resource.Audio) - return audio - }(), - Image: func() *searchMessage.Image { - image, _ := conversions.To[*searchMessage.Image](resource.Image) - return image - }(), - Location: func() *searchMessage.GeoCoordinates { - geoCoordinates, _ := conversions.To[*searchMessage.GeoCoordinates](resource.Location) - return geoCoordinates - }(), - Photo: func() *searchMessage.Photo { - photo, _ := conversions.To[*searchMessage.Photo](resource.Photo) - return photo - }(), + Image: copyFacet[searchMessage.Image](resource.Image), + Location: copyFacet[searchMessage.GeoCoordinates](resource.Location), + Photo: copyFacet[searchMessage.Photo](resource.Photo), }, } + if strings.HasPrefix(resource.MimeType, "audio/") { + match.Entity.Audio = copyFacet[searchMessage.Audio](resource.Audio) + } + if mtime, err := time.Parse(time.RFC3339, resource.Mtime); err == nil { match.Entity.LastModifiedTime = ×tamppb.Timestamp{Seconds: mtime.Unix(), Nanos: int32(mtime.Nanosecond())} } From 0899f889715de8616786bcefdbeb076c0c824704 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 22 Apr 2026 21:03:49 +0200 Subject: [PATCH 15/26] feat(search): index Location as a geopoint on both backends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Showcase commit: after the mapping refactor, turning on geopoint indexing for the Location facet is essentially one line — SearchFieldOverrides now includes: "location": {Type: mapping.TypeGeopoint} Everything else falls out of the existing pipeline: - BleveBuildMapping emits a sub-document at "location" that carries both a geopoint field (for geo_distance queries) and explicit numeric sub-fields for longitude / latitude / altitude. The sub-fields make hit.Fields round-trip the full GeoCoordinates instead of leaving altitude at the mercy of Dynamic mapping; this preserves Move / Delete / Restore. - OpenSearchBuildMapping emits {"type": "geo_point"}; altitude stays available via the OpenSearch _source round-trip. - PrepareForIndex adapts the flat "location" map into a geoPoint struct bleve recognizes (Lat() / Lon() methods + exported longitude / latitude / altitude fields bleve indexes as sub-fields). Covered by unit tests in bleve/geo_verify_test.go: - location.longitude / latitude / altitude all land in hit.Fields - numeric range queries on each of the three sub-fields return the expected hit (and miss when the range excludes the indexed value) - bleve.GeoDistanceQuery matches when inside the radius and misses when the search point is outside --- services/search/pkg/bleve/geo_verify_test.go | 178 +++++++++++++++++++ services/search/pkg/mapping/bleve.go | 21 +++ services/search/pkg/mapping/bleve_test.go | 28 ++- services/search/pkg/mapping/geo.go | 55 ++++++ services/search/pkg/mapping/geo_test.go | 86 +++++++++ services/search/pkg/mapping/serialize.go | 17 +- services/search/pkg/search/search.go | 1 + 7 files changed, 372 insertions(+), 14 deletions(-) create mode 100644 services/search/pkg/bleve/geo_verify_test.go create mode 100644 services/search/pkg/mapping/geo.go create mode 100644 services/search/pkg/mapping/geo_test.go diff --git a/services/search/pkg/bleve/geo_verify_test.go b/services/search/pkg/bleve/geo_verify_test.go new file mode 100644 index 0000000000..f7ccd2e80c --- /dev/null +++ b/services/search/pkg/bleve/geo_verify_test.go @@ -0,0 +1,178 @@ +package bleve_test + +import ( + "sort" + "testing" + + bleveSearch "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/search/query" + libregraph "github.com/opencloud-eu/libre-graph-api-go" + + "github.com/opencloud-eu/opencloud/services/search/pkg/bleve" + "github.com/opencloud-eu/opencloud/services/search/pkg/content" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" + "github.com/opencloud-eu/opencloud/services/search/pkg/search" +) + +// geoFixture builds an in-memory bleve index with a single resource that +// carries the given lon/lat/alt. Used by the search tests below. +func geoFixture(t *testing.T, lon, lat, alt float64) bleveSearch.Index { + t.Helper() + idxMapping, err := bleve.NewMapping() + if err != nil { + t.Fatalf("NewMapping: %v", err) + } + idx, err := bleveSearch.NewMemOnly(idxMapping) + if err != nil { + t.Fatalf("NewMemOnly: %v", err) + } + r := search.Resource{ + ID: "x", + Document: content.Document{ + Name: "team.jpg", + Location: &libregraph.GeoCoordinates{ + Longitude: &lon, + Latitude: &lat, + Altitude: &alt, + }, + }, + } + doc, err := mapping.PrepareForIndex(r, r.SearchFieldOverrides()) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + if err := idx.Index(r.ID, doc); err != nil { + t.Fatalf("Index: %v", err) + } + return idx +} + +// TestLocationAltitudeRoundTrip proves that every subfield of Location +// (including altitude) ends up in hit.Fields when a Resource is indexed +// through the full bleve pipeline. This is the invariant the Move / +// Delete / Restore round-trip depends on. +func TestLocationAltitudeRoundTrip(t *testing.T) { + idx := geoFixture(t, 11.103870357204285, 49.48675890884328, 1047.7) + + req := bleveSearch.NewSearchRequest(bleveSearch.NewMatchAllQuery()) + req.Fields = []string{"*"} + res, err := idx.Search(req) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) == 0 { + t.Fatal("no hits") + } + keys := make([]string, 0, len(res.Hits[0].Fields)) + for k := range res.Hits[0].Fields { + keys = append(keys, k) + } + sort.Strings(keys) + t.Logf("hit.Fields keys: %v", keys) + + for _, k := range []string{"location.longitude", "location.latitude", "location.altitude"} { + if _, ok := res.Hits[0].Fields[k]; !ok { + t.Errorf("missing %q in hit.Fields (got %v)", k, keys) + } + } +} + +func TestLocationLatitudeRangeQueryMatches(t *testing.T) { + idx := geoFixture(t, 11.1, 49.48, 1000) + + // numeric range on the sub-field + min, max := 49.0, 50.0 + incl := true + q := query.NewNumericRangeInclusiveQuery(&min, &max, &incl, &incl) + q.SetField("location.latitude") + res, err := idx.Search(bleveSearch.NewSearchRequest(q)) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) != 1 { + t.Fatalf("latitude range: got %d hits, want 1", len(res.Hits)) + } + + // same range excluding the indexed latitude => no match + lowMin, lowMax := 0.0, 10.0 + q2 := query.NewNumericRangeInclusiveQuery(&lowMin, &lowMax, &incl, &incl) + q2.SetField("location.latitude") + res, err = idx.Search(bleveSearch.NewSearchRequest(q2)) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) != 0 { + t.Errorf("latitude range outside value: got %d hits, want 0", len(res.Hits)) + } +} + +func TestLocationLongitudeRangeQueryMatches(t *testing.T) { + idx := geoFixture(t, 11.1, 49.48, 1000) + + min, max := 11.0, 12.0 + incl := true + q := query.NewNumericRangeInclusiveQuery(&min, &max, &incl, &incl) + q.SetField("location.longitude") + res, err := idx.Search(bleveSearch.NewSearchRequest(q)) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) != 1 { + t.Fatalf("longitude range: got %d hits, want 1", len(res.Hits)) + } +} + +func TestLocationAltitudeRangeQueryMatches(t *testing.T) { + idx := geoFixture(t, 11.1, 49.48, 1047.7) + + min := 1000.0 + incl := true + q := query.NewNumericRangeInclusiveQuery(&min, nil, &incl, nil) + q.SetField("location.altitude") + res, err := idx.Search(bleveSearch.NewSearchRequest(q)) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) != 1 { + t.Fatalf("altitude >= 1000: got %d hits, want 1", len(res.Hits)) + } + + // altitude floor above the indexed value => no hits + highMin := 2000.0 + q2 := query.NewNumericRangeInclusiveQuery(&highMin, nil, &incl, nil) + q2.SetField("location.altitude") + res, err = idx.Search(bleveSearch.NewSearchRequest(q2)) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) != 0 { + t.Errorf("altitude >= 2000 against 1047.7: got %d hits, want 0", len(res.Hits)) + } +} + +func TestLocationGeoDistanceQueryMatches(t *testing.T) { + // Nuremberg-ish coordinates. + idx := geoFixture(t, 11.103870357204285, 49.48675890884328, 1047.7) + + // 10 km radius around the indexed point should match. + near := query.NewGeoDistanceQuery(11.103870357204285, 49.48675890884328, "10km") + near.SetField("location") + res, err := idx.Search(bleveSearch.NewSearchRequest(near)) + if err != nil { + t.Fatalf("Search (near): %v", err) + } + if len(res.Hits) != 1 { + t.Fatalf("geo distance near: got %d hits, want 1", len(res.Hits)) + } + + // Far away (Berlin, ~400 km) with a 10 km radius should miss. + far := query.NewGeoDistanceQuery(13.404954, 52.520008, "10km") + far.SetField("location") + res, err = idx.Search(bleveSearch.NewSearchRequest(far)) + if err != nil { + t.Fatalf("Search (far): %v", err) + } + if len(res.Hits) != 0 { + t.Errorf("geo distance far: got %d hits, want 0", len(res.Hits)) + } +} diff --git a/services/search/pkg/mapping/bleve.go b/services/search/pkg/mapping/bleve.go index 7d5027dcaf..fef66050dd 100644 --- a/services/search/pkg/mapping/bleve.go +++ b/services/search/pkg/mapping/bleve.go @@ -46,6 +46,27 @@ func buildBleveDocMapping(t reflect.Type, overrides map[string]FieldOpts, prefix return nil } + if fieldType == TypeGeopoint { + // Build a sub-document that carries both the geopoint field + // (for geo queries) and explicit numeric sub-fields for the + // Go struct's members (longitude, latitude, altitude) so + // hit.Fields round-trips the full libregraph.GeoCoordinates, + // not just what bleve's geopoint storage retains. + var subDoc *bleveMapping.DocumentMapping + if sub := structType(fi.GoField.Type); sub != nil { + var err error + subDoc, err = buildBleveDocMapping(sub, overrides, key) + if err != nil { + return err + } + } else { + subDoc = bleve.NewDocumentMapping() + } + subDoc.AddFieldMapping(bleve.NewGeoPointFieldMapping()) + doc.AddSubDocumentMapping(fi.Name, subDoc) + return nil + } + fm, err := bleveFieldMapping(fieldType, opts) if err != nil { return fmt.Errorf("mapping: field %q: %w", key, err) diff --git a/services/search/pkg/mapping/bleve_test.go b/services/search/pkg/mapping/bleve_test.go index cd140eb796..095a6cc5b0 100644 --- a/services/search/pkg/mapping/bleve_test.go +++ b/services/search/pkg/mapping/bleve_test.go @@ -103,8 +103,9 @@ func TestBleveBuildMappingOverrides(t *testing.T) { func TestBleveBuildMappingGeopoint(t *testing.T) { type geoDoc struct { Location *struct { - Lon float64 `json:"longitude"` - Lat float64 `json:"latitude"` + Lon *float64 `json:"longitude,omitempty"` + Lat *float64 `json:"latitude,omitempty"` + Alt *float64 `json:"altitude,omitempty"` } `json:"location,omitempty"` } dm, err := BleveBuildMapping(reflect.TypeFor[geoDoc](), map[string]FieldOpts{ @@ -114,10 +115,23 @@ func TestBleveBuildMappingGeopoint(t *testing.T) { t.Fatalf("BleveBuildMapping: %v", err) } loc := dm.Properties["location"] - if loc == nil || len(loc.Fields) == 0 { - t.Fatalf("location field missing: %#v", dm.Properties) - } - if got := loc.Fields[0].Type; got != "geopoint" { - t.Errorf("location type: %q, want geopoint", got) + if loc == nil { + t.Fatalf("location sub-document missing: %#v", dm.Properties) + } + if len(loc.Fields) == 0 || loc.Fields[0].Type != "geopoint" { + t.Errorf("location Fields: %#v, want [geopoint]", loc.Fields) + } + // Sub-fields must be explicit so hit.Fields carries longitude/ + // latitude/altitude back to the caller without relying on + // Dynamic=true. + for _, sub := range []string{"longitude", "latitude", "altitude"} { + prop, ok := loc.Properties[sub] + if !ok { + t.Errorf("missing sub-field %q under location (properties: %v)", sub, loc.Properties) + continue + } + if len(prop.Fields) == 0 || prop.Fields[0].Type != "number" { + t.Errorf("location.%s Fields: %#v, want [number]", sub, prop.Fields) + } } } diff --git a/services/search/pkg/mapping/geo.go b/services/search/pkg/mapping/geo.go new file mode 100644 index 0000000000..15608cb9ef --- /dev/null +++ b/services/search/pkg/mapping/geo.go @@ -0,0 +1,55 @@ +package mapping + +// geoPoint is the shape bleve's geo.ExtractGeoPoint recognizes: a struct +// with a Lon() and Lat() method. Exported fields with json tags are also +// walked by bleve, so indexing a geoPoint produces: +// +// - the geopoint value at the parent field (for geo queries), and +// - sibling numeric sub-fields (parent.longitude, parent.latitude, +// parent.altitude) suitable for retrieval via hit.Fields. +type geoPoint struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + Altitude *float64 `json:"altitude,omitempty"` +} + +func (g geoPoint) Lon() float64 { + if g.Longitude == nil { + return 0 + } + return *g.Longitude +} + +func (g geoPoint) Lat() float64 { + if g.Latitude == nil { + return 0 + } + return *g.Latitude +} + +// adaptGeoPoint looks at m[key], expects a nested map with "longitude" and +// "latitude" (the json layout of libregraph.GeoCoordinates), and replaces +// it with a geoPoint the bleve indexer recognizes. Returns true if the +// adaptation was applied. +func adaptGeoPoint(m map[string]any, key string) bool { + raw, ok := m[key].(map[string]any) + if !ok { + return false + } + gp := geoPoint{} + if v, ok := raw["longitude"].(float64); ok { + gp.Longitude = &v + } + if v, ok := raw["latitude"].(float64); ok { + gp.Latitude = &v + } + if v, ok := raw["altitude"].(float64); ok { + gp.Altitude = &v + } + if gp.Longitude == nil || gp.Latitude == nil { + delete(m, key) + return false + } + m[key] = gp + return true +} diff --git a/services/search/pkg/mapping/geo_test.go b/services/search/pkg/mapping/geo_test.go new file mode 100644 index 0000000000..b933a323fd --- /dev/null +++ b/services/search/pkg/mapping/geo_test.go @@ -0,0 +1,86 @@ +package mapping + +import ( + "testing" +) + +func TestPrepareForIndexAdaptsGeopoint(t *testing.T) { + type geoDoc struct { + Location *struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + Altitude *float64 `json:"altitude,omitempty"` + } `json:"location,omitempty"` + } + lon, lat, alt := 11.1, 49.4, 1047.7 + doc := geoDoc{Location: &struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + Altitude *float64 `json:"altitude,omitempty"` + }{Longitude: &lon, Latitude: &lat, Altitude: &alt}} + + m, err := PrepareForIndex(doc, map[string]FieldOpts{ + "location": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + gp, ok := m["location"].(geoPoint) + if !ok { + t.Fatalf("expected geoPoint, got %T: %#v", m["location"], m["location"]) + } + if gp.Lon() != lon || gp.Lat() != lat { + t.Errorf("geoPoint: Lon=%v, Lat=%v, want %v/%v", gp.Lon(), gp.Lat(), lon, lat) + } + if gp.Altitude == nil || *gp.Altitude != alt { + t.Errorf("Altitude preserved: %#v, want %v", gp.Altitude, alt) + } +} + +func TestPrepareForIndexDropsIncompleteGeopoint(t *testing.T) { + type geoDoc struct { + Location *struct { + Altitude *float64 `json:"altitude,omitempty"` + } `json:"location,omitempty"` + } + alt := 100.0 + doc := geoDoc{Location: &struct { + Altitude *float64 `json:"altitude,omitempty"` + }{Altitude: &alt}} + + m, err := PrepareForIndex(doc, map[string]FieldOpts{ + "location": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + if _, ok := m["location"]; ok { + t.Errorf("location should be dropped when lon/lat missing, got %#v", m["location"]) + } +} + +func TestPrepareForIndexWithoutGeopointOverrideIsUnchanged(t *testing.T) { + type geoDoc struct { + Location *struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + } `json:"location,omitempty"` + } + lon, lat := 11.1, 49.4 + doc := geoDoc{Location: &struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + }{Longitude: &lon, Latitude: &lat}} + + m, err := PrepareForIndex(doc, nil) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + nested, ok := m["location"].(map[string]any) + if !ok { + t.Fatalf("expected nested map without geopoint override, got %T", m["location"]) + } + if nested["longitude"] != lon || nested["latitude"] != lat { + t.Errorf("nested map: %#v", nested) + } +} diff --git a/services/search/pkg/mapping/serialize.go b/services/search/pkg/mapping/serialize.go index b145ef26d8..809bd30267 100644 --- a/services/search/pkg/mapping/serialize.go +++ b/services/search/pkg/mapping/serialize.go @@ -9,14 +9,12 @@ import ( // to bleve.Batch.Index. It honors json tags, omitempty semantics, and // embedded-struct flattening via a json marshal/unmarshal round-trip — the // same field resolution bleve would do internally when given the struct -// directly, but in a form that lets the mapping package splice in extra -// representations (e.g. geopoint adapter) without mutating the caller's -// value. +// directly — and then splices in type-specific adaptations (currently: +// geopoint) driven by overrides. // -// The overrides parameter is reserved for type-specific adaptations -// (geopoint is the first user); callers should pass the same map used for -// BleveBuildMapping so the two stay in sync. -func PrepareForIndex(v any, _ map[string]FieldOpts) (map[string]any, error) { +// Callers should pass the same overrides map used for BleveBuildMapping so +// the document shape and the mapping stay in sync. +func PrepareForIndex(v any, overrides map[string]FieldOpts) (map[string]any, error) { b, err := json.Marshal(v) if err != nil { return nil, fmt.Errorf("mapping: marshal %T: %w", v, err) @@ -25,5 +23,10 @@ func PrepareForIndex(v any, _ map[string]FieldOpts) (map[string]any, error) { if err := json.Unmarshal(b, &out); err != nil { return nil, fmt.Errorf("mapping: unmarshal %T: %w", v, err) } + for key, opts := range overrides { + if opts.Type == TypeGeopoint { + adaptGeoPoint(out, key) + } + } return out, nil } diff --git a/services/search/pkg/search/search.go b/services/search/pkg/search/search.go index 4a1d652d60..f5bdb69c40 100644 --- a/services/search/pkg/search/search.go +++ b/services/search/pkg/search/search.go @@ -71,6 +71,7 @@ func (Resource) SearchFieldOverrides() map[string]mapping.FieldOpts { "Content": {Type: mapping.TypeFulltext}, "Tags": {Analyzer: "lowercaseKeyword", IncludeInAll: &excludeFromAll}, "Favorites": {Analyzer: "lowercaseKeyword", IncludeInAll: &excludeFromAll}, + "location": {Type: mapping.TypeGeopoint}, } } From 7463359fa563579526737e35a238c1f7a9626024 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Thu, 23 Apr 2026 01:14:50 +0200 Subject: [PATCH 16/26] refactor(search): geopoint as sibling field on both backends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reshape TypeGeopoint so the libregraph facet (longitude / latitude / altitude) stays as a plain object sub-document — queryable the usual way (e.g. location.latitude:>49) and fully round-trippable via hit.Fields / _source — and a sibling "_geopoint" field carries the {lat, lon} form the spatial indices understand. Same shape on both backends: bleve: sub-document with numeric sub-properties + sibling "_geopoint" with NewGeoPointFieldMapping OpenSearch: { "properties": { longitude, latitude, altitude } } + sibling "_geopoint" with {"type": "geo_point"} PrepareForIndex now walks overrides and, for each TypeGeopoint entry at any dotted path (e.g. "location" or "journey.start"), inserts the {lat, lon} sibling at the same parent level. The original facet map stays untouched — no struct wrapper, no value replacement — so multiple geopoints per facet work automatically (journey.start and journey.end both yield siblings journey.start_geopoint and journey.end_geopoint). OpenSearch's batch Upsert now routes through mapping.PrepareForIndex instead of the bare conversions.To[map[string]any], so a single write path handles the sibling injection for both backends. Drive-by: emit "doc_values": false on TypeWildcard fields to match how OpenSearch normalizes wildcard mappings on read; keeps the Apply up-to-date check stable. Verified with an OpenSearch 2 container (full services/search/pkg/ opensearch test suite green). --- services/search/pkg/bleve/geo_verify_test.go | 4 +- services/search/pkg/mapping/bleve.go | 30 +++---- services/search/pkg/mapping/bleve_test.go | 18 ++-- services/search/pkg/mapping/geo.go | 84 +++++++++---------- services/search/pkg/mapping/geo_test.go | 81 ++++++++++++++---- services/search/pkg/mapping/opensearch.go | 23 ++++- .../search/pkg/mapping/opensearch_test.go | 21 ++++- services/search/pkg/mapping/serialize.go | 6 +- services/search/pkg/opensearch/batch.go | 3 +- 9 files changed, 180 insertions(+), 90 deletions(-) diff --git a/services/search/pkg/bleve/geo_verify_test.go b/services/search/pkg/bleve/geo_verify_test.go index f7ccd2e80c..27cf0e7a01 100644 --- a/services/search/pkg/bleve/geo_verify_test.go +++ b/services/search/pkg/bleve/geo_verify_test.go @@ -156,7 +156,7 @@ func TestLocationGeoDistanceQueryMatches(t *testing.T) { // 10 km radius around the indexed point should match. near := query.NewGeoDistanceQuery(11.103870357204285, 49.48675890884328, "10km") - near.SetField("location") + near.SetField("location" + mapping.GeopointSuffix) res, err := idx.Search(bleveSearch.NewSearchRequest(near)) if err != nil { t.Fatalf("Search (near): %v", err) @@ -167,7 +167,7 @@ func TestLocationGeoDistanceQueryMatches(t *testing.T) { // Far away (Berlin, ~400 km) with a 10 km radius should miss. far := query.NewGeoDistanceQuery(13.404954, 52.520008, "10km") - far.SetField("location") + far.SetField("location" + mapping.GeopointSuffix) res, err = idx.Search(bleveSearch.NewSearchRequest(far)) if err != nil { t.Fatalf("Search (far): %v", err) diff --git a/services/search/pkg/mapping/bleve.go b/services/search/pkg/mapping/bleve.go index fef66050dd..e2ca4755d2 100644 --- a/services/search/pkg/mapping/bleve.go +++ b/services/search/pkg/mapping/bleve.go @@ -47,23 +47,23 @@ func buildBleveDocMapping(t reflect.Type, overrides map[string]FieldOpts, prefix } if fieldType == TypeGeopoint { - // Build a sub-document that carries both the geopoint field - // (for geo queries) and explicit numeric sub-fields for the - // Go struct's members (longitude, latitude, altitude) so - // hit.Fields round-trips the full libregraph.GeoCoordinates, - // not just what bleve's geopoint storage retains. - var subDoc *bleveMapping.DocumentMapping - if sub := structType(fi.GoField.Type); sub != nil { - var err error - subDoc, err = buildBleveDocMapping(sub, overrides, key) - if err != nil { - return err - } - } else { - subDoc = bleve.NewDocumentMapping() + // The original libregraph facet (longitude / latitude / + // altitude) is preserved as an ordinary sub-document for + // data retrieval and numeric queries. A sibling field at + // "_geopoint" at the same parent level carries the + // {lat, lon} geopoint representation for geo-distance / + // bounding-box / polygon queries; the adapter in + // PrepareForIndex populates it at write time. + sub := structType(fi.GoField.Type) + if sub == nil { + return fmt.Errorf("mapping: geopoint type on non-struct field %q", key) + } + subDoc, err := buildBleveDocMapping(sub, overrides, key) + if err != nil { + return err } - subDoc.AddFieldMapping(bleve.NewGeoPointFieldMapping()) doc.AddSubDocumentMapping(fi.Name, subDoc) + doc.AddFieldMappingsAt(fi.Name+GeopointSuffix, bleve.NewGeoPointFieldMapping()) return nil } diff --git a/services/search/pkg/mapping/bleve_test.go b/services/search/pkg/mapping/bleve_test.go index 095a6cc5b0..5dbe114edb 100644 --- a/services/search/pkg/mapping/bleve_test.go +++ b/services/search/pkg/mapping/bleve_test.go @@ -114,16 +114,16 @@ func TestBleveBuildMappingGeopoint(t *testing.T) { if err != nil { t.Fatalf("BleveBuildMapping: %v", err) } + // Original facet stays as an object sub-document with numeric + // sub-properties — for data retrieval via hit.Fields and ordinary + // numeric queries. loc := dm.Properties["location"] if loc == nil { t.Fatalf("location sub-document missing: %#v", dm.Properties) } - if len(loc.Fields) == 0 || loc.Fields[0].Type != "geopoint" { - t.Errorf("location Fields: %#v, want [geopoint]", loc.Fields) + if len(loc.Fields) != 0 { + t.Errorf("location should not carry field mappings directly, got %#v", loc.Fields) } - // Sub-fields must be explicit so hit.Fields carries longitude/ - // latitude/altitude back to the caller without relying on - // Dynamic=true. for _, sub := range []string{"longitude", "latitude", "altitude"} { prop, ok := loc.Properties[sub] if !ok { @@ -134,4 +134,12 @@ func TestBleveBuildMappingGeopoint(t *testing.T) { t.Errorf("location.%s Fields: %#v, want [number]", sub, prop.Fields) } } + // Sibling geopoint at "_geopoint" for geo-distance queries. + sibling := dm.Properties["location"+GeopointSuffix] + if sibling == nil { + t.Fatalf("location%s missing: %#v", GeopointSuffix, dm.Properties) + } + if len(sibling.Fields) == 0 || sibling.Fields[0].Type != "geopoint" { + t.Errorf("location%s Fields: %#v, want [geopoint]", GeopointSuffix, sibling.Fields) + } } diff --git a/services/search/pkg/mapping/geo.go b/services/search/pkg/mapping/geo.go index 15608cb9ef..3d0adf61a7 100644 --- a/services/search/pkg/mapping/geo.go +++ b/services/search/pkg/mapping/geo.go @@ -1,55 +1,51 @@ package mapping -// geoPoint is the shape bleve's geo.ExtractGeoPoint recognizes: a struct -// with a Lon() and Lat() method. Exported fields with json tags are also -// walked by bleve, so indexing a geoPoint produces: -// -// - the geopoint value at the parent field (for geo queries), and -// - sibling numeric sub-fields (parent.longitude, parent.latitude, -// parent.altitude) suitable for retrieval via hit.Fields. -type geoPoint struct { - Longitude *float64 `json:"longitude,omitempty"` - Latitude *float64 `json:"latitude,omitempty"` - Altitude *float64 `json:"altitude,omitempty"` -} +import "strings" -func (g geoPoint) Lon() float64 { - if g.Longitude == nil { - return 0 - } - return *g.Longitude -} +// GeopointSuffix is appended to a field's name to produce the sibling key +// that carries the geo_point / bleve-geopoint representation of the +// original facet. For example, a libregraph "location" object with +// longitude / latitude / altitude is preserved as-is under "location" (for +// data retrieval and numeric queries) while "location_geopoint" carries +// the {lat, lon} form the geo indices understand. +const GeopointSuffix = "_geopoint" -func (g geoPoint) Lat() float64 { - if g.Latitude == nil { - return 0 +// addGeopointSiblings walks the overrides; for each TypeGeopoint entry at +// a dotted path (e.g. "location" or "journey.start") it writes a sibling +// under the suffixed key with the {lat, lon} form both bleve's +// ExtractGeoPoint and OpenSearch's geo_point parser accept. The original +// facet object stays untouched so downstream code still sees the full +// libregraph shape (including altitude). +func addGeopointSiblings(m map[string]any, overrides map[string]FieldOpts) { + for key, opts := range overrides { + if opts.Type == TypeGeopoint { + addGeopointSibling(m, key) + } } - return *g.Latitude } -// adaptGeoPoint looks at m[key], expects a nested map with "longitude" and -// "latitude" (the json layout of libregraph.GeoCoordinates), and replaces -// it with a geoPoint the bleve indexer recognizes. Returns true if the -// adaptation was applied. -func adaptGeoPoint(m map[string]any, key string) bool { - raw, ok := m[key].(map[string]any) - if !ok { - return false +// addGeopointSibling resolves dottedPath within m and, if the target is a +// libregraph-shaped geo object (with numeric "longitude" and "latitude"), +// writes the `{lat, lon}` sibling at the same level under the suffixed key. +func addGeopointSibling(m map[string]any, dottedPath string) { + parts := strings.Split(dottedPath, ".") + parent := m + for _, p := range parts[:len(parts)-1] { + next, ok := parent[p].(map[string]any) + if !ok { + return + } + parent = next } - gp := geoPoint{} - if v, ok := raw["longitude"].(float64); ok { - gp.Longitude = &v - } - if v, ok := raw["latitude"].(float64); ok { - gp.Latitude = &v - } - if v, ok := raw["altitude"].(float64); ok { - gp.Altitude = &v + leaf := parts[len(parts)-1] + obj, ok := parent[leaf].(map[string]any) + if !ok { + return } - if gp.Longitude == nil || gp.Latitude == nil { - delete(m, key) - return false + lon, hasLon := obj["longitude"].(float64) + lat, hasLat := obj["latitude"].(float64) + if !hasLon || !hasLat { + return } - m[key] = gp - return true + parent[leaf+GeopointSuffix] = map[string]any{"lat": lat, "lon": lon} } diff --git a/services/search/pkg/mapping/geo_test.go b/services/search/pkg/mapping/geo_test.go index b933a323fd..5126355382 100644 --- a/services/search/pkg/mapping/geo_test.go +++ b/services/search/pkg/mapping/geo_test.go @@ -4,7 +4,7 @@ import ( "testing" ) -func TestPrepareForIndexAdaptsGeopoint(t *testing.T) { +func TestPrepareForIndexAddsGeopointSibling(t *testing.T) { type geoDoc struct { Location *struct { Longitude *float64 `json:"longitude,omitempty"` @@ -25,19 +25,27 @@ func TestPrepareForIndexAdaptsGeopoint(t *testing.T) { if err != nil { t.Fatalf("PrepareForIndex: %v", err) } - gp, ok := m["location"].(geoPoint) + + // Original location object stays untouched (full libregraph shape). + orig, ok := m["location"].(map[string]any) if !ok { - t.Fatalf("expected geoPoint, got %T: %#v", m["location"], m["location"]) + t.Fatalf("expected location object preserved, got %T", m["location"]) } - if gp.Lon() != lon || gp.Lat() != lat { - t.Errorf("geoPoint: Lon=%v, Lat=%v, want %v/%v", gp.Lon(), gp.Lat(), lon, lat) + if orig["longitude"] != lon || orig["latitude"] != lat || orig["altitude"] != alt { + t.Errorf("location object: %#v", orig) } - if gp.Altitude == nil || *gp.Altitude != alt { - t.Errorf("Altitude preserved: %#v, want %v", gp.Altitude, alt) + + // Sibling location_geopoint has {lat, lon} for the geo indices. + gp, ok := m["location"+GeopointSuffix].(map[string]any) + if !ok { + t.Fatalf("expected location_geopoint sibling, got %T", m["location"+GeopointSuffix]) + } + if gp["lat"] != lat || gp["lon"] != lon { + t.Errorf("sibling: %#v", gp) } } -func TestPrepareForIndexDropsIncompleteGeopoint(t *testing.T) { +func TestPrepareForIndexSkipsIncompleteGeopoint(t *testing.T) { type geoDoc struct { Location *struct { Altitude *float64 `json:"altitude,omitempty"` @@ -54,12 +62,17 @@ func TestPrepareForIndexDropsIncompleteGeopoint(t *testing.T) { if err != nil { t.Fatalf("PrepareForIndex: %v", err) } - if _, ok := m["location"]; ok { - t.Errorf("location should be dropped when lon/lat missing, got %#v", m["location"]) + // Original stays (altitude alone is still useful metadata). + if _, ok := m["location"]; !ok { + t.Error("location should still be present when only altitude is set") + } + // No sibling without both lon and lat. + if _, ok := m["location"+GeopointSuffix]; ok { + t.Errorf("no sibling expected, got %#v", m["location"+GeopointSuffix]) } } -func TestPrepareForIndexWithoutGeopointOverrideIsUnchanged(t *testing.T) { +func TestPrepareForIndexWithoutOverrideNoSibling(t *testing.T) { type geoDoc struct { Location *struct { Longitude *float64 `json:"longitude,omitempty"` @@ -76,11 +89,49 @@ func TestPrepareForIndexWithoutGeopointOverrideIsUnchanged(t *testing.T) { if err != nil { t.Fatalf("PrepareForIndex: %v", err) } - nested, ok := m["location"].(map[string]any) + if _, ok := m["location"+GeopointSuffix]; ok { + t.Errorf("no sibling expected without override, got %#v", m["location"+GeopointSuffix]) + } +} + +func TestPrepareForIndexHandlesNestedGeopoint(t *testing.T) { + // journey.start and journey.end — two geopoints in the same facet, + // demonstrating the dotted-path walker. + type geo struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + } + type journey struct { + Start *geo `json:"start,omitempty"` + End *geo `json:"end,omitempty"` + } + type doc struct { + Journey *journey `json:"journey,omitempty"` + } + slon, slat := 11.0, 49.0 + elon, elat := 13.4, 52.5 + d := doc{Journey: &journey{ + Start: &geo{Longitude: &slon, Latitude: &slat}, + End: &geo{Longitude: &elon, Latitude: &elat}, + }} + + m, err := PrepareForIndex(d, map[string]FieldOpts{ + "journey.start": {Type: TypeGeopoint}, + "journey.end": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + j, ok := m["journey"].(map[string]any) if !ok { - t.Fatalf("expected nested map without geopoint override, got %T", m["location"]) + t.Fatalf("journey not an object: %T", m["journey"]) + } + startGp, ok := j["start"+GeopointSuffix].(map[string]any) + if !ok || startGp["lat"] != slat || startGp["lon"] != slon { + t.Errorf("journey.start sibling: %#v", j["start"+GeopointSuffix]) } - if nested["longitude"] != lon || nested["latitude"] != lat { - t.Errorf("nested map: %#v", nested) + endGp, ok := j["end"+GeopointSuffix].(map[string]any) + if !ok || endGp["lat"] != elat || endGp["lon"] != elon { + t.Errorf("journey.end sibling: %#v", j["end"+GeopointSuffix]) } } diff --git a/services/search/pkg/mapping/opensearch.go b/services/search/pkg/mapping/opensearch.go index 1ef05617d7..d677aaa189 100644 --- a/services/search/pkg/mapping/opensearch.go +++ b/services/search/pkg/mapping/opensearch.go @@ -41,6 +41,24 @@ func buildOpenSearchProperties(t reflect.Type, overrides map[string]FieldOpts, p return nil } + if fieldType == TypeGeopoint { + // Mirror the bleve layout: keep the libregraph facet as an + // object for data retrieval / numeric queries, and add a + // sibling "_geopoint" geo_point field at the same + // level for geo-distance / bbox / polygon queries. + sub := structType(fi.GoField.Type) + if sub == nil { + return fmt.Errorf("mapping: geopoint type on non-struct field %q", key) + } + subProps, err := buildOpenSearchProperties(sub, overrides, key) + if err != nil { + return err + } + props[fi.Name] = map[string]any{"properties": subProps} + props[fi.Name+GeopointSuffix] = map[string]any{"type": "geo_point"} + return nil + } + fm, err := openSearchFieldMapping(fieldType, opts, fi.GoField.Type) if err != nil { return fmt.Errorf("mapping: field %q: %w", key, err) @@ -78,7 +96,10 @@ func openSearchFieldMapping(fieldType string, opts FieldOpts, goType reflect.Typ } return m, nil case TypeWildcard: - return map[string]any{"type": "wildcard"}, nil + // OpenSearch stores wildcard fields with doc_values=false by + // default, so emit it explicitly to keep local and remote + // mappings in sync for the Apply comparison. + return map[string]any{"type": "wildcard", "doc_values": false}, nil case TypeNumeric: return map[string]any{"type": openSearchNumericType(goType)}, nil case TypeBool: diff --git a/services/search/pkg/mapping/opensearch_test.go b/services/search/pkg/mapping/opensearch_test.go index 7a34c9e483..bf100d4f7e 100644 --- a/services/search/pkg/mapping/opensearch_test.go +++ b/services/search/pkg/mapping/opensearch_test.go @@ -113,6 +113,7 @@ func TestOpenSearchBuildMappingGeopoint(t *testing.T) { Location *struct { Lon float64 `json:"longitude"` Lat float64 `json:"latitude"` + Alt float64 `json:"altitude"` } `json:"location,omitempty"` } props, err := OpenSearchBuildMapping(reflect.TypeFor[doc](), map[string]FieldOpts{ @@ -121,11 +122,27 @@ func TestOpenSearchBuildMappingGeopoint(t *testing.T) { if err != nil { t.Fatalf("OpenSearchBuildMapping: %v", err) } + // Object for libregraph-shape data retrieval. loc, ok := props["location"].(map[string]any) if !ok { t.Fatalf("location: %#v", props["location"]) } - if loc["type"] != "geo_point" { - t.Errorf("location.type: %v", loc["type"]) + sub, ok := loc["properties"].(map[string]any) + if !ok { + t.Fatalf("location should have numeric sub-properties, got %#v", loc) + } + for _, k := range []string{"longitude", "latitude", "altitude"} { + prop, ok := sub[k].(map[string]any) + if !ok || prop["type"] != "double" { + t.Errorf("location.%s: %#v", k, sub[k]) + } + } + // Sibling geo_point for spatial queries. + gp, ok := props["location"+GeopointSuffix].(map[string]any) + if !ok { + t.Fatalf("location%s: %#v", GeopointSuffix, props["location"+GeopointSuffix]) + } + if gp["type"] != "geo_point" { + t.Errorf("location%s.type: %v", GeopointSuffix, gp["type"]) } } diff --git a/services/search/pkg/mapping/serialize.go b/services/search/pkg/mapping/serialize.go index 809bd30267..4c450aa6d1 100644 --- a/services/search/pkg/mapping/serialize.go +++ b/services/search/pkg/mapping/serialize.go @@ -23,10 +23,6 @@ func PrepareForIndex(v any, overrides map[string]FieldOpts) (map[string]any, err if err := json.Unmarshal(b, &out); err != nil { return nil, fmt.Errorf("mapping: unmarshal %T: %w", v, err) } - for key, opts := range overrides { - if opts.Type == TypeGeopoint { - adaptGeoPoint(out, key) - } - } + addGeopointSiblings(out, overrides) return out, nil } diff --git a/services/search/pkg/opensearch/batch.go b/services/search/pkg/opensearch/batch.go index 6297b40156..5018441b1a 100644 --- a/services/search/pkg/opensearch/batch.go +++ b/services/search/pkg/opensearch/batch.go @@ -14,6 +14,7 @@ import ( "github.com/opencloud-eu/opencloud/pkg/conversions" "github.com/opencloud-eu/opencloud/pkg/log" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/osu" "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) @@ -43,7 +44,7 @@ func NewBatch(client *opensearchgoAPI.Client, index string, size int) (*Batch, e func (b *Batch) Upsert(id string, r search.Resource) error { return b.withSizeLimit(func() error { - body, err := conversions.To[map[string]any](r) + body, err := mapping.PrepareForIndex(r, r.SearchFieldOverrides()) if err != nil { return fmt.Errorf("failed to marshal resource: %w", err) } From 0c37aa27ca4f1af0f3177f49f734918d24e97a6f Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Thu, 23 Apr 2026 01:28:04 +0200 Subject: [PATCH 17/26] refactor(search): PrepareForIndex via conversions.To MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct → map[string]any step of PrepareForIndex did its own json.Marshal + json.Unmarshal, which is exactly what pkg/conversions provides. Route through conversions.To[map[string]any] and keep the geopoint sibling adapter as the only interesting thing PrepareForIndex actually contributes on top of the generic converter. No behavior change; the generic tests and the live OpenSearch suite still pass. --- services/search/pkg/mapping/serialize.go | 27 ++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/services/search/pkg/mapping/serialize.go b/services/search/pkg/mapping/serialize.go index 4c450aa6d1..9deebfd67d 100644 --- a/services/search/pkg/mapping/serialize.go +++ b/services/search/pkg/mapping/serialize.go @@ -1,27 +1,28 @@ package mapping import ( - "encoding/json" "fmt" + + "github.com/opencloud-eu/opencloud/pkg/conversions" ) // PrepareForIndex converts v to a flat map[string]any suitable for passing -// to bleve.Batch.Index. It honors json tags, omitempty semantics, and -// embedded-struct flattening via a json marshal/unmarshal round-trip — the -// same field resolution bleve would do internally when given the struct -// directly — and then splices in type-specific adaptations (currently: -// geopoint) driven by overrides. +// to the backend's index client (bleve.Batch.Index or the OpenSearch bulk +// body). The struct → map conversion goes through conversions.To, which is +// a json marshal/unmarshal round-trip — honors json tags, omitempty, and +// embedded-struct flattening. After that, type-specific adaptations +// (currently: geopoint siblings) are spliced in based on overrides. // -// Callers should pass the same overrides map used for BleveBuildMapping so -// the document shape and the mapping stay in sync. +// Callers should pass the same overrides map used for BleveBuildMapping / +// OpenSearchBuildMapping so the document shape and the mapping stay in +// sync. func PrepareForIndex(v any, overrides map[string]FieldOpts) (map[string]any, error) { - b, err := json.Marshal(v) + out, err := conversions.To[map[string]any](v) if err != nil { - return nil, fmt.Errorf("mapping: marshal %T: %w", v, err) + return nil, fmt.Errorf("mapping: prepare %T: %w", v, err) } - var out map[string]any - if err := json.Unmarshal(b, &out); err != nil { - return nil, fmt.Errorf("mapping: unmarshal %T: %w", v, err) + if out == nil { + return out, nil } addGeopointSiblings(out, overrides) return out, nil From 4d5b1aefb747431370dc94345fd440a21ddf27c3 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Thu, 23 Apr 2026 01:48:02 +0200 Subject: [PATCH 18/26] refactor(search): drop dead overrides param from Deserialize, make it fail-soft MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two tightly-coupled fixes against the hit → struct pipeline: - Deserialize[T] and DeserializeAt[T] had an unused map[string]FieldOpts parameter left over from the pre-sibling-field geopoint iteration. Every call site passed either nil or the SearchFieldOverrides map, and the function body never looked at it. Drop it — the signature now lines up with DeserializeStringMap. - The previous per-field parse errors (type mismatch in setValue, malformed RFC3339 in parseTime) bubbled up and caused matchTo- Resource to return nil. Callers in bleve/index.go (searchResource- ById, searchResourcesByPath) dereferenced that result directly, so a single corrupt field in a hit could panic Move / Delete / Restore. The pre-refactor getFieldValue[T] based code was fail- soft (type mismatches → zero value, no error). Restore that behavior: per-field decode failures leave the affected field at its zero value; Deserialize only returns an error when T itself isn't a struct. matchToResource now always returns a non-nil Resource. Covered by a new TestDeserializeIsFailSoft unit test that feeds intentionally corrupt fields and asserts the surrounding record still round-trips. --- services/search/pkg/bleve/backend.go | 8 +- services/search/pkg/bleve/bleve.go | 9 +- services/search/pkg/mapping/deserialize.go | 119 ++++++++---------- .../search/pkg/mapping/deserialize_test.go | 54 ++++++-- 4 files changed, 109 insertions(+), 81 deletions(-) diff --git a/services/search/pkg/bleve/backend.go b/services/search/pkg/bleve/backend.go index ee472c5f2d..b6194ba41b 100644 --- a/services/search/pkg/bleve/backend.go +++ b/services/search/pkg/bleve/backend.go @@ -140,17 +140,17 @@ func (b *Backend) Search(_ context.Context, sir *searchService.SearchIndexReques }, } if strings.HasPrefix(match.Entity.MimeType, "audio/") { - if audio, _ := mapping.DeserializeAt[searchMessage.Audio](hit.Fields, "audio", nil); audio != nil { + if audio, _ := mapping.DeserializeAt[searchMessage.Audio](hit.Fields, "audio"); audio != nil { match.Entity.Audio = audio } } - if image, _ := mapping.DeserializeAt[searchMessage.Image](hit.Fields, "image", nil); image != nil { + if image, _ := mapping.DeserializeAt[searchMessage.Image](hit.Fields, "image"); image != nil { match.Entity.Image = image } - if loc, _ := mapping.DeserializeAt[searchMessage.GeoCoordinates](hit.Fields, "location", nil); loc != nil { + if loc, _ := mapping.DeserializeAt[searchMessage.GeoCoordinates](hit.Fields, "location"); loc != nil { match.Entity.Location = loc } - if photo, _ := mapping.DeserializeAt[searchMessage.Photo](hit.Fields, "photo", nil); photo != nil { + if photo, _ := mapping.DeserializeAt[searchMessage.Photo](hit.Fields, "photo"); photo != nil { match.Entity.Photo = photo } diff --git a/services/search/pkg/bleve/bleve.go b/services/search/pkg/bleve/bleve.go index 336a327398..2c8653dd1e 100644 --- a/services/search/pkg/bleve/bleve.go +++ b/services/search/pkg/bleve/bleve.go @@ -73,12 +73,11 @@ func getFragmentValue(m bleveSearch.FieldFragmentMap, key string, idx int) strin // matchToResource reconstructs a search.Resource from a bleve hit. Used by // the Move / Delete / Restore / Purge paths that round-trip a record through -// the index. +// the index. Always returns a non-nil *Resource — Deserialize is fail-soft +// for per-field parse errors, and the only way to get an error out of it is +// a programmer bug (non-struct T), which is not possible here. func matchToResource(match *bleveSearch.DocumentMatch) *search.Resource { - r, err := mapping.Deserialize[search.Resource](match.Fields, search.Resource{}.SearchFieldOverrides()) - if err != nil || r == nil { - return nil - } + r, _ := mapping.Deserialize[search.Resource](match.Fields) // Legacy safety net: only expose audio metadata for audio MimeTypes, // matching the pre-refactor getAudioValue behavior. if r.Audio != nil && !strings.HasPrefix(r.MimeType, "audio/") { diff --git a/services/search/pkg/mapping/deserialize.go b/services/search/pkg/mapping/deserialize.go index 2880ce0854..68188778ad 100644 --- a/services/search/pkg/mapping/deserialize.go +++ b/services/search/pkg/mapping/deserialize.go @@ -13,35 +13,29 @@ import ( // values stored as slices (bleve unwraps single-element slices) are // re-wrapped when the target field is a slice. Time-valued fields accept // RFC3339 strings and are decoded into time.Time or timestamppb.Timestamp. -// -// The overrides parameter is reserved for type-specific adaptations -// (geopoint is the first planned consumer). -func Deserialize[T any](fields map[string]any, _ map[string]FieldOpts) (*T, error) { +// Unparseable individual fields are left at their zero value rather than +// aborting the whole record — matching the pre-refactor getFieldValue +// fail-soft behavior. +func Deserialize[T any](fields map[string]any) (*T, error) { t := reflect.TypeFor[T]() if t.Kind() != reflect.Struct { return nil, fmt.Errorf("mapping: Deserialize requires a struct type, got %v", t) } out := reflect.New(t) - if _, err := fillStruct(out.Elem(), fields, ""); err != nil { - return nil, err - } + fillStruct(out.Elem(), fields, "") return out.Interface().(*T), nil } // DeserializeAt reads sub-fields of the flat fields map under the given // dotted prefix into a new *T. If no sub-fields were present, it returns // (nil, nil) so callers can leave the enclosing pointer nil. -func DeserializeAt[T any](fields map[string]any, prefix string, _ map[string]FieldOpts) (*T, error) { +func DeserializeAt[T any](fields map[string]any, prefix string) (*T, error) { t := reflect.TypeFor[T]() if t.Kind() != reflect.Struct { return nil, fmt.Errorf("mapping: DeserializeAt requires a struct type, got %v", t) } out := reflect.New(t) - touched, err := fillStruct(out.Elem(), fields, prefix) - if err != nil { - return nil, err - } - if !touched { + if !fillStruct(out.Elem(), fields, prefix) { return nil, nil } return out.Interface().(*T), nil @@ -49,8 +43,9 @@ func DeserializeAt[T any](fields map[string]any, prefix string, _ map[string]Fie // fillStruct walks v's fields, copying values from fields at the dotted // prefix. Returns true if any leaf was populated (used by caller to decide -// whether to keep a newly-allocated pointer to v). -func fillStruct(v reflect.Value, fields map[string]any, prefix string) (bool, error) { +// whether to keep a newly-allocated pointer to v). Individual fields that +// can't be decoded are silently left at their zero value. +func fillStruct(v reflect.Value, fields map[string]any, prefix string) bool { t := v.Type() touched := false for i := 0; i < t.NumField(); i++ { @@ -61,11 +56,7 @@ func fillStruct(v reflect.Value, fields map[string]any, prefix string) (bool, er fv := v.Field(i) if fi.Embedded { - sub, err := fillStruct(fv, fields, prefix) - if err != nil { - return touched, err - } - if sub { + if fillStruct(fv, fields, prefix) { touched = true } continue @@ -75,108 +66,108 @@ func fillStruct(v reflect.Value, fields map[string]any, prefix string) (bool, er if prefix != "" { key = prefix + "." + fi.Name } - ok, err := fillField(fv, fields, key) - if err != nil { - return touched, err - } - if ok { + if fillField(fv, fields, key) { touched = true } } - return touched, nil + return touched } // fillField populates a single struct field. For pointers to nested structs // (non-time), it recurses with the field key as prefix and keeps the pointer -// only if something was set. -func fillField(v reflect.Value, fields map[string]any, key string) (bool, error) { +// only if something was set. Type mismatches and parse failures leave the +// field at its zero value rather than propagating an error. +func fillField(v reflect.Value, fields map[string]any, key string) bool { ft := v.Type() if ft.Kind() == reflect.Ptr { elem := ft.Elem() if elem.Kind() == reflect.Struct && elem != timeType && elem != timestampType { alloc := reflect.New(elem) - touched, err := fillStruct(alloc.Elem(), fields, key) - if err != nil { - return false, err - } - if touched { + if fillStruct(alloc.Elem(), fields, key) { v.Set(alloc) - return true, nil + return true } - return false, nil + return false } } raw, ok := fields[key] if !ok { - return false, nil + return false } - return true, setValue(v, raw) + return setValue(v, raw) } -func setValue(v reflect.Value, raw any) error { +// setValue writes raw into v, returning true when it succeeded. Any type +// mismatch is silently ignored. +func setValue(v reflect.Value, raw any) bool { if v.Kind() == reflect.Ptr { elem := v.Type().Elem() alloc := reflect.New(elem) - if err := setValue(alloc.Elem(), raw); err != nil { - return err + if !setValue(alloc.Elem(), raw) { + return false } v.Set(alloc) - return nil + return true } if v.Type() == timeType { - t, err := parseTime(raw) - if err != nil { - return err + t, ok := parseTime(raw) + if !ok { + return false } v.Set(reflect.ValueOf(t)) - return nil + return true } if v.Type() == timestampType { - t, err := parseTime(raw) - if err != nil { - return err + t, ok := parseTime(raw) + if !ok { + return false } v.Set(reflect.ValueOf(*timestamppb.New(t))) - return nil + return true } if v.Kind() == reflect.Slice { return setSlice(v, raw) } rv := reflect.ValueOf(raw) if !rv.IsValid() { - return nil + return false } - if rv.Type().ConvertibleTo(v.Type()) { - v.Set(rv.Convert(v.Type())) - return nil + if !rv.Type().ConvertibleTo(v.Type()) { + return false } - return fmt.Errorf("mapping: cannot set %v from %T", v.Type(), raw) + v.Set(rv.Convert(v.Type())) + return true } -func setSlice(v reflect.Value, raw any) error { +func setSlice(v reflect.Value, raw any) bool { items, ok := raw.([]any) if !ok { // bleve unwraps single-element slices; re-wrap here. items = []any{raw} } - out := reflect.MakeSlice(v.Type(), len(items), len(items)) - for i, item := range items { - if err := setValue(out.Index(i), item); err != nil { - return fmt.Errorf("mapping: slice[%d]: %w", i, err) + out := reflect.MakeSlice(v.Type(), 0, len(items)) + elemType := v.Type().Elem() + for _, item := range items { + elem := reflect.New(elemType).Elem() + if setValue(elem, item) { + out = reflect.Append(out, elem) } } + if out.Len() == 0 { + return false + } v.Set(out) - return nil + return true } -func parseTime(raw any) (time.Time, error) { +func parseTime(raw any) (time.Time, bool) { s, ok := raw.(string) if !ok { - return time.Time{}, fmt.Errorf("mapping: expected time string, got %T", raw) + return time.Time{}, false } t, err := time.Parse(time.RFC3339, s) if err != nil { - return time.Time{}, fmt.Errorf("mapping: parse time %q: %w", s, err) + return time.Time{}, false } - return t, nil + return t, true } diff --git a/services/search/pkg/mapping/deserialize_test.go b/services/search/pkg/mapping/deserialize_test.go index c8a452254f..69523d61cb 100644 --- a/services/search/pkg/mapping/deserialize_test.go +++ b/services/search/pkg/mapping/deserialize_test.go @@ -36,7 +36,7 @@ func TestDeserializeLeafFields(t *testing.T) { "Name": "n", "Size": float64(42), "Deleted": true, - }, nil) + }) if err != nil { t.Fatalf("Deserialize: %v", err) } @@ -49,7 +49,7 @@ func TestDeserializeScalarToSlice(t *testing.T) { r, err := Deserialize[Leaf](map[string]any{ "Tags": "single", "Favorites": []any{"a", "b"}, - }, nil) + }) if err != nil { t.Fatalf("Deserialize: %v", err) } @@ -65,7 +65,7 @@ func TestDeserializeNestedPointer(t *testing.T) { r, err := Deserialize[embedded](map[string]any{ "audio.artist": "A", "audio.year": float64(2024), - }, nil) + }) if err != nil { t.Fatalf("Deserialize: %v", err) } @@ -83,7 +83,7 @@ func TestDeserializeNestedPointer(t *testing.T) { func TestDeserializeEmptyNestedStaysNil(t *testing.T) { r, err := Deserialize[embedded](map[string]any{ "Name": "n", - }, nil) + }) if err != nil { t.Fatalf("Deserialize: %v", err) } @@ -99,7 +99,7 @@ func TestDeserializeTimestamp(t *testing.T) { r, err := Deserialize[embedded](map[string]any{ "photo.takenDateTime": "2024-01-02T03:04:05Z", "photo.mtime": "2024-05-06T07:08:09Z", - }, nil) + }) if err != nil { t.Fatalf("Deserialize: %v", err) } @@ -121,15 +121,53 @@ func TestDeserializeTimestamp(t *testing.T) { } } +func TestDeserializeIsFailSoft(t *testing.T) { + // Malformed values (type mismatch, unparseable time) leave the + // affected field at its zero value instead of dropping the whole + // record. Matches the pre-refactor getFieldValue behavior so + // matchToResource never returns nil on a corrupted hit. + r, err := Deserialize[embedded](map[string]any{ + "Name": "n", + "Size": "not-a-number", // wrong type + "Deleted": true, + "photo.takenDateTime": "not-an-rfc3339-time", + "photo.mtime": "2024-05-06T07:08:09Z", + }) + if err != nil { + t.Fatalf("Deserialize: %v", err) + } + if r == nil { + t.Fatal("expected non-nil *embedded even with partial corruption") + } + if r.Name != "n" { + t.Errorf("Name: %q", r.Name) + } + if r.Size != 0 { + t.Errorf("Size should stay zero on mismatch, got %d", r.Size) + } + if !r.Deleted { + t.Errorf("Deleted should still be true") + } + if r.Photo == nil { + t.Fatal("Photo should be populated because Mtime parsed ok") + } + if r.Photo.Taken != nil { + t.Errorf("Taken should stay nil for unparseable time, got %v", r.Photo.Taken) + } + if r.Photo.Mtime == nil { + t.Error("Mtime should be parsed") + } +} + func TestDeserializeRejectsNonStruct(t *testing.T) { - _, err := Deserialize[int](nil, nil) + _, err := Deserialize[int](nil) if err == nil { t.Fatal("expected error for non-struct T") } } func TestDeserializeAtReturnsNilWhenNothingMatches(t *testing.T) { - r, err := DeserializeAt[audio](map[string]any{"Name": "n"}, "audio", nil) + r, err := DeserializeAt[audio](map[string]any{"Name": "n"}, "audio") if err != nil { t.Fatalf("DeserializeAt: %v", err) } @@ -141,7 +179,7 @@ func TestDeserializeAtReturnsNilWhenNothingMatches(t *testing.T) { func TestDeserializeAtReturnsValueWhenPrefixMatches(t *testing.T) { r, err := DeserializeAt[audio](map[string]any{ "audio.artist": "A", - }, "audio", nil) + }, "audio") if err != nil { t.Fatalf("DeserializeAt: %v", err) } From 10edb7cd5340b9ae0e9ab6a6c2f9be1567fe0e0c Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Thu, 23 Apr 2026 01:50:21 +0200 Subject: [PATCH 19/26] refactor(search): drop the audio/ MimeType guard on the read path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four separate consumers of the indexed Audio facet each carried the same "only expose Audio when MimeType starts with audio/" check — bleve.matchToResource, bleve.Search, opensearch/convert.Open- SearchHitToMatch, graph.cs3ResourceInfoToDriveItem. It existed as a defensive post-process in case audio.* fields somehow leaked into a non-audio record. In practice the write path only populates audio.* for MimeType audio/* (tika.Extract guards it there), so the read-side check is a pure micro-optimization that never fires on clean data. Drop all four instances; trust the write path. If a future corruption mode actually produces out-of-band audio.* data, the right place to catch that is closer to the ingestion bug, not on every reader. --- services/graph/pkg/service/v0/driveitems.go | 4 +--- services/search/pkg/bleve/backend.go | 6 ++---- services/search/pkg/bleve/bleve.go | 6 ------ .../search/pkg/opensearch/internal/convert/opensearch.go | 5 +---- 4 files changed, 4 insertions(+), 17 deletions(-) diff --git a/services/graph/pkg/service/v0/driveitems.go b/services/graph/pkg/service/v0/driveitems.go index ca18faf02d..9e9e3acd23 100644 --- a/services/graph/pkg/service/v0/driveitems.go +++ b/services/graph/pkg/service/v0/driveitems.go @@ -472,9 +472,7 @@ func cs3ResourceToDriveItem(logger *log.Logger, res *storageprovider.ResourceInf } if metadata := res.GetArbitraryMetadata().GetMetadata(); metadata != nil { - if strings.HasPrefix(res.GetMimeType(), "audio/") { - setFacet(logger, &driveItem.Audio, metadata, "libre.graph.audio.") - } + setFacet(logger, &driveItem.Audio, metadata, "libre.graph.audio.") setFacet(logger, &driveItem.Image, metadata, "libre.graph.image.") setFacet(logger, &driveItem.Location, metadata, "libre.graph.location.") setFacet(logger, &driveItem.Photo, metadata, "libre.graph.photo.") diff --git a/services/search/pkg/bleve/backend.go b/services/search/pkg/bleve/backend.go index b6194ba41b..098b265cc6 100644 --- a/services/search/pkg/bleve/backend.go +++ b/services/search/pkg/bleve/backend.go @@ -139,10 +139,8 @@ func (b *Backend) Search(_ context.Context, sir *searchService.SearchIndexReques Highlights: getFragmentValue(hit.Fragments, "Content", 0), }, } - if strings.HasPrefix(match.Entity.MimeType, "audio/") { - if audio, _ := mapping.DeserializeAt[searchMessage.Audio](hit.Fields, "audio"); audio != nil { - match.Entity.Audio = audio - } + if audio, _ := mapping.DeserializeAt[searchMessage.Audio](hit.Fields, "audio"); audio != nil { + match.Entity.Audio = audio } if image, _ := mapping.DeserializeAt[searchMessage.Image](hit.Fields, "image"); image != nil { match.Entity.Image = image diff --git a/services/search/pkg/bleve/bleve.go b/services/search/pkg/bleve/bleve.go index 2c8653dd1e..15d6cfcedb 100644 --- a/services/search/pkg/bleve/bleve.go +++ b/services/search/pkg/bleve/bleve.go @@ -2,7 +2,6 @@ package bleve import ( "regexp" - "strings" bleveSearch "github.com/blevesearch/bleve/v2/search" storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" @@ -78,11 +77,6 @@ func getFragmentValue(m bleveSearch.FieldFragmentMap, key string, idx int) strin // a programmer bug (non-struct T), which is not possible here. func matchToResource(match *bleveSearch.DocumentMatch) *search.Resource { r, _ := mapping.Deserialize[search.Resource](match.Fields) - // Legacy safety net: only expose audio metadata for audio MimeTypes, - // matching the pre-refactor getAudioValue behavior. - if r.Audio != nil && !strings.HasPrefix(r.MimeType, "audio/") { - r.Audio = nil - } return r } diff --git a/services/search/pkg/opensearch/internal/convert/opensearch.go b/services/search/pkg/opensearch/internal/convert/opensearch.go index d60cbe86bd..1eef75ac88 100644 --- a/services/search/pkg/opensearch/internal/convert/opensearch.go +++ b/services/search/pkg/opensearch/internal/convert/opensearch.go @@ -79,16 +79,13 @@ func OpenSearchHitToMatch(hit opensearchgoAPI.SearchHit) (*searchMessage.Match, return strings.Join(contentHighlights[:], "; ") }(), + Audio: copyFacet[searchMessage.Audio](resource.Audio), Image: copyFacet[searchMessage.Image](resource.Image), Location: copyFacet[searchMessage.GeoCoordinates](resource.Location), Photo: copyFacet[searchMessage.Photo](resource.Photo), }, } - if strings.HasPrefix(resource.MimeType, "audio/") { - match.Entity.Audio = copyFacet[searchMessage.Audio](resource.Audio) - } - if mtime, err := time.Parse(time.RFC3339, resource.Mtime); err == nil { match.Entity.LastModifiedTime = ×tamppb.Timestamp{Seconds: mtime.Unix(), Nanos: int32(mtime.Nanosecond())} } From fbfdca196fa88fbfc4af215415eef8c140060cbf Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Thu, 23 Apr 2026 01:51:56 +0200 Subject: [PATCH 20/26] refactor(search): address small review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three small follow-ups from the review of the series so far: - Cache the Resource SearchFieldOverrides map via sync.OnceValue so hot paths (matchToResource per hit, indexResource per upsert on both backends) reuse the same read-only map instead of rebuilding it from scratch each call. Callers that mutate (only the OpenSearch index builder) keep using maps.Clone. - Turn the OpenSearch IndexManager MarshalJSON dispatch into a map lookup (indexGenerators) instead of a hard-coded equality check against IndexIndexManagerResourceV2. Adding a future resource_v3 variant is now one entry in the map. - Add a DeserializeStringMap test that covers the embedded-struct flattening branch (fillStructFromStrings's fi.Embedded leg) — libregraph's generated types happen to not use embedding, but the walker supports it and the test guards against regressions. --- .../pkg/mapping/deserialize_string_test.go | 30 +++++++++++++++++++ services/search/pkg/opensearch/index.go | 11 +++++-- services/search/pkg/search/search.go | 23 +++++++++++--- 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/services/search/pkg/mapping/deserialize_string_test.go b/services/search/pkg/mapping/deserialize_string_test.go index 82370284ca..b931431b22 100644 --- a/services/search/pkg/mapping/deserialize_string_test.go +++ b/services/search/pkg/mapping/deserialize_string_test.go @@ -110,3 +110,33 @@ func TestDeserializeStringMapRejectsNonStruct(t *testing.T) { t.Fatal("expected error for non-struct T") } } + +func TestDeserializeStringMapFlattensEmbedded(t *testing.T) { + // Embedded structs — whose fields are promoted to the enclosing + // struct's json namespace — must be walked the same way Go's json + // package walks them, so the shared prefix still resolves the right + // leaves. + type Base struct { + Artist *string `json:"artist,omitempty"` + } + type outer struct { + Base + Album *string `json:"album,omitempty"` + } + r, err := DeserializeStringMap[outer](map[string]string{ + "libre.graph.audio.artist": "Queen", + "libre.graph.audio.album": "A Night at the Opera", + }, "libre.graph.audio.") + if err != nil { + t.Fatalf("DeserializeStringMap: %v", err) + } + if r == nil { + t.Fatal("expected non-nil *outer") + } + if r.Artist == nil || *r.Artist != "Queen" { + t.Errorf("Artist (embedded): %#v", r.Artist) + } + if r.Album == nil || *r.Album != "A Night at the Opera" { + t.Errorf("Album: %#v", r.Album) + } +} diff --git a/services/search/pkg/opensearch/index.go b/services/search/pkg/opensearch/index.go index 9ea1c0ccfd..37a316b2fd 100644 --- a/services/search/pkg/opensearch/index.go +++ b/services/search/pkg/opensearch/index.go @@ -30,6 +30,13 @@ var indexes embed.FS type IndexManager string +// indexGenerators dispatches the generated (non-file-backed) IndexManager +// variants to their builder. Everything not listed here falls back to the +// embedded JSON file under internal/indexes/. +var indexGenerators = map[IndexManager]func() ([]byte, error){ + IndexIndexManagerResourceV2: buildResourceV2Mapping, +} + func (m IndexManager) String() string { b, err := m.MarshalJSON() if err != nil { @@ -40,8 +47,8 @@ func (m IndexManager) String() string { } func (m IndexManager) MarshalJSON() ([]byte, error) { - if m == IndexIndexManagerResourceV2 { - return buildResourceV2Mapping() + if gen, ok := indexGenerators[m]; ok { + return gen() } filePath := string(m) body, err := indexes.ReadFile(path.Join("./internal/indexes", filePath)) diff --git a/services/search/pkg/search/search.go b/services/search/pkg/search/search.go index f5bdb69c40..4e384062e1 100644 --- a/services/search/pkg/search/search.go +++ b/services/search/pkg/search/search.go @@ -6,6 +6,7 @@ import ( "fmt" "regexp" "strings" + "sync" gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" @@ -61,10 +62,14 @@ type Resource struct { Hidden bool `json:"Hidden"` } -// SearchFieldOverrides returns the field options the mapping package needs -// to build per-backend index mappings for a Resource. Keys are json-tag -// names; see package mapping for the available FieldOpts knobs. -func (Resource) SearchFieldOverrides() map[string]mapping.FieldOpts { +// resourceFieldOverrides is the cached value for SearchFieldOverrides. The +// map is built once at first use (its contents never change) so hot paths — +// per-hit matchToResource, per-upsert PrepareForIndex — reuse the same map +// instead of allocating a fresh one every call. +// +// Callers must treat the returned map as read-only. Mutators (the +// OpenSearch index builder adds per-backend tweaks) clone it first. +var resourceFieldOverrides = sync.OnceValue(func() map[string]mapping.FieldOpts { excludeFromAll := false return map[string]mapping.FieldOpts{ "Name": {Analyzer: "lowercaseKeyword"}, @@ -73,6 +78,16 @@ func (Resource) SearchFieldOverrides() map[string]mapping.FieldOpts { "Favorites": {Analyzer: "lowercaseKeyword", IncludeInAll: &excludeFromAll}, "location": {Type: mapping.TypeGeopoint}, } +}) + +// SearchFieldOverrides returns the field options the mapping package needs +// to build per-backend index mappings for a Resource. Keys are json-tag +// names; see package mapping for the available FieldOpts knobs. +// +// The returned map is a shared read-only instance; callers that need to +// adjust entries must clone it first (see maps.Clone). +func (Resource) SearchFieldOverrides() map[string]mapping.FieldOpts { + return resourceFieldOverrides() } // ResolveReference makes sure the path is relative to the space root From e948da65c9b33ee5ffd6c7275dc3df00f494ca13 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Thu, 23 Apr 2026 01:55:09 +0200 Subject: [PATCH 21/26] refactor(search): restore single-allocation setSlice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fail-soft rewrite of setSlice added a reflect.New per element to accommodate per-element failure skipping, which pushed Deserialize- Resource from 68 to ~78 allocs per call. Compact the slice in place instead: pre-allocate once, slide successful writes into j, trim to out[:j] at the end. Same fail-soft semantics (failing items drop out of the result), single MakeSlice regardless of item count. Benchmark delta vs the fail-soft commit: DeserializeResource: 9379 → 8903 ns (-5%), 78 → 70 allocs (-10%) --- services/search/pkg/mapping/deserialize.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/services/search/pkg/mapping/deserialize.go b/services/search/pkg/mapping/deserialize.go index 68188778ad..c0bef99cb6 100644 --- a/services/search/pkg/mapping/deserialize.go +++ b/services/search/pkg/mapping/deserialize.go @@ -145,18 +145,22 @@ func setSlice(v reflect.Value, raw any) bool { // bleve unwraps single-element slices; re-wrap here. items = []any{raw} } - out := reflect.MakeSlice(v.Type(), 0, len(items)) - elemType := v.Type().Elem() + // Pre-allocate to len(items) and compact in place: unparseable + // elements stay unfilled, successful writes slide into the next + // free slot, and the final Slice(0, j) trims the tail. This keeps + // the total allocation to a single MakeSlice regardless of how + // many elements we process. + out := reflect.MakeSlice(v.Type(), len(items), len(items)) + j := 0 for _, item := range items { - elem := reflect.New(elemType).Elem() - if setValue(elem, item) { - out = reflect.Append(out, elem) + if setValue(out.Index(j), item) { + j++ } } - if out.Len() == 0 { + if j == 0 { return false } - v.Set(out) + v.Set(out.Slice(0, j)) return true } From 7117af24c5346c142ebc2348a7dd8a29a5187f3c Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Thu, 23 Apr 2026 10:55:56 +0200 Subject: [PATCH 22/26] refactor(search): derive lowercaseFields from SearchFieldOverrides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bleve KQL compiler kept a hand-maintained set of field names whose query values need to be pre-lowercased to match the lowercasing analyzer at index time. The comment on the set even said "Keep in sync with services/search/pkg/bleve/index.go NewMapping" — a classic drift hazard that's now resolved: the same overrides map that drives the index mapping also drives the compiler. A field ends up in the set if its override selects the lowercase- Keyword analyzer or the fulltext type (whose analyzer lowercases in its token filter). Anything else preserves case both at index time and query time. Behavior unchanged for the current overrides (Name, Tags, Favorites, Content); a future override that picks a lowercase- ing analyzer will get the matching query-side fold for free. --- services/search/pkg/query/bleve/compiler.go | 26 ++++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/services/search/pkg/query/bleve/compiler.go b/services/search/pkg/query/bleve/compiler.go index 310397cae6..b0fbeb870e 100644 --- a/services/search/pkg/query/bleve/compiler.go +++ b/services/search/pkg/query/bleve/compiler.go @@ -8,17 +8,25 @@ import ( bleveQuery "github.com/blevesearch/bleve/v2/search/query" "github.com/opencloud-eu/opencloud/pkg/ast" "github.com/opencloud-eu/opencloud/pkg/kql" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" + "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) -// lowercaseFields lists the bleve fields whose index mapping uses a -// lowercasing analyzer. Values bound to these fields are pre-lowercased -// so query-side matching stays consistent with the index. -// Keep in sync with services/search/pkg/bleve/index.go NewMapping. -var lowercaseFields = map[string]struct{}{ - "Name": {}, - "Tags": {}, - "Favorites": {}, - "Content": {}, +// lowercaseFields is derived from Resource.SearchFieldOverrides(): any +// field whose override picks a lowercasing analyzer (`lowercaseKeyword`) +// or the fulltext type (which uses a lowercasing analyzer under the hood) +// gets its query-side value pre-lowercased so compile-time matches the +// index-time tokenization. Anything else keeps its original casing. +var lowercaseFields = buildLowercaseFields() + +func buildLowercaseFields() map[string]struct{} { + out := map[string]struct{}{} + for key, opts := range (search.Resource{}).SearchFieldOverrides() { + if opts.Analyzer == "lowercaseKeyword" || opts.Type == mapping.TypeFulltext { + out[key] = struct{}{} + } + } + return out } var _fields = map[string]string{ From 11d32a570d67d227d888d41ae46d1e46337649e9 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Tue, 12 May 2026 10:03:34 +0200 Subject: [PATCH 23/26] refactor(search): per-field fail-soft for CS3 metadata, drop dead error returns DeserializeStringMap previously aborted the whole facet on any single unparseable string field, by bubbling parse errors up from fillStructFromStrings. The bleve-hit Deserialize already failed soft per field (matching the pre-refactor unmarshalStringMap behavior). Align both paths: malformed individual values are silently zeroed, the rest of the facet still populates. A facet pointer stays nil only when no fields under its prefix were present at all. Concretely: an audio extractor regression that emits one bad value (e.g. duration as "12.5s" instead of "12") used to drop the entire Audio facet (losing Album, Artist, Track too); now Album/Artist/Track still surface and only Duration stays zero. With this change Deserialize, DeserializeAt, and DeserializeStringMap no longer ever return non-nil errors except on programmer misuse (non-struct T), so drop the (T, error) signatures in favor of just T and panic on non-struct. Callers in bleve/backend.go, bleve/bleve.go and graph/driveitems.go become one-liners; setFacet's logger arg goes away (no error to log). --- services/graph/pkg/service/v0/driveitems.go | 25 +++--- services/search/pkg/bleve/backend.go | 16 +--- services/search/pkg/bleve/bleve.go | 9 +- services/search/pkg/mapping/deserialize.go | 23 ++--- .../search/pkg/mapping/deserialize_string.go | 47 +++++----- .../pkg/mapping/deserialize_string_test.go | 85 ++++++++++++------- .../search/pkg/mapping/deserialize_test.go | 52 ++++-------- 7 files changed, 117 insertions(+), 140 deletions(-) diff --git a/services/graph/pkg/service/v0/driveitems.go b/services/graph/pkg/service/v0/driveitems.go index 9e9e3acd23..bcbf8e2091 100644 --- a/services/graph/pkg/service/v0/driveitems.go +++ b/services/graph/pkg/service/v0/driveitems.go @@ -472,27 +472,22 @@ func cs3ResourceToDriveItem(logger *log.Logger, res *storageprovider.ResourceInf } if metadata := res.GetArbitraryMetadata().GetMetadata(); metadata != nil { - setFacet(logger, &driveItem.Audio, metadata, "libre.graph.audio.") - setFacet(logger, &driveItem.Image, metadata, "libre.graph.image.") - setFacet(logger, &driveItem.Location, metadata, "libre.graph.location.") - setFacet(logger, &driveItem.Photo, metadata, "libre.graph.photo.") + setFacet(&driveItem.Audio, metadata, "libre.graph.audio.") + setFacet(&driveItem.Image, metadata, "libre.graph.image.") + setFacet(&driveItem.Location, metadata, "libre.graph.location.") + setFacet(&driveItem.Photo, metadata, "libre.graph.photo.") } return driveItem, nil } // setFacet decodes a libre.graph..* slice of CS3 ArbitraryMetadata -// into *dst. Parse errors are logged and the facet stays nil, matching the -// original fail-soft behavior of the per-field unmarshaller this replaced. -func setFacet[T any](logger *log.Logger, dst **T, metadata map[string]string, prefix string) { - v, err := mapping.DeserializeStringMap[T](metadata, prefix) - if err != nil { - logger.Error().Err(err).Str("prefix", prefix).Msg("graph: failed to parse facet metadata") - return - } - if v != nil { - *dst = v - } +// into *dst. DeserializeStringMap is fail-soft per field: malformed +// individual values are silently zeroed, the rest of the facet still +// populates. *dst stays nil only when no fields under prefix were present +// at all (which is what DeserializeStringMap returns). +func setFacet[T any](dst **T, metadata map[string]string, prefix string) { + *dst = mapping.DeserializeStringMap[T](metadata, prefix) } func cs3ResourceToRemoteItem(res *storageprovider.ResourceInfo) (*libregraph.RemoteItem, error) { diff --git a/services/search/pkg/bleve/backend.go b/services/search/pkg/bleve/backend.go index 098b265cc6..c85e1a8dfb 100644 --- a/services/search/pkg/bleve/backend.go +++ b/services/search/pkg/bleve/backend.go @@ -139,18 +139,10 @@ func (b *Backend) Search(_ context.Context, sir *searchService.SearchIndexReques Highlights: getFragmentValue(hit.Fragments, "Content", 0), }, } - if audio, _ := mapping.DeserializeAt[searchMessage.Audio](hit.Fields, "audio"); audio != nil { - match.Entity.Audio = audio - } - if image, _ := mapping.DeserializeAt[searchMessage.Image](hit.Fields, "image"); image != nil { - match.Entity.Image = image - } - if loc, _ := mapping.DeserializeAt[searchMessage.GeoCoordinates](hit.Fields, "location"); loc != nil { - match.Entity.Location = loc - } - if photo, _ := mapping.DeserializeAt[searchMessage.Photo](hit.Fields, "photo"); photo != nil { - match.Entity.Photo = photo - } + match.Entity.Audio = mapping.DeserializeAt[searchMessage.Audio](hit.Fields, "audio") + match.Entity.Image = mapping.DeserializeAt[searchMessage.Image](hit.Fields, "image") + match.Entity.Location = mapping.DeserializeAt[searchMessage.GeoCoordinates](hit.Fields, "location") + match.Entity.Photo = mapping.DeserializeAt[searchMessage.Photo](hit.Fields, "photo") if mtime, err := time.Parse(time.RFC3339, getFieldValue[string](hit.Fields, "Mtime")); err == nil { match.Entity.LastModifiedTime = ×tamppb.Timestamp{Seconds: mtime.Unix(), Nanos: int32(mtime.Nanosecond())} diff --git a/services/search/pkg/bleve/bleve.go b/services/search/pkg/bleve/bleve.go index 15d6cfcedb..ce16151bae 100644 --- a/services/search/pkg/bleve/bleve.go +++ b/services/search/pkg/bleve/bleve.go @@ -72,12 +72,11 @@ func getFragmentValue(m bleveSearch.FieldFragmentMap, key string, idx int) strin // matchToResource reconstructs a search.Resource from a bleve hit. Used by // the Move / Delete / Restore / Purge paths that round-trip a record through -// the index. Always returns a non-nil *Resource — Deserialize is fail-soft -// for per-field parse errors, and the only way to get an error out of it is -// a programmer bug (non-struct T), which is not possible here. +// the index. Always returns a non-nil *Resource: Deserialize is fail-soft +// for per-field parse errors, so corrupted hit values surface as zero +// values on individual fields instead of dropping the whole record. func matchToResource(match *bleveSearch.DocumentMatch) *search.Resource { - r, _ := mapping.Deserialize[search.Resource](match.Fields) - return r + return mapping.Deserialize[search.Resource](match.Fields) } func escapeQuery(s string) string { diff --git a/services/search/pkg/mapping/deserialize.go b/services/search/pkg/mapping/deserialize.go index c0bef99cb6..6fb055b4e6 100644 --- a/services/search/pkg/mapping/deserialize.go +++ b/services/search/pkg/mapping/deserialize.go @@ -14,31 +14,32 @@ import ( // re-wrapped when the target field is a slice. Time-valued fields accept // RFC3339 strings and are decoded into time.Time or timestamppb.Timestamp. // Unparseable individual fields are left at their zero value rather than -// aborting the whole record — matching the pre-refactor getFieldValue -// fail-soft behavior. -func Deserialize[T any](fields map[string]any) (*T, error) { +// aborting the whole record, matching the pre-refactor getFieldValue +// fail-soft behavior. Panics if T is not a struct (programmer error). +func Deserialize[T any](fields map[string]any) *T { t := reflect.TypeFor[T]() if t.Kind() != reflect.Struct { - return nil, fmt.Errorf("mapping: Deserialize requires a struct type, got %v", t) + panic(fmt.Sprintf("mapping: Deserialize requires a struct type, got %v", t)) } out := reflect.New(t) fillStruct(out.Elem(), fields, "") - return out.Interface().(*T), nil + return out.Interface().(*T) } // DeserializeAt reads sub-fields of the flat fields map under the given -// dotted prefix into a new *T. If no sub-fields were present, it returns -// (nil, nil) so callers can leave the enclosing pointer nil. -func DeserializeAt[T any](fields map[string]any, prefix string) (*T, error) { +// dotted prefix into a new *T. Returns nil when no sub-fields were present +// so callers can leave the enclosing pointer nil. Panics if T is not a +// struct (programmer error). +func DeserializeAt[T any](fields map[string]any, prefix string) *T { t := reflect.TypeFor[T]() if t.Kind() != reflect.Struct { - return nil, fmt.Errorf("mapping: DeserializeAt requires a struct type, got %v", t) + panic(fmt.Sprintf("mapping: DeserializeAt requires a struct type, got %v", t)) } out := reflect.New(t) if !fillStruct(out.Elem(), fields, prefix) { - return nil, nil + return nil } - return out.Interface().(*T), nil + return out.Interface().(*T) } // fillStruct walks v's fields, copying values from fields at the dotted diff --git a/services/search/pkg/mapping/deserialize_string.go b/services/search/pkg/mapping/deserialize_string.go index 8fb6100949..1da8998c29 100644 --- a/services/search/pkg/mapping/deserialize_string.go +++ b/services/search/pkg/mapping/deserialize_string.go @@ -12,25 +12,24 @@ import ( // DeserializeStringMap reads sub-fields of a string-typed flat map (e.g. CS3 // ArbitraryMetadata, which is map[string]string) under the given dotted // prefix into a new *T. String values are parsed into the target field's Go -// type via strconv / time.Parse. Returns (nil, nil) when nothing under the -// prefix matched, so callers can leave the enclosing pointer nil. -func DeserializeStringMap[T any](fields map[string]string, prefix string) (*T, error) { +// type via strconv / time.Parse. Returns nil when nothing under the prefix +// matched, so callers can leave the enclosing pointer nil. Unparseable +// individual fields are left at their zero value rather than aborting the +// whole facet (matches the pre-refactor unmarshalStringMap fail-soft +// behavior). Panics if T is not a struct (programmer error). +func DeserializeStringMap[T any](fields map[string]string, prefix string) *T { t := reflect.TypeFor[T]() if t.Kind() != reflect.Struct { - return nil, fmt.Errorf("mapping: DeserializeStringMap requires a struct type, got %v", t) + panic(fmt.Sprintf("mapping: DeserializeStringMap requires a struct type, got %v", t)) } out := reflect.New(t) - touched, err := fillStructFromStrings(out.Elem(), fields, prefix) - if err != nil { - return nil, err - } - if !touched { - return nil, nil + if !fillStructFromStrings(out.Elem(), fields, prefix) { + return nil } - return out.Interface().(*T), nil + return out.Interface().(*T) } -func fillStructFromStrings(v reflect.Value, fields map[string]string, prefix string) (bool, error) { +func fillStructFromStrings(v reflect.Value, fields map[string]string, prefix string) bool { t := v.Type() touched := false for i := 0; i < t.NumField(); i++ { @@ -41,11 +40,7 @@ func fillStructFromStrings(v reflect.Value, fields map[string]string, prefix str fv := v.Field(i) if fi.Embedded { - sub, err := fillStructFromStrings(fv, fields, prefix) - if err != nil { - return touched, err - } - if sub { + if fillStructFromStrings(fv, fields, prefix) { touched = true } continue @@ -60,12 +55,7 @@ func fillStructFromStrings(v reflect.Value, fields map[string]string, prefix str elem := fv.Type().Elem() if elem.Kind() == reflect.Struct && elem != timeType && elem != timestampType { alloc := reflect.New(elem) - subPrefix := key + "." - sub, err := fillStructFromStrings(alloc.Elem(), fields, subPrefix) - if err != nil { - return touched, err - } - if sub { + if fillStructFromStrings(alloc.Elem(), fields, key+".") { fv.Set(alloc) touched = true } @@ -77,12 +67,15 @@ func fillStructFromStrings(v reflect.Value, fields map[string]string, prefix str if !ok { continue } - if err := setValueFromString(fv, raw); err != nil { - return touched, fmt.Errorf("mapping: field %q: %w", key, err) + // Unparseable individual fields are silently left at their zero + // value: a malformed Duration shouldn't take Album/Artist/Track + // down with it. The bleve-hit deserializer (Deserialize) does the + // same; both paths feed the same downstream consumers. + if err := setValueFromString(fv, raw); err == nil { + touched = true } - touched = true } - return touched, nil + return touched } func setValueFromString(v reflect.Value, raw string) error { diff --git a/services/search/pkg/mapping/deserialize_string_test.go b/services/search/pkg/mapping/deserialize_string_test.go index b931431b22..977ad8b6df 100644 --- a/services/search/pkg/mapping/deserialize_string_test.go +++ b/services/search/pkg/mapping/deserialize_string_test.go @@ -17,7 +17,7 @@ type stringFacet struct { } func TestDeserializeStringMapBasicTypes(t *testing.T) { - r, err := DeserializeStringMap[stringFacet](map[string]string{ + r := DeserializeStringMap[stringFacet](map[string]string{ "libre.graph.audio.artist": "Queen", "libre.graph.audio.year": "1975", "libre.graph.audio.duration": "354000", @@ -25,9 +25,6 @@ func TestDeserializeStringMapBasicTypes(t *testing.T) { "libre.graph.audio.explicit": "true", "libre.graph.audio.takenDateTime": "2024-01-02T03:04:05Z", }, "libre.graph.audio.") - if err != nil { - t.Fatalf("DeserializeStringMap: %v", err) - } if r == nil { t.Fatal("expected non-nil *stringFacet") } @@ -52,12 +49,9 @@ func TestDeserializeStringMapBasicTypes(t *testing.T) { } func TestDeserializeStringMapReturnsNilWhenEmpty(t *testing.T) { - r, err := DeserializeStringMap[stringFacet](map[string]string{ + r := DeserializeStringMap[stringFacet](map[string]string{ "libre.graph.image.width": "1200", }, "libre.graph.audio.") - if err != nil { - t.Fatalf("DeserializeStringMap: %v", err) - } if r != nil { t.Fatalf("expected nil, got %#v", r) } @@ -67,12 +61,9 @@ func TestDeserializeStringMapTimestamppb(t *testing.T) { type photoFacet struct { Taken *timestamppb.Timestamp `json:"takenDateTime,omitempty"` } - r, err := DeserializeStringMap[photoFacet](map[string]string{ + r := DeserializeStringMap[photoFacet](map[string]string{ "libre.graph.photo.takenDateTime": "2024-05-06T07:08:09Z", }, "libre.graph.photo.") - if err != nil { - t.Fatalf("DeserializeStringMap: %v", err) - } if r == nil || r.Taken == nil { t.Fatalf("Taken missing: %#v", r) } @@ -82,40 +73,71 @@ func TestDeserializeStringMapTimestamppb(t *testing.T) { } } -func TestDeserializeStringMapParseErrorBubblesUp(t *testing.T) { - _, err := DeserializeStringMap[stringFacet](map[string]string{ +func TestDeserializeStringMapIsFailSoft(t *testing.T) { + // A single malformed field (year is unparseable as int) must not drop + // the whole facet. The bad field stays at zero value, the rest of the + // facet still populates. Mirrors the bleve-hit Deserialize behavior. + r := DeserializeStringMap[stringFacet](map[string]string{ + "libre.graph.audio.artist": "Iron Maiden", + "libre.graph.audio.year": "not-a-number", + "libre.graph.audio.duration": "354000", + "libre.graph.audio.explicit": "not-a-bool", + "libre.graph.audio.rating": "4.9", + }, "libre.graph.audio.") + if r == nil { + t.Fatal("expected non-nil *stringFacet despite bad fields") + } + if r.Artist == nil || *r.Artist != "Iron Maiden" { + t.Errorf("Artist should still be populated, got %#v", r.Artist) + } + if r.Duration == nil || *r.Duration != 354000 { + t.Errorf("Duration should still be populated, got %#v", r.Duration) + } + if r.Rating == nil || *r.Rating != 4.9 { + t.Errorf("Rating should still be populated, got %#v", r.Rating) + } + if r.Year != nil { + t.Errorf("Year should stay nil for bad int, got %#v", r.Year) + } + if r.Explicit != nil { + t.Errorf("Explicit should stay nil for bad bool, got %#v", r.Explicit) + } +} + +func TestDeserializeStringMapReturnsNilWhenOnlyBadFields(t *testing.T) { + // If the only present field is malformed and nothing else under the + // prefix matched, the facet pointer must stay nil (touched stays false). + r := DeserializeStringMap[stringFacet](map[string]string{ "libre.graph.audio.year": "not-a-number", }, "libre.graph.audio.") - if err == nil { - t.Fatal("expected error for unparsable int") + if r != nil { + t.Fatalf("expected nil when no field parsed successfully, got %#v", r) } } func TestDeserializeStringMapIgnoresOtherPrefixes(t *testing.T) { - r, err := DeserializeStringMap[stringFacet](map[string]string{ + r := DeserializeStringMap[stringFacet](map[string]string{ "libre.graph.audio.artist": "Mercury", "libre.graph.image.artist": "someone-else", }, "libre.graph.audio.") - if err != nil { - t.Fatalf("DeserializeStringMap: %v", err) - } if r == nil || r.Artist == nil || *r.Artist != "Mercury" { t.Fatalf("Artist: %#v", r.Artist) } } -func TestDeserializeStringMapRejectsNonStruct(t *testing.T) { - _, err := DeserializeStringMap[int](nil, "") - if err == nil { - t.Fatal("expected error for non-struct T") - } +func TestDeserializeStringMapPanicsOnNonStruct(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Fatal("expected panic for non-struct T") + } + }() + DeserializeStringMap[int](nil, "") } func TestDeserializeStringMapFlattensEmbedded(t *testing.T) { - // Embedded structs — whose fields are promoted to the enclosing - // struct's json namespace — must be walked the same way Go's json - // package walks them, so the shared prefix still resolves the right - // leaves. + // Embedded structs (whose fields are promoted to the enclosing struct's + // json namespace) must be walked the same way Go's json package walks + // them, so the shared prefix still resolves the right leaves. type Base struct { Artist *string `json:"artist,omitempty"` } @@ -123,13 +145,10 @@ func TestDeserializeStringMapFlattensEmbedded(t *testing.T) { Base Album *string `json:"album,omitempty"` } - r, err := DeserializeStringMap[outer](map[string]string{ + r := DeserializeStringMap[outer](map[string]string{ "libre.graph.audio.artist": "Queen", "libre.graph.audio.album": "A Night at the Opera", }, "libre.graph.audio.") - if err != nil { - t.Fatalf("DeserializeStringMap: %v", err) - } if r == nil { t.Fatal("expected non-nil *outer") } diff --git a/services/search/pkg/mapping/deserialize_test.go b/services/search/pkg/mapping/deserialize_test.go index 69523d61cb..3ebd121921 100644 --- a/services/search/pkg/mapping/deserialize_test.go +++ b/services/search/pkg/mapping/deserialize_test.go @@ -32,27 +32,21 @@ type embedded struct { } func TestDeserializeLeafFields(t *testing.T) { - r, err := Deserialize[Leaf](map[string]any{ + r := Deserialize[Leaf](map[string]any{ "Name": "n", "Size": float64(42), "Deleted": true, }) - if err != nil { - t.Fatalf("Deserialize: %v", err) - } if r.Name != "n" || r.Size != 42 || !r.Deleted { t.Fatalf("got %#v", r) } } func TestDeserializeScalarToSlice(t *testing.T) { - r, err := Deserialize[Leaf](map[string]any{ + r := Deserialize[Leaf](map[string]any{ "Tags": "single", "Favorites": []any{"a", "b"}, }) - if err != nil { - t.Fatalf("Deserialize: %v", err) - } if len(r.Tags) != 1 || r.Tags[0] != "single" { t.Errorf("Tags: %#v", r.Tags) } @@ -62,13 +56,10 @@ func TestDeserializeScalarToSlice(t *testing.T) { } func TestDeserializeNestedPointer(t *testing.T) { - r, err := Deserialize[embedded](map[string]any{ + r := Deserialize[embedded](map[string]any{ "audio.artist": "A", "audio.year": float64(2024), }) - if err != nil { - t.Fatalf("Deserialize: %v", err) - } if r.Audio == nil { t.Fatal("Audio is nil") } @@ -81,12 +72,9 @@ func TestDeserializeNestedPointer(t *testing.T) { } func TestDeserializeEmptyNestedStaysNil(t *testing.T) { - r, err := Deserialize[embedded](map[string]any{ + r := Deserialize[embedded](map[string]any{ "Name": "n", }) - if err != nil { - t.Fatalf("Deserialize: %v", err) - } if r.Audio != nil || r.Photo != nil { t.Fatalf("nested pointers should stay nil: %#v", r) } @@ -96,13 +84,10 @@ func TestDeserializeEmptyNestedStaysNil(t *testing.T) { } func TestDeserializeTimestamp(t *testing.T) { - r, err := Deserialize[embedded](map[string]any{ + r := Deserialize[embedded](map[string]any{ "photo.takenDateTime": "2024-01-02T03:04:05Z", "photo.mtime": "2024-05-06T07:08:09Z", }) - if err != nil { - t.Fatalf("Deserialize: %v", err) - } if r.Photo == nil { t.Fatal("Photo is nil") } @@ -126,16 +111,13 @@ func TestDeserializeIsFailSoft(t *testing.T) { // affected field at its zero value instead of dropping the whole // record. Matches the pre-refactor getFieldValue behavior so // matchToResource never returns nil on a corrupted hit. - r, err := Deserialize[embedded](map[string]any{ + r := Deserialize[embedded](map[string]any{ "Name": "n", "Size": "not-a-number", // wrong type "Deleted": true, "photo.takenDateTime": "not-an-rfc3339-time", "photo.mtime": "2024-05-06T07:08:09Z", }) - if err != nil { - t.Fatalf("Deserialize: %v", err) - } if r == nil { t.Fatal("expected non-nil *embedded even with partial corruption") } @@ -159,30 +141,26 @@ func TestDeserializeIsFailSoft(t *testing.T) { } } -func TestDeserializeRejectsNonStruct(t *testing.T) { - _, err := Deserialize[int](nil) - if err == nil { - t.Fatal("expected error for non-struct T") - } +func TestDeserializePanicsOnNonStruct(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Fatal("expected panic for non-struct T") + } + }() + Deserialize[int](nil) } func TestDeserializeAtReturnsNilWhenNothingMatches(t *testing.T) { - r, err := DeserializeAt[audio](map[string]any{"Name": "n"}, "audio") - if err != nil { - t.Fatalf("DeserializeAt: %v", err) - } + r := DeserializeAt[audio](map[string]any{"Name": "n"}, "audio") if r != nil { t.Fatalf("expected nil, got %#v", r) } } func TestDeserializeAtReturnsValueWhenPrefixMatches(t *testing.T) { - r, err := DeserializeAt[audio](map[string]any{ + r := DeserializeAt[audio](map[string]any{ "audio.artist": "A", }, "audio") - if err != nil { - t.Fatalf("DeserializeAt: %v", err) - } if r == nil { t.Fatal("expected non-nil *audio") } From 4bc5b7acc6bd1852cb110eea946ffc07138695a5 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Tue, 12 May 2026 10:30:45 +0200 Subject: [PATCH 24/26] refactor(search): inline DeserializeAt facet calls into Match struct literal Now that DeserializeAt has a single return value, the four post-init assignments collapse into the struct literal alongside the other field initialisers. --- services/search/pkg/bleve/backend.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/search/pkg/bleve/backend.go b/services/search/pkg/bleve/backend.go index c85e1a8dfb..4c265860e4 100644 --- a/services/search/pkg/bleve/backend.go +++ b/services/search/pkg/bleve/backend.go @@ -137,12 +137,12 @@ func (b *Backend) Search(_ context.Context, sir *searchService.SearchIndexReques Tags: getFieldSliceValue[string](hit.Fields, "Tags"), Favorites: getFieldSliceValue[string](hit.Fields, "Favorites"), Highlights: getFragmentValue(hit.Fragments, "Content", 0), + Audio: mapping.DeserializeAt[searchMessage.Audio](hit.Fields, "audio"), + Image: mapping.DeserializeAt[searchMessage.Image](hit.Fields, "image"), + Location: mapping.DeserializeAt[searchMessage.GeoCoordinates](hit.Fields, "location"), + Photo: mapping.DeserializeAt[searchMessage.Photo](hit.Fields, "photo"), }, } - match.Entity.Audio = mapping.DeserializeAt[searchMessage.Audio](hit.Fields, "audio") - match.Entity.Image = mapping.DeserializeAt[searchMessage.Image](hit.Fields, "image") - match.Entity.Location = mapping.DeserializeAt[searchMessage.GeoCoordinates](hit.Fields, "location") - match.Entity.Photo = mapping.DeserializeAt[searchMessage.Photo](hit.Fields, "photo") if mtime, err := time.Parse(time.RFC3339, getFieldValue[string](hit.Fields, "Mtime")); err == nil { match.Entity.LastModifiedTime = ×tamppb.Timestamp{Seconds: mtime.Unix(), Nanos: int32(mtime.Nanosecond())} From 6133be9429e7175722ce47f0571d1e9ccb5f71d4 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Tue, 12 May 2026 10:30:59 +0200 Subject: [PATCH 25/26] refactor(search): drop the unused resource_v1 OpenSearch mapping IndexIndexManagerResourceV1 was declared as the legacy file-backed mapping but had no in-tree consumer: nothing references it, and the file fallback in MarshalJSON was the only code path that could load it. Existing V1-shaped indexes in deployments stay on V1 because OpenSearch persists the mapping in the index metadata; the documented migration path remains "recreate the index" (see ADR 0005). With V1 gone, the embedded internal/indexes/ directory has no content, so the //go:embed directive and file-loading fallback in MarshalJSON are also removed: every IndexManager variant is now generated in code via indexGenerators. If a future mapping wants to ship as a JSON file, the embed + fallback can be reintroduced for just that case. --- services/search/pkg/opensearch/index.go | 26 ++-------- .../internal/indexes/resource_v1.json | 49 ------------------- 2 files changed, 5 insertions(+), 70 deletions(-) delete mode 100644 services/search/pkg/opensearch/internal/indexes/resource_v1.json diff --git a/services/search/pkg/opensearch/index.go b/services/search/pkg/opensearch/index.go index 37a316b2fd..0f3ff4ecf2 100644 --- a/services/search/pkg/opensearch/index.go +++ b/services/search/pkg/opensearch/index.go @@ -3,11 +3,9 @@ package opensearch import ( "bytes" "context" - "embed" "errors" "fmt" "maps" - "path" "reflect" "github.com/go-jose/go-jose/v3/json" @@ -21,18 +19,12 @@ import ( var ( ErrManualActionRequired = errors.New("manual action required") IndexManagerLatest = IndexIndexManagerResourceV2 - IndexIndexManagerResourceV1 IndexManager = "resource_v1.json" IndexIndexManagerResourceV2 IndexManager = "resource_v2" ) -//go:embed internal/indexes/*.json -var indexes embed.FS - type IndexManager string -// indexGenerators dispatches the generated (non-file-backed) IndexManager -// variants to their builder. Everything not listed here falls back to the -// embedded JSON file under internal/indexes/. +// indexGenerators dispatches each IndexManager variant to its builder. var indexGenerators = map[IndexManager]func() ([]byte, error){ IndexIndexManagerResourceV2: buildResourceV2Mapping, } @@ -47,19 +39,11 @@ func (m IndexManager) String() string { } func (m IndexManager) MarshalJSON() ([]byte, error) { - if gen, ok := indexGenerators[m]; ok { - return gen() + gen, ok := indexGenerators[m] + if !ok { + return nil, fmt.Errorf("unknown index manager %q", string(m)) } - filePath := string(m) - body, err := indexes.ReadFile(path.Join("./internal/indexes", filePath)) - switch { - case err != nil: - return nil, fmt.Errorf("failed to read index file %s: %w", filePath, err) - case len(body) <= 0: - return nil, fmt.Errorf("index file %s is empty", filePath) - } - - return body, nil + return gen() } // buildResourceV2Mapping renders the OpenSearch index template for a diff --git a/services/search/pkg/opensearch/internal/indexes/resource_v1.json b/services/search/pkg/opensearch/internal/indexes/resource_v1.json deleted file mode 100644 index f0f719c4c5..0000000000 --- a/services/search/pkg/opensearch/internal/indexes/resource_v1.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "settings": { - "number_of_shards": "1", - "number_of_replicas": "1", - "analysis": { - "analyzer": { - "path_hierarchy": { - "filter": [ - "lowercase" - ], - "tokenizer": "path_hierarchy", - "type": "custom" - } - }, - "tokenizer": { - "path_hierarchy": { - "type": "path_hierarchy" - } - } - } - }, - "mappings": { - "properties": { - "ID": { - "type": "keyword" - }, - "ParentID": { - "type": "keyword" - }, - "RootID": { - "type": "keyword" - }, - "MimeType": { - "type": "wildcard", - "doc_values": false - }, - "Path": { - "type": "text", - "analyzer": "path_hierarchy" - }, - "Deleted": { - "type": "boolean" - }, - "Hidden": { - "type": "boolean" - } - } - } -} From 9e1e56b932886444cf8988557b33075a800b1a7c Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Tue, 12 May 2026 12:52:07 +0200 Subject: [PATCH 26/26] fix(search): explain how to recover from OpenSearch index mapping drift The Apply error on a divergent index used to read 'index X already exists and is different from the requested version, manual action required: ...' which tells the operator something is wrong but not what to do. There is no in-place migration today, so the search service ends up stuck in suture's restart loop with that line flooding the log. Spell out the recovery procedure in the error message itself: drop the index in OpenSearch and restart, the search service will recreate it with the new mapping. This does not change behavior, just the error text. A real fix (additive PUT _mapping for non-destructive diffs, alias-based blue/green for destructive ones, plus the bleve silent-stale gap) is a separate piece of work that needs operator-control over reindex timing on hyperlarge installs. --- services/search/pkg/opensearch/index.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/services/search/pkg/opensearch/index.go b/services/search/pkg/opensearch/index.go index 0f3ff4ecf2..3c9a3dce78 100644 --- a/services/search/pkg/opensearch/index.go +++ b/services/search/pkg/opensearch/index.go @@ -161,8 +161,11 @@ func (m IndexManager) Apply(ctx context.Context, name string, client *opensearch if errs != nil { return fmt.Errorf( - "index %s already exists and is different from the requested version, %w: %w", - name, + "index %s already exists with a different mapping than the requested version. "+ + "There is no in-place migration today: drop the index in OpenSearch (DELETE /%s) "+ + "and restart the search service. The index will be recreated with the new mapping. "+ + "%w: %w", + name, name, ErrManualActionRequired, errors.Join(errs...), )