diff --git a/services/graph/pkg/service/v0/driveitems.go b/services/graph/pkg/service/v0/driveitems.go index da89e25bad..bcbf8e2091 100644 --- a/services/graph/pkg/service/v0/driveitems.go +++ b/services/graph/pkg/service/v0/driveitems.go @@ -9,7 +9,6 @@ import ( "net/http" "net/url" "path" - "reflect" "strconv" "strings" "time" @@ -30,6 +29,7 @@ import ( "github.com/opencloud-eu/opencloud/pkg/log" "github.com/opencloud-eu/opencloud/services/graph/pkg/errorcode" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" ) // CreateUploadSession create an upload session to allow your app to upload files up to the maximum file size. @@ -471,130 +471,23 @@ func cs3ResourceToDriveItem(logger *log.Logger, res *storageprovider.ResourceInf driveItem.Folder = &libregraph.Folder{} } - if res.GetArbitraryMetadata() != nil { - driveItem.Audio = cs3ResourceToDriveItemAudioFacet(logger, res) - driveItem.Image = cs3ResourceToDriveItemImageFacet(logger, res) - driveItem.Location = cs3ResourceToDriveItemLocationFacet(logger, res) - driveItem.Photo = cs3ResourceToDriveItemPhotoFacet(logger, res) + if metadata := res.GetArbitraryMetadata().GetMetadata(); metadata != nil { + setFacet(&driveItem.Audio, metadata, "libre.graph.audio.") + setFacet(&driveItem.Image, metadata, "libre.graph.image.") + setFacet(&driveItem.Location, metadata, "libre.graph.location.") + setFacet(&driveItem.Photo, metadata, "libre.graph.photo.") } return driveItem, nil } -func cs3ResourceToDriveItemAudioFacet(logger *log.Logger, res *storageprovider.ResourceInfo) *libregraph.Audio { - if !strings.HasPrefix(res.GetMimeType(), "audio/") { - return nil - } - - k := res.GetArbitraryMetadata().GetMetadata() - if k == nil { - return nil - } - - var audio = &libregraph.Audio{} - if ok := unmarshalStringMap(logger, audio, k, "libre.graph.audio."); ok { - return audio - } - - return nil -} - -func cs3ResourceToDriveItemImageFacet(logger *log.Logger, res *storageprovider.ResourceInfo) *libregraph.Image { - k := res.GetArbitraryMetadata().GetMetadata() - if k == nil { - return nil - } - - var image = &libregraph.Image{} - if ok := unmarshalStringMap(logger, image, k, "libre.graph.image."); ok { - return image - } - - return nil -} - -func cs3ResourceToDriveItemLocationFacet(logger *log.Logger, res *storageprovider.ResourceInfo) *libregraph.GeoCoordinates { - k := res.GetArbitraryMetadata().GetMetadata() - if k == nil { - return nil - } - - var location = &libregraph.GeoCoordinates{} - if ok := unmarshalStringMap(logger, location, k, "libre.graph.location."); ok { - return location - } - - return nil -} - -func cs3ResourceToDriveItemPhotoFacet(logger *log.Logger, res *storageprovider.ResourceInfo) *libregraph.Photo { - k := res.GetArbitraryMetadata().GetMetadata() - if k == nil { - return nil - } - - var photo = &libregraph.Photo{} - if ok := unmarshalStringMap(logger, photo, k, "libre.graph.photo."); ok { - return photo - } - - return nil -} - -func getFieldName(structField reflect.StructField) string { - tag := structField.Tag.Get("json") - if tag == "" { - return structField.Name - } - - return strings.Split(tag, ",")[0] -} - -func unmarshalStringMap(logger *log.Logger, out any, flatMap map[string]string, prefix string) bool { - nonEmpty := false - obj := reflect.ValueOf(out).Elem() - timeKind := reflect.TypeOf(&time.Time{}).Elem().Kind() - for i := 0; i < obj.NumField(); i++ { - field := obj.Field(i) - structField := obj.Type().Field(i) - mapKey := prefix + getFieldName(structField) - - if value, ok := flatMap[mapKey]; ok { - if field.Kind() == reflect.Ptr { - newValue := reflect.New(field.Type().Elem()) - var tmp any - var err error - switch t := newValue.Type().Elem().Kind(); t { - case reflect.String: - tmp = value - case reflect.Int32: - tmp, err = strconv.ParseInt(value, 10, 32) - case reflect.Int64: - tmp, err = strconv.ParseInt(value, 10, 64) - case reflect.Float32: - tmp, err = strconv.ParseFloat(value, 32) - case reflect.Float64: - tmp, err = strconv.ParseFloat(value, 64) - case reflect.Bool: - tmp, err = strconv.ParseBool(value) - case timeKind: - tmp, err = time.Parse(time.RFC3339, value) - default: - err = errors.New("unsupported type") - logger.Error().Err(err).Str("type", t.String()).Str("mapKey", mapKey).Msg("target field type for value of mapKey is not supported") - } - if err != nil { - logger.Error().Err(err).Str("mapKey", mapKey).Msg("unmarshalling failed") - continue - } - newValue.Elem().Set(reflect.ValueOf(tmp).Convert(field.Type().Elem())) - field.Set(newValue) - nonEmpty = true - } - } - } - - return nonEmpty +// setFacet decodes a libre.graph..* slice of CS3 ArbitraryMetadata +// into *dst. DeserializeStringMap is fail-soft per field: malformed +// individual values are silently zeroed, the rest of the facet still +// populates. *dst stays nil only when no fields under prefix were present +// at all (which is what DeserializeStringMap returns). +func setFacet[T any](dst **T, metadata map[string]string, prefix string) { + *dst = mapping.DeserializeStringMap[T](metadata, prefix) } func cs3ResourceToRemoteItem(res *storageprovider.ResourceInfo) (*libregraph.RemoteItem, error) { diff --git a/services/search/pkg/bleve/backend.go b/services/search/pkg/bleve/backend.go index 8adcdda09e..4c265860e4 100644 --- a/services/search/pkg/bleve/backend.go +++ b/services/search/pkg/bleve/backend.go @@ -15,6 +15,7 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" "github.com/opencloud-eu/opencloud/pkg/log" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" "github.com/opencloud-eu/opencloud/services/search/pkg/search" searchMessage "github.com/opencloud-eu/opencloud/protogen/gen/opencloud/messages/search/v0" @@ -136,10 +137,10 @@ func (b *Backend) Search(_ context.Context, sir *searchService.SearchIndexReques Tags: getFieldSliceValue[string](hit.Fields, "Tags"), Favorites: getFieldSliceValue[string](hit.Fields, "Favorites"), Highlights: getFragmentValue(hit.Fragments, "Content", 0), - Audio: getAudioValue[searchMessage.Audio](hit.Fields), - Image: getImageValue[searchMessage.Image](hit.Fields), - Location: getLocationValue[searchMessage.GeoCoordinates](hit.Fields), - Photo: getPhotoValue[searchMessage.Photo](hit.Fields), + Audio: mapping.DeserializeAt[searchMessage.Audio](hit.Fields, "audio"), + Image: mapping.DeserializeAt[searchMessage.Image](hit.Fields, "image"), + Location: mapping.DeserializeAt[searchMessage.GeoCoordinates](hit.Fields, "location"), + Photo: mapping.DeserializeAt[searchMessage.Photo](hit.Fields, "photo"), }, } diff --git a/services/search/pkg/bleve/batch.go b/services/search/pkg/bleve/batch.go index 60a3d0c867..0b3bcf2775 100644 --- a/services/search/pkg/bleve/batch.go +++ b/services/search/pkg/bleve/batch.go @@ -10,6 +10,7 @@ import ( "github.com/opencloud-eu/reva/v2/pkg/utils" "github.com/opencloud-eu/opencloud/pkg/log" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) @@ -36,10 +37,21 @@ func NewBatch(index bleve.Index, size int) (*Batch, error) { func (b *Batch) Upsert(id string, r search.Resource) error { return b.withSizeLimit(func() error { - return b.batch.Index(id, r) + return b.indexResource(id, r) }) } +// indexResource prepares r for bleve (resolving json tags and splicing in +// type-specific adaptations via the mapping package) and appends it to the +// batch under id. +func (b *Batch) indexResource(id string, r search.Resource) error { + doc, err := mapping.PrepareForIndex(r, r.SearchFieldOverrides()) + if err != nil { + return err + } + return b.batch.Index(id, doc) +} + func (b *Batch) Move(id, parentID, location string) error { return b.withSizeLimit(func() error { rootResource, err := searchResourceByID(id, b.index) @@ -68,7 +80,7 @@ func (b *Batch) Move(id, parentID, location string) error { } for _, resource := range resources { - if err := b.batch.Index(resource.ID, resource); err != nil { + if err := b.indexResource(resource.ID, *resource); err != nil { return err } if b.batch.Size() >= b.size { @@ -90,7 +102,7 @@ func (b *Batch) Delete(id string) error { } for _, resource := range affectedResources { - if err := b.batch.Index(resource.ID, resource); err != nil { + if err := b.indexResource(resource.ID, *resource); err != nil { return err } if b.batch.Size() >= b.size { @@ -112,7 +124,7 @@ func (b *Batch) Restore(id string) error { } for _, resource := range affectedResources { - if err := b.batch.Index(resource.ID, resource); err != nil { + if err := b.indexResource(resource.ID, *resource); err != nil { return err } if b.batch.Size() >= b.size { diff --git a/services/search/pkg/bleve/bleve.go b/services/search/pkg/bleve/bleve.go index e478c2c148..ce16151bae 100644 --- a/services/search/pkg/bleve/bleve.go +++ b/services/search/pkg/bleve/bleve.go @@ -1,18 +1,13 @@ package bleve import ( - "reflect" "regexp" - "strings" - "time" bleveSearch "github.com/blevesearch/bleve/v2/search" storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" - libregraph "github.com/opencloud-eu/libre-graph-api-go" - "google.golang.org/protobuf/types/known/timestamppb" searchMessage "github.com/opencloud-eu/opencloud/protogen/gen/opencloud/messages/search/v0" - "github.com/opencloud-eu/opencloud/services/search/pkg/content" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) @@ -75,131 +70,13 @@ func getFragmentValue(m bleveSearch.FieldFragmentMap, key string, idx int) strin return val[idx] } -func getAudioValue[T any](fields map[string]any) *T { - if !strings.HasPrefix(getFieldValue[string](fields, "MimeType"), "audio/") { - return nil - } - - var audio = newPointerOfType[T]() - if ok := unmarshalInterfaceMap(audio, fields, "audio."); ok { - return audio - } - - return nil -} - -func getImageValue[T any](fields map[string]any) *T { - var image = newPointerOfType[T]() - if ok := unmarshalInterfaceMap(image, fields, "image."); ok { - return image - } - - return nil -} - -func getLocationValue[T any](fields map[string]any) *T { - var location = newPointerOfType[T]() - if ok := unmarshalInterfaceMap(location, fields, "location."); ok { - return location - } - - return nil -} - -func getPhotoValue[T any](fields map[string]any) *T { - var photo = newPointerOfType[T]() - if ok := unmarshalInterfaceMap(photo, fields, "photo."); ok { - return photo - } - - return nil -} - -func newPointerOfType[T any]() *T { - t := reflect.TypeOf((*T)(nil)).Elem() - ptr := reflect.New(t).Interface() - return ptr.(*T) -} - -func unmarshalInterfaceMap(out any, flatMap map[string]any, prefix string) bool { - nonEmpty := false - obj := reflect.ValueOf(out).Elem() - for i := 0; i < obj.NumField(); i++ { - field := obj.Field(i) - structField := obj.Type().Field(i) - mapKey := prefix + getFieldName(structField) - - if value, ok := flatMap[mapKey]; ok { - if field.Kind() == reflect.Ptr { - alloc := reflect.New(field.Type().Elem()) - elemType := field.Type().Elem() - - // convert time strings from index for search requests - if elemType == reflect.TypeOf(timestamppb.Timestamp{}) { - if strValue, ok := value.(string); ok { - if parsedTime, err := time.Parse(time.RFC3339, strValue); err == nil { - alloc.Elem().Set(reflect.ValueOf(*timestamppb.New(parsedTime))) - field.Set(alloc) - nonEmpty = true - } - } - continue - } - - // convert time strings from index for libregraph structs when updating resources - if elemType == reflect.TypeOf(time.Time{}) { - if strValue, ok := value.(string); ok { - if parsedTime, err := time.Parse(time.RFC3339, strValue); err == nil { - alloc.Elem().Set(reflect.ValueOf(parsedTime)) - field.Set(alloc) - nonEmpty = true - } - } - continue - } - - alloc.Elem().Set(reflect.ValueOf(value).Convert(elemType)) - field.Set(alloc) - nonEmpty = true - } - } - } - - return nonEmpty -} - -func getFieldName(structField reflect.StructField) string { - tag := structField.Tag.Get("json") - if tag == "" { - return structField.Name - } - - return strings.Split(tag, ",")[0] -} - +// matchToResource reconstructs a search.Resource from a bleve hit. Used by +// the Move / Delete / Restore / Purge paths that round-trip a record through +// the index. Always returns a non-nil *Resource: Deserialize is fail-soft +// for per-field parse errors, so corrupted hit values surface as zero +// values on individual fields instead of dropping the whole record. func matchToResource(match *bleveSearch.DocumentMatch) *search.Resource { - return &search.Resource{ - ID: getFieldValue[string](match.Fields, "ID"), - RootID: getFieldValue[string](match.Fields, "RootID"), - Path: getFieldValue[string](match.Fields, "Path"), - ParentID: getFieldValue[string](match.Fields, "ParentID"), - Type: uint64(getFieldValue[float64](match.Fields, "Type")), - Deleted: getFieldValue[bool](match.Fields, "Deleted"), - Document: content.Document{ - Name: getFieldValue[string](match.Fields, "Name"), - Title: getFieldValue[string](match.Fields, "Title"), - Size: uint64(getFieldValue[float64](match.Fields, "Size")), - Mtime: getFieldValue[string](match.Fields, "Mtime"), - MimeType: getFieldValue[string](match.Fields, "MimeType"), - Content: getFieldValue[string](match.Fields, "Content"), - Tags: getFieldSliceValue[string](match.Fields, "Tags"), - Favorites: getFieldSliceValue[string](match.Fields, "Favorites"), - Audio: getAudioValue[libregraph.Audio](match.Fields), - Image: getImageValue[libregraph.Image](match.Fields), - Location: getLocationValue[libregraph.GeoCoordinates](match.Fields), - Photo: getPhotoValue[libregraph.Photo](match.Fields), - }, - } + return mapping.Deserialize[search.Resource](match.Fields) } func escapeQuery(s string) string { diff --git a/services/search/pkg/bleve/geo_verify_test.go b/services/search/pkg/bleve/geo_verify_test.go new file mode 100644 index 0000000000..27cf0e7a01 --- /dev/null +++ b/services/search/pkg/bleve/geo_verify_test.go @@ -0,0 +1,178 @@ +package bleve_test + +import ( + "sort" + "testing" + + bleveSearch "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/search/query" + libregraph "github.com/opencloud-eu/libre-graph-api-go" + + "github.com/opencloud-eu/opencloud/services/search/pkg/bleve" + "github.com/opencloud-eu/opencloud/services/search/pkg/content" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" + "github.com/opencloud-eu/opencloud/services/search/pkg/search" +) + +// geoFixture builds an in-memory bleve index with a single resource that +// carries the given lon/lat/alt. Used by the search tests below. +func geoFixture(t *testing.T, lon, lat, alt float64) bleveSearch.Index { + t.Helper() + idxMapping, err := bleve.NewMapping() + if err != nil { + t.Fatalf("NewMapping: %v", err) + } + idx, err := bleveSearch.NewMemOnly(idxMapping) + if err != nil { + t.Fatalf("NewMemOnly: %v", err) + } + r := search.Resource{ + ID: "x", + Document: content.Document{ + Name: "team.jpg", + Location: &libregraph.GeoCoordinates{ + Longitude: &lon, + Latitude: &lat, + Altitude: &alt, + }, + }, + } + doc, err := mapping.PrepareForIndex(r, r.SearchFieldOverrides()) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + if err := idx.Index(r.ID, doc); err != nil { + t.Fatalf("Index: %v", err) + } + return idx +} + +// TestLocationAltitudeRoundTrip proves that every subfield of Location +// (including altitude) ends up in hit.Fields when a Resource is indexed +// through the full bleve pipeline. This is the invariant the Move / +// Delete / Restore round-trip depends on. +func TestLocationAltitudeRoundTrip(t *testing.T) { + idx := geoFixture(t, 11.103870357204285, 49.48675890884328, 1047.7) + + req := bleveSearch.NewSearchRequest(bleveSearch.NewMatchAllQuery()) + req.Fields = []string{"*"} + res, err := idx.Search(req) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) == 0 { + t.Fatal("no hits") + } + keys := make([]string, 0, len(res.Hits[0].Fields)) + for k := range res.Hits[0].Fields { + keys = append(keys, k) + } + sort.Strings(keys) + t.Logf("hit.Fields keys: %v", keys) + + for _, k := range []string{"location.longitude", "location.latitude", "location.altitude"} { + if _, ok := res.Hits[0].Fields[k]; !ok { + t.Errorf("missing %q in hit.Fields (got %v)", k, keys) + } + } +} + +func TestLocationLatitudeRangeQueryMatches(t *testing.T) { + idx := geoFixture(t, 11.1, 49.48, 1000) + + // numeric range on the sub-field + min, max := 49.0, 50.0 + incl := true + q := query.NewNumericRangeInclusiveQuery(&min, &max, &incl, &incl) + q.SetField("location.latitude") + res, err := idx.Search(bleveSearch.NewSearchRequest(q)) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) != 1 { + t.Fatalf("latitude range: got %d hits, want 1", len(res.Hits)) + } + + // same range excluding the indexed latitude => no match + lowMin, lowMax := 0.0, 10.0 + q2 := query.NewNumericRangeInclusiveQuery(&lowMin, &lowMax, &incl, &incl) + q2.SetField("location.latitude") + res, err = idx.Search(bleveSearch.NewSearchRequest(q2)) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) != 0 { + t.Errorf("latitude range outside value: got %d hits, want 0", len(res.Hits)) + } +} + +func TestLocationLongitudeRangeQueryMatches(t *testing.T) { + idx := geoFixture(t, 11.1, 49.48, 1000) + + min, max := 11.0, 12.0 + incl := true + q := query.NewNumericRangeInclusiveQuery(&min, &max, &incl, &incl) + q.SetField("location.longitude") + res, err := idx.Search(bleveSearch.NewSearchRequest(q)) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) != 1 { + t.Fatalf("longitude range: got %d hits, want 1", len(res.Hits)) + } +} + +func TestLocationAltitudeRangeQueryMatches(t *testing.T) { + idx := geoFixture(t, 11.1, 49.48, 1047.7) + + min := 1000.0 + incl := true + q := query.NewNumericRangeInclusiveQuery(&min, nil, &incl, nil) + q.SetField("location.altitude") + res, err := idx.Search(bleveSearch.NewSearchRequest(q)) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) != 1 { + t.Fatalf("altitude >= 1000: got %d hits, want 1", len(res.Hits)) + } + + // altitude floor above the indexed value => no hits + highMin := 2000.0 + q2 := query.NewNumericRangeInclusiveQuery(&highMin, nil, &incl, nil) + q2.SetField("location.altitude") + res, err = idx.Search(bleveSearch.NewSearchRequest(q2)) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(res.Hits) != 0 { + t.Errorf("altitude >= 2000 against 1047.7: got %d hits, want 0", len(res.Hits)) + } +} + +func TestLocationGeoDistanceQueryMatches(t *testing.T) { + // Nuremberg-ish coordinates. + idx := geoFixture(t, 11.103870357204285, 49.48675890884328, 1047.7) + + // 10 km radius around the indexed point should match. + near := query.NewGeoDistanceQuery(11.103870357204285, 49.48675890884328, "10km") + near.SetField("location" + mapping.GeopointSuffix) + res, err := idx.Search(bleveSearch.NewSearchRequest(near)) + if err != nil { + t.Fatalf("Search (near): %v", err) + } + if len(res.Hits) != 1 { + t.Fatalf("geo distance near: got %d hits, want 1", len(res.Hits)) + } + + // Far away (Berlin, ~400 km) with a 10 km radius should miss. + far := query.NewGeoDistanceQuery(13.404954, 52.520008, "10km") + far.SetField("location" + mapping.GeopointSuffix) + res, err = idx.Search(bleveSearch.NewSearchRequest(far)) + if err != nil { + t.Fatalf("Search (far): %v", err) + } + if len(res.Hits) != 0 { + t.Errorf("geo distance far: got %d hits, want 0", len(res.Hits)) + } +} diff --git a/services/search/pkg/bleve/index.go b/services/search/pkg/bleve/index.go index f11bd61402..910d585f36 100644 --- a/services/search/pkg/bleve/index.go +++ b/services/search/pkg/bleve/index.go @@ -4,6 +4,7 @@ import ( "errors" "math" "path/filepath" + "reflect" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" @@ -15,6 +16,7 @@ import ( "github.com/blevesearch/bleve/v2/mapping" storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + searchmapping "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) @@ -38,27 +40,20 @@ func NewIndex(root string) (bleve.Index, error) { } func NewMapping() (mapping.IndexMapping, error) { - nameMapping := bleve.NewTextFieldMapping() - nameMapping.Analyzer = "lowercaseKeyword" - - lowercaseMapping := bleve.NewTextFieldMapping() - lowercaseMapping.IncludeInAll = false - lowercaseMapping.Analyzer = "lowercaseKeyword" - - fulltextFieldMapping := bleve.NewTextFieldMapping() - fulltextFieldMapping.Analyzer = "fulltext" - fulltextFieldMapping.IncludeInAll = false - - docMapping := bleve.NewDocumentMapping() - docMapping.AddFieldMappingsAt("Name", nameMapping) - docMapping.AddFieldMappingsAt("Tags", lowercaseMapping) - docMapping.AddFieldMappingsAt("Favorites", lowercaseMapping) - docMapping.AddFieldMappingsAt("Content", fulltextFieldMapping) + resourceType := reflect.TypeFor[search.Resource]() + overrides := search.Resource{}.SearchFieldOverrides() + if err := searchmapping.Validate(resourceType, overrides); err != nil { + return nil, err + } + docMapping, err := searchmapping.BleveBuildMapping(resourceType, overrides) + if err != nil { + return nil, err + } indexMapping := bleve.NewIndexMapping() indexMapping.DefaultAnalyzer = keyword.Name indexMapping.DefaultMapping = docMapping - err := indexMapping.AddCustomAnalyzer("lowercaseKeyword", + err = indexMapping.AddCustomAnalyzer("lowercaseKeyword", map[string]any{ "type": custom.Name, "tokenizer": single.Name, diff --git a/services/search/pkg/content/content.go b/services/search/pkg/content/content.go index e185351678..b162ccdcfc 100644 --- a/services/search/pkg/content/content.go +++ b/services/search/pkg/content/content.go @@ -14,14 +14,14 @@ func init() { // Document wraps all resource meta fields, // it is used as a content extraction result. type Document struct { - Title string - Name string - Content string - Size uint64 - Mtime string - MimeType string - Tags []string - Favorites []string + Title string `json:"Title"` + Name string `json:"Name"` + Content string `json:"Content"` + Size uint64 `json:"Size"` + Mtime string `json:"Mtime"` + MimeType string `json:"MimeType"` + Tags []string `json:"Tags"` + Favorites []string `json:"Favorites"` Audio *libregraph.Audio `json:"audio,omitempty"` Image *libregraph.Image `json:"image,omitempty"` Location *libregraph.GeoCoordinates `json:"location,omitempty"` diff --git a/services/search/pkg/mapping/bleve.go b/services/search/pkg/mapping/bleve.go new file mode 100644 index 0000000000..e2ca4755d2 --- /dev/null +++ b/services/search/pkg/mapping/bleve.go @@ -0,0 +1,115 @@ +package mapping + +import ( + "fmt" + "reflect" + + "github.com/blevesearch/bleve/v2" + bleveMapping "github.com/blevesearch/bleve/v2/mapping" +) + +// BleveBuildMapping builds a bleve DocumentMapping for t by walking the +// struct via reflection. Field names come from json tags; overrides are +// keyed by those names (or dotted paths for nested fields). +// +// The returned mapping references analyzer names (Analyzer field on the +// FieldOpts, plus "fulltext" / "path_hierarchy" for the corresponding Types); +// the caller is responsible for registering those analyzers on the enclosing +// IndexMapping. +func BleveBuildMapping(t reflect.Type, overrides map[string]FieldOpts) (*bleveMapping.DocumentMapping, error) { + return buildBleveDocMapping(t, overrides, "") +} + +func buildBleveDocMapping(t reflect.Type, overrides map[string]FieldOpts, prefix string) (*bleveMapping.DocumentMapping, error) { + doc := bleve.NewDocumentMapping() + err := walkFields(t, func(fi fieldInfo) error { + key := fi.Name + if prefix != "" { + key = prefix + "." + fi.Name + } + opts := overrides[key] + fieldType := opts.Type + if fieldType == "" { + fieldType = inferType(fi.GoField.Type) + } + + if fieldType == TypeObject { + sub := structType(fi.GoField.Type) + if sub == nil { + return fmt.Errorf("mapping: object type on non-struct field %q", key) + } + subDoc, err := buildBleveDocMapping(sub, overrides, key) + if err != nil { + return err + } + doc.AddSubDocumentMapping(fi.Name, subDoc) + return nil + } + + if fieldType == TypeGeopoint { + // The original libregraph facet (longitude / latitude / + // altitude) is preserved as an ordinary sub-document for + // data retrieval and numeric queries. A sibling field at + // "_geopoint" at the same parent level carries the + // {lat, lon} geopoint representation for geo-distance / + // bounding-box / polygon queries; the adapter in + // PrepareForIndex populates it at write time. + sub := structType(fi.GoField.Type) + if sub == nil { + return fmt.Errorf("mapping: geopoint type on non-struct field %q", key) + } + subDoc, err := buildBleveDocMapping(sub, overrides, key) + if err != nil { + return err + } + doc.AddSubDocumentMapping(fi.Name, subDoc) + doc.AddFieldMappingsAt(fi.Name+GeopointSuffix, bleve.NewGeoPointFieldMapping()) + return nil + } + + fm, err := bleveFieldMapping(fieldType, opts) + if err != nil { + return fmt.Errorf("mapping: field %q: %w", key, err) + } + doc.AddFieldMappingsAt(fi.Name, fm) + return nil + }) + return doc, err +} + +func bleveFieldMapping(fieldType string, opts FieldOpts) (*bleveMapping.FieldMapping, error) { + switch fieldType { + case TypeWildcard: + // bleve has no wildcard type; fall back to keyword-ish text. + fieldType = TypeKeyword + fallthrough + case TypeKeyword, TypeFulltext, TypePath: + fm := bleve.NewTextFieldMapping() + switch { + case opts.Analyzer != "": + fm.Analyzer = opts.Analyzer + case fieldType == TypeFulltext: + fm.Analyzer = "fulltext" + case fieldType == TypePath: + fm.Analyzer = "path_hierarchy" + } + switch { + case opts.IncludeInAll != nil: + fm.IncludeInAll = *opts.IncludeInAll + case fieldType == TypeFulltext, fieldType == TypePath: + fm.IncludeInAll = false + } + return fm, nil + case TypeNumeric: + return bleve.NewNumericFieldMapping(), nil + case TypeBool: + return bleve.NewBooleanFieldMapping(), nil + case TypeDatetime: + return bleve.NewDateTimeFieldMapping(), nil + case TypeGeopoint: + return bleve.NewGeoPointFieldMapping(), nil + case "": + return nil, fmt.Errorf("no type inferred and no override") + } + return nil, fmt.Errorf("unsupported type %q", fieldType) +} diff --git a/services/search/pkg/mapping/bleve_test.go b/services/search/pkg/mapping/bleve_test.go new file mode 100644 index 0000000000..5dbe114edb --- /dev/null +++ b/services/search/pkg/mapping/bleve_test.go @@ -0,0 +1,145 @@ +package mapping + +import ( + "reflect" + "testing" + "time" +) + +type bleveDoc struct { + Name string `json:"Name"` + Content string `json:"Content"` + Tags []string `json:"Tags"` + Size uint64 `json:"Size"` + Deleted bool `json:"Deleted"` + CreatedAt time.Time `json:"CreatedAt"` + Nested *nested `json:"nested,omitempty"` +} + +type nested struct { + Artist string `json:"artist"` + Year int `json:"year"` +} + +func TestBleveBuildMappingInferredTypes(t *testing.T) { + dm, err := BleveBuildMapping(reflect.TypeFor[bleveDoc](), nil) + if err != nil { + t.Fatalf("BleveBuildMapping: %v", err) + } + cases := map[string]string{ + "Name": "text", + "Content": "text", + "Tags": "text", + "Size": "number", + "Deleted": "boolean", + "CreatedAt": "datetime", + } + for field, wantType := range cases { + prop := dm.Properties[field] + if prop == nil { + t.Errorf("missing property %q", field) + continue + } + if len(prop.Fields) == 0 { + t.Errorf("%q: no field mappings", field) + continue + } + if got := prop.Fields[0].Type; got != wantType { + t.Errorf("%q: got type %q, want %q", field, got, wantType) + } + } +} + +func TestBleveBuildMappingNestedIsSubDocument(t *testing.T) { + dm, err := BleveBuildMapping(reflect.TypeFor[bleveDoc](), nil) + if err != nil { + t.Fatalf("BleveBuildMapping: %v", err) + } + sub := dm.Properties["nested"] + if sub == nil { + t.Fatal("missing nested sub-document") + } + if sub.Properties["artist"] == nil || sub.Properties["year"] == nil { + t.Fatalf("nested fields missing: %#v", sub.Properties) + } + if got := sub.Properties["artist"].Fields[0].Type; got != "text" { + t.Errorf("nested.artist: type %q, want text", got) + } + if got := sub.Properties["year"].Fields[0].Type; got != "number" { + t.Errorf("nested.year: type %q, want number", got) + } +} + +func TestBleveBuildMappingOverrides(t *testing.T) { + includeInAllFalse := false + dm, err := BleveBuildMapping(reflect.TypeFor[bleveDoc](), map[string]FieldOpts{ + "Name": {Analyzer: "lowercaseKeyword"}, + "Content": {Type: TypeFulltext}, + "Tags": {Analyzer: "lowercaseKeyword", IncludeInAll: &includeInAllFalse}, + }) + if err != nil { + t.Fatalf("BleveBuildMapping: %v", err) + } + nameField := dm.Properties["Name"].Fields[0] + if nameField.Analyzer != "lowercaseKeyword" { + t.Errorf("Name analyzer: %q, want lowercaseKeyword", nameField.Analyzer) + } + if !nameField.IncludeInAll { + t.Errorf("Name IncludeInAll should stay default-true when not overridden") + } + contentField := dm.Properties["Content"].Fields[0] + if contentField.Analyzer != "fulltext" { + t.Errorf("Content analyzer: %q, want fulltext", contentField.Analyzer) + } + if contentField.IncludeInAll { + t.Errorf("Content IncludeInAll should default to false for fulltext type") + } + tagsField := dm.Properties["Tags"].Fields[0] + if tagsField.IncludeInAll { + t.Errorf("Tags IncludeInAll should honor the explicit false override") + } +} + +func TestBleveBuildMappingGeopoint(t *testing.T) { + type geoDoc struct { + Location *struct { + Lon *float64 `json:"longitude,omitempty"` + Lat *float64 `json:"latitude,omitempty"` + Alt *float64 `json:"altitude,omitempty"` + } `json:"location,omitempty"` + } + dm, err := BleveBuildMapping(reflect.TypeFor[geoDoc](), map[string]FieldOpts{ + "location": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("BleveBuildMapping: %v", err) + } + // Original facet stays as an object sub-document with numeric + // sub-properties — for data retrieval via hit.Fields and ordinary + // numeric queries. + loc := dm.Properties["location"] + if loc == nil { + t.Fatalf("location sub-document missing: %#v", dm.Properties) + } + if len(loc.Fields) != 0 { + t.Errorf("location should not carry field mappings directly, got %#v", loc.Fields) + } + for _, sub := range []string{"longitude", "latitude", "altitude"} { + prop, ok := loc.Properties[sub] + if !ok { + t.Errorf("missing sub-field %q under location (properties: %v)", sub, loc.Properties) + continue + } + if len(prop.Fields) == 0 || prop.Fields[0].Type != "number" { + t.Errorf("location.%s Fields: %#v, want [number]", sub, prop.Fields) + } + } + // Sibling geopoint at "_geopoint" for geo-distance queries. + sibling := dm.Properties["location"+GeopointSuffix] + if sibling == nil { + t.Fatalf("location%s missing: %#v", GeopointSuffix, dm.Properties) + } + if len(sibling.Fields) == 0 || sibling.Fields[0].Type != "geopoint" { + t.Errorf("location%s Fields: %#v, want [geopoint]", GeopointSuffix, sibling.Fields) + } +} diff --git a/services/search/pkg/mapping/deserialize.go b/services/search/pkg/mapping/deserialize.go new file mode 100644 index 0000000000..6fb055b4e6 --- /dev/null +++ b/services/search/pkg/mapping/deserialize.go @@ -0,0 +1,178 @@ +package mapping + +import ( + "fmt" + "reflect" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +// Deserialize builds a *T from bleve's flat hit.Fields map, using json-tag +// names for lookup and "parent.child" for nested struct pointers. Scalar +// values stored as slices (bleve unwraps single-element slices) are +// re-wrapped when the target field is a slice. Time-valued fields accept +// RFC3339 strings and are decoded into time.Time or timestamppb.Timestamp. +// Unparseable individual fields are left at their zero value rather than +// aborting the whole record, matching the pre-refactor getFieldValue +// fail-soft behavior. Panics if T is not a struct (programmer error). +func Deserialize[T any](fields map[string]any) *T { + t := reflect.TypeFor[T]() + if t.Kind() != reflect.Struct { + panic(fmt.Sprintf("mapping: Deserialize requires a struct type, got %v", t)) + } + out := reflect.New(t) + fillStruct(out.Elem(), fields, "") + return out.Interface().(*T) +} + +// DeserializeAt reads sub-fields of the flat fields map under the given +// dotted prefix into a new *T. Returns nil when no sub-fields were present +// so callers can leave the enclosing pointer nil. Panics if T is not a +// struct (programmer error). +func DeserializeAt[T any](fields map[string]any, prefix string) *T { + t := reflect.TypeFor[T]() + if t.Kind() != reflect.Struct { + panic(fmt.Sprintf("mapping: DeserializeAt requires a struct type, got %v", t)) + } + out := reflect.New(t) + if !fillStruct(out.Elem(), fields, prefix) { + return nil + } + return out.Interface().(*T) +} + +// fillStruct walks v's fields, copying values from fields at the dotted +// prefix. Returns true if any leaf was populated (used by caller to decide +// whether to keep a newly-allocated pointer to v). Individual fields that +// can't be decoded are silently left at their zero value. +func fillStruct(v reflect.Value, fields map[string]any, prefix string) bool { + t := v.Type() + touched := false + for i := 0; i < t.NumField(); i++ { + fi := resolveField(t.Field(i)) + if fi.Skip { + continue + } + fv := v.Field(i) + + if fi.Embedded { + if fillStruct(fv, fields, prefix) { + touched = true + } + continue + } + + key := fi.Name + if prefix != "" { + key = prefix + "." + fi.Name + } + if fillField(fv, fields, key) { + touched = true + } + } + return touched +} + +// fillField populates a single struct field. For pointers to nested structs +// (non-time), it recurses with the field key as prefix and keeps the pointer +// only if something was set. Type mismatches and parse failures leave the +// field at its zero value rather than propagating an error. +func fillField(v reflect.Value, fields map[string]any, key string) bool { + ft := v.Type() + if ft.Kind() == reflect.Ptr { + elem := ft.Elem() + if elem.Kind() == reflect.Struct && elem != timeType && elem != timestampType { + alloc := reflect.New(elem) + if fillStruct(alloc.Elem(), fields, key) { + v.Set(alloc) + return true + } + return false + } + } + raw, ok := fields[key] + if !ok { + return false + } + return setValue(v, raw) +} + +// setValue writes raw into v, returning true when it succeeded. Any type +// mismatch is silently ignored. +func setValue(v reflect.Value, raw any) bool { + if v.Kind() == reflect.Ptr { + elem := v.Type().Elem() + alloc := reflect.New(elem) + if !setValue(alloc.Elem(), raw) { + return false + } + v.Set(alloc) + return true + } + if v.Type() == timeType { + t, ok := parseTime(raw) + if !ok { + return false + } + v.Set(reflect.ValueOf(t)) + return true + } + if v.Type() == timestampType { + t, ok := parseTime(raw) + if !ok { + return false + } + v.Set(reflect.ValueOf(*timestamppb.New(t))) + return true + } + if v.Kind() == reflect.Slice { + return setSlice(v, raw) + } + rv := reflect.ValueOf(raw) + if !rv.IsValid() { + return false + } + if !rv.Type().ConvertibleTo(v.Type()) { + return false + } + v.Set(rv.Convert(v.Type())) + return true +} + +func setSlice(v reflect.Value, raw any) bool { + items, ok := raw.([]any) + if !ok { + // bleve unwraps single-element slices; re-wrap here. + items = []any{raw} + } + // Pre-allocate to len(items) and compact in place: unparseable + // elements stay unfilled, successful writes slide into the next + // free slot, and the final Slice(0, j) trims the tail. This keeps + // the total allocation to a single MakeSlice regardless of how + // many elements we process. + out := reflect.MakeSlice(v.Type(), len(items), len(items)) + j := 0 + for _, item := range items { + if setValue(out.Index(j), item) { + j++ + } + } + if j == 0 { + return false + } + v.Set(out.Slice(0, j)) + return true +} + +func parseTime(raw any) (time.Time, bool) { + s, ok := raw.(string) + if !ok { + return time.Time{}, false + } + t, err := time.Parse(time.RFC3339, s) + if err != nil { + return time.Time{}, false + } + return t, true +} diff --git a/services/search/pkg/mapping/deserialize_string.go b/services/search/pkg/mapping/deserialize_string.go new file mode 100644 index 0000000000..1da8998c29 --- /dev/null +++ b/services/search/pkg/mapping/deserialize_string.go @@ -0,0 +1,141 @@ +package mapping + +import ( + "fmt" + "reflect" + "strconv" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +// DeserializeStringMap reads sub-fields of a string-typed flat map (e.g. CS3 +// ArbitraryMetadata, which is map[string]string) under the given dotted +// prefix into a new *T. String values are parsed into the target field's Go +// type via strconv / time.Parse. Returns nil when nothing under the prefix +// matched, so callers can leave the enclosing pointer nil. Unparseable +// individual fields are left at their zero value rather than aborting the +// whole facet (matches the pre-refactor unmarshalStringMap fail-soft +// behavior). Panics if T is not a struct (programmer error). +func DeserializeStringMap[T any](fields map[string]string, prefix string) *T { + t := reflect.TypeFor[T]() + if t.Kind() != reflect.Struct { + panic(fmt.Sprintf("mapping: DeserializeStringMap requires a struct type, got %v", t)) + } + out := reflect.New(t) + if !fillStructFromStrings(out.Elem(), fields, prefix) { + return nil + } + return out.Interface().(*T) +} + +func fillStructFromStrings(v reflect.Value, fields map[string]string, prefix string) bool { + t := v.Type() + touched := false + for i := 0; i < t.NumField(); i++ { + fi := resolveField(t.Field(i)) + if fi.Skip { + continue + } + fv := v.Field(i) + + if fi.Embedded { + if fillStructFromStrings(fv, fields, prefix) { + touched = true + } + continue + } + + key := fi.Name + if prefix != "" { + key = prefix + fi.Name + } + + if fv.Kind() == reflect.Ptr { + elem := fv.Type().Elem() + if elem.Kind() == reflect.Struct && elem != timeType && elem != timestampType { + alloc := reflect.New(elem) + if fillStructFromStrings(alloc.Elem(), fields, key+".") { + fv.Set(alloc) + touched = true + } + continue + } + } + + raw, ok := fields[key] + if !ok { + continue + } + // Unparseable individual fields are silently left at their zero + // value: a malformed Duration shouldn't take Album/Artist/Track + // down with it. The bleve-hit deserializer (Deserialize) does the + // same; both paths feed the same downstream consumers. + if err := setValueFromString(fv, raw); err == nil { + touched = true + } + } + return touched +} + +func setValueFromString(v reflect.Value, raw string) error { + if v.Kind() == reflect.Ptr { + elem := v.Type().Elem() + alloc := reflect.New(elem) + if err := setValueFromString(alloc.Elem(), raw); err != nil { + return err + } + v.Set(alloc) + return nil + } + if v.Type() == timeType { + t, err := time.Parse(time.RFC3339, raw) + if err != nil { + return fmt.Errorf("parse time %q: %w", raw, err) + } + v.Set(reflect.ValueOf(t)) + return nil + } + if v.Type() == timestampType { + t, err := time.Parse(time.RFC3339, raw) + if err != nil { + return fmt.Errorf("parse time %q: %w", raw, err) + } + v.Set(reflect.ValueOf(*timestamppb.New(t))) + return nil + } + switch v.Kind() { + case reflect.String: + v.SetString(raw) + return nil + case reflect.Bool: + b, err := strconv.ParseBool(raw) + if err != nil { + return fmt.Errorf("parse bool %q: %w", raw, err) + } + v.SetBool(b) + return nil + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + n, err := strconv.ParseInt(raw, 10, v.Type().Bits()) + if err != nil { + return fmt.Errorf("parse int %q: %w", raw, err) + } + v.SetInt(n) + return nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + n, err := strconv.ParseUint(raw, 10, v.Type().Bits()) + if err != nil { + return fmt.Errorf("parse uint %q: %w", raw, err) + } + v.SetUint(n) + return nil + case reflect.Float32, reflect.Float64: + f, err := strconv.ParseFloat(raw, v.Type().Bits()) + if err != nil { + return fmt.Errorf("parse float %q: %w", raw, err) + } + v.SetFloat(f) + return nil + } + return fmt.Errorf("unsupported target kind %s", v.Kind()) +} diff --git a/services/search/pkg/mapping/deserialize_string_test.go b/services/search/pkg/mapping/deserialize_string_test.go new file mode 100644 index 0000000000..977ad8b6df --- /dev/null +++ b/services/search/pkg/mapping/deserialize_string_test.go @@ -0,0 +1,161 @@ +package mapping + +import ( + "testing" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +type stringFacet struct { + Artist *string `json:"artist,omitempty"` + Year *int32 `json:"year,omitempty"` + Duration *int64 `json:"duration,omitempty"` + Rating *float64 `json:"rating,omitempty"` + Explicit *bool `json:"explicit,omitempty"` + Taken *time.Time `json:"takenDateTime,omitempty"` +} + +func TestDeserializeStringMapBasicTypes(t *testing.T) { + r := DeserializeStringMap[stringFacet](map[string]string{ + "libre.graph.audio.artist": "Queen", + "libre.graph.audio.year": "1975", + "libre.graph.audio.duration": "354000", + "libre.graph.audio.rating": "4.9", + "libre.graph.audio.explicit": "true", + "libre.graph.audio.takenDateTime": "2024-01-02T03:04:05Z", + }, "libre.graph.audio.") + if r == nil { + t.Fatal("expected non-nil *stringFacet") + } + if r.Artist == nil || *r.Artist != "Queen" { + t.Errorf("Artist: %#v", r.Artist) + } + if r.Year == nil || *r.Year != 1975 { + t.Errorf("Year: %#v", r.Year) + } + if r.Duration == nil || *r.Duration != 354000 { + t.Errorf("Duration: %#v", r.Duration) + } + if r.Rating == nil || *r.Rating != 4.9 { + t.Errorf("Rating: %#v", r.Rating) + } + if r.Explicit == nil || !*r.Explicit { + t.Errorf("Explicit: %#v", r.Explicit) + } + if r.Taken == nil || !r.Taken.Equal(time.Date(2024, 1, 2, 3, 4, 5, 0, time.UTC)) { + t.Errorf("Taken: %#v", r.Taken) + } +} + +func TestDeserializeStringMapReturnsNilWhenEmpty(t *testing.T) { + r := DeserializeStringMap[stringFacet](map[string]string{ + "libre.graph.image.width": "1200", + }, "libre.graph.audio.") + if r != nil { + t.Fatalf("expected nil, got %#v", r) + } +} + +func TestDeserializeStringMapTimestamppb(t *testing.T) { + type photoFacet struct { + Taken *timestamppb.Timestamp `json:"takenDateTime,omitempty"` + } + r := DeserializeStringMap[photoFacet](map[string]string{ + "libre.graph.photo.takenDateTime": "2024-05-06T07:08:09Z", + }, "libre.graph.photo.") + if r == nil || r.Taken == nil { + t.Fatalf("Taken missing: %#v", r) + } + want := time.Date(2024, 5, 6, 7, 8, 9, 0, time.UTC) + if !r.Taken.AsTime().Equal(want) { + t.Errorf("Taken: got %v, want %v", r.Taken.AsTime(), want) + } +} + +func TestDeserializeStringMapIsFailSoft(t *testing.T) { + // A single malformed field (year is unparseable as int) must not drop + // the whole facet. The bad field stays at zero value, the rest of the + // facet still populates. Mirrors the bleve-hit Deserialize behavior. + r := DeserializeStringMap[stringFacet](map[string]string{ + "libre.graph.audio.artist": "Iron Maiden", + "libre.graph.audio.year": "not-a-number", + "libre.graph.audio.duration": "354000", + "libre.graph.audio.explicit": "not-a-bool", + "libre.graph.audio.rating": "4.9", + }, "libre.graph.audio.") + if r == nil { + t.Fatal("expected non-nil *stringFacet despite bad fields") + } + if r.Artist == nil || *r.Artist != "Iron Maiden" { + t.Errorf("Artist should still be populated, got %#v", r.Artist) + } + if r.Duration == nil || *r.Duration != 354000 { + t.Errorf("Duration should still be populated, got %#v", r.Duration) + } + if r.Rating == nil || *r.Rating != 4.9 { + t.Errorf("Rating should still be populated, got %#v", r.Rating) + } + if r.Year != nil { + t.Errorf("Year should stay nil for bad int, got %#v", r.Year) + } + if r.Explicit != nil { + t.Errorf("Explicit should stay nil for bad bool, got %#v", r.Explicit) + } +} + +func TestDeserializeStringMapReturnsNilWhenOnlyBadFields(t *testing.T) { + // If the only present field is malformed and nothing else under the + // prefix matched, the facet pointer must stay nil (touched stays false). + r := DeserializeStringMap[stringFacet](map[string]string{ + "libre.graph.audio.year": "not-a-number", + }, "libre.graph.audio.") + if r != nil { + t.Fatalf("expected nil when no field parsed successfully, got %#v", r) + } +} + +func TestDeserializeStringMapIgnoresOtherPrefixes(t *testing.T) { + r := DeserializeStringMap[stringFacet](map[string]string{ + "libre.graph.audio.artist": "Mercury", + "libre.graph.image.artist": "someone-else", + }, "libre.graph.audio.") + if r == nil || r.Artist == nil || *r.Artist != "Mercury" { + t.Fatalf("Artist: %#v", r.Artist) + } +} + +func TestDeserializeStringMapPanicsOnNonStruct(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Fatal("expected panic for non-struct T") + } + }() + DeserializeStringMap[int](nil, "") +} + +func TestDeserializeStringMapFlattensEmbedded(t *testing.T) { + // Embedded structs (whose fields are promoted to the enclosing struct's + // json namespace) must be walked the same way Go's json package walks + // them, so the shared prefix still resolves the right leaves. + type Base struct { + Artist *string `json:"artist,omitempty"` + } + type outer struct { + Base + Album *string `json:"album,omitempty"` + } + r := DeserializeStringMap[outer](map[string]string{ + "libre.graph.audio.artist": "Queen", + "libre.graph.audio.album": "A Night at the Opera", + }, "libre.graph.audio.") + if r == nil { + t.Fatal("expected non-nil *outer") + } + if r.Artist == nil || *r.Artist != "Queen" { + t.Errorf("Artist (embedded): %#v", r.Artist) + } + if r.Album == nil || *r.Album != "A Night at the Opera" { + t.Errorf("Album: %#v", r.Album) + } +} diff --git a/services/search/pkg/mapping/deserialize_test.go b/services/search/pkg/mapping/deserialize_test.go new file mode 100644 index 0000000000..3ebd121921 --- /dev/null +++ b/services/search/pkg/mapping/deserialize_test.go @@ -0,0 +1,170 @@ +package mapping + +import ( + "testing" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +type Leaf struct { + Name string `json:"Name"` + Size uint64 `json:"Size"` + Deleted bool `json:"Deleted"` + Tags []string `json:"Tags"` + Favorites []string `json:"Favorites"` +} + +type audio struct { + Artist *string `json:"artist,omitempty"` + Year *int32 `json:"year,omitempty"` +} + +type photo struct { + Taken *timestamppb.Timestamp `json:"takenDateTime,omitempty"` + Mtime *time.Time `json:"mtime,omitempty"` +} + +type embedded struct { + Leaf + Audio *audio `json:"audio,omitempty"` + Photo *photo `json:"photo,omitempty"` +} + +func TestDeserializeLeafFields(t *testing.T) { + r := Deserialize[Leaf](map[string]any{ + "Name": "n", + "Size": float64(42), + "Deleted": true, + }) + if r.Name != "n" || r.Size != 42 || !r.Deleted { + t.Fatalf("got %#v", r) + } +} + +func TestDeserializeScalarToSlice(t *testing.T) { + r := Deserialize[Leaf](map[string]any{ + "Tags": "single", + "Favorites": []any{"a", "b"}, + }) + if len(r.Tags) != 1 || r.Tags[0] != "single" { + t.Errorf("Tags: %#v", r.Tags) + } + if len(r.Favorites) != 2 || r.Favorites[0] != "a" || r.Favorites[1] != "b" { + t.Errorf("Favorites: %#v", r.Favorites) + } +} + +func TestDeserializeNestedPointer(t *testing.T) { + r := Deserialize[embedded](map[string]any{ + "audio.artist": "A", + "audio.year": float64(2024), + }) + if r.Audio == nil { + t.Fatal("Audio is nil") + } + if r.Audio.Artist == nil || *r.Audio.Artist != "A" { + t.Errorf("Artist: %#v", r.Audio.Artist) + } + if r.Audio.Year == nil || *r.Audio.Year != 2024 { + t.Errorf("Year: %#v", r.Audio.Year) + } +} + +func TestDeserializeEmptyNestedStaysNil(t *testing.T) { + r := Deserialize[embedded](map[string]any{ + "Name": "n", + }) + if r.Audio != nil || r.Photo != nil { + t.Fatalf("nested pointers should stay nil: %#v", r) + } + if r.Name != "n" { + t.Errorf("Name: %q", r.Name) + } +} + +func TestDeserializeTimestamp(t *testing.T) { + r := Deserialize[embedded](map[string]any{ + "photo.takenDateTime": "2024-01-02T03:04:05Z", + "photo.mtime": "2024-05-06T07:08:09Z", + }) + if r.Photo == nil { + t.Fatal("Photo is nil") + } + if r.Photo.Taken == nil { + t.Fatal("Taken is nil") + } + expected := time.Date(2024, 1, 2, 3, 4, 5, 0, time.UTC) + if !r.Photo.Taken.AsTime().Equal(expected) { + t.Errorf("Taken: got %v, want %v", r.Photo.Taken.AsTime(), expected) + } + if r.Photo.Mtime == nil { + t.Fatal("Mtime is nil") + } + if !r.Photo.Mtime.Equal(time.Date(2024, 5, 6, 7, 8, 9, 0, time.UTC)) { + t.Errorf("Mtime: %v", r.Photo.Mtime) + } +} + +func TestDeserializeIsFailSoft(t *testing.T) { + // Malformed values (type mismatch, unparseable time) leave the + // affected field at its zero value instead of dropping the whole + // record. Matches the pre-refactor getFieldValue behavior so + // matchToResource never returns nil on a corrupted hit. + r := Deserialize[embedded](map[string]any{ + "Name": "n", + "Size": "not-a-number", // wrong type + "Deleted": true, + "photo.takenDateTime": "not-an-rfc3339-time", + "photo.mtime": "2024-05-06T07:08:09Z", + }) + if r == nil { + t.Fatal("expected non-nil *embedded even with partial corruption") + } + if r.Name != "n" { + t.Errorf("Name: %q", r.Name) + } + if r.Size != 0 { + t.Errorf("Size should stay zero on mismatch, got %d", r.Size) + } + if !r.Deleted { + t.Errorf("Deleted should still be true") + } + if r.Photo == nil { + t.Fatal("Photo should be populated because Mtime parsed ok") + } + if r.Photo.Taken != nil { + t.Errorf("Taken should stay nil for unparseable time, got %v", r.Photo.Taken) + } + if r.Photo.Mtime == nil { + t.Error("Mtime should be parsed") + } +} + +func TestDeserializePanicsOnNonStruct(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Fatal("expected panic for non-struct T") + } + }() + Deserialize[int](nil) +} + +func TestDeserializeAtReturnsNilWhenNothingMatches(t *testing.T) { + r := DeserializeAt[audio](map[string]any{"Name": "n"}, "audio") + if r != nil { + t.Fatalf("expected nil, got %#v", r) + } +} + +func TestDeserializeAtReturnsValueWhenPrefixMatches(t *testing.T) { + r := DeserializeAt[audio](map[string]any{ + "audio.artist": "A", + }, "audio") + if r == nil { + t.Fatal("expected non-nil *audio") + } + if r.Artist == nil || *r.Artist != "A" { + t.Errorf("Artist: %#v", r.Artist) + } +} diff --git a/services/search/pkg/mapping/geo.go b/services/search/pkg/mapping/geo.go new file mode 100644 index 0000000000..3d0adf61a7 --- /dev/null +++ b/services/search/pkg/mapping/geo.go @@ -0,0 +1,51 @@ +package mapping + +import "strings" + +// GeopointSuffix is appended to a field's name to produce the sibling key +// that carries the geo_point / bleve-geopoint representation of the +// original facet. For example, a libregraph "location" object with +// longitude / latitude / altitude is preserved as-is under "location" (for +// data retrieval and numeric queries) while "location_geopoint" carries +// the {lat, lon} form the geo indices understand. +const GeopointSuffix = "_geopoint" + +// addGeopointSiblings walks the overrides; for each TypeGeopoint entry at +// a dotted path (e.g. "location" or "journey.start") it writes a sibling +// under the suffixed key with the {lat, lon} form both bleve's +// ExtractGeoPoint and OpenSearch's geo_point parser accept. The original +// facet object stays untouched so downstream code still sees the full +// libregraph shape (including altitude). +func addGeopointSiblings(m map[string]any, overrides map[string]FieldOpts) { + for key, opts := range overrides { + if opts.Type == TypeGeopoint { + addGeopointSibling(m, key) + } + } +} + +// addGeopointSibling resolves dottedPath within m and, if the target is a +// libregraph-shaped geo object (with numeric "longitude" and "latitude"), +// writes the `{lat, lon}` sibling at the same level under the suffixed key. +func addGeopointSibling(m map[string]any, dottedPath string) { + parts := strings.Split(dottedPath, ".") + parent := m + for _, p := range parts[:len(parts)-1] { + next, ok := parent[p].(map[string]any) + if !ok { + return + } + parent = next + } + leaf := parts[len(parts)-1] + obj, ok := parent[leaf].(map[string]any) + if !ok { + return + } + lon, hasLon := obj["longitude"].(float64) + lat, hasLat := obj["latitude"].(float64) + if !hasLon || !hasLat { + return + } + parent[leaf+GeopointSuffix] = map[string]any{"lat": lat, "lon": lon} +} diff --git a/services/search/pkg/mapping/geo_test.go b/services/search/pkg/mapping/geo_test.go new file mode 100644 index 0000000000..5126355382 --- /dev/null +++ b/services/search/pkg/mapping/geo_test.go @@ -0,0 +1,137 @@ +package mapping + +import ( + "testing" +) + +func TestPrepareForIndexAddsGeopointSibling(t *testing.T) { + type geoDoc struct { + Location *struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + Altitude *float64 `json:"altitude,omitempty"` + } `json:"location,omitempty"` + } + lon, lat, alt := 11.1, 49.4, 1047.7 + doc := geoDoc{Location: &struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + Altitude *float64 `json:"altitude,omitempty"` + }{Longitude: &lon, Latitude: &lat, Altitude: &alt}} + + m, err := PrepareForIndex(doc, map[string]FieldOpts{ + "location": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + + // Original location object stays untouched (full libregraph shape). + orig, ok := m["location"].(map[string]any) + if !ok { + t.Fatalf("expected location object preserved, got %T", m["location"]) + } + if orig["longitude"] != lon || orig["latitude"] != lat || orig["altitude"] != alt { + t.Errorf("location object: %#v", orig) + } + + // Sibling location_geopoint has {lat, lon} for the geo indices. + gp, ok := m["location"+GeopointSuffix].(map[string]any) + if !ok { + t.Fatalf("expected location_geopoint sibling, got %T", m["location"+GeopointSuffix]) + } + if gp["lat"] != lat || gp["lon"] != lon { + t.Errorf("sibling: %#v", gp) + } +} + +func TestPrepareForIndexSkipsIncompleteGeopoint(t *testing.T) { + type geoDoc struct { + Location *struct { + Altitude *float64 `json:"altitude,omitempty"` + } `json:"location,omitempty"` + } + alt := 100.0 + doc := geoDoc{Location: &struct { + Altitude *float64 `json:"altitude,omitempty"` + }{Altitude: &alt}} + + m, err := PrepareForIndex(doc, map[string]FieldOpts{ + "location": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + // Original stays (altitude alone is still useful metadata). + if _, ok := m["location"]; !ok { + t.Error("location should still be present when only altitude is set") + } + // No sibling without both lon and lat. + if _, ok := m["location"+GeopointSuffix]; ok { + t.Errorf("no sibling expected, got %#v", m["location"+GeopointSuffix]) + } +} + +func TestPrepareForIndexWithoutOverrideNoSibling(t *testing.T) { + type geoDoc struct { + Location *struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + } `json:"location,omitempty"` + } + lon, lat := 11.1, 49.4 + doc := geoDoc{Location: &struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + }{Longitude: &lon, Latitude: &lat}} + + m, err := PrepareForIndex(doc, nil) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + if _, ok := m["location"+GeopointSuffix]; ok { + t.Errorf("no sibling expected without override, got %#v", m["location"+GeopointSuffix]) + } +} + +func TestPrepareForIndexHandlesNestedGeopoint(t *testing.T) { + // journey.start and journey.end — two geopoints in the same facet, + // demonstrating the dotted-path walker. + type geo struct { + Longitude *float64 `json:"longitude,omitempty"` + Latitude *float64 `json:"latitude,omitempty"` + } + type journey struct { + Start *geo `json:"start,omitempty"` + End *geo `json:"end,omitempty"` + } + type doc struct { + Journey *journey `json:"journey,omitempty"` + } + slon, slat := 11.0, 49.0 + elon, elat := 13.4, 52.5 + d := doc{Journey: &journey{ + Start: &geo{Longitude: &slon, Latitude: &slat}, + End: &geo{Longitude: &elon, Latitude: &elat}, + }} + + m, err := PrepareForIndex(d, map[string]FieldOpts{ + "journey.start": {Type: TypeGeopoint}, + "journey.end": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + j, ok := m["journey"].(map[string]any) + if !ok { + t.Fatalf("journey not an object: %T", m["journey"]) + } + startGp, ok := j["start"+GeopointSuffix].(map[string]any) + if !ok || startGp["lat"] != slat || startGp["lon"] != slon { + t.Errorf("journey.start sibling: %#v", j["start"+GeopointSuffix]) + } + endGp, ok := j["end"+GeopointSuffix].(map[string]any) + if !ok || endGp["lat"] != elat || endGp["lon"] != elon { + t.Errorf("journey.end sibling: %#v", j["end"+GeopointSuffix]) + } +} diff --git a/services/search/pkg/mapping/infer.go b/services/search/pkg/mapping/infer.go new file mode 100644 index 0000000000..869ba76f02 --- /dev/null +++ b/services/search/pkg/mapping/infer.go @@ -0,0 +1,112 @@ +package mapping + +import ( + "reflect" + "strings" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +var ( + timeType = reflect.TypeFor[time.Time]() + timestampType = reflect.TypeFor[timestamppb.Timestamp]() +) + +// inferType returns the mapping type for a Go type. Pointers and slices are +// unwrapped to their element type. time.Time and timestamppb.Timestamp become +// datetime; other structs become object. +func inferType(t reflect.Type) string { + for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice { + t = t.Elem() + } + switch t.Kind() { + case reflect.String: + return TypeKeyword + case reflect.Bool: + return TypeBool + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Float32, reflect.Float64: + return TypeNumeric + case reflect.Struct: + if t == timeType || t == timestampType { + return TypeDatetime + } + return TypeObject + } + return "" +} + +// fieldInfo is the resolved metadata for one struct field. +type fieldInfo struct { + Name string + GoField reflect.StructField + Skip bool + Embedded bool +} + +// resolveField resolves a struct field's json-tag name and skip/embed state. +func resolveField(sf reflect.StructField) fieldInfo { + if !sf.IsExported() { + return fieldInfo{Skip: true} + } + name := sf.Name + tag := sf.Tag.Get("json") + if tag != "" { + first, _, _ := strings.Cut(tag, ",") + if first == "-" { + return fieldInfo{Skip: true} + } + if first != "" { + name = first + } + } + return fieldInfo{ + Name: name, + GoField: sf, + Embedded: sf.Anonymous, + } +} + +// walkFields visits exported leaf fields of t, flattening embedded structs +// onto the enclosing level. It returns the first error returned by fn. +func walkFields(t reflect.Type, fn func(fi fieldInfo) error) error { + for t.Kind() == reflect.Ptr { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return nil + } + for i := 0; i < t.NumField(); i++ { + fi := resolveField(t.Field(i)) + if fi.Skip { + continue + } + if fi.Embedded { + if err := walkFields(fi.GoField.Type, fn); err != nil { + return err + } + continue + } + if err := fn(fi); err != nil { + return err + } + } + return nil +} + +// structType returns the underlying struct type, unwrapping pointers and +// slices. Returns nil when t is not a walkable struct (e.g. time.Time). +func structType(t reflect.Type) reflect.Type { + for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return nil + } + if t == timeType || t == timestampType { + return nil + } + return t +} diff --git a/services/search/pkg/mapping/infer_test.go b/services/search/pkg/mapping/infer_test.go new file mode 100644 index 0000000000..080b116e47 --- /dev/null +++ b/services/search/pkg/mapping/infer_test.go @@ -0,0 +1,116 @@ +package mapping + +import ( + "reflect" + "testing" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +func TestInferType(t *testing.T) { + cases := []struct { + name string + in any + want string + }{ + {"string", "", TypeKeyword}, + {"*string", (*string)(nil), TypeKeyword}, + {"[]string", []string(nil), TypeKeyword}, + {"bool", false, TypeBool}, + {"int", int(0), TypeNumeric}, + {"int64", int64(0), TypeNumeric}, + {"uint64", uint64(0), TypeNumeric}, + {"float64", float64(0), TypeNumeric}, + {"time.Time", time.Time{}, TypeDatetime}, + {"*time.Time", (*time.Time)(nil), TypeDatetime}, + {"*timestamppb.Timestamp", (*timestamppb.Timestamp)(nil), TypeDatetime}, + {"struct", struct{ X int }{}, TypeObject}, + {"*struct", (*struct{ X int })(nil), TypeObject}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got := inferType(reflect.TypeOf(c.in)) + if got != c.want { + t.Fatalf("inferType(%s): got %q, want %q", c.name, got, c.want) + } + }) + } +} + +func TestResolveField(t *testing.T) { + type S struct { + Exported string `json:"exp"` + Renamed string `json:"renamed,omitempty"` + NoTag string + OmitOnly string `json:",omitempty"` + Skipped string `json:"-"` + unexported string //nolint:unused + } + st := reflect.TypeFor[S]() + cases := []struct { + fieldIdx int + wantName string + wantSkip bool + }{ + {0, "exp", false}, + {1, "renamed", false}, + {2, "NoTag", false}, + {3, "OmitOnly", false}, + {4, "", true}, + {5, "", true}, + } + for _, c := range cases { + fi := resolveField(st.Field(c.fieldIdx)) + if fi.Skip != c.wantSkip { + t.Errorf("field %d: skip=%v, want %v", c.fieldIdx, fi.Skip, c.wantSkip) + } + if !c.wantSkip && fi.Name != c.wantName { + t.Errorf("field %d: name=%q, want %q", c.fieldIdx, fi.Name, c.wantName) + } + } +} + +func TestWalkFieldsFlattensEmbedded(t *testing.T) { + type Inner struct { + A string `json:"a"` + B int `json:"b"` + } + type Outer struct { + Inner + C bool `json:"c"` + } + var names []string + err := walkFields(reflect.TypeFor[Outer](), func(fi fieldInfo) error { + names = append(names, fi.Name) + return nil + }) + if err != nil { + t.Fatalf("walkFields: %v", err) + } + want := []string{"a", "b", "c"} + if !reflect.DeepEqual(names, want) { + t.Fatalf("got %v, want %v", names, want) + } +} + +func TestStructType(t *testing.T) { + type S struct{ X int } + cases := []struct { + name string + in reflect.Type + wantNil bool + }{ + {"struct", reflect.TypeFor[S](), false}, + {"*struct", reflect.TypeFor[*S](), false}, + {"[]struct", reflect.TypeFor[[]S](), false}, + {"time.Time", reflect.TypeFor[time.Time](), true}, + {"string", reflect.TypeFor[string](), true}, + } + for _, c := range cases { + got := structType(c.in) + if (got == nil) != c.wantNil { + t.Errorf("%s: got %v, wantNil %v", c.name, got, c.wantNil) + } + } +} diff --git a/services/search/pkg/mapping/opensearch.go b/services/search/pkg/mapping/opensearch.go new file mode 100644 index 0000000000..d677aaa189 --- /dev/null +++ b/services/search/pkg/mapping/opensearch.go @@ -0,0 +1,134 @@ +package mapping + +import ( + "fmt" + "reflect" +) + +// OpenSearchBuildMapping builds the OpenSearch "properties" map (the value +// of mappings.properties) for type t by walking the struct via reflection. +// Field names come from json tags; overrides are keyed by those names. +// +// The returned map contains plain JSON-friendly values (strings, bools, +// nested maps) and can be marshalled directly. +func OpenSearchBuildMapping(t reflect.Type, overrides map[string]FieldOpts) (map[string]any, error) { + return buildOpenSearchProperties(t, overrides, "") +} + +func buildOpenSearchProperties(t reflect.Type, overrides map[string]FieldOpts, prefix string) (map[string]any, error) { + props := map[string]any{} + err := walkFields(t, func(fi fieldInfo) error { + key := fi.Name + if prefix != "" { + key = prefix + "." + fi.Name + } + opts := overrides[key] + fieldType := opts.Type + if fieldType == "" { + fieldType = inferType(fi.GoField.Type) + } + + if fieldType == TypeObject { + sub := structType(fi.GoField.Type) + if sub == nil { + return fmt.Errorf("mapping: object type on non-struct field %q", key) + } + subProps, err := buildOpenSearchProperties(sub, overrides, key) + if err != nil { + return err + } + props[fi.Name] = map[string]any{"properties": subProps} + return nil + } + + if fieldType == TypeGeopoint { + // Mirror the bleve layout: keep the libregraph facet as an + // object for data retrieval / numeric queries, and add a + // sibling "_geopoint" geo_point field at the same + // level for geo-distance / bbox / polygon queries. + sub := structType(fi.GoField.Type) + if sub == nil { + return fmt.Errorf("mapping: geopoint type on non-struct field %q", key) + } + subProps, err := buildOpenSearchProperties(sub, overrides, key) + if err != nil { + return err + } + props[fi.Name] = map[string]any{"properties": subProps} + props[fi.Name+GeopointSuffix] = map[string]any{"type": "geo_point"} + return nil + } + + fm, err := openSearchFieldMapping(fieldType, opts, fi.GoField.Type) + if err != nil { + return fmt.Errorf("mapping: field %q: %w", key, err) + } + props[fi.Name] = fm + return nil + }) + return props, err +} + +func openSearchFieldMapping(fieldType string, opts FieldOpts, goType reflect.Type) (map[string]any, error) { + switch fieldType { + case TypeKeyword: + m := map[string]any{"type": "keyword"} + if opts.Analyzer != "" { + m["type"] = "text" + m["analyzer"] = opts.Analyzer + } + return m, nil + case TypeFulltext: + m := map[string]any{ + "type": "text", + "term_vector": "with_positions_offsets", + } + if opts.Analyzer != "" { + m["analyzer"] = opts.Analyzer + } + return m, nil + case TypePath: + m := map[string]any{"type": "text"} + if opts.Analyzer != "" { + m["analyzer"] = opts.Analyzer + } else { + m["analyzer"] = "path_hierarchy" + } + return m, nil + case TypeWildcard: + // OpenSearch stores wildcard fields with doc_values=false by + // default, so emit it explicitly to keep local and remote + // mappings in sync for the Apply comparison. + return map[string]any{"type": "wildcard", "doc_values": false}, nil + case TypeNumeric: + return map[string]any{"type": openSearchNumericType(goType)}, nil + case TypeBool: + return map[string]any{"type": "boolean"}, nil + case TypeDatetime: + return map[string]any{"type": "date"}, nil + case TypeGeopoint: + return map[string]any{"type": "geo_point"}, nil + case "": + return nil, fmt.Errorf("no type inferred and no override") + } + return nil, fmt.Errorf("unsupported type %q", fieldType) +} + +// openSearchNumericType maps a Go numeric type to an OpenSearch numeric +// field type. +func openSearchNumericType(t reflect.Type) string { + for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice { + t = t.Elem() + } + switch t.Kind() { + case reflect.Float32: + return "float" + case reflect.Float64: + return "double" + case reflect.Int8, reflect.Uint8, reflect.Int16, reflect.Uint16: + return "short" + case reflect.Int32, reflect.Uint32: + return "integer" + } + return "long" +} diff --git a/services/search/pkg/mapping/opensearch_test.go b/services/search/pkg/mapping/opensearch_test.go new file mode 100644 index 0000000000..bf100d4f7e --- /dev/null +++ b/services/search/pkg/mapping/opensearch_test.go @@ -0,0 +1,148 @@ +package mapping + +import ( + "reflect" + "testing" + "time" +) + +type osDoc struct { + ID string `json:"ID"` + Size uint64 `json:"Size"` + Deleted bool `json:"Deleted"` + CreatedAt time.Time `json:"CreatedAt"` + Rating float64 `json:"Rating"` + Nested *struct { + Artist string `json:"artist"` + Year int32 `json:"year"` + } `json:"nested,omitempty"` +} + +func TestOpenSearchBuildMappingInferred(t *testing.T) { + props, err := OpenSearchBuildMapping(reflect.TypeFor[osDoc](), nil) + if err != nil { + t.Fatalf("OpenSearchBuildMapping: %v", err) + } + want := map[string]string{ + "ID": "keyword", + "Size": "long", + "Deleted": "boolean", + "CreatedAt": "date", + "Rating": "double", + } + for k, v := range want { + m, ok := props[k].(map[string]any) + if !ok { + t.Errorf("%s: missing or not a map: %#v", k, props[k]) + continue + } + if got := m["type"]; got != v { + t.Errorf("%s: type %v, want %v", k, got, v) + } + } +} + +func TestOpenSearchBuildMappingNested(t *testing.T) { + props, err := OpenSearchBuildMapping(reflect.TypeFor[osDoc](), nil) + if err != nil { + t.Fatalf("OpenSearchBuildMapping: %v", err) + } + nested, ok := props["nested"].(map[string]any) + if !ok { + t.Fatalf("nested: not a map: %#v", props["nested"]) + } + sub, ok := nested["properties"].(map[string]any) + if !ok { + t.Fatalf("nested.properties: missing: %#v", nested) + } + artist, ok := sub["artist"].(map[string]any) + if !ok { + t.Fatalf("nested.artist: %#v", sub) + } + if artist["type"] != "keyword" { + t.Errorf("nested.artist.type: %v", artist["type"]) + } + year, ok := sub["year"].(map[string]any) + if !ok { + t.Fatalf("nested.year: %#v", sub) + } + if year["type"] != "integer" { + t.Errorf("nested.year.type: %v (int32 → integer expected)", year["type"]) + } +} + +func TestOpenSearchBuildMappingOverrides(t *testing.T) { + type doc struct { + Name string `json:"Name"` + Content string `json:"Content"` + Path string `json:"Path"` + MimeType string `json:"MimeType"` + } + props, err := OpenSearchBuildMapping(reflect.TypeFor[doc](), map[string]FieldOpts{ + "Name": {Analyzer: "lowercaseKeyword"}, + "Content": {Type: TypeFulltext}, + "Path": {Type: TypePath}, + "MimeType": {Type: TypeWildcard}, + }) + if err != nil { + t.Fatalf("OpenSearchBuildMapping: %v", err) + } + name := props["Name"].(map[string]any) + if name["type"] != "text" || name["analyzer"] != "lowercaseKeyword" { + t.Errorf("Name: %#v", name) + } + content := props["Content"].(map[string]any) + if content["type"] != "text" || content["term_vector"] != "with_positions_offsets" { + t.Errorf("Content: %#v", content) + } + if _, ok := content["analyzer"]; ok { + t.Errorf("Content should leave analyzer unset (use OpenSearch default), got %#v", content["analyzer"]) + } + path := props["Path"].(map[string]any) + if path["type"] != "text" || path["analyzer"] != "path_hierarchy" { + t.Errorf("Path: %#v", path) + } + mime := props["MimeType"].(map[string]any) + if mime["type"] != "wildcard" { + t.Errorf("MimeType: %#v", mime) + } +} + +func TestOpenSearchBuildMappingGeopoint(t *testing.T) { + type doc struct { + Location *struct { + Lon float64 `json:"longitude"` + Lat float64 `json:"latitude"` + Alt float64 `json:"altitude"` + } `json:"location,omitempty"` + } + props, err := OpenSearchBuildMapping(reflect.TypeFor[doc](), map[string]FieldOpts{ + "location": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("OpenSearchBuildMapping: %v", err) + } + // Object for libregraph-shape data retrieval. + loc, ok := props["location"].(map[string]any) + if !ok { + t.Fatalf("location: %#v", props["location"]) + } + sub, ok := loc["properties"].(map[string]any) + if !ok { + t.Fatalf("location should have numeric sub-properties, got %#v", loc) + } + for _, k := range []string{"longitude", "latitude", "altitude"} { + prop, ok := sub[k].(map[string]any) + if !ok || prop["type"] != "double" { + t.Errorf("location.%s: %#v", k, sub[k]) + } + } + // Sibling geo_point for spatial queries. + gp, ok := props["location"+GeopointSuffix].(map[string]any) + if !ok { + t.Fatalf("location%s: %#v", GeopointSuffix, props["location"+GeopointSuffix]) + } + if gp["type"] != "geo_point" { + t.Errorf("location%s.type: %v", GeopointSuffix, gp["type"]) + } +} diff --git a/services/search/pkg/mapping/opts.go b/services/search/pkg/mapping/opts.go new file mode 100644 index 0000000000..3daa258a62 --- /dev/null +++ b/services/search/pkg/mapping/opts.go @@ -0,0 +1,35 @@ +// Package mapping builds search index mappings for bleve and OpenSearch from +// a Go struct via reflection. Field names come from json tags; the caller +// provides overrides for fields that need a specific type or analyzer. +package mapping + +// Field type constants used in FieldOpts.Type. An empty Type means the type +// is inferred from the Go field via reflection. +const ( + TypeKeyword = "keyword" + TypeFulltext = "fulltext" + TypePath = "path" + TypeWildcard = "wildcard" + TypeNumeric = "numeric" + TypeDatetime = "datetime" + TypeBool = "bool" + TypeObject = "object" + TypeGeopoint = "geopoint" +) + +// FieldOpts overrides the default type inference for a struct field. Keys in +// the override map are json-tag names (e.g. "Name", "location", "audio.artist"), +// not Go field names. +type FieldOpts struct { + // Type is one of the Type* constants. Empty means "infer from Go type". + Type string + + // Analyzer is the name of a custom analyzer registered on the bleve + // IndexMapping (e.g. "lowercaseKeyword", "fulltext"). For OpenSearch it + // becomes the analyzer attribute on the field. + Analyzer string + + // IncludeInAll controls bleve's _all field inclusion. Nil means "use the + // bleve default for this field type". Has no effect on OpenSearch. + IncludeInAll *bool +} diff --git a/services/search/pkg/mapping/serialize.go b/services/search/pkg/mapping/serialize.go new file mode 100644 index 0000000000..9deebfd67d --- /dev/null +++ b/services/search/pkg/mapping/serialize.go @@ -0,0 +1,29 @@ +package mapping + +import ( + "fmt" + + "github.com/opencloud-eu/opencloud/pkg/conversions" +) + +// PrepareForIndex converts v to a flat map[string]any suitable for passing +// to the backend's index client (bleve.Batch.Index or the OpenSearch bulk +// body). The struct → map conversion goes through conversions.To, which is +// a json marshal/unmarshal round-trip — honors json tags, omitempty, and +// embedded-struct flattening. After that, type-specific adaptations +// (currently: geopoint siblings) are spliced in based on overrides. +// +// Callers should pass the same overrides map used for BleveBuildMapping / +// OpenSearchBuildMapping so the document shape and the mapping stay in +// sync. +func PrepareForIndex(v any, overrides map[string]FieldOpts) (map[string]any, error) { + out, err := conversions.To[map[string]any](v) + if err != nil { + return nil, fmt.Errorf("mapping: prepare %T: %w", v, err) + } + if out == nil { + return out, nil + } + addGeopointSiblings(out, overrides) + return out, nil +} diff --git a/services/search/pkg/mapping/serialize_test.go b/services/search/pkg/mapping/serialize_test.go new file mode 100644 index 0000000000..0ce7f84f58 --- /dev/null +++ b/services/search/pkg/mapping/serialize_test.go @@ -0,0 +1,65 @@ +package mapping + +import ( + "reflect" + "testing" +) + +func TestPrepareForIndexFlattensEmbedded(t *testing.T) { + type inner struct { + Name string `json:"Name"` + Size uint64 `json:"Size"` + } + type outer struct { + inner + ID string `json:"ID"` + } + m, err := PrepareForIndex(outer{inner: inner{Name: "a", Size: 7}, ID: "x"}, nil) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + want := map[string]any{"Name": "a", "Size": float64(7), "ID": "x"} + if !reflect.DeepEqual(m, want) { + t.Fatalf("got %#v, want %#v", m, want) + } +} + +func TestPrepareForIndexOmitsNilWithOmitempty(t *testing.T) { + type facet struct { + Artist string `json:"artist"` + } + type doc struct { + Name string `json:"Name"` + Audio *facet `json:"audio,omitempty"` + } + m, err := PrepareForIndex(doc{Name: "n"}, nil) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + if _, ok := m["audio"]; ok { + t.Errorf("audio should be omitted when nil: %#v", m) + } + if m["Name"] != "n" { + t.Errorf("Name: %v", m["Name"]) + } +} + +func TestPrepareForIndexIncludesNestedWhenSet(t *testing.T) { + type facet struct { + Artist string `json:"artist"` + } + type doc struct { + Audio *facet `json:"audio,omitempty"` + } + m, err := PrepareForIndex(doc{Audio: &facet{Artist: "A"}}, nil) + if err != nil { + t.Fatalf("PrepareForIndex: %v", err) + } + nested, ok := m["audio"].(map[string]any) + if !ok { + t.Fatalf("audio should be a nested map: %#v", m["audio"]) + } + if nested["artist"] != "A" { + t.Errorf("audio.artist: %v", nested["artist"]) + } +} diff --git a/services/search/pkg/mapping/validate.go b/services/search/pkg/mapping/validate.go new file mode 100644 index 0000000000..6a85c718d4 --- /dev/null +++ b/services/search/pkg/mapping/validate.go @@ -0,0 +1,47 @@ +package mapping + +import ( + "fmt" + "reflect" + "sort" + "strings" +) + +// Validate returns an error if any override key does not match a known field +// name in t. Top-level fields are identified by their json-tag name; nested +// struct fields are reachable as "parent.child". +func Validate(t reflect.Type, overrides map[string]FieldOpts) error { + if len(overrides) == 0 { + return nil + } + names := collectNames(t, "") + var unknown []string + for k := range overrides { + if _, ok := names[k]; !ok { + unknown = append(unknown, k) + } + } + if len(unknown) == 0 { + return nil + } + sort.Strings(unknown) + return fmt.Errorf("mapping: unknown override keys: %s", strings.Join(unknown, ", ")) +} + +func collectNames(t reflect.Type, prefix string) map[string]struct{} { + out := map[string]struct{}{} + _ = walkFields(t, func(fi fieldInfo) error { + key := fi.Name + if prefix != "" { + key = prefix + "." + fi.Name + } + out[key] = struct{}{} + if sub := structType(fi.GoField.Type); sub != nil { + for k := range collectNames(sub, key) { + out[k] = struct{}{} + } + } + return nil + }) + return out +} diff --git a/services/search/pkg/mapping/validate_test.go b/services/search/pkg/mapping/validate_test.go new file mode 100644 index 0000000000..2d8500c2b8 --- /dev/null +++ b/services/search/pkg/mapping/validate_test.go @@ -0,0 +1,51 @@ +package mapping + +import ( + "reflect" + "strings" + "testing" +) + +type inner struct { + Artist string `json:"artist"` +} + +type sample struct { + Name string `json:"Name"` + Audio *inner `json:"audio,omitempty"` + Location *struct { //nolint:unused + Lon float64 `json:"longitude"` + Lat float64 `json:"latitude"` + } `json:"location,omitempty"` +} + +func TestValidateAccepts(t *testing.T) { + err := Validate(reflect.TypeFor[sample](), map[string]FieldOpts{ + "Name": {Analyzer: "lowercaseKeyword"}, + "audio": {Type: TypeObject}, + "audio.artist": {Analyzer: "lowercaseKeyword"}, + "location": {Type: TypeGeopoint}, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestValidateRejectsUnknown(t *testing.T) { + err := Validate(reflect.TypeFor[sample](), map[string]FieldOpts{ + "nope": {}, + "audio.zzz": {}, + }) + if err == nil { + t.Fatalf("expected error") + } + if !strings.Contains(err.Error(), "nope") || !strings.Contains(err.Error(), "audio.zzz") { + t.Fatalf("error missing keys: %v", err) + } +} + +func TestValidateEmpty(t *testing.T) { + if err := Validate(reflect.TypeFor[sample](), nil); err != nil { + t.Fatalf("empty overrides should pass: %v", err) + } +} diff --git a/services/search/pkg/opensearch/batch.go b/services/search/pkg/opensearch/batch.go index 6297b40156..5018441b1a 100644 --- a/services/search/pkg/opensearch/batch.go +++ b/services/search/pkg/opensearch/batch.go @@ -14,6 +14,7 @@ import ( "github.com/opencloud-eu/opencloud/pkg/conversions" "github.com/opencloud-eu/opencloud/pkg/log" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/osu" "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) @@ -43,7 +44,7 @@ func NewBatch(client *opensearchgoAPI.Client, index string, size int) (*Batch, e func (b *Batch) Upsert(id string, r search.Resource) error { return b.withSizeLimit(func() error { - body, err := conversions.To[map[string]any](r) + body, err := mapping.PrepareForIndex(r, r.SearchFieldOverrides()) if err != nil { return fmt.Errorf("failed to marshal resource: %w", err) } diff --git a/services/search/pkg/opensearch/index.go b/services/search/pkg/opensearch/index.go index 6f10b23c3d..3c9a3dce78 100644 --- a/services/search/pkg/opensearch/index.go +++ b/services/search/pkg/opensearch/index.go @@ -3,29 +3,32 @@ package opensearch import ( "bytes" "context" - "embed" "errors" "fmt" - "path" + "maps" "reflect" "github.com/go-jose/go-jose/v3/json" opensearchgoAPI "github.com/opensearch-project/opensearch-go/v4/opensearchapi" "github.com/tidwall/gjson" + + searchmapping "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" + "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) var ( ErrManualActionRequired = errors.New("manual action required") IndexManagerLatest = IndexIndexManagerResourceV2 - IndexIndexManagerResourceV1 IndexManager = "resource_v1.json" - IndexIndexManagerResourceV2 IndexManager = "resource_v2.json" + IndexIndexManagerResourceV2 IndexManager = "resource_v2" ) -//go:embed internal/indexes/*.json -var indexes embed.FS - type IndexManager string +// indexGenerators dispatches each IndexManager variant to its builder. +var indexGenerators = map[IndexManager]func() ([]byte, error){ + IndexIndexManagerResourceV2: buildResourceV2Mapping, +} + func (m IndexManager) String() string { b, err := m.MarshalJSON() if err != nil { @@ -36,16 +39,56 @@ func (m IndexManager) String() string { } func (m IndexManager) MarshalJSON() ([]byte, error) { - filePath := string(m) - body, err := indexes.ReadFile(path.Join("./internal/indexes", filePath)) - switch { - case err != nil: - return nil, fmt.Errorf("failed to read index file %s: %w", filePath, err) - case len(body) <= 0: - return nil, fmt.Errorf("index file %s is empty", filePath) + gen, ok := indexGenerators[m] + if !ok { + return nil, fmt.Errorf("unknown index manager %q", string(m)) + } + return gen() +} + +// buildResourceV2Mapping renders the OpenSearch index template for a +// search.Resource from the shared SearchFieldOverrides. OpenSearch-specific +// tweaks (wildcard MimeType, path_hierarchy Path) are applied on top. +func buildResourceV2Mapping() ([]byte, error) { + resourceType := reflect.TypeFor[search.Resource]() + overrides := maps.Clone(search.Resource{}.SearchFieldOverrides()) + overrides["MimeType"] = searchmapping.FieldOpts{Type: searchmapping.TypeWildcard} + overrides["Path"] = searchmapping.FieldOpts{Type: searchmapping.TypePath} + if err := searchmapping.Validate(resourceType, overrides); err != nil { + return nil, err + } + props, err := searchmapping.OpenSearchBuildMapping(resourceType, overrides) + if err != nil { + return nil, err } - return body, nil + index := map[string]any{ + "settings": map[string]any{ + "number_of_shards": "1", + "number_of_replicas": "1", + "analysis": map[string]any{ + "analyzer": map[string]any{ + "path_hierarchy": map[string]any{ + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": []string{"lowercase"}, + }, + "lowercaseKeyword": map[string]any{ + "type": "custom", + "tokenizer": "keyword", + "filter": []string{"lowercase"}, + }, + }, + "tokenizer": map[string]any{ + "path_hierarchy": map[string]any{"type": "path_hierarchy"}, + }, + }, + }, + "mappings": map[string]any{ + "properties": props, + }, + } + return json.Marshal(index) } func (m IndexManager) Apply(ctx context.Context, name string, client *opensearchgoAPI.Client) error { @@ -118,8 +161,11 @@ func (m IndexManager) Apply(ctx context.Context, name string, client *opensearch if errs != nil { return fmt.Errorf( - "index %s already exists and is different from the requested version, %w: %w", - name, + "index %s already exists with a different mapping than the requested version. "+ + "There is no in-place migration today: drop the index in OpenSearch (DELETE /%s) "+ + "and restart the search service. The index will be recreated with the new mapping. "+ + "%w: %w", + name, name, ErrManualActionRequired, errors.Join(errs...), ) diff --git a/services/search/pkg/opensearch/internal/convert/opensearch.go b/services/search/pkg/opensearch/internal/convert/opensearch.go index c4d8212dcd..1eef75ac88 100644 --- a/services/search/pkg/opensearch/internal/convert/opensearch.go +++ b/services/search/pkg/opensearch/internal/convert/opensearch.go @@ -15,6 +15,17 @@ import ( "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) +// copyFacet converts a typed pointer from the indexed shape (libregraph) to +// the protobuf shape via conversions.To. Returns nil when src is nil so the +// enclosing Match.Entity field stays nil. +func copyFacet[Dst, Src any](src *Src) *Dst { + if src == nil { + return nil + } + dst, _ := conversions.To[*Dst](src) + return dst +} + func OpenSearchHitToMatch(hit opensearchgoAPI.SearchHit) (*searchMessage.Match, error) { resource, err := conversions.To[search.Resource](hit.Source) if err != nil { @@ -68,26 +79,10 @@ func OpenSearchHitToMatch(hit opensearchgoAPI.SearchHit) (*searchMessage.Match, return strings.Join(contentHighlights[:], "; ") }(), - Audio: func() *searchMessage.Audio { - if !strings.HasPrefix(resource.MimeType, "audio/") { - return nil - } - - audio, _ := conversions.To[*searchMessage.Audio](resource.Audio) - return audio - }(), - Image: func() *searchMessage.Image { - image, _ := conversions.To[*searchMessage.Image](resource.Image) - return image - }(), - Location: func() *searchMessage.GeoCoordinates { - geoCoordinates, _ := conversions.To[*searchMessage.GeoCoordinates](resource.Location) - return geoCoordinates - }(), - Photo: func() *searchMessage.Photo { - photo, _ := conversions.To[*searchMessage.Photo](resource.Photo) - return photo - }(), + Audio: copyFacet[searchMessage.Audio](resource.Audio), + Image: copyFacet[searchMessage.Image](resource.Image), + Location: copyFacet[searchMessage.GeoCoordinates](resource.Location), + Photo: copyFacet[searchMessage.Photo](resource.Photo), }, } diff --git a/services/search/pkg/opensearch/internal/indexes/resource_v1.json b/services/search/pkg/opensearch/internal/indexes/resource_v1.json deleted file mode 100644 index f0f719c4c5..0000000000 --- a/services/search/pkg/opensearch/internal/indexes/resource_v1.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "settings": { - "number_of_shards": "1", - "number_of_replicas": "1", - "analysis": { - "analyzer": { - "path_hierarchy": { - "filter": [ - "lowercase" - ], - "tokenizer": "path_hierarchy", - "type": "custom" - } - }, - "tokenizer": { - "path_hierarchy": { - "type": "path_hierarchy" - } - } - } - }, - "mappings": { - "properties": { - "ID": { - "type": "keyword" - }, - "ParentID": { - "type": "keyword" - }, - "RootID": { - "type": "keyword" - }, - "MimeType": { - "type": "wildcard", - "doc_values": false - }, - "Path": { - "type": "text", - "analyzer": "path_hierarchy" - }, - "Deleted": { - "type": "boolean" - }, - "Hidden": { - "type": "boolean" - } - } - } -} diff --git a/services/search/pkg/opensearch/internal/indexes/resource_v2.json b/services/search/pkg/opensearch/internal/indexes/resource_v2.json deleted file mode 100644 index 64b450ef51..0000000000 --- a/services/search/pkg/opensearch/internal/indexes/resource_v2.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "settings": { - "number_of_shards": "1", - "number_of_replicas": "1", - "analysis": { - "analyzer": { - "path_hierarchy": { - "filter": [ - "lowercase" - ], - "tokenizer": "path_hierarchy", - "type": "custom" - } - }, - "tokenizer": { - "path_hierarchy": { - "type": "path_hierarchy" - } - } - } - }, - "mappings": { - "properties": { - "Content": { - "type": "text", - "term_vector": "with_positions_offsets" - }, - "ID": { - "type": "keyword" - }, - "ParentID": { - "type": "keyword" - }, - "RootID": { - "type": "keyword" - }, - "MimeType": { - "type": "wildcard", - "doc_values": false - }, - "Path": { - "type": "text", - "analyzer": "path_hierarchy" - }, - "Deleted": { - "type": "boolean" - }, - "Hidden": { - "type": "boolean" - }, - "Favorites": { - "type": "keyword" - } - } - } -} \ No newline at end of file diff --git a/services/search/pkg/query/bleve/compiler.go b/services/search/pkg/query/bleve/compiler.go index 310397cae6..b0fbeb870e 100644 --- a/services/search/pkg/query/bleve/compiler.go +++ b/services/search/pkg/query/bleve/compiler.go @@ -8,17 +8,25 @@ import ( bleveQuery "github.com/blevesearch/bleve/v2/search/query" "github.com/opencloud-eu/opencloud/pkg/ast" "github.com/opencloud-eu/opencloud/pkg/kql" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" + "github.com/opencloud-eu/opencloud/services/search/pkg/search" ) -// lowercaseFields lists the bleve fields whose index mapping uses a -// lowercasing analyzer. Values bound to these fields are pre-lowercased -// so query-side matching stays consistent with the index. -// Keep in sync with services/search/pkg/bleve/index.go NewMapping. -var lowercaseFields = map[string]struct{}{ - "Name": {}, - "Tags": {}, - "Favorites": {}, - "Content": {}, +// lowercaseFields is derived from Resource.SearchFieldOverrides(): any +// field whose override picks a lowercasing analyzer (`lowercaseKeyword`) +// or the fulltext type (which uses a lowercasing analyzer under the hood) +// gets its query-side value pre-lowercased so compile-time matches the +// index-time tokenization. Anything else keeps its original casing. +var lowercaseFields = buildLowercaseFields() + +func buildLowercaseFields() map[string]struct{} { + out := map[string]struct{}{} + for key, opts := range (search.Resource{}).SearchFieldOverrides() { + if opts.Analyzer == "lowercaseKeyword" || opts.Type == mapping.TypeFulltext { + out[key] = struct{}{} + } + } + return out } var _fields = map[string]string{ diff --git a/services/search/pkg/search/search.go b/services/search/pkg/search/search.go index be0e922268..4e384062e1 100644 --- a/services/search/pkg/search/search.go +++ b/services/search/pkg/search/search.go @@ -6,6 +6,7 @@ import ( "fmt" "regexp" "strings" + "sync" gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" @@ -19,6 +20,7 @@ import ( searchmsg "github.com/opencloud-eu/opencloud/protogen/gen/opencloud/messages/search/v0" searchService "github.com/opencloud-eu/opencloud/protogen/gen/opencloud/services/search/v0" "github.com/opencloud-eu/opencloud/services/search/pkg/content" + "github.com/opencloud-eu/opencloud/services/search/pkg/mapping" ) var scopeRegex = regexp.MustCompile(`scope:\s*([^" "\n\r]*)`) @@ -51,13 +53,41 @@ type BatchOperator interface { type Resource struct { content.Document - ID string - RootID string - Path string - ParentID string - Type uint64 - Deleted bool - Hidden bool + ID string `json:"ID"` + RootID string `json:"RootID"` + Path string `json:"Path"` + ParentID string `json:"ParentID"` + Type uint64 `json:"Type"` + Deleted bool `json:"Deleted"` + Hidden bool `json:"Hidden"` +} + +// resourceFieldOverrides is the cached value for SearchFieldOverrides. The +// map is built once at first use (its contents never change) so hot paths — +// per-hit matchToResource, per-upsert PrepareForIndex — reuse the same map +// instead of allocating a fresh one every call. +// +// Callers must treat the returned map as read-only. Mutators (the +// OpenSearch index builder adds per-backend tweaks) clone it first. +var resourceFieldOverrides = sync.OnceValue(func() map[string]mapping.FieldOpts { + excludeFromAll := false + return map[string]mapping.FieldOpts{ + "Name": {Analyzer: "lowercaseKeyword"}, + "Content": {Type: mapping.TypeFulltext}, + "Tags": {Analyzer: "lowercaseKeyword", IncludeInAll: &excludeFromAll}, + "Favorites": {Analyzer: "lowercaseKeyword", IncludeInAll: &excludeFromAll}, + "location": {Type: mapping.TypeGeopoint}, + } +}) + +// SearchFieldOverrides returns the field options the mapping package needs +// to build per-backend index mappings for a Resource. Keys are json-tag +// names; see package mapping for the available FieldOpts knobs. +// +// The returned map is a shared read-only instance; callers that need to +// adjust entries must clone it first (see maps.Clone). +func (Resource) SearchFieldOverrides() map[string]mapping.FieldOpts { + return resourceFieldOverrides() } // ResolveReference makes sure the path is relative to the space root diff --git a/services/search/pkg/search/service.go b/services/search/pkg/search/service.go index 980f332572..171b08fe86 100644 --- a/services/search/pkg/search/service.go +++ b/services/search/pkg/search/service.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "path/filepath" + "reflect" "sort" "strconv" "strings" @@ -628,10 +629,10 @@ func (s *Service) doUpsertItem(ref *provider.Reference, batch BatchOperator) { // determine if metadata needs to be stored in storage as well metadata := map[string]string{} - addAudioMetadata(metadata, doc.Audio) - addImageMetadata(metadata, doc.Image) - addLocationMetadata(metadata, doc.Location) - addPhotoMetadata(metadata, doc.Photo) + addFacetMetadata(metadata, doc.Audio, "libre.graph.audio.") + addFacetMetadata(metadata, doc.Image, "libre.graph.image.") + addFacetMetadata(metadata, doc.Location, "libre.graph.location.") + addFacetMetadata(metadata, doc.Photo, "libre.graph.photo.") if len(metadata) == 0 { return } @@ -656,32 +657,14 @@ func (s *Service) doUpsertItem(ref *provider.Reference, batch BatchOperator) { } } -func addAudioMetadata(metadata map[string]string, audio *libregraph.Audio) { - if audio == nil { +// addFacetMetadata flattens a libregraph facet (Audio / Image / Location / +// Photo pointer) into the metadata map under the given prefix. No-op when +// the facet pointer is nil. +func addFacetMetadata[T libregraph.MappedNullable](metadata map[string]string, facet T, prefix string) { + if reflect.ValueOf(facet).IsNil() { return } - marshalToStringMap(audio, metadata, "libre.graph.audio.") -} - -func addImageMetadata(metadata map[string]string, image *libregraph.Image) { - if image == nil { - return - } - marshalToStringMap(image, metadata, "libre.graph.image.") -} - -func addLocationMetadata(metadata map[string]string, location *libregraph.GeoCoordinates) { - if location == nil { - return - } - marshalToStringMap(location, metadata, "libre.graph.location.") -} - -func addPhotoMetadata(metadata map[string]string, photo *libregraph.Photo) { - if photo == nil { - return - } - marshalToStringMap(photo, metadata, "libre.graph.photo.") + marshalToStringMap(facet, metadata, prefix) } func marshalToStringMap[T libregraph.MappedNullable](source T, target map[string]string, prefix string) {