From b774ebacd2ac00a3de6d7de72a0316dbb2ee8b2f Mon Sep 17 00:00:00 2001 From: Jeremy Prevost Date: Wed, 11 Feb 2026 13:32:44 -0500 Subject: [PATCH] Adds support to exclude fields from _source Why are these changes being introduced: * We are starting to add very large fields into our OpenSearch _source, which we are concerned may cause performance issues Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/USE-406 How does this address that need: * Introduces an ENV variable to specify fields to exclude from _source --- README.md | 2 ++ app/models/opensearch.rb | 10 ++++++++++ test/models/opensearch_test.rb | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/README.md b/README.md index 5b8a259..800139c 100644 --- a/README.md +++ b/README.md @@ -181,6 +181,8 @@ locally. confused. ``` +- `OPENSEARCH_SOURCE_EXCLUDES` comma separated list of fields to exclude from the OpenSearch `_source` field. Leave unset to return all fields. + - recommended value: `embedding_full_record,fulltext` - `PLATFORM_NAME`: The value set is added to the header after the MIT Libraries logo. The logic and CSS for this comes from our theme gem. - `PREFERRED_DOMAIN` - set this to the domain you would like to to use. Any other requests that come to the app will redirect to the root of this domain. diff --git a/app/models/opensearch.rb b/app/models/opensearch.rb index e5c2e5e..dbac195 100644 --- a/app/models/opensearch.rb +++ b/app/models/opensearch.rb @@ -41,6 +41,16 @@ def build_query(from) sort: } + # If ENV OPENSEARCH_SOURCE_EXCLUDES is set, use the values in it's comma-separated list; + # otherwise leave out the _source attribute entirely (which will return all fields in _source) + # excludes are used to prevent large fields from being returned in the search results, which can cause performance issues + # these fields are still searchable, just not returned in the search results + if ENV['OPENSEARCH_SOURCE_EXCLUDES'].present? + query_hash[:_source] = { + excludes: ENV['OPENSEARCH_SOURCE_EXCLUDES'].split(',').map(&:strip) + } + end + query_hash[:highlight] = highlight if @highlight query_hash.to_json end diff --git a/test/models/opensearch_test.rb b/test/models/opensearch_test.rb index 491d0be..dc4ff39 100644 --- a/test/models/opensearch_test.rb +++ b/test/models/opensearch_test.rb @@ -387,4 +387,22 @@ class OpensearchTest < ActiveSupport::TestCase json = JSON.parse(os.build_query(0)) assert_equal Opensearch::MAX_SIZE, json['size'] end + + test 'can exclude fields from _source' do + ClimateControl.modify(OPENSEARCH_SOURCE_EXCLUDES: 'field1,field2') do + os = Opensearch.new + os.instance_variable_set(:@params, {}) + json = JSON.parse(os.build_query(0)) + assert_equal %w[field1 field2], json['_source']['excludes'] + end + end + + test 'does not include _source if OPENSEARCH_SOURCE_EXCLUDES is not set' do + ClimateControl.modify(OPENSEARCH_SOURCE_EXCLUDES: nil) do + os = Opensearch.new + os.instance_variable_set(:@params, {}) + json = JSON.parse(os.build_query(0)) + refute json.key?('_source') + end + end end