Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

- Refactor the ShardStats, WarmerStats and IndexingPressureStats class to use the Builder pattern instead of constructors ([#19966](https://github.com/opensearch-project/OpenSearch/pull/19966))
- Throw exceptions for currently unsupported GRPC request-side fields ([#20162](https://github.com/opensearch-project/OpenSearch/pull/20162))
- Support ignore_above for keyword/wildcard field and optimise text field under derived source ([#20113](https://github.com/opensearch-project/OpenSearch/pull/20113))

### Fixed
- Fix Allocation and Rebalance Constraints of WeightFunction are incorrectly reset ([#19012](https://github.com/opensearch-project/OpenSearch/pull/19012))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,13 @@ public void testDerivedSourceSimple() throws IOException {
"type": "boolean"
},
"text_field": {
"type": "text"
"type": "text",
"fields": {
"keyword_field": {
"type": "keyword",
"ignore_above": 20
}
}
},
"ip_field": {
"type": "ip"
Expand All @@ -847,7 +853,7 @@ public void testDerivedSourceSimple() throws IOException {
.field("numeric_field", 123)
.field("date_field", "2023-01-01")
.field("bool_field", true)
.field("text_field", "test text")
.field("text_field", "This text field is exceeding ignore above")
.field("ip_field", "1.2.3.4")
.endObject()
)
Expand Down Expand Up @@ -1086,7 +1092,7 @@ void validateDeriveSource(Map<String, Object> source) {
assertEquals(123, source.get("numeric_field"));
assertEquals("2023-01-01T00:00:00.000Z", source.get("date_field"));
assertEquals(true, source.get("bool_field"));
assertEquals("test text", source.get("text_field"));
assertEquals("This text field is exceeding ignore above", source.get("text_field"));
assertEquals("1.2.3.4", source.get("ip_field"));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,7 @@ public void testDerivedSourceRollingRestart() throws Exception {
{
"properties": {
"text_field": {
"type": "text",
"store": true
"type": "text"
},
"keyword_field": {
"type": "keyword"
Expand Down Expand Up @@ -459,8 +458,7 @@ public void testDerivedSourceWithMultiFieldsRollingRestart() throws Exception {
{
"properties": {
"text_field": {
"type": "text",
"store": true
"type": "text"
},
"multi_field": {
"properties": {
Expand Down Expand Up @@ -545,8 +543,7 @@ public void testDerivedSourceWithConcurrentUpdatesRollingRestart() throws Except
{
"properties": {
"text_field": {
"type": "text",
"store": true
"type": "text"
},
"counter": {
"type": "long"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -777,12 +777,10 @@ public void testDerivedSourceSearch() throws Exception {
"type": "ip"
},
"text_field": {
"type": "text",
"store": true
"type": "text"
},
"wildcard_field": {
"type": "wildcard",
"doc_values": true
"type": "wildcard"
},
"constant_keyword": {
"type": "constant_keyword",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -932,8 +932,7 @@ public void testDerivedSourceWithUpdates() throws Exception {
"type": "boolean"
},
"text_field": {
"type": "text",
"store": true
"type": "text"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.index.LeafReader;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* Composite fetcher that tries multiple sources and returns already-converted values from the available source
* with the highest priority
*
* @opensearch.internal
*/
public class CompositeFieldValueFetcher extends FieldValueFetcher {

private final List<FieldValueFetcher> fieldValueFetchers;

public CompositeFieldValueFetcher(String simpleName, List<FieldValueFetcher> fieldValueFetchers) {
super(simpleName);
this.fieldValueFetchers = fieldValueFetchers;
}

@Override
public List<Object> fetch(LeafReader reader, int docId) throws IOException {
// Try fetching values from various fetchers as per priority
for (final FieldValueFetcher fieldValueFetcher : fieldValueFetchers) {
List<Object> values = fieldValueFetcher.fetch(reader, docId);

// Convert values immediately after fetching
if (values != null && !values.isEmpty()) {
List<Object> convertedValues = new ArrayList<>(values.size());
for (Object value : values) {
convertedValues.add(fieldValueFetcher.convert(value));
}
return convertedValues;
}
}
return null;
}

@Override
Object convert(Object value) {
// Values are already converted, return as-is
return value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ Object convert(Object value) {
* @param builder - builder to store the field value(s) in
*/
void write(XContentBuilder builder, List<Object> values) throws IOException {
if (values.isEmpty()) {
if (values == null || values.isEmpty()) {
return;
}
if (values.size() == 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
Expand Down Expand Up @@ -271,10 +272,8 @@ public Optional<DimensionType> getSupportedDataCubeDimensionType() {

@Override
protected void canDeriveSourceInternal() {
if (this.ignoreAbove != Integer.MAX_VALUE || !Objects.equals(this.normalizerName, "default")) {
throw new UnsupportedOperationException(
"Unable to derive source for [" + name() + "] with " + "ignore_above and/or normalizer set"
);
if (!(fieldType().normalizer() == null || Lucene.KEYWORD_ANALYZER.equals(fieldType().normalizer()))) {
throw new UnsupportedOperationException("Unable to derive source for [" + name() + "] with normalizer set");
}
checkStoredAndDocValuesForDerivedSource();
}
Expand All @@ -284,8 +283,8 @@ protected void canDeriveSourceInternal() {
* 2. If doc_values is disabled in field mapping, then build source using stored field
* <p>
* Support:
* 1. If "ignore_above" is set in the field mapping, then we won't be supporting derived source for now,
* considering for these cases we will need to have explicit stored field.
* 1. If "ignore_above" is set in the field mapping, then we will fall back to ignored_value explicitly being
* added for derived source
* 2. If "normalizer" is set in the field mapping, then also we won't support derived source, as with
* normalizer it is hard to regenerate original source
* <p>
Expand All @@ -295,11 +294,21 @@ protected void canDeriveSourceInternal() {
*/
@Override
protected DerivedFieldGenerator derivedFieldGenerator() {
return new DerivedFieldGenerator(
mappedFieldType,
new SortedSetDocValuesFetcher(mappedFieldType, simpleName()),
new StoredFieldFetcher(mappedFieldType, simpleName())
final FieldValueFetcher primaryFieldValueFetcher = KeywordFieldMapper.DerivedSourceHelper.getPrimaryFieldValueFetcher(
this,
simpleName()
);
final FieldValueFetcher fallbackFieldValueFetcher = KeywordFieldMapper.DerivedSourceHelper.getFallbackFieldValueFetcher(this);
final FieldValueFetcher compositeFieldValueFetcher = new CompositeFieldValueFetcher(
simpleName(),
List.of(primaryFieldValueFetcher, fallbackFieldValueFetcher)
);
return new DerivedFieldGenerator(mappedFieldType, compositeFieldValueFetcher, null) {
@Override
public FieldValueType getDerivedFieldPreference() {
return FieldValueType.DOC_VALUES;
}
};
}

/**
Expand Down Expand Up @@ -872,11 +881,22 @@ protected void parseCreateField(ParseContext context) throws IOException {
}
}

if (value == null || value.length() > ignoreAbove) {
if (value == null) {
return;
}

NamedAnalyzer normalizer = fieldType().normalizer();

// Explicitly add value as a stored field if value is getting ignored, to be able to derive the source
if (value.length() > ignoreAbove) {
if ((normalizer == null || Lucene.KEYWORD_ANALYZER.equals(normalizer))
&& context.indexSettings().isDerivedSourceEnabled()
&& context.isWithinMultiFields() == false) {
final BytesRef binaryValue = new BytesRef(value);
context.doc().add(new StoredField(fieldType().derivedSourceIgnoreFieldName(), binaryValue));
}
return;
}
if (normalizer != null) {
value = normalizeValue(normalizer, name(), value);
}
Expand Down Expand Up @@ -936,4 +956,51 @@ protected String contentType() {
public ParametrizedFieldMapper.Builder getMergeBuilder() {
return new Builder(simpleName(), indexAnalyzers).init(this);
}

static final class DerivedSourceHelper {

static FieldValueFetcher getPrimaryFieldValueFetcher(KeywordFieldMapper mapper, String textFieldName) {
return mapper.fieldType().hasDocValues()
? new SortedSetDocValuesFetcher(mapper.fieldType(), textFieldName)
: new StoredFieldFetcher(mapper.fieldType(), textFieldName);
}

static FieldValueFetcher getFallbackFieldValueFetcher(KeywordFieldMapper mapper) {
// Override to read from the special ignored value field
final MappedFieldType ignoredFieldType = new MappedFieldType(
mapper.fieldType().derivedSourceIgnoreFieldName(),
false, // not searchable
true, // stored
false, // no doc values
TextSearchInfo.NONE,
Collections.emptyMap()
) {
@Override
public String typeName() {
return "keyword";
}

@Override
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
return null;
}

@Override
public Query termQuery(Object value, QueryShardContext context) {
return null;
}

@Override
public Object valueForDisplay(Object value) {
if (value == null) {
return null;
}
// keywords are internally stored as utf8 bytes
BytesRef binaryValue = (BytesRef) value;
return binaryValue.utf8ToString();
}
};
return new StoredFieldFetcher(ignoredFieldType, mapper.simpleName());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -139,21 +139,22 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna
@Override
public MatchOnlyTextFieldMapper build(BuilderContext context) {
FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors);
MatchOnlyTextFieldType tft = buildFieldType(fieldType, context);
MultiFields multiFields = multiFieldsBuilder.build(this, context);
MatchOnlyTextFieldType tft = buildFieldType(fieldType, multiFields, context);
return new MatchOnlyTextFieldMapper(
name,
fieldType,
tft,
buildPrefixMapper(context, fieldType, tft),
buildPhraseMapper(fieldType, tft),
multiFieldsBuilder.build(this, context),
multiFields,
copyTo.build(),
this
);
}

@Override
protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, BuilderContext context) {
protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, MultiFields multiFields, BuilderContext context) {
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
*/
public abstract class StringFieldType extends TermBasedFieldType {

private static final String IGNORED_VALUE_FIELD_SUFFIX = ".ignored_value";
private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\\\.)|([?*]+)");

public StringFieldType(
Expand Down Expand Up @@ -255,4 +256,8 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower
includeUpper
);
}

public String derivedSourceIgnoreFieldName() {
return name() + IGNORED_VALUE_FIELD_SUFFIX;
}
}
Loading
Loading