From bea189c5f30621b9276f95121e669df96c9a47f4 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 26 Jan 2024 11:14:13 +0100 Subject: [PATCH] Add initial _all field support to match_only_test field type. --- .../extras/MatchOnlyTextFieldMapper.java | 64 ++++++++++++++++--- .../extras/MatchOnlyTextFieldMapperTests.java | 33 ++++++++++ .../index/mapper/AllFieldMapper.java | 23 +++++++ 3 files changed, 112 insertions(+), 8 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index fa83e2600de9b..abdbe71b6779d 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -24,6 +24,7 @@ import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOFunction; @@ -39,6 +40,7 @@ import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; +import org.elasticsearch.index.mapper.AllFieldMapper; import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.BlockSourceReader; import org.elasticsearch.index.mapper.BlockStoredFieldsReader; @@ -63,12 +65,15 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; +import static org.elasticsearch.index.mapper.AllFieldMapper.toAllFieldTerm; + /** * A {@link FieldMapper} for full-text fields that only indexes * {@link IndexOptions#DOCS} and runs positional queries by looking at the @@ -131,7 +136,8 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) { tsi, indexAnalyzer, context.isSourceSynthetic(), - meta.getValue() + meta.getValue(), + context.isIndexIntoAllField() ); return ft; } @@ -140,7 +146,16 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) { public MatchOnlyTextFieldMapper build(MapperBuilderContext context) { MatchOnlyTextFieldType tft = buildFieldType(context); MultiFields multiFields = multiFieldsBuilder.build(this, context); - return new MatchOnlyTextFieldMapper(name, Defaults.FIELD_TYPE, tft, multiFields, copyTo, context.isSourceSynthetic(), this); + return new MatchOnlyTextFieldMapper( + name, + Defaults.FIELD_TYPE, + tft, + multiFields, + copyTo, + context.isSourceSynthetic(), + context.isIndexIntoAllField(), + this + ); } } @@ -150,17 +165,20 @@ public static class MatchOnlyTextFieldType extends StringFieldType { private final Analyzer indexAnalyzer; private final TextFieldType textFieldType; + private final boolean indexIntoAllField; public MatchOnlyTextFieldType( String name, TextSearchInfo tsi, Analyzer indexAnalyzer, boolean isSyntheticSource, - Map meta + Map meta, + boolean indexIntoAllField ) { super(name, true, false, false, tsi, meta); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); this.textFieldType = new TextFieldType(name, isSyntheticSource); + this.indexIntoAllField = indexIntoAllField; } public MatchOnlyTextFieldType(String name) { @@ -169,7 +187,8 @@ public MatchOnlyTextFieldType(String name) { new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), Lucene.STANDARD_ANALYZER, false, - Collections.emptyMap() + Collections.emptyMap(), + false ); } @@ -235,10 +254,30 @@ private IntervalsSource toIntervalsSource( return new SourceIntervalsSource(source, approximation, getValueFetcherProvider(searchExecutionContext), indexAnalyzer); } + // TODO: fix other query types to work with _all @Override public Query termQuery(Object value, SearchExecutionContext context) { - // Disable scoring - return new ConstantScoreQuery(super.termQuery(value, context)); + if (indexIntoAllField) { + return new ConstantScoreQuery( + new TermQuery(new Term(AllFieldMapper.NAME, toAllFieldTerm(indexedValueForSearch(value), new BytesRef(name())))) + ); + } else { + // Disable scoring + return new ConstantScoreQuery(super.termQuery(value, context)); + } + } + + @Override + public Query termsQuery(Collection values, SearchExecutionContext context) { + if (indexIntoAllField) { + BytesRef[] bytesRefs = values.stream() + .map(value -> toAllFieldTerm(indexedValueForSearch(value), new BytesRef(name()))) + .toArray(BytesRef[]::new); + return new ConstantScoreQuery(new TermInSetQuery(AllFieldMapper.NAME, bytesRefs)); + } else { + // Disable scoring + return new ConstantScoreQuery(super.termsQuery(values, context)); + } } @Override @@ -368,6 +407,7 @@ private String storedFieldNameForSyntheticSource() { private final NamedAnalyzer indexAnalyzer; private final int positionIncrementGap; private final boolean storeSource; + private final boolean indexIntoAllField; private final FieldType fieldType; private MatchOnlyTextFieldMapper( @@ -377,6 +417,7 @@ private MatchOnlyTextFieldMapper( MultiFields multiFields, CopyTo copyTo, boolean storeSource, + boolean indexIntoAllField, Builder builder ) { super(simpleName, mappedFieldType, multiFields, copyTo, false, null); @@ -388,6 +429,7 @@ private MatchOnlyTextFieldMapper( this.indexAnalyzer = builder.analyzers.getIndexAnalyzer(); this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); this.storeSource = storeSource; + this.indexIntoAllField = indexIntoAllField; } @Override @@ -408,8 +450,14 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio return; } - Field field = new Field(fieldType().name(), value, fieldType); - context.doc().add(field); + if (indexIntoAllField) { + Field field = new Field(fieldType().name(), value, fieldType); + AllFieldMapper allFieldMapper = (AllFieldMapper) context.getMetadataMapper(AllFieldMapper.NAME); + allFieldMapper.addToAll(context, field); + } else { + Field field = new Field(fieldType().name(), value, fieldType); + context.doc().add(field); + } context.addToFieldNames(fieldType().name()); if (storeSource) { diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java index abd2a4c8fa622..fbc08aabff844 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java @@ -14,6 +14,9 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TotalHits; import org.apache.lucene.store.Directory; @@ -22,7 +25,9 @@ import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Strings; +import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.mapper.AllFieldMapper; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; @@ -259,6 +264,34 @@ public void testDocValuesLoadedFromSynthetic() throws IOException { assertScriptDocValues(mapper, "foo", equalTo(List.of("foo"))); } + public void testAllField() throws Exception { + DocumentMapper docMapper = createDocumentMapper(allFieldMapping(true, b -> { + b.startObject("field1"); + b.field("type", "match_only_text"); + b.endObject(); + })); + + ParsedDocument doc = docMapper.parse(source(b -> b.field("field1", "value1"))); + assertThat(doc.rootDoc().getFields(AllFieldMapper.NAME).size(), equalTo(1)); + assertThat(doc.rootDoc().getFields(AllFieldMapper.NAME).get(0).binaryValue(), equalTo(new BytesRef("value1\0field1"))); + + Query query = docMapper.mappers().getFieldType("field1").termQuery("value1", null); + assertThat(query, instanceOf(ConstantScoreQuery.class)); + TermQuery termQuery = (TermQuery) ((ConstantScoreQuery) query).getQuery(); + assertThat(termQuery.getTerm().field(), equalTo(AllFieldMapper.NAME)); + assertThat(termQuery.getTerm().bytes(), equalTo(new BytesRef("value1\0field1"))); + } + + private static XContentBuilder allFieldMapping(boolean enabled, CheckedConsumer propertiesBuilder) + throws IOException { + return topMapping(b -> { + b.startObject(AllFieldMapper.NAME).field("enabled", enabled).endObject(); + b.startObject("properties"); + propertiesBuilder.accept(b); + b.endObject(); + }); + } + @Override protected IngestScriptSupport ingestScriptSupport() { throw new AssumptionViolatedException("not supported"); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AllFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/AllFieldMapper.java index b6c18190e9325..8c25880293f81 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AllFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AllFieldMapper.java @@ -8,6 +8,8 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; @@ -136,6 +138,7 @@ public void postParse(DocumentParserContext context) throws IOException { for (int i = 0; i < fields.size(); i++) { IndexableField indexableField = fields.get(i); var mappedFieldType = context.mappingLookup().getFieldType(indexableField.name()); + // TODO: make keyword push like match_onlu_text: if (mappedFieldType != null && "keyword".equals(mappedFieldType.typeName())) { BytesRef value = toAllFieldTerm(indexableField.binaryValue(), new BytesRef(indexableField.name())); if (value.length > MAX_TERM_LENGTH) { @@ -147,6 +150,26 @@ public void postParse(DocumentParserContext context) throws IOException { } + public void addToAll(DocumentParserContext context, IndexableField indexableField) throws IOException { + if (enabled == false) { + return; + } + + // TODO: do we need to use index analyzer from MatchOnlyTextFieldMapper here? + try (TokenStream tokenStream = indexableField.tokenStream(Lucene.STANDARD_ANALYZER, null)) { + CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class); + tokenStream.reset(); + while (tokenStream.incrementToken()) { + BytesRef value = toAllFieldTerm(new BytesRef(termAtt.toString()), new BytesRef(indexableField.name())); + if (value.length > MAX_TERM_LENGTH) { + // TODO + } + context.doc().add(new KeywordFieldMapper.KeywordField(NAME, value, Defaults.FIELD_TYPE)); + } + tokenStream.end(); + } + } + public static BytesRef toAllFieldTerm(BytesRef fieldValueBytes, BytesRef fieldNameBytes) { BytesRefBuilder builder = new BytesRefBuilder(); builder.append(fieldValueBytes);