Merge branch 'elastic:main' into requireAtLeastOneMatch-only-needed-f…

…or-aggs
piergm · Oct 24, 2024 · ad8d4d3 · ad8d4d3
2 parents 2b3cbb1 + 37c7137
commit ad8d4d3
Show file tree

Hide file tree

Showing 55 changed files with 1,073 additions and 202 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -39,7 +39,6 @@ gradle @elastic/es-delivery
 build-conventions @elastic/es-delivery
 build-tools @elastic/es-delivery
 build-tools-internal @elastic/es-delivery
-*.gradle @elastic/es-delivery
 .buildkite @elastic/es-delivery
 .ci @elastic/es-delivery
 .idea @elastic/es-delivery

diff --git a/README.asciidoc b/README.asciidoc
@@ -56,8 +56,8 @@ Quickly set up Elasticsearch and Kibana in Docker for local development or testi
 - If you're using Microsoft Windows, then install https://learn.microsoft.com/en-us/windows/wsl/install[Windows Subsystem for Linux (WSL)].
 
 ==== Trial license
+This setup comes with a one-month trial license that includes all Elastic features.
 
-This setup comes with a one-month trial of the Elastic *Platinum* license.
 After the trial period, the license reverts to *Free and open - Basic*.
 Refer to https://www.elastic.co/subscriptions[Elastic subscriptions] for more information.
 

diff --git a/docs/changelog/114990.yaml b/docs/changelog/114990.yaml
@@ -0,0 +1,6 @@
+pr: 114990
+summary: Allow for querries on `_tier` to skip shards in the `can_match` phase
+area: Search
+type: bug
+issues:
+ - 114910
diff --git a/docs/changelog/115117.yaml b/docs/changelog/115117.yaml
@@ -0,0 +1,6 @@
+pr: 115117
+summary: Report JVM stats for all memory pools (97046)
+area: Infra/Core
+type: bug
+issues:
+ - 97046
diff --git a/docs/changelog/115399.yaml b/docs/changelog/115399.yaml
@@ -0,0 +1,29 @@
+pr: 115399
+summary: Adding breaking change entry for retrievers
+area: Search
+type: breaking
+issues: []
+breaking:
+  title: Reworking RRF retriever to be evaluated during rewrite phase
+  area: REST API
+  details: |-
+    In this release (8.16), we have introduced major changes to the retrievers framework 
+    and how they can be evaluated, focusing mainly on compound retrievers 
+    like `rrf` and `text_similarity_reranker`, which allowed us to support full 
+    composability (i.e. any retriever can be nested under any compound retriever), 
+    as well as supporting additional search features like collapsing, explaining, 
+    aggregations, and highlighting.
+    
+    To ensure consistency, and given that this rework is not available until 8.16, 
+    `rrf` and `text_similarity_reranker`  retriever queries would now  
+    throw an exception in a mixed cluster scenario, where there are nodes 
+    both in current or later (i.e. >= 8.16) and previous ( <= 8.15) versions.
+    
+    As part of the rework, we have also removed the `_rank` property from 
+    the responses of an `rrf` retriever.
+  impact: |-
+    - Users will not be able to use the `rrf` and `text_similarity_reranker` retrievers in a mixed cluster scenario
+    with previous releases (i.e. prior to 8.16), and the request will throw an `IllegalArgumentException`.
+    - `_rank` has now been removed from the output of the `rrf` retrievers so trying to directly parse the field
+    will throw an exception
+  notable: false
diff --git a/docs/changelog/115429.yaml b/docs/changelog/115429.yaml
@@ -0,0 +1,5 @@
+pr: 115429
+summary: "[otel-data] Add more kubernetes aliases"
+area: Data streams
+type: bug
+issues: []
diff --git a/docs/reference/how-to/size-your-shards.asciidoc b/docs/reference/how-to/size-your-shards.asciidoc
@@ -572,7 +572,7 @@ PUT _cluster/settings
 }
 ----
 
-For more information, see <<troubleshooting-shards-capacity-issues,Troubleshooting shards capacity>>.
+See this https://www.youtube.com/watch?v=tZKbDegt4-M[fixing "max shards open" video] for an example troubleshooting walkthrough. For more information, see <<troubleshooting-shards-capacity-issues,Troubleshooting shards capacity>>.
 
 [discrete]
 [[troubleshooting-max-docs-limit]]

diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc
@@ -34,6 +34,24 @@ Elastic –, then create an {infer} endpoint by the <<put-inference-api>>.
 Now use <<semantic-search-semantic-text, semantic text>> to perform
 <<semantic-search, semantic search>> on your data.
 
+
+[discrete]
+[[default-enpoints]]
+=== Default {infer} endpoints
+
+Your {es} deployment contains some preconfigured {infer} endpoints that makes it easier for you to use them when defining `semantic_text` fields or {infer} processors.
+The following list contains the default {infer} endpoints listed by `inference_id`:
+
+* `.elser-2-elasticsearch`: uses the {ml-docs}/ml-nlp-elser.html[ELSER] built-in trained model for `sparse_embedding` tasks (recommended for English language texts)
+* `.multilingual-e5-small-elasticsearch`: uses the {ml-docs}/ml-nlp-e5.html[E5] built-in trained model for `text_embedding` tasks (recommended for non-English language texts)
+
+Use the `inference_id` of the endpoint in a <<semantic-text,`semantic_text`>> field definition or when creating an <<inference-processor,{infer} processor>>.
+The API call will automatically download and deploy the model which might take a couple of minutes.
+Default {infer} enpoints have {ml-docs}/ml-nlp-auto-scale.html#nlp-model-adaptive-allocations[adaptive allocations] enabled.
+For these models, the minimum number of allocations is `0`. 
+If there is no {infer} activity that uses the endpoint, the number of allocations will scale down to `0` automatically after 15 minutes.
+
+
 include::delete-inference.asciidoc[]
 include::get-inference.asciidoc[]
 include::post-inference.asciidoc[]

diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc
@@ -1,12 +1,9 @@
 [[infer-service-elasticsearch]]
 === Elasticsearch {infer} service
 
-Creates an {infer} endpoint to perform an {infer} task with the `elasticsearch`
-service.
+Creates an {infer} endpoint to perform an {infer} task with the `elasticsearch` service.
 
-NOTE: If you use the E5 model through the `elasticsearch` service, the API
-request will automatically download and deploy the model if it isn't downloaded
-yet.
+NOTE: If you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.
 
 
 [discrete]
@@ -56,6 +53,11 @@ These settings are specific to the `elasticsearch` service.
 (Optional, object)
 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation]
 
+`deployment_id`:::
+(Optional, string)
+The `deployment_id` of an existing trained model deployment.
+When `deployment_id` is used the `model_id` is optional.
+
 `enabled`::::
 (Optional, Boolean)
 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-enabled]
@@ -71,7 +73,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-min-number]
 `model_id`:::
 (Required, string)
 The name of the model to use for the {infer} task.
-It can be the ID of either a built-in model (for example, `.multilingual-e5-small` for E5) or a text embedding model already
+It can be the ID of either a built-in model (for example, `.multilingual-e5-small` for E5), a text embedding model already
 {ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland].
 
 `num_allocations`:::
@@ -98,15 +100,44 @@ Returns the document instead of only the index. Defaults to `true`.
 =====
 
 
+[discrete]
+[[inference-example-elasticsearch-elser]]
+==== ELSER via the `elasticsearch` service
+
+The following example shows how to create an {infer} endpoint called `my-elser-model` to perform a `sparse_embedding` task type.
+
+The API request below will automatically download the ELSER model if it isn't already downloaded and then deploy the model.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/sparse_embedding/my-elser-model
+{
+  "service": "elasticsearch",
+  "service_settings": {
+    "adaptive_allocations": { <1>
+      "enabled": true,
+      "min_number_of_allocations": 1,
+      "max_number_of_allocations": 10
+    },
+    "num_threads": 1,
+    "model_id": ".elser_model_2" <2>
+  }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+<1> Adaptive allocations will be enabled with the minimum of 1 and the maximum of 10 allocations.
+<2> The `model_id` must be the ID of one of the built-in ELSER models.
+Valid values are `.elser_model_2` and `.elser_model_2_linux-x86_64`.
+For further details, refer to the {ml-docs}/ml-nlp-elser.html[ELSER model documentation].
+
+
 [discrete]
 [[inference-example-elasticsearch]]
 ==== E5 via the `elasticsearch` service
 
-The following example shows how to create an {infer} endpoint called
-`my-e5-model` to perform a `text_embedding` task type.
+The following example shows how to create an {infer} endpoint called `my-e5-model` to perform a `text_embedding` task type.
 
-The API request below will automatically download the E5 model if it isn't
-already downloaded and then deploy the model.
+The API request below will automatically download the E5 model if it isn't already downloaded and then deploy the model.
 
 [source,console]
 ------------------------------------------------------------
@@ -185,3 +216,46 @@ PUT _inference/text_embedding/my-e5-model
 }
 ------------------------------------------------------------
 // TEST[skip:TBD]
+
+
+[discrete]
+[[inference-example-existing-deployment]]
+==== Using an existing model deployment with the `elasticsearch` service
+
+The following example shows how to use an already existing model deployment when creating an {infer} endpoint.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/sparse_embedding/use_existing_deployment
+{
+  "service": "elasticsearch",
+  "service_settings": {
+    "deployment_id": ".elser_model_2" <1>
+  }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+<1> The `deployment_id` of the already existing model deployment.
+
+The API response contains the `model_id`, and the threads and allocations settings from the model deployment:
+
+[source,console-result]
+------------------------------------------------------------
+{
+  "inference_id": "use_existing_deployment",
+  "task_type": "sparse_embedding",
+  "service": "elasticsearch",
+  "service_settings": {
+    "num_allocations": 2,
+    "num_threads": 1,
+    "model_id": ".elser_model_2",
+    "deployment_id": ".elser_model_2"
+  },
+  "chunking_settings": {
+    "strategy": "sentence",
+    "max_chunk_size": 250,
+    "sentence_overlap": 1
+  }
+}
+------------------------------------------------------------
+// NOTCONSOLE
diff --git a/docs/reference/inference/service-elser.asciidoc b/docs/reference/inference/service-elser.asciidoc
@@ -2,6 +2,7 @@
 === ELSER {infer} service
 
 Creates an {infer} endpoint to perform an {infer} task with the `elser` service.
+You can also deploy ELSER by using the <<infer-service-elasticsearch>>.
 
 NOTE: The API request will automatically download and deploy the ELSER model if
 it isn't already downloaded.
@@ -128,7 +129,7 @@ If using the Python client, you can set the `timeout` parameter to a higher valu
 
 [discrete]
 [[inference-example-elser-adaptive-allocation]]
-==== Setting adaptive allocation for the ELSER service
+==== Setting adaptive allocations for the ELSER service
 
 NOTE: For more information on how to optimize your ELSER endpoints, refer to {ml-docs}/ml-nlp-elser.html#elser-recommendations[the ELSER recommendations] section in the model documentation.
 To learn more about model autoscaling, refer to the {ml-docs}/ml-nlp-auto-scale.html[trained model autoscaling] page.

diff --git a/docs/reference/query-dsl/terms-set-query.asciidoc b/docs/reference/query-dsl/terms-set-query.asciidoc
@@ -159,12 +159,22 @@ GET /job-candidates/_search
 `terms`::
 +
 --
-(Required, array of strings) Array of terms you wish to find in the provided
+(Required, array) Array of terms you wish to find in the provided
 `<field>`. To return a document, a required number of terms must exactly match
 the field values, including whitespace and capitalization.
 
-The required number of matching terms is defined in the
-`minimum_should_match_field` or `minimum_should_match_script` parameter.
+The required number of matching terms is defined in the `minimum_should_match`,
+`minimum_should_match_field` or `minimum_should_match_script` parameters. Exactly
+one of these parameters must be provided.
+--
+
+`minimum_should_match`::
++
+--
+(Optional) Specification for the number of matching terms required to return
+a document.
+
+For valid values, see <<query-dsl-minimum-should-match, `minimum_should_match` parameter>>.
 --
 
 `minimum_should_match_field`::

diff --git a/docs/reference/query-dsl/text-expansion-query.asciidoc b/docs/reference/query-dsl/text-expansion-query.asciidoc
@@ -7,6 +7,13 @@
 
 deprecated[8.15.0, This query has been replaced by <<query-dsl-sparse-vector-query>>.]
 
+.Deprecation usage note
+****
+You can continue using `rank_features` fields with `text_expansion` queries in the current version.
+However, if you plan to upgrade, we recommend updating mappings to use the `sparse_vector` field type and <<docs-reindex,reindexing your data>>.
+This will allow you to take advantage of the new capabilities and improvements available in newer versions.
+****
+
 The text expansion query uses a {nlp} model to convert the query text into a list of token-weight pairs which are then used in a query against a
 <<sparse-vector,sparse vector>> or <<rank-features,rank features>> field.
 

diff --git a/docs/reference/run-elasticsearch-locally.asciidoc b/docs/reference/run-elasticsearch-locally.asciidoc
@@ -20,7 +20,7 @@ Refer to <<elasticsearch-intro-deploy, deployment options>> for a list of produc
 
 Quickly set up {es} and {kib} in Docker for local development or testing, using the https://github.com/elastic/start-local?tab=readme-ov-file#-try-elasticsearch-and-kibana-locally[`start-local` script].
 
-This setup comes with a one-month trial of the Elastic *Platinum* license.
+This setup comes with a one-month trial license that includes all Elastic features.
 After the trial period, the license reverts to *Free and open - Basic*.
 Refer to https://www.elastic.co/subscriptions[Elastic subscriptions] for more information.
 
@@ -84,4 +84,4 @@ Learn about customizing the setup, logging, and more.
 [[local-dev-next-steps]]
 === Next steps
 
-Use our <<quickstart,quick start guides>> to learn the basics of {es}.
+Use our <<quickstart,quick start guides>> to learn the basics of {es}.
diff --git a/muted-tests.yml b/muted-tests.yml
@@ -282,6 +282,9 @@ tests:
 - class: org.elasticsearch.xpack.security.FileSettingsRoleMappingsRestartIT
   method: testFileSettingsReprocessedOnRestartWithoutVersionChange
   issue: https://github.com/elastic/elasticsearch/issues/115450
+- class: org.elasticsearch.xpack.restart.MLModelDeploymentFullClusterRestartIT
+  method: testDeploymentSurvivesRestart {cluster=UPGRADED}
+  issue: https://github.com/elastic/elasticsearch/issues/115528
 
 # Examples:
 #

diff --git a/server/src/main/java/org/elasticsearch/common/settings/LocallyMountedSecrets.java b/server/src/main/java/org/elasticsearch/common/settings/LocallyMountedSecrets.java
@@ -11,11 +11,11 @@
 
 import org.apache.lucene.util.SetOnce;
 import org.elasticsearch.TransportVersion;
-import org.elasticsearch.Version;
 import org.elasticsearch.common.hash.MessageDigests;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.env.BuildVersion;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.reservedstate.service.ReservedStateVersion;
 import org.elasticsearch.xcontent.ConstructingObjectParser;
@@ -130,7 +130,7 @@ public LocallyMountedSecrets(Environment environment) {
                 throw new IllegalStateException("Error processing secrets file", e);
             }
         } else {
-            secrets.set(new LocalFileSecrets(Map.of(), new ReservedStateVersion(-1L, Version.CURRENT)));
+            secrets.set(new LocalFileSecrets(Map.of(), new ReservedStateVersion(-1L, BuildVersion.current())));
         }
         this.secretsDir = secretsDirPath.toString();
         this.secretsFile = secretsFilePath.toString();

diff --git a/server/src/main/java/org/elasticsearch/env/BuildVersion.java b/server/src/main/java/org/elasticsearch/env/BuildVersion.java
@@ -72,6 +72,16 @@ public static BuildVersion fromVersionId(int versionId) {
         return CurrentExtensionHolder.BUILD_EXTENSION.fromVersionId(versionId);
     }
 
+    /**
+     * Create a {@link BuildVersion} from a version string.
+     *
+     * @param version A string representation of a version
+     * @return a version representing a build or release of Elasticsearch
+     */
+    public static BuildVersion fromString(String version) {
+        return CurrentExtensionHolder.BUILD_EXTENSION.fromString(version);
+    }
+
     /**
      * Get the current build version.
      *
@@ -110,6 +120,11 @@ public BuildVersion currentBuildVersion() {
         public BuildVersion fromVersionId(int versionId) {
             return new DefaultBuildVersion(versionId);
         }
+
+        @Override
+        public BuildVersion fromString(String version) {
+            return new DefaultBuildVersion(version);
+        }
     }
 
 }