From ca48c3762818e79fd644ebaa908288bb234abe05 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 3 Dec 2024 14:13:30 +0100 Subject: [PATCH] Fix layout when aggregating with aliases (#117832) Fix https://github.com/elastic/elasticsearch/issues/117770 Fix https://github.com/elastic/elasticsearch/issues/117784 https://github.com/elastic/elasticsearch/pull/117699 made changes to how we plan aggregations which were supposed to only trigger when a query contained a `CATEGORIZE`, but accidentally changed a code path that seems to only be required for interoperability with pre-8.13 nodes. Because of this, we didn't notice failing tests until the periodic bwc tests ran. The code this PR fixes addresses situations where `Aggregate` plan nodes contained _aliases inside the aggregates_. On `main` and `8.x`, this is effectively an illegal state: since https://github.com/elastic/elasticsearch/pull/104958, aliases in the aggregates become `Eval` nodes before and after the `Aggregate` node. But here, on 8.x, we'll just fix this code path so that it behaves exactly as before https://github.com/elastic/elasticsearch/pull/117699. If this passes the full-bwc test, I plan to forward-port this by removing the obsolete code path on `main`. --- .../qa/testFixtures/src/main/resources/stats.csv-spec | 8 ++++---- .../testFixtures/src/main/resources/version.csv-spec | 2 +- .../planner/AbstractPhysicalOperationProviders.java | 10 +++++++--- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index f61452f13fb53..6e0a55655ee1c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -522,7 +522,7 @@ h:d | languages:i 1.41 | null ; -groupByAlias#[skip:-8.13.99,reason:muted, see https://github.com/elastic/elasticsearch/issues/117770] +groupByAlias from employees | rename languages as l | keep l, height | stats m = min(height) by l | sort l; m:d | l:i @@ -951,7 +951,7 @@ c:l 49 ; -countFieldWithGrouping#[skip:-8.13.99,reason:muted, see https://github.com/elastic/elasticsearch/issues/117784] +countFieldWithGrouping from employees | rename languages as l | where emp_no < 10050 | stats c = count(emp_no) by l | sort l; c:l | l:i @@ -963,7 +963,7 @@ c:l | l:i 10 | null ; -countFieldWithAliasWithGrouping#[skip:-8.13.99,reason:muted, see https://github.com/elastic/elasticsearch/issues/117784] +countFieldWithAliasWithGrouping from employees | rename languages as l | eval e = emp_no | where emp_no < 10050 | stats c = count(e) by l | sort l; c:l | l:i @@ -982,7 +982,7 @@ c:l 49 ; -countEvalExpWithGrouping#[skip:-8.13.99,reason:muted, see https://github.com/elastic/elasticsearch/issues/117784] +countEvalExpWithGrouping from employees | rename languages as l | eval e = case(emp_no < 10050, emp_no, null) | stats c = count(e) by l | sort l; c:l | l:i diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/version.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/version.csv-spec index a4f6bd554881c..eb0d6d75a7d07 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/version.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/version.csv-spec @@ -159,7 +159,7 @@ id:i |name:s |version:v |o:v 13 |lllll |null |null ; -countVersion#[skip:-8.13.99,reason:muted, see https://github.com/elastic/elasticsearch/issues/117784] +countVersion FROM apps | RENAME name AS k | STATS v = COUNT(version) BY k | SORT k; v:l | k:s diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java index 69e2d1c45aa3c..35aba7665ec87 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java @@ -120,10 +120,14 @@ public final PhysicalOperation groupingPhysicalOperation( * - before stats (keep x = a | stats by x) which requires the partial input to use a's channel * - after stats (stats by a | keep x = a) which causes the output layout to refer to the follow-up alias */ + // TODO: This is likely required only for pre-8.14 node compatibility; confirm and remove if possible. + // Since https://github.com/elastic/elasticsearch/pull/104958, it shouldn't be possible to have aliases in the aggregates + // which the groupings refer to. Except for `BY CATEGORIZE(field)`, which remains as alias in the grouping, all aliases + // should've become EVALs before or after the STATS. for (NamedExpression agg : aggregates) { if (agg instanceof Alias a) { if (a.child() instanceof Attribute attr) { - if (groupAttribute.id().equals(attr.id())) { + if (sourceGroupAttribute.id().equals(attr.id())) { groupAttributeLayout.nameIds().add(a.id()); // TODO: investigate whether a break could be used since it shouldn't be possible to have multiple // attributes pointing to the same attribute @@ -133,8 +137,8 @@ public final PhysicalOperation groupingPhysicalOperation( // is in the output form // if the group points to an alias declared in the aggregate, use the alias child as source else if (aggregatorMode.isOutputPartial()) { - if (groupAttribute.semanticEquals(a.toAttribute())) { - groupAttribute = attr; + if (sourceGroupAttribute.semanticEquals(a.toAttribute())) { + sourceGroupAttribute = attr; break; } }