Skip to content

Commit

Permalink
Fix layout when aggregating with aliases (elastic#117832)
Browse files Browse the repository at this point in the history
Fix elastic#117770 Fix
elastic#117784

elastic#117699 made changes to how
we plan aggregations which were supposed to only trigger when a query
contained a `CATEGORIZE`, but accidentally changed a code path that
seems to only be required for interoperability with pre-8.13 nodes.
Because of this, we didn't notice failing tests until the periodic bwc
tests ran.

The code this PR fixes addresses situations where `Aggregate` plan nodes
contained _aliases inside the aggregates_. On `main` and `8.x`, this is
effectively an illegal state: since
elastic#104958, aliases in the
aggregates become `Eval` nodes before and after the `Aggregate` node.

But here, on 8.x, we'll just fix this code path so that it behaves
exactly as before elastic#117699.

If this passes the full-bwc test, I plan to forward-port this by
removing the obsolete code path on `main`.
  • Loading branch information
alex-spies authored Dec 3, 2024
1 parent d5ba0a3 commit ca48c37
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ h:d | languages:i
1.41 | null
;

groupByAlias#[skip:-8.13.99,reason:muted, see https://github.com/elastic/elasticsearch/issues/117770]
groupByAlias
from employees | rename languages as l | keep l, height | stats m = min(height) by l | sort l;

m:d | l:i
Expand Down Expand Up @@ -951,7 +951,7 @@ c:l
49
;

countFieldWithGrouping#[skip:-8.13.99,reason:muted, see https://github.com/elastic/elasticsearch/issues/117784]
countFieldWithGrouping
from employees | rename languages as l | where emp_no < 10050 | stats c = count(emp_no) by l | sort l;

c:l | l:i
Expand All @@ -963,7 +963,7 @@ c:l | l:i
10 | null
;

countFieldWithAliasWithGrouping#[skip:-8.13.99,reason:muted, see https://github.com/elastic/elasticsearch/issues/117784]
countFieldWithAliasWithGrouping
from employees | rename languages as l | eval e = emp_no | where emp_no < 10050 | stats c = count(e) by l | sort l;

c:l | l:i
Expand All @@ -982,7 +982,7 @@ c:l
49
;

countEvalExpWithGrouping#[skip:-8.13.99,reason:muted, see https://github.com/elastic/elasticsearch/issues/117784]
countEvalExpWithGrouping
from employees | rename languages as l | eval e = case(emp_no < 10050, emp_no, null) | stats c = count(e) by l | sort l;

c:l | l:i
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ id:i |name:s |version:v |o:v
13 |lllll |null |null
;

countVersion#[skip:-8.13.99,reason:muted, see https://github.com/elastic/elasticsearch/issues/117784]
countVersion
FROM apps | RENAME name AS k | STATS v = COUNT(version) BY k | SORT k;

v:l | k:s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,14 @@ public final PhysicalOperation groupingPhysicalOperation(
* - before stats (keep x = a | stats by x) which requires the partial input to use a's channel
* - after stats (stats by a | keep x = a) which causes the output layout to refer to the follow-up alias
*/
// TODO: This is likely required only for pre-8.14 node compatibility; confirm and remove if possible.
// Since https://github.com/elastic/elasticsearch/pull/104958, it shouldn't be possible to have aliases in the aggregates
// which the groupings refer to. Except for `BY CATEGORIZE(field)`, which remains as alias in the grouping, all aliases
// should've become EVALs before or after the STATS.
for (NamedExpression agg : aggregates) {
if (agg instanceof Alias a) {
if (a.child() instanceof Attribute attr) {
if (groupAttribute.id().equals(attr.id())) {
if (sourceGroupAttribute.id().equals(attr.id())) {
groupAttributeLayout.nameIds().add(a.id());
// TODO: investigate whether a break could be used since it shouldn't be possible to have multiple
// attributes pointing to the same attribute
Expand All @@ -133,8 +137,8 @@ public final PhysicalOperation groupingPhysicalOperation(
// is in the output form
// if the group points to an alias declared in the aggregate, use the alias child as source
else if (aggregatorMode.isOutputPartial()) {
if (groupAttribute.semanticEquals(a.toAttribute())) {
groupAttribute = attr;
if (sourceGroupAttribute.semanticEquals(a.toAttribute())) {
sourceGroupAttribute = attr;
break;
}
}
Expand Down

0 comments on commit ca48c37

Please sign in to comment.