From c148f0632c52ab8710dc0b8890a03873def0fd27 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Wed, 19 Jun 2024 15:01:02 +0800 Subject: [PATCH 01/11] fix(optimizer): reduce expr tree depth when merge logical operations Signed-off-by: Bugen Zhao --- .../tests/testdata/output/ch_benchmark.yaml | 10 +++--- .../testdata/output/index_selection.yaml | 20 ++++++------ .../tests/testdata/output/join.yaml | 2 +- .../logical_scan_predicate_eliminate.yaml | 4 +-- .../tests/testdata/output/nexmark.yaml | 14 ++++----- .../tests/testdata/output/nexmark_source.yaml | 12 +++---- .../output/nexmark_temporal_filter.yaml | 4 +-- .../testdata/output/nexmark_watermark.yaml | 18 +++++------ .../tests/testdata/output/range_scan.yaml | 2 +- .../testdata/output/temporal_filter.yaml | 24 +++++++------- .../tests/testdata/output/tpch.yaml | 10 +++--- src/frontend/src/expr/mod.rs | 4 +-- src/frontend/src/expr/utils.rs | 30 ++++++++++-------- .../src/optimizer/plan_node/logical_filter.rs | 31 ++++++++----------- .../src/optimizer/plan_node/logical_join.rs | 2 +- 15 files changed, 93 insertions(+), 94 deletions(-) diff --git a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml index ce98b8bea75c9..f78fd13ffa17b 100644 --- a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml @@ -2841,20 +2841,20 @@ LogicalProject { exprs: [sum(order_line.ol_amount)] } └─LogicalAgg { aggs: [sum(order_line.ol_amount)] } └─LogicalProject { exprs: [order_line.ol_amount] } - └─LogicalFilter { predicate: (order_line.ol_i_id = item.i_id) AND (order_line.ol_quantity >= 1:Int32) AND (order_line.ol_quantity <= 10:Int32) AND (item.i_price >= 1:Int32::Decimal) AND (item.i_price <= 400000:Int32::Decimal) AND (((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32))) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32))) } + └─LogicalFilter { predicate: (order_line.ol_i_id = item.i_id) AND (order_line.ol_quantity >= 1:Int32) AND (order_line.ol_quantity <= 10:Int32) AND (item.i_price >= 1:Int32::Decimal) AND (item.i_price <= 400000:Int32::Decimal) AND ((Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32)) OR ((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32)))) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalScan { table: order_line, columns: [order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number, order_line.ol_i_id, order_line.ol_supply_w_id, order_line.ol_delivery_d, order_line.ol_quantity, order_line.ol_amount, order_line.ol_dist_info] } └─LogicalScan { table: item, columns: [item.i_id, item.i_im_id, item.i_name, item.i_price, item.i_data] } optimized_logical_plan_for_batch: |- LogicalAgg { aggs: [sum(order_line.ol_amount)] } - └─LogicalJoin { type: Inner, on: (order_line.ol_i_id = item.i_id) AND (((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32))) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32))), output: [order_line.ol_amount] } + └─LogicalJoin { type: Inner, on: (order_line.ol_i_id = item.i_id) AND ((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR ((Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32)) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32)))), output: [order_line.ol_amount] } ├─LogicalScan { table: order_line, output_columns: [order_line.ol_w_id, order_line.ol_i_id, order_line.ol_amount], required_columns: [order_line.ol_w_id, order_line.ol_i_id, order_line.ol_amount, order_line.ol_quantity], predicate: (order_line.ol_quantity >= 1:Int32) AND (order_line.ol_quantity <= 10:Int32) } └─LogicalScan { table: item, output_columns: [item.i_id, item.i_data], required_columns: [item.i_id, item.i_data, item.i_price], predicate: (item.i_price >= 1:Int32::Decimal) AND (item.i_price <= 400000:Int32::Decimal) } batch_plan: |- BatchSimpleAgg { aggs: [sum(sum(order_line.ol_amount))] } └─BatchExchange { order: [], dist: Single } └─BatchSimpleAgg { aggs: [sum(order_line.ol_amount)] } - └─BatchLookupJoin { type: Inner, predicate: order_line.ol_i_id = item.i_id AND (((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32))) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32))) AND (item.i_price >= 1:Decimal) AND (item.i_price <= 400000:Decimal), output: [order_line.ol_amount], lookup table: item } + └─BatchLookupJoin { type: Inner, predicate: order_line.ol_i_id = item.i_id AND ((Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32)) OR ((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32)))) AND (item.i_price >= 1:Decimal) AND (item.i_price <= 400000:Decimal), output: [order_line.ol_amount], lookup table: item } └─BatchExchange { order: [], dist: UpstreamHashShard(order_line.ol_i_id) } └─BatchProject { exprs: [order_line.ol_w_id, order_line.ol_i_id, order_line.ol_amount] } └─BatchFilter { predicate: (order_line.ol_quantity >= 1:Int32) AND (order_line.ol_quantity <= 10:Int32) } @@ -2866,7 +2866,7 @@ └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(order_line.ol_amount)] } └─StreamProject { exprs: [order_line.ol_amount, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_i_id, item.i_id] } - └─StreamFilter { predicate: (((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32))) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32))) } + └─StreamFilter { predicate: ((Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32)) OR ((Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32)) OR (Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)))) } └─StreamHashJoin { type: Inner, predicate: order_line.ol_i_id = item.i_id, output: all } ├─StreamExchange { dist: HashShard(order_line.ol_i_id) } │ └─StreamProject { exprs: [order_line.ol_w_id, order_line.ol_i_id, order_line.ol_amount, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] } @@ -2888,7 +2888,7 @@ Fragment 1 StreamStatelessSimpleAgg { aggs: [sum(order_line.ol_amount)] } └── StreamProject { exprs: [order_line.ol_amount, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_i_id, item.i_id] } - └── StreamFilter { predicate: (((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32))) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32))) } + └── StreamFilter { predicate: ((Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32)) OR ((Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32)) OR (Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)))) } └── StreamHashJoin { type: Inner, predicate: order_line.ol_i_id = item.i_id, output: all } { tables: [ HashJoinLeft: 1, HashJoinDegreeLeft: 2, HashJoinRight: 3, HashJoinDegreeRight: 4 ] } ├── StreamExchange Hash([1]) from 2 └── StreamExchange Hash([0]) from 3 diff --git a/src/frontend/planner_test/tests/testdata/output/index_selection.yaml b/src/frontend/planner_test/tests/testdata/output/index_selection.yaml index 82c2a5bbf7ec6..5ac9026633afb 100644 --- a/src/frontend/planner_test/tests/testdata/output/index_selection.yaml +++ b/src/frontend/planner_test/tests/testdata/output/index_selection.yaml @@ -306,7 +306,7 @@ select * from t1 where p = 1 or (a = 2 and b = 3 and c = 4) batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchLookupJoin { type: Inner, predicate: idx2.t1._row_id IS NOT DISTINCT FROM t1._row_id AND ((t1.p = 1:Int32) OR (((t1.a = 2:Int32) AND (t1.b = 3:Decimal)) AND (t1.c = 4:Int32))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } + └─BatchLookupJoin { type: Inner, predicate: idx2.t1._row_id IS NOT DISTINCT FROM t1._row_id AND ((t1.p = 1:Int32) OR ((t1.b = 3:Decimal) AND ((t1.c = 4:Int32) AND (t1.a = 2:Int32)))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } └─BatchExchange { order: [], dist: UpstreamHashShard(idx2.t1._row_id) } └─BatchHashAgg { group_key: [idx2.t1._row_id], aggs: [] } └─BatchExchange { order: [], dist: HashShard(idx2.t1._row_id) } @@ -316,7 +316,7 @@ └─BatchExchange { order: [], dist: Single } └─BatchScan { table: idx4, columns: [idx4.t1._row_id], scan_ranges: [idx4.p = Int32(1)], distribution: SomeShard } batch_local_plan: |- - BatchLookupJoin { type: Inner, predicate: idx2.t1._row_id IS NOT DISTINCT FROM t1._row_id AND ((t1.p = 1:Int32) OR (((t1.a = 2:Int32) AND (t1.b = 3:Decimal)) AND (t1.c = 4:Int32))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } + BatchLookupJoin { type: Inner, predicate: idx2.t1._row_id IS NOT DISTINCT FROM t1._row_id AND ((t1.p = 1:Int32) OR ((t1.b = 3:Decimal) AND ((t1.c = 4:Int32) AND (t1.a = 2:Int32)))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } └─BatchHashAgg { group_key: [idx2.t1._row_id], aggs: [] } └─BatchUnion { all: true } ├─BatchExchange { order: [], dist: Single } @@ -332,13 +332,13 @@ select * from t1 where a = 1 or b = 2 or c = 3 or p = 4 or a = 5 batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchLookupJoin { type: Inner, predicate: idx1.t1._row_id IS NOT DISTINCT FROM t1._row_id AND (((((t1.a = 1:Int32) OR (t1.b = 2:Decimal)) OR (t1.c = 3:Int32)) OR (t1.p = 4:Int32)) OR (t1.a = 5:Int32)), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } + └─BatchLookupJoin { type: Inner, predicate: idx1.t1._row_id IS NOT DISTINCT FROM t1._row_id AND (((t1.a = 5:Int32) OR (t1.a = 1:Int32)) OR ((t1.b = 2:Decimal) OR ((t1.c = 3:Int32) OR (t1.p = 4:Int32)))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } └─BatchExchange { order: [], dist: UpstreamHashShard(idx1.t1._row_id) } └─BatchHashAgg { group_key: [idx1.t1._row_id], aggs: [] } └─BatchExchange { order: [], dist: HashShard(idx1.t1._row_id) } └─BatchUnion { all: true } ├─BatchExchange { order: [], dist: Single } - │ └─BatchScan { table: idx1, columns: [idx1.t1._row_id], scan_ranges: [idx1.a = Int32(5), idx1.a = Int32(1)], distribution: SomeShard } + │ └─BatchScan { table: idx1, columns: [idx1.t1._row_id], scan_ranges: [idx1.a = Int32(1), idx1.a = Int32(5)], distribution: SomeShard } ├─BatchExchange { order: [], dist: Single } │ └─BatchScan { table: idx2, columns: [idx2.t1._row_id], scan_ranges: [idx2.b = Decimal(Normalized(2))], distribution: SomeShard } ├─BatchExchange { order: [], dist: Single } @@ -346,11 +346,11 @@ └─BatchExchange { order: [], dist: Single } └─BatchScan { table: idx4, columns: [idx4.t1._row_id], scan_ranges: [idx4.p = Int32(4)], distribution: SomeShard } batch_local_plan: |- - BatchLookupJoin { type: Inner, predicate: idx1.t1._row_id IS NOT DISTINCT FROM t1._row_id AND (((((t1.a = 1:Int32) OR (t1.b = 2:Decimal)) OR (t1.c = 3:Int32)) OR (t1.p = 4:Int32)) OR (t1.a = 5:Int32)), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } + BatchLookupJoin { type: Inner, predicate: idx1.t1._row_id IS NOT DISTINCT FROM t1._row_id AND (((t1.a = 5:Int32) OR (t1.a = 1:Int32)) OR ((t1.b = 2:Decimal) OR ((t1.c = 3:Int32) OR (t1.p = 4:Int32)))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } └─BatchHashAgg { group_key: [idx1.t1._row_id], aggs: [] } └─BatchUnion { all: true } ├─BatchExchange { order: [], dist: Single } - │ └─BatchScan { table: idx1, columns: [idx1.t1._row_id], scan_ranges: [idx1.a = Int32(5), idx1.a = Int32(1)], distribution: SomeShard } + │ └─BatchScan { table: idx1, columns: [idx1.t1._row_id], scan_ranges: [idx1.a = Int32(1), idx1.a = Int32(5)], distribution: SomeShard } ├─BatchExchange { order: [], dist: Single } │ └─BatchScan { table: idx2, columns: [idx2.t1._row_id], scan_ranges: [idx2.b = Decimal(Normalized(2))], distribution: SomeShard } ├─BatchExchange { order: [], dist: Single } @@ -475,11 +475,11 @@ select * from t1 where a > 1 or c > 1 or b > 1 batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (((t1.a > 1:Int32) OR (t1.c > 1:Int32)) OR (t1.b > 1:Decimal)) } + └─BatchFilter { predicate: ((t1.b > 1:Decimal) OR ((t1.a > 1:Int32) OR (t1.c > 1:Int32))) } └─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c, t1.p], distribution: SomeShard } batch_local_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (((t1.a > 1:Int32) OR (t1.c > 1:Int32)) OR (t1.b > 1:Decimal)) } + └─BatchFilter { predicate: ((t1.b > 1:Decimal) OR ((t1.a > 1:Int32) OR (t1.c > 1:Int32))) } └─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c, t1.p], distribution: SomeShard } - sql: | create table t1 (a int, b numeric, c bigint, p int); @@ -490,11 +490,11 @@ select * from t1 where a between 1 and 8 or b between 1 and 8 or c between 1 and 8; batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: ((((t1.a >= 1:Int32) AND (t1.a <= 8:Int32)) OR ((t1.b >= 1:Decimal) AND (t1.b <= 8:Decimal))) OR ((t1.c >= 1:Int32) AND (t1.c <= 8:Int32))) } + └─BatchFilter { predicate: (((t1.c >= 1:Int32) AND (t1.c <= 8:Int32)) OR (((t1.a >= 1:Int32) AND (t1.a <= 8:Int32)) OR ((t1.b >= 1:Decimal) AND (t1.b <= 8:Decimal)))) } └─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c, t1.p], distribution: SomeShard } batch_local_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: ((((t1.a >= 1:Int32) AND (t1.a <= 8:Int32)) OR ((t1.b >= 1:Decimal) AND (t1.b <= 8:Decimal))) OR ((t1.c >= 1:Int32) AND (t1.c <= 8:Int32))) } + └─BatchFilter { predicate: (((t1.c >= 1:Int32) AND (t1.c <= 8:Int32)) OR (((t1.a >= 1:Int32) AND (t1.a <= 8:Int32)) OR ((t1.b >= 1:Decimal) AND (t1.b <= 8:Decimal)))) } └─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c, t1.p], distribution: SomeShard } - sql: | create table t1 (a int, b numeric, c bigint, p int); diff --git a/src/frontend/planner_test/tests/testdata/output/join.yaml b/src/frontend/planner_test/tests/testdata/output/join.yaml index 2db3b8cc3994a..a17bc8fab2d38 100644 --- a/src/frontend/planner_test/tests/testdata/output/join.yaml +++ b/src/frontend/planner_test/tests/testdata/output/join.yaml @@ -204,7 +204,7 @@ StreamMaterialize { columns: [x, i.t._row_id(hidden), i.t._row_id#1(hidden), i.x(hidden), i.t._row_id#2(hidden), i.t._row_id#3(hidden), i.x#1(hidden)], stream_key: [i.t._row_id, i.t._row_id#1, i.x, i.t._row_id#2, i.t._row_id#3, i.x#1], pk_columns: [i.t._row_id, i.t._row_id#1, i.x, i.t._row_id#2, i.t._row_id#3, i.x#1], pk_conflict: NoCheck } └─StreamExchange { dist: HashShard(i.t._row_id, i.t._row_id, i.x, i.t._row_id, i.t._row_id, i.x) } └─StreamProject { exprs: [Coalesce(i.x, i.x) as $expr1, i.t._row_id, i.t._row_id, i.x, i.t._row_id, i.t._row_id, i.x] } - └─StreamFilter { predicate: (((((IsNotNull(i.t._row_id) OR IsNotNull(i.t._row_id)) OR IsNotNull(i.x)) OR IsNotNull(i.t._row_id)) OR IsNotNull(i.t._row_id)) OR IsNotNull(i.x)) } + └─StreamFilter { predicate: ((IsNotNull(i.t._row_id) OR IsNotNull(i.t._row_id)) OR ((IsNotNull(i.x) OR IsNotNull(i.t._row_id)) OR (IsNotNull(i.t._row_id) OR IsNotNull(i.x)))) } └─StreamHashJoin { type: FullOuter, predicate: i.x = i.x, output: [i.x, i.x, i.t._row_id, i.t._row_id, i.t._row_id, i.t._row_id] } ├─StreamShare { id: 4 } │ └─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.t._row_id, i.t._row_id] } diff --git a/src/frontend/planner_test/tests/testdata/output/logical_scan_predicate_eliminate.yaml b/src/frontend/planner_test/tests/testdata/output/logical_scan_predicate_eliminate.yaml index 684e4c9144d01..763538b42406a 100644 --- a/src/frontend/planner_test/tests/testdata/output/logical_scan_predicate_eliminate.yaml +++ b/src/frontend/planner_test/tests/testdata/output/logical_scan_predicate_eliminate.yaml @@ -45,9 +45,9 @@ and (c1 > 1 or c2 > 2 or c3 > 3); logical_plan: |- LogicalProject { exprs: [t1.c1, t1.c2, t1.c3] } - └─LogicalFilter { predicate: Not((t1.c1 > 1:Int32)) AND Not((t1.c2 > 2:Int32)) AND Not((t1.c3 > 3:Int32)) AND ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) AND ((t1.c2 > 2:Int32) OR (t1.c3 > 3:Int32)) AND ((t1.c3 > 3:Int32) OR (t1.c1 > 1:Int32)) AND (((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) OR (t1.c3 > 3:Int32)) } + └─LogicalFilter { predicate: Not((t1.c1 > 1:Int32)) AND Not((t1.c2 > 2:Int32)) AND Not((t1.c3 > 3:Int32)) AND ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) AND ((t1.c2 > 2:Int32) OR (t1.c3 > 3:Int32)) AND ((t1.c3 > 3:Int32) OR (t1.c1 > 1:Int32)) AND ((t1.c3 > 3:Int32) OR ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32))) } └─LogicalScan { table: t1, columns: [t1.c1, t1.c2, t1.c3, t1._row_id] } batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: Not((t1.c1 > 1:Int32)) AND Not((t1.c2 > 2:Int32)) AND Not((t1.c3 > 3:Int32)) AND ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) AND ((t1.c2 > 2:Int32) OR (t1.c3 > 3:Int32)) AND ((t1.c3 > 3:Int32) OR (t1.c1 > 1:Int32)) AND (((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) OR (t1.c3 > 3:Int32)) } + └─BatchFilter { predicate: Not((t1.c1 > 1:Int32)) AND Not((t1.c2 > 2:Int32)) AND Not((t1.c3 > 3:Int32)) AND ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) AND ((t1.c2 > 2:Int32) OR (t1.c3 > 3:Int32)) AND ((t1.c3 > 3:Int32) OR (t1.c1 > 1:Int32)) AND ((t1.c3 > 3:Int32) OR ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32))) } └─BatchScan { table: t1, columns: [t1.c1, t1.c2, t1.c3], distribution: SomeShard } diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml index 298653450f659..8f16219a0830c 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml @@ -107,20 +107,20 @@ sql: SELECT auction, price FROM bid WHERE auction = 1007 OR auction = 1020 OR auction = 2001 OR auction = 2019 OR auction = 2087; batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (((((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR (bid.auction = 2001:Int32)) OR (bid.auction = 2019:Int32)) OR (bid.auction = 2087:Int32)) } + └─BatchFilter { predicate: (((bid.auction = 2087:Int32) OR (bid.auction = 1007:Int32)) OR ((bid.auction = 1020:Int32) OR ((bid.auction = 2001:Int32) OR (bid.auction = 2019:Int32)))) } └─BatchScan { table: bid, columns: [bid.auction, bid.price], distribution: SomeShard } sink_plan: |- StreamSink { type: append-only, columns: [auction, price, bid._row_id(hidden)] } - └─StreamFilter { predicate: (((((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR (bid.auction = 2001:Int32)) OR (bid.auction = 2019:Int32)) OR (bid.auction = 2087:Int32)) } + └─StreamFilter { predicate: (((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR ((bid.auction = 2001:Int32) OR ((bid.auction = 2019:Int32) OR (bid.auction = 2087:Int32)))) } └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], stream_scan_type: ArrangementBackfill, stream_key: [bid._row_id], pk: [_row_id], dist: UpstreamHashShard(bid._row_id) } stream_plan: |- StreamMaterialize { columns: [auction, price, bid._row_id(hidden)], stream_key: [bid._row_id], pk_columns: [bid._row_id], pk_conflict: NoCheck } - └─StreamFilter { predicate: (((((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR (bid.auction = 2001:Int32)) OR (bid.auction = 2019:Int32)) OR (bid.auction = 2087:Int32)) } + └─StreamFilter { predicate: (((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR ((bid.auction = 2001:Int32) OR ((bid.auction = 2019:Int32) OR (bid.auction = 2087:Int32)))) } └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], stream_scan_type: ArrangementBackfill, stream_key: [bid._row_id], pk: [_row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction, price, bid._row_id(hidden)], stream_key: [bid._row_id], pk_columns: [bid._row_id], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } - └── StreamFilter { predicate: (((((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR (bid.auction = 2001:Int32)) OR (bid.auction = 2019:Int32)) OR (bid.auction = 2087:Int32)) } + └── StreamFilter { predicate: (((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR ((bid.auction = 2001:Int32) OR ((bid.auction = 2019:Int32) OR (bid.auction = 2087:Int32)))) } └── StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], stream_scan_type: ArrangementBackfill, stream_key: [bid._row_id], pk: [_row_id], dist: UpstreamHashShard(bid._row_id) } ├── tables: [ StreamScan: 0 ] ├── Upstream @@ -142,7 +142,7 @@ A.category = 10 and (P.state = 'or' OR P.state = 'id' OR P.state = 'ca'); batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchLookupJoin { type: Inner, predicate: auction.seller = person.id AND (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)), output: [person.name, person.city, person.state, auction.id], lookup table: person } + └─BatchLookupJoin { type: Inner, predicate: auction.seller = person.id AND ((person.state = 'or':Varchar) OR ((person.state = 'id':Varchar) OR (person.state = 'ca':Varchar))), output: [person.name, person.city, person.state, auction.id], lookup table: person } └─BatchExchange { order: [], dist: UpstreamHashShard(auction.seller) } └─BatchProject { exprs: [auction.id, auction.seller] } └─BatchFilter { predicate: (auction.category = 10:Int32) } @@ -156,7 +156,7 @@ │ └─StreamFilter { predicate: (auction.category = 10:Int32) } │ └─StreamTableScan { table: auction, columns: [auction.id, auction.seller, auction.category], stream_scan_type: ArrangementBackfill, stream_key: [auction.id], pk: [id], dist: UpstreamHashShard(auction.id) } └─StreamExchange { dist: HashShard(person.id) } - └─StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) } + └─StreamFilter { predicate: ((person.state = 'id':Varchar) OR ((person.state = 'ca':Varchar) OR (person.state = 'or':Varchar))) } └─StreamTableScan { table: person, columns: [person.id, person.name, person.city, person.state], stream_scan_type: ArrangementBackfill, stream_key: [person.id], pk: [id], dist: UpstreamHashShard(person.id) } stream_dist_plan: |+ Fragment 0 @@ -179,7 +179,7 @@ └── BatchPlanNode Fragment 3 - StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) } + StreamFilter { predicate: ((person.state = 'id':Varchar) OR ((person.state = 'ca':Varchar) OR (person.state = 'or':Varchar))) } └── StreamTableScan { table: person, columns: [person.id, person.name, person.city, person.state], stream_scan_type: ArrangementBackfill, stream_key: [person.id], pk: [id], dist: UpstreamHashShard(person.id) } ├── tables: [ StreamScan: 5 ] ├── Upstream diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml index 823fa85459df7..e434af2189670 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml @@ -116,13 +116,13 @@ sql: SELECT auction, price FROM bid WHERE auction = 1007 OR auction = 1020 OR auction = 2001 OR auction = 2019 OR auction = 2087; batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (((((auction = 1007:Int32) OR (auction = 1020:Int32)) OR (auction = 2001:Int32)) OR (auction = 2019:Int32)) OR (auction = 2087:Int32)) } + └─BatchFilter { predicate: (((auction = 1007:Int32) OR (auction = 1020:Int32)) OR ((auction = 2001:Int32) OR ((auction = 2019:Int32) OR (auction = 2087:Int32)))) } └─BatchProject { exprs: [auction, price] } └─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } stream_plan: |- StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } └─StreamProject { exprs: [auction, price, _row_id] } - └─StreamFilter { predicate: (((((auction = 1007:Int32) OR (auction = 1020:Int32)) OR (auction = 2001:Int32)) OR (auction = 2019:Int32)) OR (auction = 2087:Int32)) } + └─StreamFilter { predicate: (((auction = 1007:Int32) OR (auction = 1020:Int32)) OR ((auction = 2001:Int32) OR ((auction = 2019:Int32) OR (auction = 2087:Int32)))) } └─StreamRowIdGen { row_id_index: 7 } └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } stream_dist_plan: |+ @@ -130,7 +130,7 @@ StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] └── StreamProject { exprs: [auction, price, _row_id] } - └── StreamFilter { predicate: (((((auction = 1007:Int32) OR (auction = 1020:Int32)) OR (auction = 2001:Int32)) OR (auction = 2019:Int32)) OR (auction = 2087:Int32)) } + └── StreamFilter { predicate: (((auction = 1007:Int32) OR (auction = 1020:Int32)) OR ((auction = 2001:Int32) OR ((auction = 2019:Int32) OR (auction = 2087:Int32)))) } └── StreamRowIdGen { row_id_index: 7 } └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { tables: [ Source: 0 ] } @@ -156,7 +156,7 @@ │ └─BatchProject { exprs: [id, seller, category] } │ └─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } └─BatchExchange { order: [], dist: HashShard(id) } - └─BatchFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } + └─BatchFilter { predicate: ((state = 'ca':Varchar) OR ((state = 'or':Varchar) OR (state = 'id':Varchar))) } └─BatchProject { exprs: [id, name, city, state] } └─BatchSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } stream_plan: |- @@ -168,7 +168,7 @@ │ └─StreamRowIdGen { row_id_index: 10 } │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } └─StreamExchange { dist: HashShard(id) } - └─StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } + └─StreamFilter { predicate: ((state = 'id':Varchar) OR ((state = 'ca':Varchar) OR (state = 'or':Varchar))) } └─StreamRowIdGen { row_id_index: 8 } └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } stream_dist_plan: |+ @@ -189,7 +189,7 @@ └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { tables: [ Source: 4 ] } Fragment 3 - StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } + StreamFilter { predicate: ((state = 'id':Varchar) OR ((state = 'ca':Varchar) OR (state = 'or':Varchar))) } └── StreamRowIdGen { row_id_index: 8 } └── StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } { tables: [ Source: 5 ] } diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml index f77e975780c8a..d17b08e7b00f5 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml @@ -116,7 +116,7 @@ StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, _row_id] } └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, person, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } - ├─StreamFilter { predicate: (((((Field(bid, 0:Int32) = 1007:Int32) OR (Field(bid, 0:Int32) = 1020:Int32)) OR (Field(bid, 0:Int32) = 2001:Int32)) OR (Field(bid, 0:Int32) = 2019:Int32)) OR (Field(bid, 0:Int32) = 2087:Int32)) AND (event_type = 2:Int32) } + ├─StreamFilter { predicate: (((Field(bid, 0:Int32) = 1020:Int32) OR (Field(bid, 0:Int32) = 2001:Int32)) OR ((Field(bid, 0:Int32) = 2019:Int32) OR ((Field(bid, 0:Int32) = 2087:Int32) OR (Field(bid, 0:Int32) = 1007:Int32)))) AND (event_type = 2:Int32) } │ └─StreamRowIdGen { row_id_index: 5 } │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } @@ -128,7 +128,7 @@ StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } └── StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, _row_id] } └── StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, person, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } { tables: [ DynamicFilterLeft: 0, DynamicFilterRight: 1 ] } - ├── StreamFilter { predicate: (((((Field(bid, 0:Int32) = 1007:Int32) OR (Field(bid, 0:Int32) = 1020:Int32)) OR (Field(bid, 0:Int32) = 2001:Int32)) OR (Field(bid, 0:Int32) = 2019:Int32)) OR (Field(bid, 0:Int32) = 2087:Int32)) AND (event_type = 2:Int32) } + ├── StreamFilter { predicate: (((Field(bid, 0:Int32) = 1020:Int32) OR (Field(bid, 0:Int32) = 2001:Int32)) OR ((Field(bid, 0:Int32) = 2019:Int32) OR ((Field(bid, 0:Int32) = 2087:Int32) OR (Field(bid, 0:Int32) = 1007:Int32)))) AND (event_type = 2:Int32) } │ └── StreamRowIdGen { row_id_index: 5 } │ └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } │ └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { tables: [ Source: 2 ] } diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml index 6dd731cffffb0..14ec8ccda0dbc 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml @@ -77,13 +77,13 @@ batch_plan: |- BatchExchange { order: [], dist: Single } └─BatchProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 2:Int32) as $expr3] } - └─BatchFilter { predicate: (((((Field(bid, 0:Int32) = 1007:Int32) OR (Field(bid, 0:Int32) = 1020:Int32)) OR (Field(bid, 0:Int32) = 2001:Int32)) OR (Field(bid, 0:Int32) = 2019:Int32)) OR (Field(bid, 0:Int32) = 2087:Int32)) AND (event_type = 2:Int32) } + └─BatchFilter { predicate: (((Field(bid, 0:Int32) = 2019:Int32) OR (Field(bid, 0:Int32) = 2087:Int32)) OR ((Field(bid, 0:Int32) = 1007:Int32) OR ((Field(bid, 0:Int32) = 1020:Int32) OR (Field(bid, 0:Int32) = 2001:Int32)))) AND (event_type = 2:Int32) } └─BatchProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_plan: |- StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 2:Int32) as $expr3, _row_id] } - └─StreamFilter { predicate: (((((Field(bid, 0:Int32) = 1007:Int32) OR (Field(bid, 0:Int32) = 1020:Int32)) OR (Field(bid, 0:Int32) = 2001:Int32)) OR (Field(bid, 0:Int32) = 2019:Int32)) OR (Field(bid, 0:Int32) = 2087:Int32)) AND (event_type = 2:Int32) } + └─StreamFilter { predicate: (((Field(bid, 0:Int32) = 2087:Int32) OR (Field(bid, 0:Int32) = 1007:Int32)) OR ((Field(bid, 0:Int32) = 1020:Int32) OR ((Field(bid, 0:Int32) = 2001:Int32) OR (Field(bid, 0:Int32) = 2019:Int32)))) AND (event_type = 2:Int32) } └─StreamRowIdGen { row_id_index: 5 } └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } @@ -92,7 +92,7 @@ Fragment 0 StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } └── StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 2:Int32) as $expr3, _row_id] } - └── StreamFilter { predicate: (((((Field(bid, 0:Int32) = 1007:Int32) OR (Field(bid, 0:Int32) = 1020:Int32)) OR (Field(bid, 0:Int32) = 2001:Int32)) OR (Field(bid, 0:Int32) = 2019:Int32)) OR (Field(bid, 0:Int32) = 2087:Int32)) AND (event_type = 2:Int32) } + └── StreamFilter { predicate: (((Field(bid, 0:Int32) = 2087:Int32) OR (Field(bid, 0:Int32) = 1007:Int32)) OR ((Field(bid, 0:Int32) = 1020:Int32) OR ((Field(bid, 0:Int32) = 2001:Int32) OR (Field(bid, 0:Int32) = 2019:Int32)))) AND (event_type = 2:Int32) } └── StreamRowIdGen { row_id_index: 5 } └── StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } { tables: [ WatermarkFilter: 0 ] } └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } @@ -127,7 +127,7 @@ │ └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } └─BatchExchange { order: [], dist: HashShard($expr3) } └─BatchProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, Field(person, 4:Int32) as $expr5, Field(person, 5:Int32) as $expr6] } - └─BatchFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) } + └─BatchFilter { predicate: ((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32) } └─BatchProject { exprs: [event_type, person] } └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_plan: |- @@ -139,17 +139,17 @@ │ └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } │ └─StreamShare { id: 6 } │ └─StreamProject { exprs: [event_type, person, auction, _row_id] } - │ └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } + │ └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'ca':Varchar) OR ((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'id':Varchar) OR ((Field(person, 5:Int32) = 'ca':Varchar) OR (Field(person, 5:Int32) = 'or':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32))) } │ └─StreamRowIdGen { row_id_index: 5 } │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } └─StreamExchange { dist: HashShard($expr4) } └─StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] } - └─StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) } + └─StreamFilter { predicate: ((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32) } └─StreamShare { id: 6 } └─StreamProject { exprs: [event_type, person, auction, _row_id] } - └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } + └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'ca':Varchar) OR ((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'id':Varchar) OR ((Field(person, 5:Int32) = 'ca':Varchar) OR (Field(person, 5:Int32) = 'or':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32))) } └─StreamRowIdGen { row_id_index: 5 } └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } @@ -173,7 +173,7 @@ Fragment 3 StreamProject { exprs: [event_type, person, auction, _row_id] } - └── StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } + └── StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'ca':Varchar) OR ((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'id':Varchar) OR ((Field(person, 5:Int32) = 'ca':Varchar) OR (Field(person, 5:Int32) = 'or':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32))) } └── StreamRowIdGen { row_id_index: 5 } └── StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } { tables: [ WatermarkFilter: 4 ] } └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } @@ -181,7 +181,7 @@ Fragment 4 StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] } - └── StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) } + └── StreamFilter { predicate: ((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32) } └── StreamExchange NoShuffle from 3 Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $1 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 1 ], read pk prefix len hint: 1 } diff --git a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml index 95a4315481ef9..9add551b2d288 100644 --- a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml @@ -417,7 +417,7 @@ SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date in (1111, 2222)) or (user_id != 3); batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (((orders_count_by_user.user_id = 1:Int32) OR ((orders_count_by_user.user_id = 2:Int32) AND In(orders_count_by_user.date, 1111:Int32, 2222:Int32))) OR (orders_count_by_user.user_id <> 3:Int32)) } + └─BatchFilter { predicate: (((orders_count_by_user.user_id = 2:Int32) AND In(orders_count_by_user.date, 1111:Int32, 2222:Int32)) OR ((orders_count_by_user.user_id <> 3:Int32) OR (orders_count_by_user.user_id = 1:Int32))) } └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } - name: When any arm of or clause is not equal type, we can't convert it to scan range yet. before: diff --git a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml index 7bbd43ce3c35c..e0d1751bf44e6 100644 --- a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml @@ -395,17 +395,17 @@ │ └─StreamProject { exprs: [t1.ts, t1._row_id, 0:Int32] } │ └─StreamDynamicFilter { predicate: ($expr1 > now), output_watermarks: [$expr1], output: [t1.ts, $expr1, t1._row_id], cleaned_by_watermark: true } │ ├─StreamProject { exprs: [t1.ts, AddWithTimeZone(t1.ts, '01:00:00':Interval, 'UTC':Varchar) as $expr1, t1._row_id] } - │ │ └─StreamFilter { predicate: Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts)) } + │ │ └─StreamFilter { predicate: Not(IsNull(t1.ts)) AND Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) } │ │ └─StreamShare { id: 2 } - │ │ └─StreamFilter { predicate: (((Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) OR IsNull(t1.ts)) } + │ │ └─StreamFilter { predicate: (IsNull(t1.ts) OR ((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz) OR (Not(IsNull(t1.ts)) AND Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))))) AND ((Not(IsNull(t1.ts)) AND Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) OR (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) } │ │ └─StreamTableScan { table: t1, columns: [t1.ts, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } │ └─StreamExchange { dist: Broadcast } │ └─StreamNow { output: [now] } └─StreamExchange { dist: HashShard(t1._row_id, 1:Int32) } └─StreamProject { exprs: [t1.ts, t1._row_id, 1:Int32] } - └─StreamFilter { predicate: ((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz) OR IsNull(t1.ts)) } + └─StreamFilter { predicate: (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) } └─StreamShare { id: 2 } - └─StreamFilter { predicate: (((Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) OR IsNull(t1.ts)) } + └─StreamFilter { predicate: (IsNull(t1.ts) OR ((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz) OR (Not(IsNull(t1.ts)) AND Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))))) AND ((Not(IsNull(t1.ts)) AND Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) OR (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) } └─StreamTableScan { table: t1, columns: [t1.ts, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } - name: Many Temporal filter with or predicate sql: | @@ -423,18 +423,18 @@ │ │ ├─StreamExchange { dist: HashShard(t._row_id, 0:Int32) } │ │ │ └─StreamProject { exprs: [t.t, t.a, t._row_id, 0:Int32], output_watermarks: [t.t] } │ │ │ └─StreamDynamicFilter { predicate: (t.t > $expr1), output_watermarks: [t.t], output: [t.t, t.a, t._row_id], cleaned_by_watermark: true } - │ │ │ ├─StreamFilter { predicate: IsNotNull(t.a) AND Not(IsNull(t.t)) AND Not((t.a < 1:Int32)) } + │ │ │ ├─StreamFilter { predicate: IsNotNull(t.a) AND Not((t.a < 1:Int32)) AND Not(IsNull(t.t)) } │ │ │ │ └─StreamShare { id: 2 } - │ │ │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND (((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR IsNull(t.t)) OR (t.a < 1:Int32)) } + │ │ │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not(IsNull(t.t)) AND Not((t.a < 1:Int32))))) AND ((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } │ │ │ │ └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } │ │ │ └─StreamExchange { dist: Broadcast } │ │ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } │ │ │ └─StreamNow { output: [now] } │ │ └─StreamExchange { dist: HashShard(t._row_id, 1:Int32) } │ │ └─StreamProject { exprs: [t.t, t.a, t._row_id, 1:Int32] } - │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND (IsNull(t.t) OR (t.a < 1:Int32)) } + │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR IsNull(t.t)) } │ │ └─StreamShare { id: 2 } - │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND (((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR IsNull(t.t)) OR (t.a < 1:Int32)) } + │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not(IsNull(t.t)) AND Not((t.a < 1:Int32))))) AND ((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } │ │ └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } │ └─StreamExchange { dist: Broadcast } │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } @@ -447,16 +447,16 @@ ├─StreamExchange { dist: HashShard(t._row_id, 0:Int32) } │ └─StreamProject { exprs: [t.t, t.a, t._row_id, 0:Int32], output_watermarks: [t.t] } │ └─StreamDynamicFilter { predicate: (t.t > $expr1), output_watermarks: [t.t], output: [t.t, t.a, t._row_id], cleaned_by_watermark: true } - │ ├─StreamFilter { predicate: IsNotNull(t.a) AND Not(IsNull(t.t)) AND Not((t.a < 1:Int32)) } + │ ├─StreamFilter { predicate: IsNotNull(t.a) AND Not((t.a < 1:Int32)) AND Not(IsNull(t.t)) } │ │ └─StreamShare { id: 2 } - │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND (((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR IsNull(t.t)) OR (t.a < 1:Int32)) } + │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not(IsNull(t.t)) AND Not((t.a < 1:Int32))))) AND ((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } │ │ └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } │ └─StreamExchange { dist: Broadcast } │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } │ └─StreamNow { output: [now] } └─StreamExchange { dist: HashShard(t._row_id, 1:Int32) } └─StreamProject { exprs: [t.t, t.a, t._row_id, 1:Int32] } - └─StreamFilter { predicate: IsNotNull(t.a) AND (IsNull(t.t) OR (t.a < 1:Int32)) } + └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR IsNull(t.t)) } └─StreamShare { id: 2 } - └─StreamFilter { predicate: IsNotNull(t.a) AND (((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR IsNull(t.t)) OR (t.a < 1:Int32)) } + └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not(IsNull(t.t)) AND Not((t.a < 1:Int32))))) AND ((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/output/tpch.yaml b/src/frontend/planner_test/tests/testdata/output/tpch.yaml index dddddff210409..5f5cfa0749432 100644 --- a/src/frontend/planner_test/tests/testdata/output/tpch.yaml +++ b/src/frontend/planner_test/tests/testdata/output/tpch.yaml @@ -3650,14 +3650,14 @@ LogicalProject { exprs: [sum($expr1)] } └─LogicalAgg { aggs: [sum($expr1)] } └─LogicalProject { exprs: [(lineitem.l_extendedprice * (1:Int32::Decimal - lineitem.l_discount)) as $expr1] } - └─LogicalFilter { predicate: (part.p_partkey = lineitem.l_partkey) AND (part.p_size >= 1:Int32) AND In(lineitem.l_shipmode, 'AIR':Varchar, 'AIR REG':Varchar) AND (lineitem.l_shipinstruct = 'DELIVER IN PERSON':Varchar) AND (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Int32::Decimal)) AND (lineitem.l_quantity <= 11:Int32::Decimal)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Int32::Decimal)) AND (lineitem.l_quantity <= 40:Int32::Decimal)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Int32::Decimal)) AND (lineitem.l_quantity <= 20:Int32::Decimal)) AND (part.p_size <= 15:Int32))) } + └─LogicalFilter { predicate: (part.p_partkey = lineitem.l_partkey) AND (part.p_size >= 1:Int32) AND In(lineitem.l_shipmode, 'AIR':Varchar, 'AIR REG':Varchar) AND (lineitem.l_shipinstruct = 'DELIVER IN PERSON':Varchar) AND ((((lineitem.l_quantity >= 10:Int32::Decimal) AND (lineitem.l_quantity <= 20:Int32::Decimal)) AND ((part.p_size <= 15:Int32) AND ((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)))) OR ((((lineitem.l_quantity >= 1:Int32::Decimal) AND (lineitem.l_quantity <= 11:Int32::Decimal)) AND ((part.p_size <= 5:Int32) AND ((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)))) OR (((lineitem.l_quantity >= 30:Int32::Decimal) AND (lineitem.l_quantity <= 40:Int32::Decimal)) AND ((part.p_size <= 10:Int32) AND ((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)))))) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } └─LogicalScan { table: part, columns: [part.p_partkey, part.p_name, part.p_mfgr, part.p_brand, part.p_type, part.p_size, part.p_container, part.p_retailprice, part.p_comment] } optimized_logical_plan_for_batch: |- LogicalAgg { aggs: [sum($expr1)] } └─LogicalProject { exprs: [(lineitem.l_extendedprice * (1:Int32::Decimal - lineitem.l_discount)) as $expr1] } - └─LogicalJoin { type: Inner, on: (part.p_partkey = lineitem.l_partkey) AND (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Int32::Decimal)) AND (lineitem.l_quantity <= 11:Int32::Decimal)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Int32::Decimal)) AND (lineitem.l_quantity <= 40:Int32::Decimal)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Int32::Decimal)) AND (lineitem.l_quantity <= 20:Int32::Decimal)) AND (part.p_size <= 15:Int32))), output: [lineitem.l_extendedprice, lineitem.l_discount] } + └─LogicalJoin { type: Inner, on: (part.p_partkey = lineitem.l_partkey) AND (((In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar) AND (lineitem.l_quantity >= 30:Int32::Decimal)) AND ((lineitem.l_quantity <= 40:Int32::Decimal) AND ((part.p_size <= 10:Int32) AND (part.p_brand = 'Brand#24':Varchar)))) OR (((In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar) AND (lineitem.l_quantity >= 10:Int32::Decimal)) AND ((lineitem.l_quantity <= 20:Int32::Decimal) AND ((part.p_size <= 15:Int32) AND (part.p_brand = 'Brand#32':Varchar)))) OR ((In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar) AND (lineitem.l_quantity >= 1:Int32::Decimal)) AND ((lineitem.l_quantity <= 11:Int32::Decimal) AND ((part.p_size <= 5:Int32) AND (part.p_brand = 'Brand#52':Varchar)))))), output: [lineitem.l_extendedprice, lineitem.l_discount] } ├─LogicalScan { table: lineitem, output_columns: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount], required_columns: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipinstruct, lineitem.l_shipmode], predicate: In(lineitem.l_shipmode, 'AIR':Varchar, 'AIR REG':Varchar) AND (lineitem.l_shipinstruct = 'DELIVER IN PERSON':Varchar) } └─LogicalScan { table: part, columns: [part.p_partkey, part.p_brand, part.p_size, part.p_container], predicate: (part.p_size >= 1:Int32) } batch_plan: |- @@ -3665,7 +3665,7 @@ └─BatchExchange { order: [], dist: Single } └─BatchSimpleAgg { aggs: [sum($expr1)] } └─BatchProject { exprs: [(lineitem.l_extendedprice * (1:Decimal - lineitem.l_discount)) as $expr1] } - └─BatchLookupJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey AND (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Decimal)) AND (lineitem.l_quantity <= 11:Decimal)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Decimal)) AND (lineitem.l_quantity <= 40:Decimal)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Decimal)) AND (lineitem.l_quantity <= 20:Decimal)) AND (part.p_size <= 15:Int32))) AND (part.p_size >= 1:Int32), output: [lineitem.l_extendedprice, lineitem.l_discount], lookup table: part } + └─BatchLookupJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey AND ((((part.p_size <= 5:Int32) AND (part.p_brand = 'Brand#52':Varchar)) AND (In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar) AND ((lineitem.l_quantity >= 1:Decimal) AND (lineitem.l_quantity <= 11:Decimal)))) OR ((((part.p_size <= 10:Int32) AND (part.p_brand = 'Brand#24':Varchar)) AND (In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar) AND ((lineitem.l_quantity >= 30:Decimal) AND (lineitem.l_quantity <= 40:Decimal)))) OR (((part.p_size <= 15:Int32) AND (part.p_brand = 'Brand#32':Varchar)) AND (In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar) AND ((lineitem.l_quantity >= 10:Decimal) AND (lineitem.l_quantity <= 20:Decimal)))))) AND (part.p_size >= 1:Int32), output: [lineitem.l_extendedprice, lineitem.l_discount], lookup table: part } └─BatchExchange { order: [], dist: UpstreamHashShard(lineitem.l_partkey) } └─BatchProject { exprs: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount] } └─BatchFilter { predicate: In(lineitem.l_shipmode, 'AIR':Varchar, 'AIR REG':Varchar) AND (lineitem.l_shipinstruct = 'DELIVER IN PERSON':Varchar) } @@ -3677,7 +3677,7 @@ └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum($expr1)] } └─StreamProject { exprs: [(lineitem.l_extendedprice * (1:Decimal - lineitem.l_discount)) as $expr1, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey] } - └─StreamFilter { predicate: (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Decimal)) AND (lineitem.l_quantity <= 11:Decimal)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Decimal)) AND (lineitem.l_quantity <= 40:Decimal)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Decimal)) AND (lineitem.l_quantity <= 20:Decimal)) AND (part.p_size <= 15:Int32))) } + └─StreamFilter { predicate: ((((lineitem.l_quantity >= 30:Decimal) AND (lineitem.l_quantity <= 40:Decimal)) AND ((part.p_size <= 10:Int32) AND ((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)))) OR ((((lineitem.l_quantity >= 10:Decimal) AND (lineitem.l_quantity <= 20:Decimal)) AND ((part.p_size <= 15:Int32) AND ((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)))) OR (((lineitem.l_quantity >= 1:Decimal) AND (lineitem.l_quantity <= 11:Decimal)) AND ((part.p_size <= 5:Int32) AND ((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)))))) } └─StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: all } ├─StreamExchange { dist: HashShard(lineitem.l_partkey) } │ └─StreamProject { exprs: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber] } @@ -3697,7 +3697,7 @@ Fragment 1 StreamStatelessSimpleAgg { aggs: [sum($expr1)] } └── StreamProject { exprs: [(lineitem.l_extendedprice * (1:Decimal - lineitem.l_discount)) as $expr1, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey] } - └── StreamFilter { predicate: (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Decimal)) AND (lineitem.l_quantity <= 11:Decimal)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Decimal)) AND (lineitem.l_quantity <= 40:Decimal)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Decimal)) AND (lineitem.l_quantity <= 20:Decimal)) AND (part.p_size <= 15:Int32))) } + └── StreamFilter { predicate: ((((lineitem.l_quantity >= 30:Decimal) AND (lineitem.l_quantity <= 40:Decimal)) AND ((part.p_size <= 10:Int32) AND ((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)))) OR ((((lineitem.l_quantity >= 10:Decimal) AND (lineitem.l_quantity <= 20:Decimal)) AND ((part.p_size <= 15:Int32) AND ((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)))) OR (((lineitem.l_quantity >= 1:Decimal) AND (lineitem.l_quantity <= 11:Decimal)) AND ((part.p_size <= 5:Int32) AND ((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)))))) } └── StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: all } { tables: [ HashJoinLeft: 1, HashJoinDegreeLeft: 2, HashJoinRight: 3, HashJoinDegreeRight: 4 ] } ├── StreamExchange Hash([0]) from 2 └── StreamExchange Hash([0]) from 3 diff --git a/src/frontend/src/expr/mod.rs b/src/frontend/src/expr/mod.rs index d14d99766bcc4..c90b5f563a2c7 100644 --- a/src/frontend/src/expr/mod.rs +++ b/src/frontend/src/expr/mod.rs @@ -1040,8 +1040,8 @@ impl ExprImpl { impl From for ExprImpl { fn from(c: Condition) -> Self { - merge_expr_by_binary( - c.conjunctions.into_iter(), + merge_expr_by_logical_binary( + c.conjunctions, ExprType::And, ExprImpl::literal_bool(true), ) diff --git a/src/frontend/src/expr/utils.rs b/src/frontend/src/expr/utils.rs index 54d0521b3f8ef..3060d8890d5e4 100644 --- a/src/frontend/src/expr/utils.rs +++ b/src/frontend/src/expr/utils.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::VecDeque; + use fixedbitset::FixedBitSet; use risingwave_common::types::{DataType, ScalarImpl}; use risingwave_pb::expr::expr_node::Type; @@ -31,19 +33,21 @@ fn split_expr_by(expr: ExprImpl, op: ExprType, rets: &mut Vec) { } } -pub fn merge_expr_by_binary(mut exprs: I, op: ExprType, identity_elem: ExprImpl) -> ExprImpl +/// Merge the given expressions by the given logical operation. +/// +/// The `op` must be commutative and associative, typically `And` or `Or`. +pub fn merge_expr_by_logical_binary(exprs: I, op: ExprType, identity_elem: ExprImpl) -> ExprImpl where - I: Iterator, + I: IntoIterator, { - if let Some(e) = exprs.next() { - let mut ret = e; - for expr in exprs { - ret = FunctionCall::new(op, vec![ret, expr]).unwrap().into(); - } - ret - } else { - identity_elem + let mut exprs: VecDeque<_> = exprs.into_iter().collect(); + while exprs.len() > 1 { + let lhs = exprs.pop_front().unwrap(); + let rhs = exprs.pop_front().unwrap(); + let new_expr = FunctionCall::new(op, vec![lhs, rhs]).unwrap().into(); + exprs.push_back(new_expr); } + exprs.pop_front().unwrap_or(identity_elem) } /// Transform a bool expression to Conjunctive form. e.g. given expression is @@ -393,10 +397,10 @@ pub fn factorization_expr(expr: ExprImpl) -> Vec { disjunction.retain(|factor| !greatest_common_divider.contains(factor)); } // now disjunctions == [[A, B], [B], [E]] - let remaining = merge_expr_by_binary( + let remaining = merge_expr_by_logical_binary( disjunctions.into_iter().map(|conjunction| { - merge_expr_by_binary( - conjunction.into_iter(), + merge_expr_by_logical_binary( + conjunction, ExprType::And, ExprImpl::literal_bool(true), ) diff --git a/src/frontend/src/optimizer/plan_node/logical_filter.rs b/src/frontend/src/optimizer/plan_node/logical_filter.rs index 4ea9adf7aacac..8a425d9cd349f 100644 --- a/src/frontend/src/optimizer/plan_node/logical_filter.rs +++ b/src/frontend/src/optimizer/plan_node/logical_filter.rs @@ -25,7 +25,8 @@ use super::{ }; use crate::error::Result; use crate::expr::{ - assert_input_ref, ExprImpl, ExprRewriter, ExprType, ExprVisitor, FunctionCall, InputRef, + assert_input_ref, merge_expr_by_logical_binary, ExprImpl, ExprRewriter, ExprType, ExprVisitor, + FunctionCall, InputRef, }; use crate::optimizer::plan_node::expr_visitable::ExprVisitable; use crate::optimizer::plan_node::{ @@ -64,27 +65,21 @@ impl LogicalFilter { } } - /// Create a `LogicalFilter` to filter the rows with all keys are null. - pub fn filter_if_keys_all_null(input: PlanRef, key: &[usize]) -> PlanRef { + /// Create a `LogicalFilter` to filter out rows where all keys are null. + pub fn filter_out_all_null_keys(input: PlanRef, key: &[usize]) -> PlanRef { let schema = input.schema(); - let cond = key.iter().fold(ExprImpl::literal_bool(false), |expr, i| { - ExprImpl::FunctionCall( + let cond = merge_expr_by_logical_binary( + key.iter().unique().map(|&i| { FunctionCall::new_unchecked( - ExprType::Or, - vec![ - expr, - FunctionCall::new_unchecked( - ExprType::IsNotNull, - vec![InputRef::new(*i, schema.fields()[*i].data_type.clone()).into()], - DataType::Boolean, - ) - .into(), - ], + ExprType::IsNotNull, + vec![InputRef::new(i, schema.fields()[i].data_type.clone()).into()], DataType::Boolean, ) - .into(), - ) - }); + .into() + }), + ExprType::Or, + ExprImpl::literal_bool(false), + ); LogicalFilter::create_with_expr(input, cond) } diff --git a/src/frontend/src/optimizer/plan_node/logical_join.rs b/src/frontend/src/optimizer/plan_node/logical_join.rs index e9dac0de38b5a..a8a832407ba68 100644 --- a/src/frontend/src/optimizer/plan_node/logical_join.rs +++ b/src/frontend/src/optimizer/plan_node/logical_join.rs @@ -1451,7 +1451,7 @@ impl ToStream for LogicalJoin { ) .collect_vec(); let plan: PlanRef = join_with_pk.into(); - LogicalFilter::filter_if_keys_all_null(plan, &left_right_stream_keys) + LogicalFilter::filter_out_all_null_keys(plan, &left_right_stream_keys) } else { join_with_pk.into() }; From 30032c7d78138c79d4aaec057a9b2cb5e6d7cdb9 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Wed, 19 Jun 2024 15:10:59 +0800 Subject: [PATCH 02/11] refactor api Signed-off-by: Bugen Zhao --- src/frontend/src/expr/mod.rs | 20 ++++++++++++---- src/frontend/src/expr/utils.rs | 18 +++++---------- .../src/optimizer/plan_node/logical_filter.rs | 23 ++++++++----------- 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/src/frontend/src/expr/mod.rs b/src/frontend/src/expr/mod.rs index c90b5f563a2c7..9dd5f7be1d53d 100644 --- a/src/frontend/src/expr/mod.rs +++ b/src/frontend/src/expr/mod.rs @@ -206,6 +206,20 @@ impl ExprImpl { .into() } + /// Create a new expression by merging the given expressions by `And`. + /// + /// If `exprs` is empty, return a literal `true`. + pub fn and(exprs: impl IntoIterator) -> Self { + merge_expr_by_logical(exprs, ExprType::And, ExprImpl::literal_bool(true)) + } + + /// Create a new expression by merging the given expressions by `Or`. + /// + /// If `exprs` is empty, return a literal `false`. + pub fn or(exprs: impl IntoIterator) -> Self { + merge_expr_by_logical(exprs, ExprType::Or, ExprImpl::literal_bool(false)) + } + /// Collect all `InputRef`s' indexes in the expression. /// /// # Panics @@ -1040,11 +1054,7 @@ impl ExprImpl { impl From for ExprImpl { fn from(c: Condition) -> Self { - merge_expr_by_logical_binary( - c.conjunctions, - ExprType::And, - ExprImpl::literal_bool(true), - ) + ExprImpl::and(c.conjunctions) } } diff --git a/src/frontend/src/expr/utils.rs b/src/frontend/src/expr/utils.rs index 3060d8890d5e4..b44c92f2005b6 100644 --- a/src/frontend/src/expr/utils.rs +++ b/src/frontend/src/expr/utils.rs @@ -33,10 +33,10 @@ fn split_expr_by(expr: ExprImpl, op: ExprType, rets: &mut Vec) { } } -/// Merge the given expressions by the given logical operation. +/// Merge the given expressions by the a logical operation. /// /// The `op` must be commutative and associative, typically `And` or `Or`. -pub fn merge_expr_by_logical_binary(exprs: I, op: ExprType, identity_elem: ExprImpl) -> ExprImpl +pub(super) fn merge_expr_by_logical(exprs: I, op: ExprType, identity_elem: ExprImpl) -> ExprImpl where I: IntoIterator, { @@ -397,16 +397,10 @@ pub fn factorization_expr(expr: ExprImpl) -> Vec { disjunction.retain(|factor| !greatest_common_divider.contains(factor)); } // now disjunctions == [[A, B], [B], [E]] - let remaining = merge_expr_by_logical_binary( - disjunctions.into_iter().map(|conjunction| { - merge_expr_by_logical_binary( - conjunction, - ExprType::And, - ExprImpl::literal_bool(true), - ) - }), - ExprType::Or, - ExprImpl::literal_bool(false), + let remaining = ExprImpl::or( + disjunctions + .into_iter() + .map(|conjunction| ExprImpl::and(conjunction)), ); // now remaining is (A & B) | (B) | (E) // the result is C & D & ((A & B) | (B) | (E)) diff --git a/src/frontend/src/optimizer/plan_node/logical_filter.rs b/src/frontend/src/optimizer/plan_node/logical_filter.rs index 8a425d9cd349f..04cc2cb12a689 100644 --- a/src/frontend/src/optimizer/plan_node/logical_filter.rs +++ b/src/frontend/src/optimizer/plan_node/logical_filter.rs @@ -25,8 +25,7 @@ use super::{ }; use crate::error::Result; use crate::expr::{ - assert_input_ref, merge_expr_by_logical_binary, ExprImpl, ExprRewriter, ExprType, ExprVisitor, - FunctionCall, InputRef, + assert_input_ref, ExprImpl, ExprRewriter, ExprType, ExprVisitor, FunctionCall, InputRef, }; use crate::optimizer::plan_node::expr_visitable::ExprVisitable; use crate::optimizer::plan_node::{ @@ -68,18 +67,14 @@ impl LogicalFilter { /// Create a `LogicalFilter` to filter out rows where all keys are null. pub fn filter_out_all_null_keys(input: PlanRef, key: &[usize]) -> PlanRef { let schema = input.schema(); - let cond = merge_expr_by_logical_binary( - key.iter().unique().map(|&i| { - FunctionCall::new_unchecked( - ExprType::IsNotNull, - vec![InputRef::new(i, schema.fields()[i].data_type.clone()).into()], - DataType::Boolean, - ) - .into() - }), - ExprType::Or, - ExprImpl::literal_bool(false), - ); + let cond = ExprImpl::or(key.iter().unique().map(|&i| { + FunctionCall::new_unchecked( + ExprType::IsNotNull, + vec![InputRef::new(i, schema.fields()[i].data_type.clone()).into()], + DataType::Boolean, + ) + .into() + })); LogicalFilter::create_with_expr(input, cond) } From b2ea565d57f3e06a0abb940dade25e1969abb2a4 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Wed, 19 Jun 2024 15:24:42 +0800 Subject: [PATCH 03/11] apply to more Signed-off-by: Bugen Zhao --- .../testdata/output/temporal_filter.yaml | 18 +++++++-------- .../rule/stream/split_now_or_rule.rs | 22 +++++-------------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml index e0d1751bf44e6..5e4e002459018 100644 --- a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml @@ -395,9 +395,9 @@ │ └─StreamProject { exprs: [t1.ts, t1._row_id, 0:Int32] } │ └─StreamDynamicFilter { predicate: ($expr1 > now), output_watermarks: [$expr1], output: [t1.ts, $expr1, t1._row_id], cleaned_by_watermark: true } │ ├─StreamProject { exprs: [t1.ts, AddWithTimeZone(t1.ts, '01:00:00':Interval, 'UTC':Varchar) as $expr1, t1._row_id] } - │ │ └─StreamFilter { predicate: Not(IsNull(t1.ts)) AND Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) } + │ │ └─StreamFilter { predicate: Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts)) } │ │ └─StreamShare { id: 2 } - │ │ └─StreamFilter { predicate: (IsNull(t1.ts) OR ((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz) OR (Not(IsNull(t1.ts)) AND Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))))) AND ((Not(IsNull(t1.ts)) AND Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) OR (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) } + │ │ └─StreamFilter { predicate: (IsNull(t1.ts) OR ((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz) OR (Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))))) AND ((Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))) OR (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) } │ │ └─StreamTableScan { table: t1, columns: [t1.ts, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } │ └─StreamExchange { dist: Broadcast } │ └─StreamNow { output: [now] } @@ -405,7 +405,7 @@ └─StreamProject { exprs: [t1.ts, t1._row_id, 1:Int32] } └─StreamFilter { predicate: (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) } └─StreamShare { id: 2 } - └─StreamFilter { predicate: (IsNull(t1.ts) OR ((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz) OR (Not(IsNull(t1.ts)) AND Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))))) AND ((Not(IsNull(t1.ts)) AND Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) OR (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) } + └─StreamFilter { predicate: (IsNull(t1.ts) OR ((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz) OR (Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))))) AND ((Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))) OR (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) } └─StreamTableScan { table: t1, columns: [t1.ts, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } - name: Many Temporal filter with or predicate sql: | @@ -423,9 +423,9 @@ │ │ ├─StreamExchange { dist: HashShard(t._row_id, 0:Int32) } │ │ │ └─StreamProject { exprs: [t.t, t.a, t._row_id, 0:Int32], output_watermarks: [t.t] } │ │ │ └─StreamDynamicFilter { predicate: (t.t > $expr1), output_watermarks: [t.t], output: [t.t, t.a, t._row_id], cleaned_by_watermark: true } - │ │ │ ├─StreamFilter { predicate: IsNotNull(t.a) AND Not((t.a < 1:Int32)) AND Not(IsNull(t.t)) } + │ │ │ ├─StreamFilter { predicate: IsNotNull(t.a) AND Not(IsNull(t.t)) AND Not((t.a < 1:Int32)) } │ │ │ │ └─StreamShare { id: 2 } - │ │ │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not(IsNull(t.t)) AND Not((t.a < 1:Int32))))) AND ((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } + │ │ │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not((t.a < 1:Int32)) AND Not(IsNull(t.t))))) AND ((Not((t.a < 1:Int32)) AND Not(IsNull(t.t))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } │ │ │ │ └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } │ │ │ └─StreamExchange { dist: Broadcast } │ │ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } @@ -434,7 +434,7 @@ │ │ └─StreamProject { exprs: [t.t, t.a, t._row_id, 1:Int32] } │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR IsNull(t.t)) } │ │ └─StreamShare { id: 2 } - │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not(IsNull(t.t)) AND Not((t.a < 1:Int32))))) AND ((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } + │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not((t.a < 1:Int32)) AND Not(IsNull(t.t))))) AND ((Not((t.a < 1:Int32)) AND Not(IsNull(t.t))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } │ │ └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } │ └─StreamExchange { dist: Broadcast } │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } @@ -447,9 +447,9 @@ ├─StreamExchange { dist: HashShard(t._row_id, 0:Int32) } │ └─StreamProject { exprs: [t.t, t.a, t._row_id, 0:Int32], output_watermarks: [t.t] } │ └─StreamDynamicFilter { predicate: (t.t > $expr1), output_watermarks: [t.t], output: [t.t, t.a, t._row_id], cleaned_by_watermark: true } - │ ├─StreamFilter { predicate: IsNotNull(t.a) AND Not((t.a < 1:Int32)) AND Not(IsNull(t.t)) } + │ ├─StreamFilter { predicate: IsNotNull(t.a) AND Not(IsNull(t.t)) AND Not((t.a < 1:Int32)) } │ │ └─StreamShare { id: 2 } - │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not(IsNull(t.t)) AND Not((t.a < 1:Int32))))) AND ((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } + │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not((t.a < 1:Int32)) AND Not(IsNull(t.t))))) AND ((Not((t.a < 1:Int32)) AND Not(IsNull(t.t))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } │ │ └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } │ └─StreamExchange { dist: Broadcast } │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } @@ -458,5 +458,5 @@ └─StreamProject { exprs: [t.t, t.a, t._row_id, 1:Int32] } └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR IsNull(t.t)) } └─StreamShare { id: 2 } - └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not(IsNull(t.t)) AND Not((t.a < 1:Int32))))) AND ((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } + └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not((t.a < 1:Int32)) AND Not(IsNull(t.t))))) AND ((Not((t.a < 1:Int32)) AND Not(IsNull(t.t))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } diff --git a/src/frontend/src/optimizer/rule/stream/split_now_or_rule.rs b/src/frontend/src/optimizer/rule/stream/split_now_or_rule.rs index 36c88211848a1..ea63119980f90 100644 --- a/src/frontend/src/optimizer/rule/stream/split_now_or_rule.rs +++ b/src/frontend/src/optimizer/rule/stream/split_now_or_rule.rs @@ -57,7 +57,7 @@ impl Rule for SplitNowOrRule { return None; } - let (mut now, others): (Vec, Vec) = + let (now, others): (Vec, Vec) = disjunctions.into_iter().partition(|x| x.count_nows() != 0); // Only support now in one arm of disjunctions @@ -70,22 +70,10 @@ impl Rule for SplitNowOrRule { // + A & !B & !C ... &!Z // + B | C ... | Z - let mut arm1 = now.pop().unwrap(); - for pred in &others { - let not_pred: ExprImpl = - FunctionCall::new_unchecked(ExprType::Not, vec![pred.clone()], DataType::Boolean) - .into(); - arm1 = - FunctionCall::new_unchecked(ExprType::And, vec![arm1, not_pred], DataType::Boolean) - .into(); - } - - let arm2 = others - .into_iter() - .reduce(|a, b| { - FunctionCall::new_unchecked(ExprType::Or, vec![a, b], DataType::Boolean).into() - }) - .unwrap(); + let arm1 = ExprImpl::and(now.into_iter().chain(others.iter().map(|pred| { + FunctionCall::new_unchecked(ExprType::Not, vec![pred.clone()], DataType::Boolean).into() + }))); + let arm2 = ExprImpl::or(others); let share = LogicalShare::create(input); let filter1 = LogicalFilter::create_with_expr(share.clone(), arm1); From 2bbe2c96bd9da7687bfafbcf4cfd3a77db5e41bc Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Wed, 19 Jun 2024 15:31:19 +0800 Subject: [PATCH 04/11] try leveled Signed-off-by: Bugen Zhao --- .../tests/testdata/output/ch_benchmark.yaml | 10 +++---- .../testdata/output/index_selection.yaml | 20 +++++++------- .../tests/testdata/output/join.yaml | 2 +- .../logical_scan_predicate_eliminate.yaml | 4 +-- .../tests/testdata/output/nexmark.yaml | 14 +++++----- .../tests/testdata/output/nexmark_source.yaml | 12 ++++----- .../output/nexmark_temporal_filter.yaml | 4 +-- .../testdata/output/nexmark_watermark.yaml | 18 ++++++------- .../tests/testdata/output/range_scan.yaml | 2 +- .../testdata/output/temporal_filter.yaml | 18 ++++++------- .../tests/testdata/output/tpch.yaml | 10 +++---- src/frontend/src/expr/utils.rs | 27 +++++++++++-------- 12 files changed, 73 insertions(+), 68 deletions(-) diff --git a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml index f78fd13ffa17b..ce98b8bea75c9 100644 --- a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml @@ -2841,20 +2841,20 @@ LogicalProject { exprs: [sum(order_line.ol_amount)] } └─LogicalAgg { aggs: [sum(order_line.ol_amount)] } └─LogicalProject { exprs: [order_line.ol_amount] } - └─LogicalFilter { predicate: (order_line.ol_i_id = item.i_id) AND (order_line.ol_quantity >= 1:Int32) AND (order_line.ol_quantity <= 10:Int32) AND (item.i_price >= 1:Int32::Decimal) AND (item.i_price <= 400000:Int32::Decimal) AND ((Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32)) OR ((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32)))) } + └─LogicalFilter { predicate: (order_line.ol_i_id = item.i_id) AND (order_line.ol_quantity >= 1:Int32) AND (order_line.ol_quantity <= 10:Int32) AND (item.i_price >= 1:Int32::Decimal) AND (item.i_price <= 400000:Int32::Decimal) AND (((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32))) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32))) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalScan { table: order_line, columns: [order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number, order_line.ol_i_id, order_line.ol_supply_w_id, order_line.ol_delivery_d, order_line.ol_quantity, order_line.ol_amount, order_line.ol_dist_info] } └─LogicalScan { table: item, columns: [item.i_id, item.i_im_id, item.i_name, item.i_price, item.i_data] } optimized_logical_plan_for_batch: |- LogicalAgg { aggs: [sum(order_line.ol_amount)] } - └─LogicalJoin { type: Inner, on: (order_line.ol_i_id = item.i_id) AND ((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR ((Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32)) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32)))), output: [order_line.ol_amount] } + └─LogicalJoin { type: Inner, on: (order_line.ol_i_id = item.i_id) AND (((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32))) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32))), output: [order_line.ol_amount] } ├─LogicalScan { table: order_line, output_columns: [order_line.ol_w_id, order_line.ol_i_id, order_line.ol_amount], required_columns: [order_line.ol_w_id, order_line.ol_i_id, order_line.ol_amount, order_line.ol_quantity], predicate: (order_line.ol_quantity >= 1:Int32) AND (order_line.ol_quantity <= 10:Int32) } └─LogicalScan { table: item, output_columns: [item.i_id, item.i_data], required_columns: [item.i_id, item.i_data, item.i_price], predicate: (item.i_price >= 1:Int32::Decimal) AND (item.i_price <= 400000:Int32::Decimal) } batch_plan: |- BatchSimpleAgg { aggs: [sum(sum(order_line.ol_amount))] } └─BatchExchange { order: [], dist: Single } └─BatchSimpleAgg { aggs: [sum(order_line.ol_amount)] } - └─BatchLookupJoin { type: Inner, predicate: order_line.ol_i_id = item.i_id AND ((Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32)) OR ((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32)))) AND (item.i_price >= 1:Decimal) AND (item.i_price <= 400000:Decimal), output: [order_line.ol_amount], lookup table: item } + └─BatchLookupJoin { type: Inner, predicate: order_line.ol_i_id = item.i_id AND (((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32))) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32))) AND (item.i_price >= 1:Decimal) AND (item.i_price <= 400000:Decimal), output: [order_line.ol_amount], lookup table: item } └─BatchExchange { order: [], dist: UpstreamHashShard(order_line.ol_i_id) } └─BatchProject { exprs: [order_line.ol_w_id, order_line.ol_i_id, order_line.ol_amount] } └─BatchFilter { predicate: (order_line.ol_quantity >= 1:Int32) AND (order_line.ol_quantity <= 10:Int32) } @@ -2866,7 +2866,7 @@ └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(order_line.ol_amount)] } └─StreamProject { exprs: [order_line.ol_amount, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_i_id, item.i_id] } - └─StreamFilter { predicate: ((Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32)) OR ((Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32)) OR (Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)))) } + └─StreamFilter { predicate: (((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32))) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32))) } └─StreamHashJoin { type: Inner, predicate: order_line.ol_i_id = item.i_id, output: all } ├─StreamExchange { dist: HashShard(order_line.ol_i_id) } │ └─StreamProject { exprs: [order_line.ol_w_id, order_line.ol_i_id, order_line.ol_amount, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] } @@ -2888,7 +2888,7 @@ Fragment 1 StreamStatelessSimpleAgg { aggs: [sum(order_line.ol_amount)] } └── StreamProject { exprs: [order_line.ol_amount, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_i_id, item.i_id] } - └── StreamFilter { predicate: ((Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32)) OR ((Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32)) OR (Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)))) } + └── StreamFilter { predicate: (((Like(item.i_data, '%a':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 3:Int32)) OR (Like(item.i_data, '%b':Varchar) AND In(order_line.ol_w_id, 1:Int32, 2:Int32, 4:Int32))) OR (Like(item.i_data, '%c':Varchar) AND In(order_line.ol_w_id, 1:Int32, 5:Int32, 3:Int32))) } └── StreamHashJoin { type: Inner, predicate: order_line.ol_i_id = item.i_id, output: all } { tables: [ HashJoinLeft: 1, HashJoinDegreeLeft: 2, HashJoinRight: 3, HashJoinDegreeRight: 4 ] } ├── StreamExchange Hash([1]) from 2 └── StreamExchange Hash([0]) from 3 diff --git a/src/frontend/planner_test/tests/testdata/output/index_selection.yaml b/src/frontend/planner_test/tests/testdata/output/index_selection.yaml index 5ac9026633afb..67a89e0878f9b 100644 --- a/src/frontend/planner_test/tests/testdata/output/index_selection.yaml +++ b/src/frontend/planner_test/tests/testdata/output/index_selection.yaml @@ -306,7 +306,7 @@ select * from t1 where p = 1 or (a = 2 and b = 3 and c = 4) batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchLookupJoin { type: Inner, predicate: idx2.t1._row_id IS NOT DISTINCT FROM t1._row_id AND ((t1.p = 1:Int32) OR ((t1.b = 3:Decimal) AND ((t1.c = 4:Int32) AND (t1.a = 2:Int32)))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } + └─BatchLookupJoin { type: Inner, predicate: idx2.t1._row_id IS NOT DISTINCT FROM t1._row_id AND ((t1.p = 1:Int32) OR (((t1.a = 2:Int32) AND (t1.b = 3:Decimal)) AND (t1.c = 4:Int32))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } └─BatchExchange { order: [], dist: UpstreamHashShard(idx2.t1._row_id) } └─BatchHashAgg { group_key: [idx2.t1._row_id], aggs: [] } └─BatchExchange { order: [], dist: HashShard(idx2.t1._row_id) } @@ -316,7 +316,7 @@ └─BatchExchange { order: [], dist: Single } └─BatchScan { table: idx4, columns: [idx4.t1._row_id], scan_ranges: [idx4.p = Int32(1)], distribution: SomeShard } batch_local_plan: |- - BatchLookupJoin { type: Inner, predicate: idx2.t1._row_id IS NOT DISTINCT FROM t1._row_id AND ((t1.p = 1:Int32) OR ((t1.b = 3:Decimal) AND ((t1.c = 4:Int32) AND (t1.a = 2:Int32)))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } + BatchLookupJoin { type: Inner, predicate: idx2.t1._row_id IS NOT DISTINCT FROM t1._row_id AND ((t1.p = 1:Int32) OR (((t1.a = 2:Int32) AND (t1.b = 3:Decimal)) AND (t1.c = 4:Int32))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } └─BatchHashAgg { group_key: [idx2.t1._row_id], aggs: [] } └─BatchUnion { all: true } ├─BatchExchange { order: [], dist: Single } @@ -332,13 +332,13 @@ select * from t1 where a = 1 or b = 2 or c = 3 or p = 4 or a = 5 batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchLookupJoin { type: Inner, predicate: idx1.t1._row_id IS NOT DISTINCT FROM t1._row_id AND (((t1.a = 5:Int32) OR (t1.a = 1:Int32)) OR ((t1.b = 2:Decimal) OR ((t1.c = 3:Int32) OR (t1.p = 4:Int32)))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } + └─BatchLookupJoin { type: Inner, predicate: idx1.t1._row_id IS NOT DISTINCT FROM t1._row_id AND ((((t1.a = 1:Int32) OR (t1.b = 2:Decimal)) OR ((t1.c = 3:Int32) OR (t1.p = 4:Int32))) OR (t1.a = 5:Int32)), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } └─BatchExchange { order: [], dist: UpstreamHashShard(idx1.t1._row_id) } └─BatchHashAgg { group_key: [idx1.t1._row_id], aggs: [] } └─BatchExchange { order: [], dist: HashShard(idx1.t1._row_id) } └─BatchUnion { all: true } ├─BatchExchange { order: [], dist: Single } - │ └─BatchScan { table: idx1, columns: [idx1.t1._row_id], scan_ranges: [idx1.a = Int32(1), idx1.a = Int32(5)], distribution: SomeShard } + │ └─BatchScan { table: idx1, columns: [idx1.t1._row_id], scan_ranges: [idx1.a = Int32(5), idx1.a = Int32(1)], distribution: SomeShard } ├─BatchExchange { order: [], dist: Single } │ └─BatchScan { table: idx2, columns: [idx2.t1._row_id], scan_ranges: [idx2.b = Decimal(Normalized(2))], distribution: SomeShard } ├─BatchExchange { order: [], dist: Single } @@ -346,11 +346,11 @@ └─BatchExchange { order: [], dist: Single } └─BatchScan { table: idx4, columns: [idx4.t1._row_id], scan_ranges: [idx4.p = Int32(4)], distribution: SomeShard } batch_local_plan: |- - BatchLookupJoin { type: Inner, predicate: idx1.t1._row_id IS NOT DISTINCT FROM t1._row_id AND (((t1.a = 5:Int32) OR (t1.a = 1:Int32)) OR ((t1.b = 2:Decimal) OR ((t1.c = 3:Int32) OR (t1.p = 4:Int32)))), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } + BatchLookupJoin { type: Inner, predicate: idx1.t1._row_id IS NOT DISTINCT FROM t1._row_id AND ((((t1.a = 1:Int32) OR (t1.b = 2:Decimal)) OR ((t1.c = 3:Int32) OR (t1.p = 4:Int32))) OR (t1.a = 5:Int32)), output: [t1.a, t1.b, t1.c, t1.p], lookup table: t1 } └─BatchHashAgg { group_key: [idx1.t1._row_id], aggs: [] } └─BatchUnion { all: true } ├─BatchExchange { order: [], dist: Single } - │ └─BatchScan { table: idx1, columns: [idx1.t1._row_id], scan_ranges: [idx1.a = Int32(1), idx1.a = Int32(5)], distribution: SomeShard } + │ └─BatchScan { table: idx1, columns: [idx1.t1._row_id], scan_ranges: [idx1.a = Int32(5), idx1.a = Int32(1)], distribution: SomeShard } ├─BatchExchange { order: [], dist: Single } │ └─BatchScan { table: idx2, columns: [idx2.t1._row_id], scan_ranges: [idx2.b = Decimal(Normalized(2))], distribution: SomeShard } ├─BatchExchange { order: [], dist: Single } @@ -475,11 +475,11 @@ select * from t1 where a > 1 or c > 1 or b > 1 batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: ((t1.b > 1:Decimal) OR ((t1.a > 1:Int32) OR (t1.c > 1:Int32))) } + └─BatchFilter { predicate: (((t1.a > 1:Int32) OR (t1.c > 1:Int32)) OR (t1.b > 1:Decimal)) } └─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c, t1.p], distribution: SomeShard } batch_local_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: ((t1.b > 1:Decimal) OR ((t1.a > 1:Int32) OR (t1.c > 1:Int32))) } + └─BatchFilter { predicate: (((t1.a > 1:Int32) OR (t1.c > 1:Int32)) OR (t1.b > 1:Decimal)) } └─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c, t1.p], distribution: SomeShard } - sql: | create table t1 (a int, b numeric, c bigint, p int); @@ -490,11 +490,11 @@ select * from t1 where a between 1 and 8 or b between 1 and 8 or c between 1 and 8; batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (((t1.c >= 1:Int32) AND (t1.c <= 8:Int32)) OR (((t1.a >= 1:Int32) AND (t1.a <= 8:Int32)) OR ((t1.b >= 1:Decimal) AND (t1.b <= 8:Decimal)))) } + └─BatchFilter { predicate: ((((t1.a >= 1:Int32) AND (t1.a <= 8:Int32)) OR ((t1.b >= 1:Decimal) AND (t1.b <= 8:Decimal))) OR ((t1.c >= 1:Int32) AND (t1.c <= 8:Int32))) } └─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c, t1.p], distribution: SomeShard } batch_local_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (((t1.c >= 1:Int32) AND (t1.c <= 8:Int32)) OR (((t1.a >= 1:Int32) AND (t1.a <= 8:Int32)) OR ((t1.b >= 1:Decimal) AND (t1.b <= 8:Decimal)))) } + └─BatchFilter { predicate: ((((t1.a >= 1:Int32) AND (t1.a <= 8:Int32)) OR ((t1.b >= 1:Decimal) AND (t1.b <= 8:Decimal))) OR ((t1.c >= 1:Int32) AND (t1.c <= 8:Int32))) } └─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c, t1.p], distribution: SomeShard } - sql: | create table t1 (a int, b numeric, c bigint, p int); diff --git a/src/frontend/planner_test/tests/testdata/output/join.yaml b/src/frontend/planner_test/tests/testdata/output/join.yaml index a17bc8fab2d38..a1617d04e7d2e 100644 --- a/src/frontend/planner_test/tests/testdata/output/join.yaml +++ b/src/frontend/planner_test/tests/testdata/output/join.yaml @@ -204,7 +204,7 @@ StreamMaterialize { columns: [x, i.t._row_id(hidden), i.t._row_id#1(hidden), i.x(hidden), i.t._row_id#2(hidden), i.t._row_id#3(hidden), i.x#1(hidden)], stream_key: [i.t._row_id, i.t._row_id#1, i.x, i.t._row_id#2, i.t._row_id#3, i.x#1], pk_columns: [i.t._row_id, i.t._row_id#1, i.x, i.t._row_id#2, i.t._row_id#3, i.x#1], pk_conflict: NoCheck } └─StreamExchange { dist: HashShard(i.t._row_id, i.t._row_id, i.x, i.t._row_id, i.t._row_id, i.x) } └─StreamProject { exprs: [Coalesce(i.x, i.x) as $expr1, i.t._row_id, i.t._row_id, i.x, i.t._row_id, i.t._row_id, i.x] } - └─StreamFilter { predicate: ((IsNotNull(i.t._row_id) OR IsNotNull(i.t._row_id)) OR ((IsNotNull(i.x) OR IsNotNull(i.t._row_id)) OR (IsNotNull(i.t._row_id) OR IsNotNull(i.x)))) } + └─StreamFilter { predicate: (((IsNotNull(i.t._row_id) OR IsNotNull(i.t._row_id)) OR (IsNotNull(i.x) OR IsNotNull(i.t._row_id))) OR (IsNotNull(i.t._row_id) OR IsNotNull(i.x))) } └─StreamHashJoin { type: FullOuter, predicate: i.x = i.x, output: [i.x, i.x, i.t._row_id, i.t._row_id, i.t._row_id, i.t._row_id] } ├─StreamShare { id: 4 } │ └─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.t._row_id, i.t._row_id] } diff --git a/src/frontend/planner_test/tests/testdata/output/logical_scan_predicate_eliminate.yaml b/src/frontend/planner_test/tests/testdata/output/logical_scan_predicate_eliminate.yaml index 763538b42406a..684e4c9144d01 100644 --- a/src/frontend/planner_test/tests/testdata/output/logical_scan_predicate_eliminate.yaml +++ b/src/frontend/planner_test/tests/testdata/output/logical_scan_predicate_eliminate.yaml @@ -45,9 +45,9 @@ and (c1 > 1 or c2 > 2 or c3 > 3); logical_plan: |- LogicalProject { exprs: [t1.c1, t1.c2, t1.c3] } - └─LogicalFilter { predicate: Not((t1.c1 > 1:Int32)) AND Not((t1.c2 > 2:Int32)) AND Not((t1.c3 > 3:Int32)) AND ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) AND ((t1.c2 > 2:Int32) OR (t1.c3 > 3:Int32)) AND ((t1.c3 > 3:Int32) OR (t1.c1 > 1:Int32)) AND ((t1.c3 > 3:Int32) OR ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32))) } + └─LogicalFilter { predicate: Not((t1.c1 > 1:Int32)) AND Not((t1.c2 > 2:Int32)) AND Not((t1.c3 > 3:Int32)) AND ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) AND ((t1.c2 > 2:Int32) OR (t1.c3 > 3:Int32)) AND ((t1.c3 > 3:Int32) OR (t1.c1 > 1:Int32)) AND (((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) OR (t1.c3 > 3:Int32)) } └─LogicalScan { table: t1, columns: [t1.c1, t1.c2, t1.c3, t1._row_id] } batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: Not((t1.c1 > 1:Int32)) AND Not((t1.c2 > 2:Int32)) AND Not((t1.c3 > 3:Int32)) AND ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) AND ((t1.c2 > 2:Int32) OR (t1.c3 > 3:Int32)) AND ((t1.c3 > 3:Int32) OR (t1.c1 > 1:Int32)) AND ((t1.c3 > 3:Int32) OR ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32))) } + └─BatchFilter { predicate: Not((t1.c1 > 1:Int32)) AND Not((t1.c2 > 2:Int32)) AND Not((t1.c3 > 3:Int32)) AND ((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) AND ((t1.c2 > 2:Int32) OR (t1.c3 > 3:Int32)) AND ((t1.c3 > 3:Int32) OR (t1.c1 > 1:Int32)) AND (((t1.c1 > 1:Int32) OR (t1.c2 > 2:Int32)) OR (t1.c3 > 3:Int32)) } └─BatchScan { table: t1, columns: [t1.c1, t1.c2, t1.c3], distribution: SomeShard } diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml index 8f16219a0830c..d6b90da0a8c1a 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml @@ -107,20 +107,20 @@ sql: SELECT auction, price FROM bid WHERE auction = 1007 OR auction = 1020 OR auction = 2001 OR auction = 2019 OR auction = 2087; batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (((bid.auction = 2087:Int32) OR (bid.auction = 1007:Int32)) OR ((bid.auction = 1020:Int32) OR ((bid.auction = 2001:Int32) OR (bid.auction = 2019:Int32)))) } + └─BatchFilter { predicate: ((((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR ((bid.auction = 2001:Int32) OR (bid.auction = 2019:Int32))) OR (bid.auction = 2087:Int32)) } └─BatchScan { table: bid, columns: [bid.auction, bid.price], distribution: SomeShard } sink_plan: |- StreamSink { type: append-only, columns: [auction, price, bid._row_id(hidden)] } - └─StreamFilter { predicate: (((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR ((bid.auction = 2001:Int32) OR ((bid.auction = 2019:Int32) OR (bid.auction = 2087:Int32)))) } + └─StreamFilter { predicate: ((((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR ((bid.auction = 2001:Int32) OR (bid.auction = 2019:Int32))) OR (bid.auction = 2087:Int32)) } └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], stream_scan_type: ArrangementBackfill, stream_key: [bid._row_id], pk: [_row_id], dist: UpstreamHashShard(bid._row_id) } stream_plan: |- StreamMaterialize { columns: [auction, price, bid._row_id(hidden)], stream_key: [bid._row_id], pk_columns: [bid._row_id], pk_conflict: NoCheck } - └─StreamFilter { predicate: (((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR ((bid.auction = 2001:Int32) OR ((bid.auction = 2019:Int32) OR (bid.auction = 2087:Int32)))) } + └─StreamFilter { predicate: ((((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR ((bid.auction = 2001:Int32) OR (bid.auction = 2019:Int32))) OR (bid.auction = 2087:Int32)) } └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], stream_scan_type: ArrangementBackfill, stream_key: [bid._row_id], pk: [_row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction, price, bid._row_id(hidden)], stream_key: [bid._row_id], pk_columns: [bid._row_id], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } - └── StreamFilter { predicate: (((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR ((bid.auction = 2001:Int32) OR ((bid.auction = 2019:Int32) OR (bid.auction = 2087:Int32)))) } + └── StreamFilter { predicate: ((((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR ((bid.auction = 2001:Int32) OR (bid.auction = 2019:Int32))) OR (bid.auction = 2087:Int32)) } └── StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], stream_scan_type: ArrangementBackfill, stream_key: [bid._row_id], pk: [_row_id], dist: UpstreamHashShard(bid._row_id) } ├── tables: [ StreamScan: 0 ] ├── Upstream @@ -142,7 +142,7 @@ A.category = 10 and (P.state = 'or' OR P.state = 'id' OR P.state = 'ca'); batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchLookupJoin { type: Inner, predicate: auction.seller = person.id AND ((person.state = 'or':Varchar) OR ((person.state = 'id':Varchar) OR (person.state = 'ca':Varchar))), output: [person.name, person.city, person.state, auction.id], lookup table: person } + └─BatchLookupJoin { type: Inner, predicate: auction.seller = person.id AND (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)), output: [person.name, person.city, person.state, auction.id], lookup table: person } └─BatchExchange { order: [], dist: UpstreamHashShard(auction.seller) } └─BatchProject { exprs: [auction.id, auction.seller] } └─BatchFilter { predicate: (auction.category = 10:Int32) } @@ -156,7 +156,7 @@ │ └─StreamFilter { predicate: (auction.category = 10:Int32) } │ └─StreamTableScan { table: auction, columns: [auction.id, auction.seller, auction.category], stream_scan_type: ArrangementBackfill, stream_key: [auction.id], pk: [id], dist: UpstreamHashShard(auction.id) } └─StreamExchange { dist: HashShard(person.id) } - └─StreamFilter { predicate: ((person.state = 'id':Varchar) OR ((person.state = 'ca':Varchar) OR (person.state = 'or':Varchar))) } + └─StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) } └─StreamTableScan { table: person, columns: [person.id, person.name, person.city, person.state], stream_scan_type: ArrangementBackfill, stream_key: [person.id], pk: [id], dist: UpstreamHashShard(person.id) } stream_dist_plan: |+ Fragment 0 @@ -179,7 +179,7 @@ └── BatchPlanNode Fragment 3 - StreamFilter { predicate: ((person.state = 'id':Varchar) OR ((person.state = 'ca':Varchar) OR (person.state = 'or':Varchar))) } + StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) } └── StreamTableScan { table: person, columns: [person.id, person.name, person.city, person.state], stream_scan_type: ArrangementBackfill, stream_key: [person.id], pk: [id], dist: UpstreamHashShard(person.id) } ├── tables: [ StreamScan: 5 ] ├── Upstream diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml index e434af2189670..35713c9682a35 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml @@ -116,13 +116,13 @@ sql: SELECT auction, price FROM bid WHERE auction = 1007 OR auction = 1020 OR auction = 2001 OR auction = 2019 OR auction = 2087; batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (((auction = 1007:Int32) OR (auction = 1020:Int32)) OR ((auction = 2001:Int32) OR ((auction = 2019:Int32) OR (auction = 2087:Int32)))) } + └─BatchFilter { predicate: ((((auction = 1007:Int32) OR (auction = 1020:Int32)) OR ((auction = 2001:Int32) OR (auction = 2019:Int32))) OR (auction = 2087:Int32)) } └─BatchProject { exprs: [auction, price] } └─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } stream_plan: |- StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } └─StreamProject { exprs: [auction, price, _row_id] } - └─StreamFilter { predicate: (((auction = 1007:Int32) OR (auction = 1020:Int32)) OR ((auction = 2001:Int32) OR ((auction = 2019:Int32) OR (auction = 2087:Int32)))) } + └─StreamFilter { predicate: ((((auction = 1007:Int32) OR (auction = 1020:Int32)) OR ((auction = 2001:Int32) OR (auction = 2019:Int32))) OR (auction = 2087:Int32)) } └─StreamRowIdGen { row_id_index: 7 } └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } stream_dist_plan: |+ @@ -130,7 +130,7 @@ StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] └── StreamProject { exprs: [auction, price, _row_id] } - └── StreamFilter { predicate: (((auction = 1007:Int32) OR (auction = 1020:Int32)) OR ((auction = 2001:Int32) OR ((auction = 2019:Int32) OR (auction = 2087:Int32)))) } + └── StreamFilter { predicate: ((((auction = 1007:Int32) OR (auction = 1020:Int32)) OR ((auction = 2001:Int32) OR (auction = 2019:Int32))) OR (auction = 2087:Int32)) } └── StreamRowIdGen { row_id_index: 7 } └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { tables: [ Source: 0 ] } @@ -156,7 +156,7 @@ │ └─BatchProject { exprs: [id, seller, category] } │ └─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } └─BatchExchange { order: [], dist: HashShard(id) } - └─BatchFilter { predicate: ((state = 'ca':Varchar) OR ((state = 'or':Varchar) OR (state = 'id':Varchar))) } + └─BatchFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } └─BatchProject { exprs: [id, name, city, state] } └─BatchSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } stream_plan: |- @@ -168,7 +168,7 @@ │ └─StreamRowIdGen { row_id_index: 10 } │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } └─StreamExchange { dist: HashShard(id) } - └─StreamFilter { predicate: ((state = 'id':Varchar) OR ((state = 'ca':Varchar) OR (state = 'or':Varchar))) } + └─StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } └─StreamRowIdGen { row_id_index: 8 } └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } stream_dist_plan: |+ @@ -189,7 +189,7 @@ └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { tables: [ Source: 4 ] } Fragment 3 - StreamFilter { predicate: ((state = 'id':Varchar) OR ((state = 'ca':Varchar) OR (state = 'or':Varchar))) } + StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } └── StreamRowIdGen { row_id_index: 8 } └── StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } { tables: [ Source: 5 ] } diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml index d17b08e7b00f5..d5d948e5b507c 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml @@ -116,7 +116,7 @@ StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, _row_id] } └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, person, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } - ├─StreamFilter { predicate: (((Field(bid, 0:Int32) = 1020:Int32) OR (Field(bid, 0:Int32) = 2001:Int32)) OR ((Field(bid, 0:Int32) = 2019:Int32) OR ((Field(bid, 0:Int32) = 2087:Int32) OR (Field(bid, 0:Int32) = 1007:Int32)))) AND (event_type = 2:Int32) } + ├─StreamFilter { predicate: ((((Field(bid, 0:Int32) = 1007:Int32) OR (Field(bid, 0:Int32) = 1020:Int32)) OR ((Field(bid, 0:Int32) = 2001:Int32) OR (Field(bid, 0:Int32) = 2019:Int32))) OR (Field(bid, 0:Int32) = 2087:Int32)) AND (event_type = 2:Int32) } │ └─StreamRowIdGen { row_id_index: 5 } │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } @@ -128,7 +128,7 @@ StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } └── StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, _row_id] } └── StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, person, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } { tables: [ DynamicFilterLeft: 0, DynamicFilterRight: 1 ] } - ├── StreamFilter { predicate: (((Field(bid, 0:Int32) = 1020:Int32) OR (Field(bid, 0:Int32) = 2001:Int32)) OR ((Field(bid, 0:Int32) = 2019:Int32) OR ((Field(bid, 0:Int32) = 2087:Int32) OR (Field(bid, 0:Int32) = 1007:Int32)))) AND (event_type = 2:Int32) } + ├── StreamFilter { predicate: ((((Field(bid, 0:Int32) = 1007:Int32) OR (Field(bid, 0:Int32) = 1020:Int32)) OR ((Field(bid, 0:Int32) = 2001:Int32) OR (Field(bid, 0:Int32) = 2019:Int32))) OR (Field(bid, 0:Int32) = 2087:Int32)) AND (event_type = 2:Int32) } │ └── StreamRowIdGen { row_id_index: 5 } │ └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } │ └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { tables: [ Source: 2 ] } diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml index 14ec8ccda0dbc..f065ba33c252d 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml @@ -77,13 +77,13 @@ batch_plan: |- BatchExchange { order: [], dist: Single } └─BatchProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 2:Int32) as $expr3] } - └─BatchFilter { predicate: (((Field(bid, 0:Int32) = 2019:Int32) OR (Field(bid, 0:Int32) = 2087:Int32)) OR ((Field(bid, 0:Int32) = 1007:Int32) OR ((Field(bid, 0:Int32) = 1020:Int32) OR (Field(bid, 0:Int32) = 2001:Int32)))) AND (event_type = 2:Int32) } + └─BatchFilter { predicate: ((((Field(bid, 0:Int32) = 1007:Int32) OR (Field(bid, 0:Int32) = 1020:Int32)) OR ((Field(bid, 0:Int32) = 2001:Int32) OR (Field(bid, 0:Int32) = 2019:Int32))) OR (Field(bid, 0:Int32) = 2087:Int32)) AND (event_type = 2:Int32) } └─BatchProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_plan: |- StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 2:Int32) as $expr3, _row_id] } - └─StreamFilter { predicate: (((Field(bid, 0:Int32) = 2087:Int32) OR (Field(bid, 0:Int32) = 1007:Int32)) OR ((Field(bid, 0:Int32) = 1020:Int32) OR ((Field(bid, 0:Int32) = 2001:Int32) OR (Field(bid, 0:Int32) = 2019:Int32)))) AND (event_type = 2:Int32) } + └─StreamFilter { predicate: ((((Field(bid, 0:Int32) = 1007:Int32) OR (Field(bid, 0:Int32) = 1020:Int32)) OR ((Field(bid, 0:Int32) = 2001:Int32) OR (Field(bid, 0:Int32) = 2019:Int32))) OR (Field(bid, 0:Int32) = 2087:Int32)) AND (event_type = 2:Int32) } └─StreamRowIdGen { row_id_index: 5 } └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } @@ -92,7 +92,7 @@ Fragment 0 StreamMaterialize { columns: [auction, price, _row_id(hidden)], stream_key: [_row_id], pk_columns: [_row_id], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } └── StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 2:Int32) as $expr3, _row_id] } - └── StreamFilter { predicate: (((Field(bid, 0:Int32) = 2087:Int32) OR (Field(bid, 0:Int32) = 1007:Int32)) OR ((Field(bid, 0:Int32) = 1020:Int32) OR ((Field(bid, 0:Int32) = 2001:Int32) OR (Field(bid, 0:Int32) = 2019:Int32)))) AND (event_type = 2:Int32) } + └── StreamFilter { predicate: ((((Field(bid, 0:Int32) = 1007:Int32) OR (Field(bid, 0:Int32) = 1020:Int32)) OR ((Field(bid, 0:Int32) = 2001:Int32) OR (Field(bid, 0:Int32) = 2019:Int32))) OR (Field(bid, 0:Int32) = 2087:Int32)) AND (event_type = 2:Int32) } └── StreamRowIdGen { row_id_index: 5 } └── StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } { tables: [ WatermarkFilter: 0 ] } └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } @@ -127,7 +127,7 @@ │ └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } └─BatchExchange { order: [], dist: HashShard($expr3) } └─BatchProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, Field(person, 4:Int32) as $expr5, Field(person, 5:Int32) as $expr6] } - └─BatchFilter { predicate: ((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32) } + └─BatchFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) } └─BatchProject { exprs: [event_type, person] } └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_plan: |- @@ -139,17 +139,17 @@ │ └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } │ └─StreamShare { id: 6 } │ └─StreamProject { exprs: [event_type, person, auction, _row_id] } - │ └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'ca':Varchar) OR ((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'id':Varchar) OR ((Field(person, 5:Int32) = 'ca':Varchar) OR (Field(person, 5:Int32) = 'or':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32))) } + │ └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } │ └─StreamRowIdGen { row_id_index: 5 } │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } └─StreamExchange { dist: HashShard($expr4) } └─StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] } - └─StreamFilter { predicate: ((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32) } + └─StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) } └─StreamShare { id: 6 } └─StreamProject { exprs: [event_type, person, auction, _row_id] } - └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'ca':Varchar) OR ((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'id':Varchar) OR ((Field(person, 5:Int32) = 'ca':Varchar) OR (Field(person, 5:Int32) = 'or':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32))) } + └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } └─StreamRowIdGen { row_id_index: 5 } └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } @@ -173,7 +173,7 @@ Fragment 3 StreamProject { exprs: [event_type, person, auction, _row_id] } - └── StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'ca':Varchar) OR ((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'id':Varchar) OR ((Field(person, 5:Int32) = 'ca':Varchar) OR (Field(person, 5:Int32) = 'or':Varchar))) AND (event_type = 0:Int32))) AND (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR (((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32))) } + └── StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } └── StreamRowIdGen { row_id_index: 5 } └── StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } { tables: [ WatermarkFilter: 4 ] } └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } @@ -181,7 +181,7 @@ Fragment 4 StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] } - └── StreamFilter { predicate: ((Field(person, 5:Int32) = 'or':Varchar) OR ((Field(person, 5:Int32) = 'id':Varchar) OR (Field(person, 5:Int32) = 'ca':Varchar))) AND (event_type = 0:Int32) } + └── StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) } └── StreamExchange NoShuffle from 3 Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $1 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 1 ], read pk prefix len hint: 1 } diff --git a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml index 9add551b2d288..95a4315481ef9 100644 --- a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml @@ -417,7 +417,7 @@ SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date in (1111, 2222)) or (user_id != 3); batch_plan: |- BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (((orders_count_by_user.user_id = 2:Int32) AND In(orders_count_by_user.date, 1111:Int32, 2222:Int32)) OR ((orders_count_by_user.user_id <> 3:Int32) OR (orders_count_by_user.user_id = 1:Int32))) } + └─BatchFilter { predicate: (((orders_count_by_user.user_id = 1:Int32) OR ((orders_count_by_user.user_id = 2:Int32) AND In(orders_count_by_user.date, 1111:Int32, 2222:Int32))) OR (orders_count_by_user.user_id <> 3:Int32)) } └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } - name: When any arm of or clause is not equal type, we can't convert it to scan range yet. before: diff --git a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml index 5e4e002459018..7bbd43ce3c35c 100644 --- a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml @@ -397,15 +397,15 @@ │ ├─StreamProject { exprs: [t1.ts, AddWithTimeZone(t1.ts, '01:00:00':Interval, 'UTC':Varchar) as $expr1, t1._row_id] } │ │ └─StreamFilter { predicate: Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts)) } │ │ └─StreamShare { id: 2 } - │ │ └─StreamFilter { predicate: (IsNull(t1.ts) OR ((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz) OR (Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))))) AND ((Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))) OR (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) } + │ │ └─StreamFilter { predicate: (((Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) OR IsNull(t1.ts)) } │ │ └─StreamTableScan { table: t1, columns: [t1.ts, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } │ └─StreamExchange { dist: Broadcast } │ └─StreamNow { output: [now] } └─StreamExchange { dist: HashShard(t1._row_id, 1:Int32) } └─StreamProject { exprs: [t1.ts, t1._row_id, 1:Int32] } - └─StreamFilter { predicate: (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) } + └─StreamFilter { predicate: ((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz) OR IsNull(t1.ts)) } └─StreamShare { id: 2 } - └─StreamFilter { predicate: (IsNull(t1.ts) OR ((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz) OR (Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))))) AND ((Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))) OR (IsNull(t1.ts) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz))) } + └─StreamFilter { predicate: (((Not((t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) AND Not(IsNull(t1.ts))) OR (t1.ts > '2023-12-18 00:00:00+00:00':Timestamptz)) OR IsNull(t1.ts)) } └─StreamTableScan { table: t1, columns: [t1.ts, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } - name: Many Temporal filter with or predicate sql: | @@ -425,16 +425,16 @@ │ │ │ └─StreamDynamicFilter { predicate: (t.t > $expr1), output_watermarks: [t.t], output: [t.t, t.a, t._row_id], cleaned_by_watermark: true } │ │ │ ├─StreamFilter { predicate: IsNotNull(t.a) AND Not(IsNull(t.t)) AND Not((t.a < 1:Int32)) } │ │ │ │ └─StreamShare { id: 2 } - │ │ │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not((t.a < 1:Int32)) AND Not(IsNull(t.t))))) AND ((Not((t.a < 1:Int32)) AND Not(IsNull(t.t))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } + │ │ │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND (((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR IsNull(t.t)) OR (t.a < 1:Int32)) } │ │ │ │ └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } │ │ │ └─StreamExchange { dist: Broadcast } │ │ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } │ │ │ └─StreamNow { output: [now] } │ │ └─StreamExchange { dist: HashShard(t._row_id, 1:Int32) } │ │ └─StreamProject { exprs: [t.t, t.a, t._row_id, 1:Int32] } - │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR IsNull(t.t)) } + │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND (IsNull(t.t) OR (t.a < 1:Int32)) } │ │ └─StreamShare { id: 2 } - │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not((t.a < 1:Int32)) AND Not(IsNull(t.t))))) AND ((Not((t.a < 1:Int32)) AND Not(IsNull(t.t))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } + │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND (((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR IsNull(t.t)) OR (t.a < 1:Int32)) } │ │ └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } │ └─StreamExchange { dist: Broadcast } │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } @@ -449,14 +449,14 @@ │ └─StreamDynamicFilter { predicate: (t.t > $expr1), output_watermarks: [t.t], output: [t.t, t.a, t._row_id], cleaned_by_watermark: true } │ ├─StreamFilter { predicate: IsNotNull(t.a) AND Not(IsNull(t.t)) AND Not((t.a < 1:Int32)) } │ │ └─StreamShare { id: 2 } - │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not((t.a < 1:Int32)) AND Not(IsNull(t.t))))) AND ((Not((t.a < 1:Int32)) AND Not(IsNull(t.t))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } + │ │ └─StreamFilter { predicate: IsNotNull(t.a) AND (((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR IsNull(t.t)) OR (t.a < 1:Int32)) } │ │ └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } │ └─StreamExchange { dist: Broadcast } │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } │ └─StreamNow { output: [now] } └─StreamExchange { dist: HashShard(t._row_id, 1:Int32) } └─StreamProject { exprs: [t.t, t.a, t._row_id, 1:Int32] } - └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR IsNull(t.t)) } + └─StreamFilter { predicate: IsNotNull(t.a) AND (IsNull(t.t) OR (t.a < 1:Int32)) } └─StreamShare { id: 2 } - └─StreamFilter { predicate: IsNotNull(t.a) AND ((t.a < 1:Int32) OR (IsNull(t.t) OR (Not((t.a < 1:Int32)) AND Not(IsNull(t.t))))) AND ((Not((t.a < 1:Int32)) AND Not(IsNull(t.t))) OR ((t.a < 1:Int32) OR IsNull(t.t))) } + └─StreamFilter { predicate: IsNotNull(t.a) AND (((Not(IsNull(t.t)) AND Not((t.a < 1:Int32))) OR IsNull(t.t)) OR (t.a < 1:Int32)) } └─StreamTableScan { table: t, columns: [t.t, t.a, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/output/tpch.yaml b/src/frontend/planner_test/tests/testdata/output/tpch.yaml index 5f5cfa0749432..3c43faa8d2494 100644 --- a/src/frontend/planner_test/tests/testdata/output/tpch.yaml +++ b/src/frontend/planner_test/tests/testdata/output/tpch.yaml @@ -3650,14 +3650,14 @@ LogicalProject { exprs: [sum($expr1)] } └─LogicalAgg { aggs: [sum($expr1)] } └─LogicalProject { exprs: [(lineitem.l_extendedprice * (1:Int32::Decimal - lineitem.l_discount)) as $expr1] } - └─LogicalFilter { predicate: (part.p_partkey = lineitem.l_partkey) AND (part.p_size >= 1:Int32) AND In(lineitem.l_shipmode, 'AIR':Varchar, 'AIR REG':Varchar) AND (lineitem.l_shipinstruct = 'DELIVER IN PERSON':Varchar) AND ((((lineitem.l_quantity >= 10:Int32::Decimal) AND (lineitem.l_quantity <= 20:Int32::Decimal)) AND ((part.p_size <= 15:Int32) AND ((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)))) OR ((((lineitem.l_quantity >= 1:Int32::Decimal) AND (lineitem.l_quantity <= 11:Int32::Decimal)) AND ((part.p_size <= 5:Int32) AND ((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)))) OR (((lineitem.l_quantity >= 30:Int32::Decimal) AND (lineitem.l_quantity <= 40:Int32::Decimal)) AND ((part.p_size <= 10:Int32) AND ((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)))))) } + └─LogicalFilter { predicate: (part.p_partkey = lineitem.l_partkey) AND (part.p_size >= 1:Int32) AND In(lineitem.l_shipmode, 'AIR':Varchar, 'AIR REG':Varchar) AND (lineitem.l_shipinstruct = 'DELIVER IN PERSON':Varchar) AND ((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND ((lineitem.l_quantity >= 1:Int32::Decimal) AND (lineitem.l_quantity <= 11:Int32::Decimal))) AND (part.p_size <= 5:Int32)) OR ((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND ((lineitem.l_quantity >= 30:Int32::Decimal) AND (lineitem.l_quantity <= 40:Int32::Decimal))) AND (part.p_size <= 10:Int32))) OR ((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND ((lineitem.l_quantity >= 10:Int32::Decimal) AND (lineitem.l_quantity <= 20:Int32::Decimal))) AND (part.p_size <= 15:Int32))) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } └─LogicalScan { table: part, columns: [part.p_partkey, part.p_name, part.p_mfgr, part.p_brand, part.p_type, part.p_size, part.p_container, part.p_retailprice, part.p_comment] } optimized_logical_plan_for_batch: |- LogicalAgg { aggs: [sum($expr1)] } └─LogicalProject { exprs: [(lineitem.l_extendedprice * (1:Int32::Decimal - lineitem.l_discount)) as $expr1] } - └─LogicalJoin { type: Inner, on: (part.p_partkey = lineitem.l_partkey) AND (((In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar) AND (lineitem.l_quantity >= 30:Int32::Decimal)) AND ((lineitem.l_quantity <= 40:Int32::Decimal) AND ((part.p_size <= 10:Int32) AND (part.p_brand = 'Brand#24':Varchar)))) OR (((In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar) AND (lineitem.l_quantity >= 10:Int32::Decimal)) AND ((lineitem.l_quantity <= 20:Int32::Decimal) AND ((part.p_size <= 15:Int32) AND (part.p_brand = 'Brand#32':Varchar)))) OR ((In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar) AND (lineitem.l_quantity >= 1:Int32::Decimal)) AND ((lineitem.l_quantity <= 11:Int32::Decimal) AND ((part.p_size <= 5:Int32) AND (part.p_brand = 'Brand#52':Varchar)))))), output: [lineitem.l_extendedprice, lineitem.l_discount] } + └─LogicalJoin { type: Inner, on: (part.p_partkey = lineitem.l_partkey) AND ((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND ((lineitem.l_quantity >= 1:Int32::Decimal) AND (lineitem.l_quantity <= 11:Int32::Decimal))) AND (part.p_size <= 5:Int32)) OR ((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND ((lineitem.l_quantity >= 30:Int32::Decimal) AND (lineitem.l_quantity <= 40:Int32::Decimal))) AND (part.p_size <= 10:Int32))) OR ((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND ((lineitem.l_quantity >= 10:Int32::Decimal) AND (lineitem.l_quantity <= 20:Int32::Decimal))) AND (part.p_size <= 15:Int32))), output: [lineitem.l_extendedprice, lineitem.l_discount] } ├─LogicalScan { table: lineitem, output_columns: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount], required_columns: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipinstruct, lineitem.l_shipmode], predicate: In(lineitem.l_shipmode, 'AIR':Varchar, 'AIR REG':Varchar) AND (lineitem.l_shipinstruct = 'DELIVER IN PERSON':Varchar) } └─LogicalScan { table: part, columns: [part.p_partkey, part.p_brand, part.p_size, part.p_container], predicate: (part.p_size >= 1:Int32) } batch_plan: |- @@ -3665,7 +3665,7 @@ └─BatchExchange { order: [], dist: Single } └─BatchSimpleAgg { aggs: [sum($expr1)] } └─BatchProject { exprs: [(lineitem.l_extendedprice * (1:Decimal - lineitem.l_discount)) as $expr1] } - └─BatchLookupJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey AND ((((part.p_size <= 5:Int32) AND (part.p_brand = 'Brand#52':Varchar)) AND (In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar) AND ((lineitem.l_quantity >= 1:Decimal) AND (lineitem.l_quantity <= 11:Decimal)))) OR ((((part.p_size <= 10:Int32) AND (part.p_brand = 'Brand#24':Varchar)) AND (In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar) AND ((lineitem.l_quantity >= 30:Decimal) AND (lineitem.l_quantity <= 40:Decimal)))) OR (((part.p_size <= 15:Int32) AND (part.p_brand = 'Brand#32':Varchar)) AND (In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar) AND ((lineitem.l_quantity >= 10:Decimal) AND (lineitem.l_quantity <= 20:Decimal)))))) AND (part.p_size >= 1:Int32), output: [lineitem.l_extendedprice, lineitem.l_discount], lookup table: part } + └─BatchLookupJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey AND ((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND ((lineitem.l_quantity >= 1:Decimal) AND (lineitem.l_quantity <= 11:Decimal))) AND (part.p_size <= 5:Int32)) OR ((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND ((lineitem.l_quantity >= 30:Decimal) AND (lineitem.l_quantity <= 40:Decimal))) AND (part.p_size <= 10:Int32))) OR ((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND ((lineitem.l_quantity >= 10:Decimal) AND (lineitem.l_quantity <= 20:Decimal))) AND (part.p_size <= 15:Int32))) AND (part.p_size >= 1:Int32), output: [lineitem.l_extendedprice, lineitem.l_discount], lookup table: part } └─BatchExchange { order: [], dist: UpstreamHashShard(lineitem.l_partkey) } └─BatchProject { exprs: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount] } └─BatchFilter { predicate: In(lineitem.l_shipmode, 'AIR':Varchar, 'AIR REG':Varchar) AND (lineitem.l_shipinstruct = 'DELIVER IN PERSON':Varchar) } @@ -3677,7 +3677,7 @@ └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum($expr1)] } └─StreamProject { exprs: [(lineitem.l_extendedprice * (1:Decimal - lineitem.l_discount)) as $expr1, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey] } - └─StreamFilter { predicate: ((((lineitem.l_quantity >= 30:Decimal) AND (lineitem.l_quantity <= 40:Decimal)) AND ((part.p_size <= 10:Int32) AND ((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)))) OR ((((lineitem.l_quantity >= 10:Decimal) AND (lineitem.l_quantity <= 20:Decimal)) AND ((part.p_size <= 15:Int32) AND ((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)))) OR (((lineitem.l_quantity >= 1:Decimal) AND (lineitem.l_quantity <= 11:Decimal)) AND ((part.p_size <= 5:Int32) AND ((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)))))) } + └─StreamFilter { predicate: ((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND ((lineitem.l_quantity >= 1:Decimal) AND (lineitem.l_quantity <= 11:Decimal))) AND (part.p_size <= 5:Int32)) OR ((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND ((lineitem.l_quantity >= 30:Decimal) AND (lineitem.l_quantity <= 40:Decimal))) AND (part.p_size <= 10:Int32))) OR ((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND ((lineitem.l_quantity >= 10:Decimal) AND (lineitem.l_quantity <= 20:Decimal))) AND (part.p_size <= 15:Int32))) } └─StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: all } ├─StreamExchange { dist: HashShard(lineitem.l_partkey) } │ └─StreamProject { exprs: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber] } @@ -3697,7 +3697,7 @@ Fragment 1 StreamStatelessSimpleAgg { aggs: [sum($expr1)] } └── StreamProject { exprs: [(lineitem.l_extendedprice * (1:Decimal - lineitem.l_discount)) as $expr1, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey] } - └── StreamFilter { predicate: ((((lineitem.l_quantity >= 30:Decimal) AND (lineitem.l_quantity <= 40:Decimal)) AND ((part.p_size <= 10:Int32) AND ((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)))) OR ((((lineitem.l_quantity >= 10:Decimal) AND (lineitem.l_quantity <= 20:Decimal)) AND ((part.p_size <= 15:Int32) AND ((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)))) OR (((lineitem.l_quantity >= 1:Decimal) AND (lineitem.l_quantity <= 11:Decimal)) AND ((part.p_size <= 5:Int32) AND ((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)))))) } + └── StreamFilter { predicate: ((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND ((lineitem.l_quantity >= 1:Decimal) AND (lineitem.l_quantity <= 11:Decimal))) AND (part.p_size <= 5:Int32)) OR ((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND ((lineitem.l_quantity >= 30:Decimal) AND (lineitem.l_quantity <= 40:Decimal))) AND (part.p_size <= 10:Int32))) OR ((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND ((lineitem.l_quantity >= 10:Decimal) AND (lineitem.l_quantity <= 20:Decimal))) AND (part.p_size <= 15:Int32))) } └── StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: all } { tables: [ HashJoinLeft: 1, HashJoinDegreeLeft: 2, HashJoinRight: 3, HashJoinDegreeRight: 4 ] } ├── StreamExchange Hash([0]) from 2 └── StreamExchange Hash([0]) from 3 diff --git a/src/frontend/src/expr/utils.rs b/src/frontend/src/expr/utils.rs index b44c92f2005b6..91452f76caa4b 100644 --- a/src/frontend/src/expr/utils.rs +++ b/src/frontend/src/expr/utils.rs @@ -40,14 +40,23 @@ pub(super) fn merge_expr_by_logical(exprs: I, op: ExprType, identity_elem: Ex where I: IntoIterator, { - let mut exprs: VecDeque<_> = exprs.into_iter().collect(); + let mut exprs: VecDeque<_> = exprs.into_iter().map(|e| (0usize, e)).collect(); + while exprs.len() > 1 { - let lhs = exprs.pop_front().unwrap(); - let rhs = exprs.pop_front().unwrap(); - let new_expr = FunctionCall::new(op, vec![lhs, rhs]).unwrap().into(); - exprs.push_back(new_expr); + let (level, lhs) = exprs.pop_front().unwrap(); + let rhs_level = exprs.front().unwrap().0; + + // If there's one element left in the current level, move it to the end of the next level. + if level < rhs_level { + exprs.push_back((level, lhs)); + } else { + let rhs = exprs.pop_front().unwrap().1; + let new_expr = FunctionCall::new(op, vec![lhs, rhs]).unwrap().into(); + exprs.push_back((level + 1, new_expr)); + } } - exprs.pop_front().unwrap_or(identity_elem) + + exprs.pop_front().map(|(_, e)| e).unwrap_or(identity_elem) } /// Transform a bool expression to Conjunctive form. e.g. given expression is @@ -397,11 +406,7 @@ pub fn factorization_expr(expr: ExprImpl) -> Vec { disjunction.retain(|factor| !greatest_common_divider.contains(factor)); } // now disjunctions == [[A, B], [B], [E]] - let remaining = ExprImpl::or( - disjunctions - .into_iter() - .map(|conjunction| ExprImpl::and(conjunction)), - ); + let remaining = ExprImpl::or(disjunctions.into_iter().map(ExprImpl::and)); // now remaining is (A & B) | (B) | (E) // the result is C & D & ((A & B) | (B) | (E)) greatest_common_divider From e0427986ce50afa7a5864eea90413acf40686596 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Wed, 19 Jun 2024 16:44:56 +0800 Subject: [PATCH 05/11] add e2e test for customer case Signed-off-by: Bugen Zhao --- .../bug_fixes/stack_overflow_17342.slt | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 e2e_test/streaming/bug_fixes/stack_overflow_17342.slt diff --git a/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt b/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt new file mode 100644 index 0000000000000..227ee8295f3cb --- /dev/null +++ b/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt @@ -0,0 +1,119 @@ +statement ok +SET streaming_parallelism TO 1; + +statement ok +CREATE TABLE t (v int); + +# This query used to overflow the stack during optimization as it generated a left-deep tree +# of `OR xx IS NOT NULL` expression in the filter after each full outer join. +statement ok +CREATE MATERIALIZED VIEW mv AS +SELECT + count(*) +FROM + t +FULL OUTER JOIN t t1 USING (v) +FULL OUTER JOIN t t2 USING (v) +FULL OUTER JOIN t t3 USING (v) +FULL OUTER JOIN t t4 USING (v) +FULL OUTER JOIN t t5 USING (v) +FULL OUTER JOIN t t6 USING (v) +FULL OUTER JOIN t t7 USING (v) +FULL OUTER JOIN t t8 USING (v) +FULL OUTER JOIN t t9 USING (v) +FULL OUTER JOIN t t10 USING (v) +FULL OUTER JOIN t t11 USING (v) +FULL OUTER JOIN t t12 USING (v) +FULL OUTER JOIN t t13 USING (v) +FULL OUTER JOIN t t14 USING (v) +FULL OUTER JOIN t t15 USING (v) +FULL OUTER JOIN t t16 USING (v) +FULL OUTER JOIN t t17 USING (v) +FULL OUTER JOIN t t18 USING (v) +FULL OUTER JOIN t t19 USING (v) +FULL OUTER JOIN t t20 USING (v) +FULL OUTER JOIN t t21 USING (v) +FULL OUTER JOIN t t22 USING (v) +FULL OUTER JOIN t t23 USING (v) +FULL OUTER JOIN t t24 USING (v) +FULL OUTER JOIN t t25 USING (v) +FULL OUTER JOIN t t26 USING (v) +FULL OUTER JOIN t t27 USING (v) +FULL OUTER JOIN t t28 USING (v) +FULL OUTER JOIN t t29 USING (v) +FULL OUTER JOIN t t30 USING (v) +FULL OUTER JOIN t t31 USING (v) +FULL OUTER JOIN t t32 USING (v) +FULL OUTER JOIN t t33 USING (v) +FULL OUTER JOIN t t34 USING (v) +FULL OUTER JOIN t t35 USING (v) +FULL OUTER JOIN t t36 USING (v) +FULL OUTER JOIN t t37 USING (v) +FULL OUTER JOIN t t38 USING (v) +FULL OUTER JOIN t t39 USING (v) +FULL OUTER JOIN t t40 USING (v) +FULL OUTER JOIN t t41 USING (v) +FULL OUTER JOIN t t42 USING (v) +FULL OUTER JOIN t t43 USING (v) +FULL OUTER JOIN t t44 USING (v) +FULL OUTER JOIN t t45 USING (v) +FULL OUTER JOIN t t46 USING (v) +FULL OUTER JOIN t t47 USING (v) +FULL OUTER JOIN t t48 USING (v) +FULL OUTER JOIN t t49 USING (v) +FULL OUTER JOIN t t50 USING (v) +FULL OUTER JOIN t t51 USING (v) +FULL OUTER JOIN t t52 USING (v) +FULL OUTER JOIN t t53 USING (v) +FULL OUTER JOIN t t54 USING (v) +FULL OUTER JOIN t t55 USING (v) +FULL OUTER JOIN t t56 USING (v) +FULL OUTER JOIN t t57 USING (v) +FULL OUTER JOIN t t58 USING (v) +FULL OUTER JOIN t t59 USING (v) +FULL OUTER JOIN t t60 USING (v) +FULL OUTER JOIN t t61 USING (v) +FULL OUTER JOIN t t62 USING (v) +FULL OUTER JOIN t t63 USING (v) +FULL OUTER JOIN t t64 USING (v) +FULL OUTER JOIN t t65 USING (v) +FULL OUTER JOIN t t66 USING (v) +FULL OUTER JOIN t t67 USING (v) +FULL OUTER JOIN t t68 USING (v) +FULL OUTER JOIN t t69 USING (v) +FULL OUTER JOIN t t70 USING (v) +FULL OUTER JOIN t t71 USING (v) +FULL OUTER JOIN t t72 USING (v) +FULL OUTER JOIN t t73 USING (v) +FULL OUTER JOIN t t74 USING (v) +FULL OUTER JOIN t t75 USING (v) +FULL OUTER JOIN t t76 USING (v) +FULL OUTER JOIN t t77 USING (v) +FULL OUTER JOIN t t78 USING (v) +FULL OUTER JOIN t t79 USING (v) +FULL OUTER JOIN t t80 USING (v) +FULL OUTER JOIN t t81 USING (v) +FULL OUTER JOIN t t82 USING (v) +FULL OUTER JOIN t t83 USING (v) +FULL OUTER JOIN t t84 USING (v) +FULL OUTER JOIN t t85 USING (v) +FULL OUTER JOIN t t86 USING (v) +FULL OUTER JOIN t t87 USING (v) +FULL OUTER JOIN t t88 USING (v) +FULL OUTER JOIN t t89 USING (v) +FULL OUTER JOIN t t90 USING (v) +FULL OUTER JOIN t t91 USING (v) +FULL OUTER JOIN t t92 USING (v) +FULL OUTER JOIN t t93 USING (v) +FULL OUTER JOIN t t94 USING (v) +FULL OUTER JOIN t t95 USING (v) +FULL OUTER JOIN t t96 USING (v) +FULL OUTER JOIN t t97 USING (v) +FULL OUTER JOIN t t98 USING (v) +; + +statement ok +DROP MATERIALIZED VIEW mv CASCADE; + +statement ok +SET streaming_parallelism TO DEFAULT; From 447d3c0e8ec7f2d09cae4191471914c7038e5f5e Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Wed, 19 Jun 2024 16:51:15 +0800 Subject: [PATCH 06/11] apply to null safe eq Signed-off-by: Bugen Zhao --- .../rule/intersect_to_semi_join_rule.rs | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/frontend/src/optimizer/rule/intersect_to_semi_join_rule.rs b/src/frontend/src/optimizer/rule/intersect_to_semi_join_rule.rs index 29ccbc066ec36..1d7d385aec627 100644 --- a/src/frontend/src/optimizer/rule/intersect_to_semi_join_rule.rs +++ b/src/frontend/src/optimizer/rule/intersect_to_semi_join_rule.rs @@ -50,14 +50,14 @@ impl Rule for IntersectToSemiJoinRule { impl IntersectToSemiJoinRule { pub(crate) fn gen_null_safe_equal(left: PlanRef, right: PlanRef) -> ExprImpl { - (left + let arms = (left .schema() .fields() .iter() .zip_eq_debug(right.schema().fields()) .enumerate()) - .fold(None, |expr, (i, (left_field, right_field))| { - let equal = ExprImpl::FunctionCall(Box::new(FunctionCall::new_unchecked( + .map(|(i, (left_field, right_field))| { + ExprImpl::FunctionCall(Box::new(FunctionCall::new_unchecked( ExprType::IsNotDistinctFrom, vec![ ExprImpl::InputRef(Box::new(InputRef::new(i, left_field.data_type()))), @@ -67,16 +67,9 @@ impl IntersectToSemiJoinRule { ))), ], Boolean, - ))); - - match expr { - None => Some(equal), - Some(expr) => Some(ExprImpl::FunctionCall(Box::new( - FunctionCall::new_unchecked(ExprType::And, vec![expr, equal], Boolean), - ))), - } - }) - .unwrap() + ))) + }); + ExprImpl::and(arms) } } From ce8d29e0cda92cfe4350dd2026841269ee46354f Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Wed, 19 Jun 2024 16:58:29 +0800 Subject: [PATCH 07/11] Update stack_overflow_17342.slt Co-authored-by: Dylan --- e2e_test/streaming/bug_fixes/stack_overflow_17342.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt b/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt index 227ee8295f3cb..43f5ce877d762 100644 --- a/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt +++ b/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt @@ -113,7 +113,7 @@ FULL OUTER JOIN t t98 USING (v) ; statement ok -DROP MATERIALIZED VIEW mv CASCADE; +DROP TABLE t CASCADE; statement ok SET streaming_parallelism TO DEFAULT; From 7f293c4cece506f3a61a5c5aabe6dcb257bc06ab Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Wed, 19 Jun 2024 17:30:37 +0800 Subject: [PATCH 08/11] skip in madsim Signed-off-by: Bugen Zhao --- e2e_test/streaming/bug_fixes/stack_overflow_17342.slt | 1 + src/tests/simulation/src/slt.rs | 3 +++ 2 files changed, 4 insertions(+) diff --git a/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt b/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt index 43f5ce877d762..01197a299736f 100644 --- a/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt +++ b/e2e_test/streaming/bug_fixes/stack_overflow_17342.slt @@ -6,6 +6,7 @@ CREATE TABLE t (v int); # This query used to overflow the stack during optimization as it generated a left-deep tree # of `OR xx IS NOT NULL` expression in the filter after each full outer join. +skipif madsim statement ok CREATE MATERIALIZED VIEW mv AS SELECT diff --git a/src/tests/simulation/src/slt.rs b/src/tests/simulation/src/slt.rs index 89e05b974e275..e2fdfb6b54bb8 100644 --- a/src/tests/simulation/src/slt.rs +++ b/src/tests/simulation/src/slt.rs @@ -208,6 +208,7 @@ pub async fn run_slt_task( // use a session per file let mut tester = sqllogictest::Runner::new(|| RisingWave::connect("frontend".into(), "dev".into())); + tester.add_label("madsim"); let file = file.unwrap(); let path = file.as_path(); @@ -461,6 +462,8 @@ pub async fn run_slt_task( pub async fn run_parallel_slt_task(glob: &str, jobs: usize) -> Result<(), ParallelTestError> { let mut tester = sqllogictest::Runner::new(|| RisingWave::connect("frontend".into(), "dev".into())); + tester.add_label("madsim"); + tester .run_parallel_async( glob, From 782975f79e588017d004131237f27e72cc11de60 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Thu, 20 Jun 2024 11:33:59 +0800 Subject: [PATCH 09/11] Update src/frontend/src/expr/utils.rs Co-authored-by: xxchan --- src/frontend/src/expr/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontend/src/expr/utils.rs b/src/frontend/src/expr/utils.rs index 91452f76caa4b..8ab2ae0e16d7f 100644 --- a/src/frontend/src/expr/utils.rs +++ b/src/frontend/src/expr/utils.rs @@ -33,7 +33,7 @@ fn split_expr_by(expr: ExprImpl, op: ExprType, rets: &mut Vec) { } } -/// Merge the given expressions by the a logical operation. +/// Merge the given expressions by the logical operation. /// /// The `op` must be commutative and associative, typically `And` or `Or`. pub(super) fn merge_expr_by_logical(exprs: I, op: ExprType, identity_elem: ExprImpl) -> ExprImpl From b51a40c18d1fb8261bd9d643413a0aaf14d9c0a6 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Thu, 20 Jun 2024 12:58:32 +0800 Subject: [PATCH 10/11] bump sqllogictest Signed-off-by: Bugen Zhao --- Cargo.lock | 3 +-- src/tests/simulation/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 69e17e94bce33..77af2f96d6da7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13366,8 +13366,7 @@ dependencies = [ [[package]] name = "sqllogictest" version = "0.20.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f20de090b0fde4dcd53b330e7ad2772140304eb34f5e9a99c8963fc4c052f149" +source = "git+https://github.com/risinglightdb/sqllogictest-rs?rev=91ee11fd9e3c20f62fc54034dc36abff9c92c934#91ee11fd9e3c20f62fc54034dc36abff9c92c934" dependencies = [ "async-trait", "educe", diff --git a/src/tests/simulation/Cargo.toml b/src/tests/simulation/Cargo.toml index 5fbfc0e19f6fd..4d63de46759a4 100644 --- a/src/tests/simulation/Cargo.toml +++ b/src/tests/simulation/Cargo.toml @@ -50,7 +50,7 @@ risingwave_sqlsmith = { workspace = true } serde = "1.0.188" serde_derive = "1.0.188" serde_json = "1.0.107" -sqllogictest = "0.20" +sqllogictest = { git = "https://github.com/risinglightdb/sqllogictest-rs", rev = "91ee11fd9e3c20f62fc54034dc36abff9c92c934" } tempfile = "3" tikv-jemallocator = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio" } From 14b2f9c76ad20b7c644dfbf118f35205b9fe089d Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Thu, 20 Jun 2024 17:46:24 +0800 Subject: [PATCH 11/11] use released version sqllogictest Signed-off-by: Bugen Zhao --- Cargo.lock | 5 +++-- src/tests/simulation/Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 77af2f96d6da7..d92a59c8dcfaf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13365,8 +13365,9 @@ dependencies = [ [[package]] name = "sqllogictest" -version = "0.20.4" -source = "git+https://github.com/risinglightdb/sqllogictest-rs?rev=91ee11fd9e3c20f62fc54034dc36abff9c92c934#91ee11fd9e3c20f62fc54034dc36abff9c92c934" +version = "0.20.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab17edbf3a80a891d1a9650c0ceca0c0e8931d8e087a22d04d7645aed5fbb86a" dependencies = [ "async-trait", "educe", diff --git a/src/tests/simulation/Cargo.toml b/src/tests/simulation/Cargo.toml index 4d63de46759a4..b08af43583eed 100644 --- a/src/tests/simulation/Cargo.toml +++ b/src/tests/simulation/Cargo.toml @@ -50,7 +50,7 @@ risingwave_sqlsmith = { workspace = true } serde = "1.0.188" serde_derive = "1.0.188" serde_json = "1.0.107" -sqllogictest = { git = "https://github.com/risinglightdb/sqllogictest-rs", rev = "91ee11fd9e3c20f62fc54034dc36abff9c92c934" } +sqllogictest = "0.20.5" tempfile = "3" tikv-jemallocator = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio" }