From 41956d0cce54980a22fc7ae75e0628cb7c7ce842 Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:18:21 +0800 Subject: [PATCH] fix(query): cume_dist() always return unbounded frame (#16242) * fix(query): cume_dist() always return unbounded frame * fix test --- .../sql/src/planner/semantic/type_check.rs | 7 ++ .../query/window_function/expr_in_window.test | 116 +++++++++--------- .../window_function/window_ignore_nulls.test | 22 ++-- .../window_function/window_wisconsin.test | 2 +- .../window_wisconsin_block_size_1.test | 18 +++ 5 files changed, 95 insertions(+), 70 deletions(-) create mode 100644 tests/sqllogictests/suites/query/window_function/window_wisconsin_block_size_1.test diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 3a2839aa5c81..c8bde2adf2f2 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -1388,6 +1388,13 @@ impl<'a> TypeChecker<'a> { } }); } + WindowFuncType::CumeDist => { + return Ok(WindowFuncFrame { + units: WindowFuncFrameUnits::Range, + start_bound: WindowFuncFrameBound::Preceding(None), + end_bound: WindowFuncFrameBound::Following(None), + }); + } _ => {} } if let Some(frame) = window_frame { diff --git a/tests/sqllogictests/suites/query/window_function/expr_in_window.test b/tests/sqllogictests/suites/query/window_function/expr_in_window.test index 0759ff65ed51..4879aa802795 100644 --- a/tests/sqllogictests/suites/query/window_function/expr_in_window.test +++ b/tests/sqllogictests/suites/query/window_function/expr_in_window.test @@ -14,7 +14,7 @@ statement ok INSERT INTO empsalary VALUES ('develop', 10, 5200, '2007-08-01'), ('sales', 1, 5000, '2006-10-01'), ('personnel', 5, 3500, '2007-12-10'), ('sales', 4, 4800, '2007-08-08'), ('personnel', 2, 3900, '2006-12-23'), ('develop', 7, 4200, '2008-01-01'), ('develop', 9, 4500, '2008-01-01'), ('sales', 3, 4800, '2007-08-01'), ('develop', 8, 6000, '2006-10-01'), ('develop', 11, 5200, '2007-08-15') statement ok -CREATE TABLE `empsalary2` ( `depname` VARCHAR null, `empno` BIGINT null, `salary` INT null, `enroll_date` DATE null) +CREATE or replace TABLE `empsalary2` ( `depname` VARCHAR null, `empno` BIGINT null, `salary` INT null, `enroll_date` DATE null) statement ok INSERT INTO empsalary2 SELECT * FROM empsalary @@ -48,7 +48,7 @@ SELECT enroll_date, empno, salary, sum(salary + 1) OVER (PARTITION BY enroll_dat 2008-01-01 9 4500 8702 query IIIRT -SELECT empno, empno % 2, salary, sum(salary + 1) OVER (PARTITION BY empno % 2 ORDER BY enroll_date + 1) as W, enroll_date FROM empsalary order by W +SELECT empno, empno % 2, salary, sum(salary + 1) OVER (PARTITION BY empno % 2 ORDER BY enroll_date + 1) as W, enroll_date FROM empsalary order by W, empno ---- 1 1 5000 5001 2006-10-01 8 0 6000 6001 2006-10-01 @@ -62,7 +62,7 @@ SELECT empno, empno % 2, salary, sum(salary + 1) OVER (PARTITION BY empno % 2 OR 9 1 4500 27206 2008-01-01 query TII -select depname, sum(sum(salary) + 1) over (partition by 1 order by sum(salary)), sum(salary) from empsalary group by depname; +select depname, sum(sum(salary) + 1) over (partition by 1 order by sum(salary)) as w, sum(salary) from empsalary group by depname order by w; ---- personnel 7401 7400 sales 22002 14600 @@ -70,97 +70,97 @@ develop 47103 25100 # lag ignore default value query II -SELECT salary, lag(salary + 1, 2) OVER (ORDER BY enroll_date) FROM empsalary +SELECT salary, lag(salary + 1, 2) OVER (ORDER BY enroll_date) as w FROM empsalary order by salary, w ---- -5000 NULL -6000 NULL +3500 4801 3900 5001 -5200 6001 +4200 5201 +4500 3501 4800 3901 4800 5201 +5000 NULL 5200 4801 -3500 4801 -4200 5201 -4500 3501 +5200 6001 +6000 NULL # lag with constant default value query II -SELECT salary, lag(salary + 1, 2, 888) OVER (ORDER BY enroll_date) FROM empsalary +SELECT salary, lag(salary + 1, 2, 888) OVER (ORDER BY enroll_date) as w FROM empsalary order by salary, w ---- -5000 888 -6000 888 +3500 4801 3900 5001 -5200 6001 +4200 5201 +4500 3501 4800 3901 4800 5201 +5000 888 5200 4801 -3500 4801 -4200 5201 -4500 3501 +5200 6001 +6000 888 # lag with column default value query II -SELECT salary, lag(salary + 1, 2, salary + 2) OVER (ORDER BY enroll_date) FROM empsalary +SELECT salary, lag(salary + 1, 2, salary + 2) OVER (ORDER BY enroll_date) as w FROM empsalary order by salary, w ---- -5000 5002 -6000 6002 +3500 4801 3900 5001 -5200 6001 +4200 5201 +4500 3501 4800 3901 4800 5201 +5000 5002 5200 4801 -3500 4801 -4200 5201 -4500 3501 +5200 6001 +6000 6002 # lead ignore default value query II -SELECT salary, lead(salary + 1, 2) OVER (ORDER BY enroll_date) FROM empsalary +SELECT salary, lead(salary + 1, 2) OVER (ORDER BY enroll_date) as w FROM empsalary order by salary, w ---- -5000 3901 -6000 5201 -3900 4801 -5200 4801 -4800 5201 -4800 3501 -5200 4201 3500 4501 +3900 4801 4200 NULL 4500 NULL +4800 3501 +4800 5201 +5000 3901 +5200 4201 +5200 4801 +6000 5201 # lead with constant default value query II -SELECT salary, lead(salary + 1, 2, 888) OVER (ORDER BY enroll_date) FROM empsalary +SELECT salary, lead(salary + 1, 2, 888) OVER (ORDER BY enroll_date) as w FROM empsalary order by salary, w ---- -5000 3901 -6000 5201 -3900 4801 -5200 4801 -4800 5201 -4800 3501 -5200 4201 3500 4501 +3900 4801 4200 888 4500 888 +4800 3501 +4800 5201 +5000 3901 +5200 4201 +5200 4801 +6000 5201 # lead with column default value query II -SELECT salary, lead(salary + 1, 2, salary + 2) OVER (ORDER BY enroll_date) FROM empsalary +SELECT salary, lead(salary + 1, 2, salary + 2) OVER (ORDER BY enroll_date) w FROM empsalary order by salary, w ---- -5000 3901 -6000 5201 -3900 4801 -5200 4801 -4800 5201 -4800 3501 -5200 4201 3500 4501 +3900 4801 4200 4202 4500 4502 +4800 3501 +4800 5201 +5000 3901 +5200 4201 +5200 4801 +6000 5201 # lead with nullable column query II -SELECT salary, lead(salary + 1, 2) OVER (ORDER BY enroll_date) c FROM empsalary2 ORDER BY salary desc +SELECT salary, lead(salary + 1, 2) OVER (ORDER BY enroll_date) c FROM empsalary2 ORDER BY salary desc, c desc ---- 6000 5201 5200 4801 @@ -174,7 +174,7 @@ SELECT salary, lead(salary + 1, 2) OVER (ORDER BY enroll_date) c FROM empsalary2 3500 4501 query II -SELECT salary, lead(salary + 1, 2, salary + 2) OVER (ORDER BY enroll_date) c FROM empsalary2 ORDER BY salary desc +SELECT salary, lead(salary + 1, 2, salary + 2) OVER (ORDER BY enroll_date) c FROM empsalary2 ORDER BY salary desc, c desc ---- 6000 5201 5200 4801 @@ -189,13 +189,13 @@ SELECT salary, lead(salary + 1, 2, salary + 2) OVER (ORDER BY enroll_date) c FRO # first_value query TII -SELECT enroll_date, salary, first_value(salary + 1) OVER (ORDER BY enroll_date) FROM empsalary +SELECT enroll_date, salary, first_value(salary + 1) OVER (ORDER BY enroll_date) FROM empsalary order by enroll_date, salary ---- 2006-10-01 5000 5001 2006-10-01 6000 5001 2006-12-23 3900 5001 -2007-08-01 5200 5001 2007-08-01 4800 5001 +2007-08-01 5200 5001 2007-08-08 4800 5001 2007-08-15 5200 5001 2007-12-10 3500 5001 @@ -203,13 +203,13 @@ SELECT enroll_date, salary, first_value(salary + 1) OVER (ORDER BY enroll_date) 2008-01-01 4500 5001 query TII -SELECT enroll_date, salary, first_value(salary + 1) OVER (ORDER BY enroll_date) FROM empsalary2 +SELECT enroll_date, salary, first_value(salary + 1) OVER (ORDER BY enroll_date) FROM empsalary2 order by enroll_date, salary ---- 2006-10-01 5000 5001 2006-10-01 6000 5001 2006-12-23 3900 5001 -2007-08-01 5200 5001 2007-08-01 4800 5001 +2007-08-01 5200 5001 2007-08-08 4800 5001 2007-08-15 5200 5001 2007-12-10 3500 5001 @@ -218,13 +218,13 @@ SELECT enroll_date, salary, first_value(salary + 1) OVER (ORDER BY enroll_date) # last_value query TII -SELECT enroll_date, salary, last_value(salary + 1) OVER (ORDER BY enroll_date) FROM empsalary +SELECT enroll_date, salary, last_value(salary + 1) OVER (ORDER BY enroll_date) FROM empsalary order by enroll_date, salary ---- 2006-10-01 5000 6001 2006-10-01 6000 6001 2006-12-23 3900 3901 -2007-08-01 5200 4801 2007-08-01 4800 4801 +2007-08-01 5200 4801 2007-08-08 4800 4801 2007-08-15 5200 5201 2007-12-10 3500 3501 @@ -233,13 +233,13 @@ SELECT enroll_date, salary, last_value(salary + 1) OVER (ORDER BY enroll_date) F # last_value query TII -SELECT enroll_date, salary, last_value(salary + 1) OVER (ORDER BY enroll_date) FROM empsalary2 +SELECT enroll_date, salary, last_value(salary + 1) OVER (ORDER BY enroll_date) FROM empsalary2 order by enroll_date, salary ---- 2006-10-01 5000 6001 2006-10-01 6000 6001 2006-12-23 3900 3901 -2007-08-01 5200 4801 2007-08-01 4800 4801 +2007-08-01 5200 4801 2007-08-08 4800 4801 2007-08-15 5200 5201 2007-12-10 3500 3501 diff --git a/tests/sqllogictests/suites/query/window_function/window_ignore_nulls.test b/tests/sqllogictests/suites/query/window_function/window_ignore_nulls.test index 23c604a98967..727ac86be890 100644 --- a/tests/sqllogictests/suites/query/window_function/window_ignore_nulls.test +++ b/tests/sqllogictests/suites/query/window_function/window_ignore_nulls.test @@ -2,7 +2,7 @@ statement ok set max_block_size = 1; statement ok -CREATE or replace TABLE issue2549 AS SELECT * FROM (VALUES +CREATE or replace TABLE default.issue2549 AS SELECT * FROM (VALUES (0, 1, 614), (1, 1, null), (2, 1, null), @@ -20,7 +20,7 @@ SELECT ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING ) AS last_order_id -FROM issue2549 order by 1,2,3,4 +FROM default.issue2549 order by 1,2,3,4 ---- 0 1 614 NULL 1 1 NULL 614 @@ -38,7 +38,7 @@ SELECT ORDER BY id ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING ) AS last_order_id -FROM issue2549 order by 1,2,3,4 +FROM default.issue2549 order by 1,2,3,4 ---- 0 1 614 614 1 1 NULL 614 @@ -56,7 +56,7 @@ SELECT ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING ) AS last_order_id -FROM issue2549 order by 1,2,3,4 +FROM default.issue2549 order by 1,2,3,4 ---- 0 1 614 NULL 1 1 NULL NULL @@ -69,7 +69,7 @@ statement ok unset max_block_size; statement ok -CREATE or replace TABLE issue2549 AS SELECT * FROM (VALUES +CREATE or replace TABLE default.issue2549 AS SELECT * FROM (VALUES (0, 1, 614), (1, 1, null), (2, 1, null), @@ -87,7 +87,7 @@ SELECT ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING ) AS last_order_id -FROM issue2549 order by 1,2,3,4 +FROM default.issue2549 order by 1,2,3,4 ---- 0 1 614 NULL 1 1 NULL 614 @@ -105,7 +105,7 @@ SELECT ORDER BY id ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING ) AS last_order_id -FROM issue2549 order by 1,2,3,4 +FROM default.issue2549 order by 1,2,3,4 ---- 0 1 614 614 1 1 NULL 614 @@ -123,7 +123,7 @@ SELECT ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING ) AS last_order_id -FROM issue2549 order by 1,2,3,4 +FROM default.issue2549 order by 1,2,3,4 ---- 0 1 614 NULL 1 1 NULL NULL @@ -132,10 +132,10 @@ FROM issue2549 order by 1,2,3,4 4 1 2027 639 statement error 1065 -SELECT id, user_id, order_id, sum (order_id) IGNORE NULLS over ( PARTITION BY user_id ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING ) AS last_order_id FROM issue2549 +SELECT id, user_id, order_id, sum (order_id) IGNORE NULLS over ( PARTITION BY user_id ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING ) AS last_order_id FROM default.issue2549 statement error 1065 -SELECT id, user_id, order_id, lag(order_id, 1) IGNORE NULLS over ( PARTITION BY user_id ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING ) AS last_order_id FROM issue2549 +SELECT id, user_id, order_id, lag(order_id, 1) IGNORE NULLS over ( PARTITION BY user_id ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING ) AS last_order_id FROM default.issue2549 statement ok -drop TABLE issue2549 +drop TABLE default.issue2549 diff --git a/tests/sqllogictests/suites/query/window_function/window_wisconsin.test b/tests/sqllogictests/suites/query/window_function/window_wisconsin.test index ca69ce76a0d2..4c314a1983f5 100644 --- a/tests/sqllogictests/suites/query/window_function/window_wisconsin.test +++ b/tests/sqllogictests/suites/query/window_function/window_wisconsin.test @@ -145,7 +145,7 @@ SELECT ntile(5) OVER (ORDER BY ten, four) nn FROM tenk1 ORDER BY ten, four, nn # cume_dist query I -SELECT CAST(cume_dist() OVER (PARTITION BY four ORDER BY ten RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)*10 as INT) FROM tenk1 WHERE unique2 < 10 order by four, ten +SELECT CAST(cume_dist() OVER (PARTITION BY four ORDER BY ten)*10 as INT) FROM tenk1 WHERE unique2 < 10 order by four, ten ---- 7 7 diff --git a/tests/sqllogictests/suites/query/window_function/window_wisconsin_block_size_1.test b/tests/sqllogictests/suites/query/window_function/window_wisconsin_block_size_1.test new file mode 100644 index 000000000000..88f187f7e284 --- /dev/null +++ b/tests/sqllogictests/suites/query/window_function/window_wisconsin_block_size_1.test @@ -0,0 +1,18 @@ +statement ok +set max_block_size = 1; + +include ./expr_in_window.test +include ./named_window_basic.test +include ./window_basic.test +include ./window_bound.test +include ./window_ignore_nulls.test +include ./window_in_expr.test +include ./window_ntile.test +include ./window_partition_spill.test +include ./window_qualify.test +include ./window_range.test +include ./window_subquery.test +include ./window_wisconsin.test + +statement ok +unset global max_block_size;