From 3431eabc3ef3c4f6431ba39546707c94a1d5b375 Mon Sep 17 00:00:00 2001 From: Li0k Date: Tue, 24 Dec 2024 21:03:53 +0800 Subject: [PATCH] feat(frontend): Supports cut OR condition and push down to storage (#19812) --- src/common/src/util/scan_range.rs | 605 ++++++++++++++++++ .../tests/testdata/input/range_scan.yaml | 347 ++++++---- .../tests/testdata/output/range_scan.yaml | 133 ++++ src/frontend/src/utils/condition.rs | 171 ++++- 4 files changed, 1124 insertions(+), 132 deletions(-) diff --git a/src/common/src/util/scan_range.rs b/src/common/src/util/scan_range.rs index b45db071be3cc..5c56550eed279 100644 --- a/src/common/src/util/scan_range.rs +++ b/src/common/src/util/scan_range.rs @@ -12,12 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::ops::{Bound, RangeBounds}; +use itertools::Itertools; use paste::paste; use risingwave_pb::batch_plan::scan_range::Bound as PbBound; use risingwave_pb::batch_plan::ScanRange as PbScanRange; +use super::sort_util::{cmp_rows, OrderType}; use crate::hash::table_distribution::TableDistribution; use crate::hash::VirtualNode; use crate::types::{Datum, ScalarImpl}; @@ -98,6 +101,147 @@ impl ScanRange { range: full_range(), } } + + pub fn convert_to_range(&self) -> (Bound>, Bound>) { + fn handle_bound(eq_conds: &Vec, bound: &Bound>) -> Bound> { + match bound { + Bound::Included(literal) => { + let mut prefix = eq_conds.clone(); + prefix.extend_from_slice(literal); + Bound::Included(prefix) + } + Bound::Excluded(literal) => { + let mut prefix = eq_conds.clone(); + prefix.extend_from_slice(literal); + Bound::Excluded(prefix) + } + Bound::Unbounded => { + if eq_conds.is_empty() { + Bound::Unbounded + } else { + Bound::Included(eq_conds.clone()) + } + } + } + } + + let new_left = handle_bound(&self.eq_conds, &self.range.0); + let new_right = handle_bound(&self.eq_conds, &self.range.1); + (new_left, new_right) + } + + pub fn is_overlap(left: &ScanRange, right: &ScanRange, order_types: &[OrderType]) -> bool { + let range_left = left.convert_to_range(); + let range_right = right.convert_to_range(); + Self::range_overlap_check(range_left, range_right, order_types) + } + + fn range_overlap_check( + left: (Bound>, Bound>), + right: (Bound>, Bound>), + order_types: &[OrderType], + ) -> bool { + let (left_start, left_end) = &left; + let (right_start, right_end) = &right; + + let left_start_vec = match &left_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + let right_start_vec = match &right_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + + if left_start_vec.is_empty() && right_start_vec.is_empty() { + return true; + } + + let order_types = if order_types.iter().all(|o| o.is_ascending()) { + order_types + } else { + // reverse order types to ascending + &order_types + .iter() + .cloned() + .map(|o| if o.is_descending() { o.reverse() } else { o }) + .collect_vec() + }; + + // Unbounded is always less than any other bound + if left_start_vec.is_empty() { + // pass + } else if right_start_vec.is_empty() { + return Self::range_overlap_check(right, left, order_types); + } else { + assert!(!left_start_vec.is_empty()); + assert!(!right_start_vec.is_empty()); + let cmp_column_len = left_start_vec.len().min(right_start_vec.len()); + let cmp_start = cmp_rows( + &left_start_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ); + + let right_start_before_left_start = cmp_start.is_gt(); + + if right_start_before_left_start { + return Self::range_overlap_check(right, left, order_types); + } + + if cmp_start == Ordering::Equal + && let (Bound::Included(_), Bound::Included(_)) = (left_start, right_start) + { + return true; + } + } + + let left_end_vec = match &left_end { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + let right_end_vec = match &right_end { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + + if left_end_vec.is_empty() && right_end_vec.is_empty() { + return true; + } + + if left_end_vec.is_empty() { + true + } else { + // cmp left_end and right_start + assert!(!left_end_vec.is_empty()); + assert!(!right_start_vec.is_empty()); + + let cmp_column_len = left_end_vec.len().min(right_start_vec.len()); + let cmp_end = cmp_rows( + &left_end_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ); + + match cmp_end { + Ordering::Equal => { + if let (Bound::Included(_), Bound::Included(_)) = (left_end, right_start) { + return true; + } + } + + Ordering::Greater => { + return true; + } + + Ordering::Less => { + return false; + } + } + + false + } + } } pub const fn full_range() -> (Bound, Bound) { @@ -221,4 +365,465 @@ mod tests { assert_eq!(scan_range.try_compute_vnode(&dist), Some(vnode)); } + + #[test] + fn test_convert_to_range() { + { + // test empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(1))]), + Bound::Included(vec![Some(ScalarImpl::from(2))]), + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!(left, Bound::Included(vec![Some(ScalarImpl::from(1))])); + assert_eq!(right, Bound::Included(vec![Some(ScalarImpl::from(2))])); + } + + { + // test exclude bound with empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![], + range: ( + Bound::Excluded(vec![Some(ScalarImpl::from(1))]), + Bound::Excluded(vec![Some(ScalarImpl::from(2))]), + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!(left, Bound::Excluded(vec![Some(ScalarImpl::from(1))])); + assert_eq!(right, Bound::Excluded(vec![Some(ScalarImpl::from(2))])); + } + + { + // test include bound with empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(1))]), + Bound::Unbounded, + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!(left, Bound::Included(vec![Some(ScalarImpl::from(1))])); + assert_eq!(right, Bound::Unbounded); + } + + { + // test exclude bound with non-empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![Some(ScalarImpl::from(1))], + range: ( + Bound::Excluded(vec![Some(ScalarImpl::from(2))]), + Bound::Excluded(vec![Some(ScalarImpl::from(3))]), + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!( + left, + Bound::Excluded(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) + ); + assert_eq!( + right, + Bound::Excluded(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(3))]) + ); + } + + { + // test include bound with non-empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![Some(ScalarImpl::from(1))], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(2))]), + Bound::Included(vec![Some(ScalarImpl::from(3))]), + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!( + left, + Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) + ); + assert_eq!( + right, + Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(3))]) + ); + } + + { + let scan_range = ScanRange { + eq_conds: vec![Some(ScalarImpl::from(1))], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(2))]), + Bound::Unbounded, + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!( + left, + Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) + ); + assert_eq!(right, Bound::Included(vec![Some(ScalarImpl::from(1))])); + } + } + + #[test] + fn test_range_overlap_check() { + let order_types = vec![OrderType::ascending()]; + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3)), Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7)), Some(ScalarImpl::Int32(7))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(!ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(2))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Excluded, Encluded) + assert!(!ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Encluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + assert!(!ScanRange::range_overlap_check( + ( + Bound::Unbounded, + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + ( + Bound::Unbounded, + Bound::Included(vec![Some(ScalarImpl::Int32(10))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + (Bound::Unbounded, Bound::Unbounded,), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + (Bound::Unbounded, Bound::Unbounded), + (Bound::Unbounded, Bound::Unbounded), + &order_types + )); + + assert!(!ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Unbounded, + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + ), + &order_types + )); + } } diff --git a/src/frontend/planner_test/tests/testdata/input/range_scan.yaml b/src/frontend/planner_test/tests/testdata/input/range_scan.yaml index 52863fa5aaf77..374da681d0987 100644 --- a/src/frontend/planner_test/tests/testdata/input/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/input/range_scan.yaml @@ -6,182 +6,184 @@ date INTEGER); CREATE MATERIALIZED VIEW orders_count_by_user AS SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date; + CREATE MATERIALIZED VIEW orders_count_by_user_desc AS + SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date ORDER BY user_id DESC; expected_outputs: [] - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id < 43 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 + 1 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1/0 expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 2147483647 + 1 expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 'a' expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > 'a' expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = '42' expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = NULL expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id IS NULL expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > NULL expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date = 1111 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > 42 AND date = 1111 AND 2>1 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE date > 1111 AND user_id = 42 AND 5<6 AND date <= 6666 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (42, 43) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (42+1, 44-1) expected_outputs: - - batch_plan + - batch_plan - name: If the IN list has a larger type than the column, the InputRef is casted. Currently this case is not converted to scan range yet. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (42.0, 43.0) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in ('42', '43') expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in ('42', '43.0') expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (2147483648, 2147483649) AND date = 6666 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) expected_outputs: - - batch_plan + - batch_plan - name: test duplicate value in in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 2222) expected_outputs: - - batch_plan + - batch_plan - name: test NULL in in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, NULL) expected_outputs: - - batch_plan + - batch_plan - name: test NULL in in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (NULL) expected_outputs: - - batch_plan + - batch_plan - name: test multiple in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) AND date in (4444, 3333) expected_outputs: - - batch_plan + - batch_plan - name: test eq & in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) AND date = 3333 expected_outputs: - - batch_plan + - batch_plan - name: test eq & in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) AND date = 4444 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (2147483648, 2147483649) AND date in (2222, 3333) expected_outputs: - - batch_plan + - batch_plan - id: create_table_and_mv_ordered sql: | CREATE TABLE orders ( @@ -194,93 +196,93 @@ ORDER BY orders_count; expected_outputs: [] - before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE user_id = 42 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE user_id > 42 AND orders_count = 10 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count = 10 expected_outputs: - - batch_plan + - batch_plan - name: merge mutiple upper bound before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count < 10 and orders_count < 30 expected_outputs: - - batch_plan + - batch_plan - name: merge include and exclude upper bound of same value before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count < 10 and orders_count <= 10 expected_outputs: - - batch_plan + - batch_plan - name: merge mutiple lower bound before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 10 and orders_count > 30 expected_outputs: - - batch_plan + - batch_plan - name: merge include and exclude lower bound of same value before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 10 and orders_count >= 10 expected_outputs: - - batch_plan + - batch_plan - name: invalid range before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 10 and orders_count < 5 expected_outputs: - - batch_plan + - batch_plan - name: merge cmp and eq condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 20 and orders_count < 30 and orders_count = 25 expected_outputs: - - batch_plan + - batch_plan - name: invalid range of merging cmp and eq condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 20 and orders_count < 30 and orders_count = 35 expected_outputs: - - batch_plan + - batch_plan - name: merge cmp and const-in condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count in (10,20,30,40) and orders_count <30 expected_outputs: - - batch_plan + - batch_plan - name: invalid range of merging cmp and const-in condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count in (10,20,30,40) and orders_count > 50 expected_outputs: - - batch_plan + - batch_plan - name: merge null and cmp condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count is null and orders_count < 30 expected_outputs: - - batch_plan + - batch_plan - id: create_small sql: | CREATE TABLE t(x smallint); @@ -289,142 +291,241 @@ expected_outputs: [] - name: When the constant with larger type is out of the range of the column's type, we can convert it as false condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x = 60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the upper bound of the column's type, we can convert < as true condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x < 60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the upper bound of the column's type, we can convert > as false condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x > 60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the lower bound of the column's type, we can convert < as false condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x < -60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the lower bound of the column's type, we can convert > as true condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x > -60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is in range of the column's type, we can convert it. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x < 3::bigint and x > 1::bigint; expected_outputs: - - batch_plan + - batch_plan - name: Can't push down the in-compatitble numeric type before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x = 3.4; expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or user_id = 2; expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date = 2222); expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date in (1111, 2222)); expected_outputs: - - batch_plan + - batch_plan - name: When one arm of or clause contains other conditions, we can't convert it to scan range yet. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date in (1111, 2222)) or (user_id != 3); expected_outputs: - - batch_plan + - batch_plan - name: When any arm of or clause is not equal type, we can't convert it to scan range yet. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > 1 or user_id < 10 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or user_id is null expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 and user_id is null expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or (user_id is null and date = 1111) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or (user_id = 2 and date is null) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or (user_id is null and date is null) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id is null or (user_id is null and date is null) expected_outputs: - - batch_plan + - batch_plan - sql: | create table sbtest1(id INT, k INT, c VARCHAR, pad VARCHAR); create index k1 on sbtest1(k); select count(k) from sbtest1 where k between 0 and 5; expected_outputs: - - batch_plan + - batch_plan - sql: | create table sbtest1(id INT, k INT, c VARCHAR, pad VARCHAR); create index k1 on sbtest1(k); select count(k) from sbtest1 where k between 0 and 500; expected_outputs: - - batch_plan + - batch_plan - sql: | create table sbtest1(id INT, k INT, c VARCHAR, pad VARCHAR, primary key(id)); create index k1 on sbtest1(k); select count(k) from sbtest1 where id between 0 and 5; expected_outputs: - - batch_plan + - batch_plan - sql: | create table t (k int primary key, v int); select v from t where k = 2147483648; -- out of range of int32 expected_outputs: - - logical_plan - - batch_plan + - logical_plan + - batch_plan +- name: When OR clauses contain non-overlapping conditions,, we can pushdown serveral scan_range. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id >= 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id >= 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + expected_outputs: + - batch_plan +- name: When OR clauses contain overlapping conditions, we can merge serveral scan_range and pushdown. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + expected_outputs: + - batch_plan +- name: When OR clauses contain overlapping conditions, we cannot push down if it results in a full table scan. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id > 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id > 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id != 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id != 10); + expected_outputs: + - batch_plan diff --git a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml index ed8b4b863fe64..e175558038c81 100644 --- a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml @@ -7,6 +7,8 @@ date INTEGER); CREATE MATERIALIZED VIEW orders_count_by_user AS SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date; + CREATE MATERIALIZED VIEW orders_count_by_user_desc AS + SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date ORDER BY user_id DESC; - before: - create_table_and_mv sql: | @@ -503,3 +505,134 @@ └─LogicalFilter { predicate: (t.k = 2147483648:Int64) } └─LogicalScan { table: t, columns: [t.k, t.v, t._rw_timestamp] } batch_plan: 'BatchValues { rows: [] }' +- name: When OR clauses contain non-overlapping conditions,, we can pushdown serveral scan_range. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 20:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id > 20:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id >= 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id >= 10:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id >= Int64(10)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id >= 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id >= 10:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id >= Int64(10)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 20:Int32)) OR (orders_count_by_user.user_id = 15:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id = Int64(15), orders_count_by_user.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id > 20:Int32)) OR (orders_count_by_user_desc.user_id = 15:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id = Int64(15), orders_count_by_user_desc.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user.user_id < 10:Int32) AND (orders_count_by_user.user_id > 1:Int32)) OR ((orders_count_by_user.user_id > 20:Int32) AND (orders_count_by_user.user_id < 30:Int32))) OR ((orders_count_by_user.user_id >= 30:Int32) AND (orders_count_by_user.user_id < 40:Int32))) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id > Int64(1) AND orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id > Int64(20) AND orders_count_by_user.user_id < Int64(30), orders_count_by_user.user_id >= Int64(30) AND orders_count_by_user.user_id < Int64(40)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user_desc.user_id < 10:Int32) AND (orders_count_by_user_desc.user_id > 1:Int32)) OR ((orders_count_by_user_desc.user_id > 20:Int32) AND (orders_count_by_user_desc.user_id < 30:Int32))) OR ((orders_count_by_user_desc.user_id >= 30:Int32) AND (orders_count_by_user_desc.user_id < 40:Int32))) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id > Int64(1) AND orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id > Int64(20) AND orders_count_by_user_desc.user_id < Int64(30), orders_count_by_user_desc.user_id >= Int64(30) AND orders_count_by_user_desc.user_id < Int64(40)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- name: When OR clauses contain overlapping conditions, we can merge serveral scan_range and pushdown. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user.user_id < 10:Int32) AND (orders_count_by_user.user_id > 1:Int32)) OR ((orders_count_by_user.user_id > 20:Int32) AND (orders_count_by_user.user_id <= 30:Int32))) OR (((orders_count_by_user.user_id >= 30:Int32) AND (orders_count_by_user.user_id < 40:Int32)) OR (orders_count_by_user.user_id = 15:Int32))) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id > Int64(1) AND orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id = Int64(15), orders_count_by_user.user_id >= Int64(20) AND orders_count_by_user.user_id <= Int64(40)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user_desc.user_id < 10:Int32) AND (orders_count_by_user_desc.user_id > 1:Int32)) OR ((orders_count_by_user_desc.user_id > 20:Int32) AND (orders_count_by_user_desc.user_id <= 30:Int32))) OR (((orders_count_by_user_desc.user_id >= 30:Int32) AND (orders_count_by_user_desc.user_id < 40:Int32)) OR (orders_count_by_user_desc.user_id = 15:Int32))) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id > Int64(1) AND orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id = Int64(15), orders_count_by_user_desc.user_id >= Int64(20) AND orders_count_by_user_desc.user_id <= Int64(40)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 30:Int32)) OR ((orders_count_by_user.user_id > 5:Int32) AND (orders_count_by_user.user_id < 15:Int32))) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id <= Int64(15), orders_count_by_user.user_id > Int64(30)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id > 30:Int32)) OR ((orders_count_by_user_desc.user_id > 5:Int32) AND (orders_count_by_user_desc.user_id < 15:Int32))) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id <= Int64(15), orders_count_by_user_desc.user_id > Int64(30)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- name: When OR clauses contain overlapping conditions, we cannot push down if it results in a full table scan. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id > 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 20:Int32) OR (orders_count_by_user.user_id > 10:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id > 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 20:Int32) OR (orders_count_by_user_desc.user_id > 10:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id != 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 20:Int32) OR (orders_count_by_user.user_id <> 10:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id != 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 20:Int32) OR (orders_count_by_user_desc.user_id <> 10:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } diff --git a/src/frontend/src/utils/condition.rs b/src/frontend/src/utils/condition.rs index c3f8327f39c2e..fb5fd07c1fc07 100644 --- a/src/frontend/src/utils/condition.rs +++ b/src/frontend/src/utils/condition.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::collections::{BTreeMap, HashSet}; use std::fmt::{self, Debug}; use std::ops::Bound; @@ -24,6 +25,7 @@ use risingwave_common::catalog::{Schema, TableDesc}; use risingwave_common::types::{DataType, DefaultOrd, ScalarImpl}; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::scan_range::{is_full_range, ScanRange}; +use risingwave_common::util::sort_util::{cmp_rows, OrderType}; use crate::error::Result; use crate::expr::{ @@ -299,7 +301,7 @@ impl Condition { table_desc: Rc, max_split_range_gap: u64, disjunctions: Vec, - ) -> Result, Self)>> { + ) -> Result, bool)>> { let disjunctions_result: Result, Self)>> = disjunctions .into_iter() .map(|x| { @@ -352,9 +354,154 @@ impl Condition { } } - Ok(Some((non_overlap_scan_ranges, Condition::true_cond()))) + Ok(Some((non_overlap_scan_ranges, false))) } else { - Ok(None) + let mut scan_ranges = vec![]; + for (scan_ranges_chunk, _) in disjunctions_result { + if scan_ranges_chunk.is_empty() { + // full scan range + return Ok(None); + } + + scan_ranges.extend(scan_ranges_chunk); + } + + let order_types = table_desc + .pk + .iter() + .cloned() + .map(|x| { + if x.order_type.is_descending() { + x.order_type.reverse() + } else { + x.order_type + } + }) + .collect_vec(); + scan_ranges.sort_by(|left, right| { + let (left_start, _left_end) = &left.convert_to_range(); + let (right_start, _right_end) = &right.convert_to_range(); + + let left_start_vec = match &left_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + let right_start_vec = match &right_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + + if left_start_vec.is_empty() && right_start_vec.is_empty() { + return Ordering::Less; + } + + if left_start_vec.is_empty() { + return Ordering::Less; + } + + if right_start_vec.is_empty() { + return Ordering::Greater; + } + + let cmp_column_len = left_start_vec.len().min(right_start_vec.len()); + cmp_rows( + &left_start_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ) + }); + + if scan_ranges.is_empty() { + return Ok(None); + } + + if scan_ranges.len() == 1 { + return Ok(Some((scan_ranges, true))); + } + + let mut output_scan_ranges: Vec = vec![]; + output_scan_ranges.push(scan_ranges[0].clone()); + let mut idx = 1; + loop { + if idx >= scan_ranges.len() { + break; + } + + let scan_range_left = output_scan_ranges.last_mut().unwrap(); + let scan_range_right = &scan_ranges[idx]; + + if scan_range_left.eq_conds == scan_range_right.eq_conds { + // range merge + + if !ScanRange::is_overlap(scan_range_left, scan_range_right, &order_types) { + // not merge + output_scan_ranges.push(scan_range_right.clone()); + idx += 1; + continue; + } + + // merge range + fn merge_bound( + left_scan_range: &Bound>>, + right_scan_range: &Bound>>, + order_types: &[OrderType], + left_bound: bool, + ) -> Bound>> { + let left_scan_range = match left_scan_range { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + Bound::Unbounded => return Bound::Unbounded, + }; + + let right_scan_range = match right_scan_range { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + Bound::Unbounded => return Bound::Unbounded, + }; + + let cmp_len = left_scan_range.len().min(right_scan_range.len()); + + let cmp = cmp_rows( + &left_scan_range[..cmp_len], + &right_scan_range[..cmp_len], + &order_types[..cmp_len], + ); + + let bound = { + if (cmp.is_le() && left_bound) || (cmp.is_ge() && !left_bound) { + left_scan_range.to_vec() + } else { + right_scan_range.to_vec() + } + }; + + // Included Bound just for convenience, the correctness will be guaranteed by the upper level filter. + Bound::Included(bound) + } + + scan_range_left.range.0 = merge_bound( + &scan_range_left.range.0, + &scan_range_right.range.0, + &order_types, + true, + ); + + scan_range_left.range.1 = merge_bound( + &scan_range_left.range.1, + &scan_range_right.range.1, + &order_types, + false, + ); + + if scan_range_left.is_full_table_scan() { + return Ok(None); + } + } else { + output_scan_ranges.push(scan_range_right.clone()); + } + + idx += 1; + } + + Ok(Some((output_scan_ranges, true))) } } @@ -497,12 +644,18 @@ impl Condition { // It's an OR. if self.conjunctions.len() == 1 { if let Some(disjunctions) = self.conjunctions[0].as_or_disjunctions() { - if let Some((scan_ranges, other_condition)) = Self::disjunctions_to_scan_ranges( - table_desc, - max_split_range_gap, - disjunctions, - )? { - return Ok((scan_ranges, other_condition)); + if let Some((scan_ranges, maintaining_condition)) = + Self::disjunctions_to_scan_ranges( + table_desc, + max_split_range_gap, + disjunctions, + )? + { + if maintaining_condition { + return Ok((scan_ranges, self)); + } else { + return Ok((scan_ranges, Condition::true_cond())); + } } else { return Ok((vec![], self)); }