Merge remote-tracking branch 'origin/main' into wrj/correlated-subquery

Signed-off-by: Runji Wang <[email protected]>
risinglightdb · Apr 12, 2024 · 6e52760 · 6e52760
2 parents 98bba4c + 85f50ed
commit 6e52760
Show file tree

Hide file tree

Showing 9 changed files with 313 additions and 101 deletions.
diff --git a/src/binder/table.rs b/src/binder/table.rs
@@ -55,7 +55,7 @@ impl Binder {
     /// Returns a `Scan` plan of table or a plan of subquery.
     ///
     /// # Example
-    /// - `bind_table_factor(t)` => `(scan $1 (list $1.1 $1.2 $1.3) null)`
+    /// - `bind_table_factor(t)` => `(scan $1 (list $1.1 $1.2 $1.3) true)`
     /// - `bind_table_factor(select 1)` => `(values (1))`
     fn bind_table_factor(&mut self, table: TableFactor) -> Result {
         match table {
@@ -142,7 +142,7 @@ impl Binder {
     /// This function defines the table name so that it can be referred later.
     ///
     /// # Example
-    /// - `bind_table_def(t)` => `(scan $1 (list $1.1 $1.2) null)`
+    /// - `bind_table_def(t)` => `(scan $1 (list $1.1 $1.2) true)`
     pub(super) fn bind_table_def(
         &mut self,
         name: &ObjectName,
@@ -195,8 +195,8 @@ impl Binder {
         // return a Scan node
         let table = self.egraph.add(Node::Table(ref_id));
         let cols = self.egraph.add(Node::List(ids.into()));
-        let null = self.egraph.add(Node::null());
-        let scan = self.egraph.add(Node::Scan([table, cols, null]));
+        let true_ = self.egraph.add(Node::true_());
+        let scan = self.egraph.add(Node::Scan([table, cols, true_]));
         Ok(scan)
     }
 

diff --git a/src/executor/mod.rs b/src/executor/mod.rs
@@ -214,9 +214,22 @@ impl<S: Storage> Builder<S> {
                     .collect_vec();
                 // analyze range filter
                 let filter = {
+                    use std::ops::Bound;
                     let mut egraph = egg::EGraph::new(ExprAnalysis::default());
                     let root = egraph.add_expr(&self.recexpr(filter));
-                    egraph[root].data.range.clone().map(|(_, r)| r)
+                    let expr: Option<crate::storage::KeyRange> =
+                        egraph[root].data.range.clone().map(|(_, r)| r);
+                    if matches!(
+                        expr,
+                        Some(crate::storage::KeyRange {
+                            start: Bound::Unbounded,
+                            end: Bound::Unbounded
+                        })
+                    ) {
+                        None
+                    } else {
+                        expr
+                    }
                 };
 
                 if let Some(subscriber) = self.views.get(&table_id) {

diff --git a/src/planner/rules/expr.rs b/src/planner/rules/expr.rs
@@ -73,6 +73,18 @@ pub fn rules() -> Vec<Rewrite> { vec![
     rw!("and-null";  "(and null ?a)"    => "null"),
     rw!("and-comm";  "(and ?a ?b)"      => "(and ?b ?a)"),
     rw!("and-assoc"; "(and ?a (and ?b ?c))" => "(and (and ?a ?b) ?c)"),
+
+    rw!("and-gt-gt-fold"; "(and (>  ?x ?a) (>  ?x ?b))" => "(>  ?x ?a)" if is_greater_than_or_equal("?a", "?b")),
+    rw!("and-ge-ge-fold"; "(and (>= ?x ?a) (>= ?x ?b))" => "(>= ?x ?a)" if is_greater_than_or_equal("?a", "?b")),
+    rw!("and-gt-ge-fold"; "(and (>  ?x ?a) (>= ?x ?b))" => "(>  ?x ?a)" if is_greater_than_or_equal("?a", "?b")),
+    rw!("and-ge-gt-fold"; "(and (>= ?x ?a) (>  ?x ?b))" => "(>= ?x ?a)" if is_greater_than("?a", "?b")),
+
+    rw!("and-lt-lt-fold"; "(and (<  ?x ?a) (<  ?x ?b))" => "(<  ?x ?a)" if is_less_than_or_equal("?a", "?b")),
+    rw!("and-le-le-fold"; "(and (<= ?x ?a) (<= ?x ?b))" => "(<= ?x ?a)" if is_less_than_or_equal("?a", "?b")),
+    rw!("and-lt-le-fold"; "(and (<  ?x ?a) (<= ?x ?b))" => "(<  ?x ?a)" if is_less_than_or_equal("?a", "?b")),
+    rw!("and-le-lt-fold"; "(and (<= ?x ?a) (<  ?x ?b))" => "(<= ?x ?a)" if is_less_than("?a", "?b")),
+
+    rw!("and-gt-lt-conflict"; "(and (> ?x ?a) (< ?x ?b))" => "false" if is_greater_than_or_equal("?a", "?b")),
 
     rw!("or-false";  "(or false ?a)" => "?a"),
     rw!("or-true";   "(or true ?a)"  => "true"),
@@ -172,6 +184,41 @@ fn is_not_zero(var: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
     value_is(var, |v| !v.is_zero())
 }
 
+fn is_greater_than_or_equal(var1: &str, var2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
+    value_cmp(var1, var2, |d1, d2| d1.ge(d2))
+}
+
+fn is_greater_than(var1: &str, var2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
+    value_cmp(var1, var2, |d1, d2| d1.gt(d2))
+}
+
+fn is_less_than_or_equal(var1: &str, var2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
+    value_cmp(var1, var2, |d1, d2| d1.le(d2))
+}
+
+fn is_less_than(var1: &str, var2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
+    value_cmp(var1, var2, |d1, d2| d1.lt(d2))
+}
+
+fn value_cmp(
+    v1: &str,
+    v2: &str,
+    f: impl Fn(&DataValue, &DataValue) -> bool,
+) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
+    let v1 = var(v1);
+    let v2 = var(v2);
+
+    move |egraph, _, subst| match (
+        &egraph[subst[v1]].data.constant,
+        &egraph[subst[v2]].data.constant,
+    ) {
+        (Some(d1), Some(d2)) => {
+            (std::mem::discriminant(d1) == std::mem::discriminant(d2)) && f(d1, d2)
+        }
+        _ => false,
+    }
+}
+
 fn value_is(v: &str, f: impl Fn(&DataValue) -> bool) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
     let v = var(v);
     move |egraph, _, subst| {
@@ -217,4 +264,58 @@ mod tests {
         rules(),
         "(isnull 1)" => "false",
     }
+
+    egg::test_fn! {
+        constant_gt_gt_fold,
+        rules(),
+        "(and (> a 1) (> a 2))" => "(> a 2)",
+    }
+
+    egg::test_fn! {
+        constant_ge_ge_fold,
+        rules(),
+        "(and (>= a 1) (>= a 2))" => "(>= a 2)",
+    }
+
+    egg::test_fn! {
+        constant_gt_ge_fold,
+        rules(),
+        "(and (> a 2) (>= a 2))" => "(> a 2)",
+    }
+
+    egg::test_fn! {
+        constant_ge_gt_fold,
+        rules(),
+        "(and (> a 1) (>= a 2))" => "(>= a 2)",
+    }
+
+    egg::test_fn! {
+        constant_lt_lt_fold,
+        rules(),
+        "(and (< a 1) (< a 2))" => "(< a 1)",
+    }
+
+    egg::test_fn! {
+        constant_le_le_fold,
+        rules(),
+        "(and (<= a 1) (<= a 2))" => "(<= a 1)",
+    }
+
+    egg::test_fn! {
+        constant_lt_le_fold,
+        rules(),
+        "(and (< a 2) (<= a 2))" => "(< a 2)",
+    }
+
+    egg::test_fn! {
+        constant_le_lt_fold,
+        rules(),
+        "(and (< a 2) (<= a 1))" => "(<= a 1)",
+    }
+
+    egg::test_fn! {
+        constant_gt_lt_conflict,
+        rules(),
+        "(and (> a 2) (< a 2))" => "false",
+    }
 }
diff --git a/src/planner/rules/range.rs b/src/planner/rules/range.rs
@@ -106,11 +106,11 @@ fn is_primary_key_range(expr: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool
         let Some((column, _)) = &egraph[subst[var]].data.range else {
             return false;
         };
-        egraph
-            .analysis
-            .catalog
-            .get_column(column)
-            .unwrap()
-            .is_primary()
+        if let Some(col) = egraph.analysis.catalog.get_column(column) {
+            col.is_primary()
+        } else {
+            // handle the case that catalog is not initialized, like in test cases
+            false
+        }
     }
 }
diff --git a/tests/planner_test/count.planner.sql b/tests/planner_test/count.planner.sql
@@ -8,7 +8,7 @@ Projection
 ├── cost: 1.13
 ├── rows: 1
 └── Agg { aggs: [ rowcount ], cost: 1.11, rows: 1 }
-    └── Scan { table: t, list: [], filter: null, cost: 0, rows: 1 }
+    └── Scan { table: t, list: [], filter: true, cost: 0, rows: 1 }
 */
 
 -- count(*) with projection
@@ -24,6 +24,6 @@ Projection
 ├── cost: 1.33
 ├── rows: 1
 └── Agg { aggs: [ rowcount ], cost: 1.11, rows: 1 }
-    └── Scan { table: t, list: [], filter: null, cost: 0, rows: 1 }
+    └── Scan { table: t, list: [], filter: true, cost: 0, rows: 1 }
 */
 
diff --git a/tests/planner_test/extract-common-predicate.planner.sql b/tests/planner_test/extract-common-predicate.planner.sql
@@ -6,6 +6,6 @@ Filter
 ├── cond: and { lhs: = { lhs: a, rhs: 1 }, rhs: or { lhs: = { lhs: b, rhs: 2 }, rhs: = { lhs: c, rhs: 3 } } }
 ├── cost: 4.955
 ├── rows: 0.375
-└── Scan { table: t, list: [ a, b, c ], filter: null, cost: 3, rows: 1 }
+└── Scan { table: t, list: [ a, b, c ], filter: true, cost: 3, rows: 1 }
 */
 
diff --git a/tests/planner_test/storage-pushdown.planner.sql b/tests/planner_test/storage-pushdown.planner.sql
@@ -0,0 +1,51 @@
+-- use merge join for primary key joins
+explain select * from t1 join t2 on a = c;
+
+/*
+Join { type: inner, on: = { lhs: c, rhs: a }, cost: 0, rows: 0 }
+├── Scan { table: t1, list: [ a, b ], filter: true, cost: 0, rows: 0 }
+└── Scan { table: t2, list: [ c, d ], filter: true, cost: 0, rows: 0 }
+*/
+
+-- use storage order by instead of sorting by primary key
+explain select * from t1 order by a;
+
+/*
+Scan { table: t1, list: [ a, b ], filter: true, cost: 0, rows: 0 }
+*/
+
+-- use storage filter for primary key
+explain select * from t1 where a = 1;
+
+/*
+Scan { table: t1, list: [ a, b ], filter: = { lhs: a, rhs: 1 }, cost: 10, rows: 5 }
+*/
+
+-- use storage filter for a combination of primary key and other keys
+explain select * from t1 where a > 1 and a < 3 and b > 1;
+
+/*
+Filter { cond: > { lhs: b, rhs: 1 }, cost: 16.05, rows: 2.5 }
+└── Scan
+    ├── table: t1
+    ├── list: [ a, b ]
+    ├── filter: and { lhs: > { lhs: 3, rhs: a }, rhs: > { lhs: a, rhs: 1 } }
+    ├── cost: 10
+    └── rows: 5
+*/
+
+-- use storage filter for a combination of primary key (always false) and other keys
+explain select * from t1 where a > 1 and a < 0 and b > 1;
+
+/*
+Scan { table: t1, list: [ a, b ], filter: false, cost: 10, rows: 5 }
+*/
+
+-- use storage filter for a combination of primary key (could be eliminated) and other keys
+explain select * from t1 where a > 1 and a > 3 and b > 1;
+
+/*
+Filter { cond: and { lhs: > { lhs: a, rhs: 3 }, rhs: > { lhs: b, rhs: 1 } }, cost: 15.1, rows: 1.25 }
+└── Scan { table: t1, list: [ a, b ], filter: > { lhs: a, rhs: 1 }, cost: 10, rows: 5 }
+*/
+
diff --git a/tests/planner_test/storage-pushdown.yml b/tests/planner_test/storage-pushdown.yml
@@ -0,0 +1,47 @@
+- sql: |
+    explain select * from t1 join t2 on a = c;
+  desc: use merge join for primary key joins
+  before:
+    - create table t1(a int primary key, b int);
+      create table t2(c int primary key, d int);
+  tasks:
+    - print
+- sql: |
+    explain select * from t1 order by a;
+  desc: use storage order by instead of sorting by primary key
+  before:
+    - create table t1(a int primary key, b int);
+  tasks:
+    - print
+- sql: |
+    explain select * from t1 where a = 1;
+  desc: use storage filter for primary key
+  before:
+    - create table t1(a int primary key, b int);
+      insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+  tasks:
+    - print
+- sql: |
+    explain select * from t1 where a > 1 and a < 3 and b > 1;
+  desc: use storage filter for a combination of primary key and other keys
+  before:
+    - create table t1(a int primary key, b int);
+      insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+  tasks:
+    - print
+- sql: |
+    explain select * from t1 where a > 1 and a < 0 and b > 1;
+  desc: use storage filter for a combination of primary key (always false) and other keys
+  before:
+    - create table t1(a int primary key, b int);
+      insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+  tasks:
+    - print
+- sql: |
+    explain select * from t1 where a > 1 and a > 3 and b > 1;
+  desc: use storage filter for a combination of primary key (could be eliminated) and other keys
+  before:
+    - create table t1(a int primary key, b int);
+      insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+  tasks:
+    - print