Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into wrj/correlated-subquery
Browse files Browse the repository at this point in the history
Signed-off-by: Runji Wang <[email protected]>
  • Loading branch information
wangrunji0408 committed Apr 12, 2024
2 parents 98bba4c + 85f50ed commit 6e52760
Show file tree
Hide file tree
Showing 9 changed files with 313 additions and 101 deletions.
8 changes: 4 additions & 4 deletions src/binder/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl Binder {
/// Returns a `Scan` plan of table or a plan of subquery.
///
/// # Example
/// - `bind_table_factor(t)` => `(scan $1 (list $1.1 $1.2 $1.3) null)`
/// - `bind_table_factor(t)` => `(scan $1 (list $1.1 $1.2 $1.3) true)`
/// - `bind_table_factor(select 1)` => `(values (1))`
fn bind_table_factor(&mut self, table: TableFactor) -> Result {
match table {
Expand Down Expand Up @@ -142,7 +142,7 @@ impl Binder {
/// This function defines the table name so that it can be referred later.
///
/// # Example
/// - `bind_table_def(t)` => `(scan $1 (list $1.1 $1.2) null)`
/// - `bind_table_def(t)` => `(scan $1 (list $1.1 $1.2) true)`
pub(super) fn bind_table_def(
&mut self,
name: &ObjectName,
Expand Down Expand Up @@ -195,8 +195,8 @@ impl Binder {
// return a Scan node
let table = self.egraph.add(Node::Table(ref_id));
let cols = self.egraph.add(Node::List(ids.into()));
let null = self.egraph.add(Node::null());
let scan = self.egraph.add(Node::Scan([table, cols, null]));
let true_ = self.egraph.add(Node::true_());
let scan = self.egraph.add(Node::Scan([table, cols, true_]));
Ok(scan)
}

Expand Down
15 changes: 14 additions & 1 deletion src/executor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,22 @@ impl<S: Storage> Builder<S> {
.collect_vec();
// analyze range filter
let filter = {
use std::ops::Bound;
let mut egraph = egg::EGraph::new(ExprAnalysis::default());
let root = egraph.add_expr(&self.recexpr(filter));
egraph[root].data.range.clone().map(|(_, r)| r)
let expr: Option<crate::storage::KeyRange> =
egraph[root].data.range.clone().map(|(_, r)| r);
if matches!(
expr,
Some(crate::storage::KeyRange {
start: Bound::Unbounded,
end: Bound::Unbounded
})
) {
None
} else {
expr
}
};

if let Some(subscriber) = self.views.get(&table_id) {
Expand Down
101 changes: 101 additions & 0 deletions src/planner/rules/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ pub fn rules() -> Vec<Rewrite> { vec![
rw!("and-null"; "(and null ?a)" => "null"),
rw!("and-comm"; "(and ?a ?b)" => "(and ?b ?a)"),
rw!("and-assoc"; "(and ?a (and ?b ?c))" => "(and (and ?a ?b) ?c)"),

rw!("and-gt-gt-fold"; "(and (> ?x ?a) (> ?x ?b))" => "(> ?x ?a)" if is_greater_than_or_equal("?a", "?b")),
rw!("and-ge-ge-fold"; "(and (>= ?x ?a) (>= ?x ?b))" => "(>= ?x ?a)" if is_greater_than_or_equal("?a", "?b")),
rw!("and-gt-ge-fold"; "(and (> ?x ?a) (>= ?x ?b))" => "(> ?x ?a)" if is_greater_than_or_equal("?a", "?b")),
rw!("and-ge-gt-fold"; "(and (>= ?x ?a) (> ?x ?b))" => "(>= ?x ?a)" if is_greater_than("?a", "?b")),

rw!("and-lt-lt-fold"; "(and (< ?x ?a) (< ?x ?b))" => "(< ?x ?a)" if is_less_than_or_equal("?a", "?b")),
rw!("and-le-le-fold"; "(and (<= ?x ?a) (<= ?x ?b))" => "(<= ?x ?a)" if is_less_than_or_equal("?a", "?b")),
rw!("and-lt-le-fold"; "(and (< ?x ?a) (<= ?x ?b))" => "(< ?x ?a)" if is_less_than_or_equal("?a", "?b")),
rw!("and-le-lt-fold"; "(and (<= ?x ?a) (< ?x ?b))" => "(<= ?x ?a)" if is_less_than("?a", "?b")),

rw!("and-gt-lt-conflict"; "(and (> ?x ?a) (< ?x ?b))" => "false" if is_greater_than_or_equal("?a", "?b")),

rw!("or-false"; "(or false ?a)" => "?a"),
rw!("or-true"; "(or true ?a)" => "true"),
Expand Down Expand Up @@ -172,6 +184,41 @@ fn is_not_zero(var: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
value_is(var, |v| !v.is_zero())
}

fn is_greater_than_or_equal(var1: &str, var2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
value_cmp(var1, var2, |d1, d2| d1.ge(d2))
}

fn is_greater_than(var1: &str, var2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
value_cmp(var1, var2, |d1, d2| d1.gt(d2))
}

fn is_less_than_or_equal(var1: &str, var2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
value_cmp(var1, var2, |d1, d2| d1.le(d2))
}

fn is_less_than(var1: &str, var2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
value_cmp(var1, var2, |d1, d2| d1.lt(d2))
}

fn value_cmp(
v1: &str,
v2: &str,
f: impl Fn(&DataValue, &DataValue) -> bool,
) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
let v1 = var(v1);
let v2 = var(v2);

move |egraph, _, subst| match (
&egraph[subst[v1]].data.constant,
&egraph[subst[v2]].data.constant,
) {
(Some(d1), Some(d2)) => {
(std::mem::discriminant(d1) == std::mem::discriminant(d2)) && f(d1, d2)
}
_ => false,
}
}

fn value_is(v: &str, f: impl Fn(&DataValue) -> bool) -> impl Fn(&mut EGraph, Id, &Subst) -> bool {
let v = var(v);
move |egraph, _, subst| {
Expand Down Expand Up @@ -217,4 +264,58 @@ mod tests {
rules(),
"(isnull 1)" => "false",
}

egg::test_fn! {
constant_gt_gt_fold,
rules(),
"(and (> a 1) (> a 2))" => "(> a 2)",
}

egg::test_fn! {
constant_ge_ge_fold,
rules(),
"(and (>= a 1) (>= a 2))" => "(>= a 2)",
}

egg::test_fn! {
constant_gt_ge_fold,
rules(),
"(and (> a 2) (>= a 2))" => "(> a 2)",
}

egg::test_fn! {
constant_ge_gt_fold,
rules(),
"(and (> a 1) (>= a 2))" => "(>= a 2)",
}

egg::test_fn! {
constant_lt_lt_fold,
rules(),
"(and (< a 1) (< a 2))" => "(< a 1)",
}

egg::test_fn! {
constant_le_le_fold,
rules(),
"(and (<= a 1) (<= a 2))" => "(<= a 1)",
}

egg::test_fn! {
constant_lt_le_fold,
rules(),
"(and (< a 2) (<= a 2))" => "(< a 2)",
}

egg::test_fn! {
constant_le_lt_fold,
rules(),
"(and (< a 2) (<= a 1))" => "(<= a 1)",
}

egg::test_fn! {
constant_gt_lt_conflict,
rules(),
"(and (> a 2) (< a 2))" => "false",
}
}
12 changes: 6 additions & 6 deletions src/planner/rules/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,11 @@ fn is_primary_key_range(expr: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool
let Some((column, _)) = &egraph[subst[var]].data.range else {
return false;
};
egraph
.analysis
.catalog
.get_column(column)
.unwrap()
.is_primary()
if let Some(col) = egraph.analysis.catalog.get_column(column) {
col.is_primary()
} else {
// handle the case that catalog is not initialized, like in test cases
false
}
}
}
4 changes: 2 additions & 2 deletions tests/planner_test/count.planner.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Projection
├── cost: 1.13
├── rows: 1
└── Agg { aggs: [ rowcount ], cost: 1.11, rows: 1 }
└── Scan { table: t, list: [], filter: null, cost: 0, rows: 1 }
└── Scan { table: t, list: [], filter: true, cost: 0, rows: 1 }
*/

-- count(*) with projection
Expand All @@ -24,6 +24,6 @@ Projection
├── cost: 1.33
├── rows: 1
└── Agg { aggs: [ rowcount ], cost: 1.11, rows: 1 }
└── Scan { table: t, list: [], filter: null, cost: 0, rows: 1 }
└── Scan { table: t, list: [], filter: true, cost: 0, rows: 1 }
*/

2 changes: 1 addition & 1 deletion tests/planner_test/extract-common-predicate.planner.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ Filter
├── cond: and { lhs: = { lhs: a, rhs: 1 }, rhs: or { lhs: = { lhs: b, rhs: 2 }, rhs: = { lhs: c, rhs: 3 } } }
├── cost: 4.955
├── rows: 0.375
└── Scan { table: t, list: [ a, b, c ], filter: null, cost: 3, rows: 1 }
└── Scan { table: t, list: [ a, b, c ], filter: true, cost: 3, rows: 1 }
*/

51 changes: 51 additions & 0 deletions tests/planner_test/storage-pushdown.planner.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
-- use merge join for primary key joins
explain select * from t1 join t2 on a = c;

/*
Join { type: inner, on: = { lhs: c, rhs: a }, cost: 0, rows: 0 }
├── Scan { table: t1, list: [ a, b ], filter: true, cost: 0, rows: 0 }
└── Scan { table: t2, list: [ c, d ], filter: true, cost: 0, rows: 0 }
*/

-- use storage order by instead of sorting by primary key
explain select * from t1 order by a;

/*
Scan { table: t1, list: [ a, b ], filter: true, cost: 0, rows: 0 }
*/

-- use storage filter for primary key
explain select * from t1 where a = 1;

/*
Scan { table: t1, list: [ a, b ], filter: = { lhs: a, rhs: 1 }, cost: 10, rows: 5 }
*/

-- use storage filter for a combination of primary key and other keys
explain select * from t1 where a > 1 and a < 3 and b > 1;

/*
Filter { cond: > { lhs: b, rhs: 1 }, cost: 16.05, rows: 2.5 }
└── Scan
├── table: t1
├── list: [ a, b ]
├── filter: and { lhs: > { lhs: 3, rhs: a }, rhs: > { lhs: a, rhs: 1 } }
├── cost: 10
└── rows: 5
*/

-- use storage filter for a combination of primary key (always false) and other keys
explain select * from t1 where a > 1 and a < 0 and b > 1;

/*
Scan { table: t1, list: [ a, b ], filter: false, cost: 10, rows: 5 }
*/

-- use storage filter for a combination of primary key (could be eliminated) and other keys
explain select * from t1 where a > 1 and a > 3 and b > 1;

/*
Filter { cond: and { lhs: > { lhs: a, rhs: 3 }, rhs: > { lhs: b, rhs: 1 } }, cost: 15.1, rows: 1.25 }
└── Scan { table: t1, list: [ a, b ], filter: > { lhs: a, rhs: 1 }, cost: 10, rows: 5 }
*/

47 changes: 47 additions & 0 deletions tests/planner_test/storage-pushdown.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
- sql: |
explain select * from t1 join t2 on a = c;
desc: use merge join for primary key joins
before:
- create table t1(a int primary key, b int);
create table t2(c int primary key, d int);
tasks:
- print
- sql: |
explain select * from t1 order by a;
desc: use storage order by instead of sorting by primary key
before:
- create table t1(a int primary key, b int);
tasks:
- print
- sql: |
explain select * from t1 where a = 1;
desc: use storage filter for primary key
before:
- create table t1(a int primary key, b int);
insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
tasks:
- print
- sql: |
explain select * from t1 where a > 1 and a < 3 and b > 1;
desc: use storage filter for a combination of primary key and other keys
before:
- create table t1(a int primary key, b int);
insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
tasks:
- print
- sql: |
explain select * from t1 where a > 1 and a < 0 and b > 1;
desc: use storage filter for a combination of primary key (always false) and other keys
before:
- create table t1(a int primary key, b int);
insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
tasks:
- print
- sql: |
explain select * from t1 where a > 1 and a > 3 and b > 1;
desc: use storage filter for a combination of primary key (could be eliminated) and other keys
before:
- create table t1(a int primary key, b int);
insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
tasks:
- print
Loading

0 comments on commit 6e52760

Please sign in to comment.