From d5d70a2cafba59dc695d8b6e3262abdb797f23c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= Date: Sun, 4 Feb 2024 12:58:44 +0800 Subject: [PATCH] Implement eliminate limit rule --- README.md | 4 +- bustubx/src/database.rs | 11 +-- bustubx/src/optimizer/logical_optimizer.rs | 11 ++- bustubx/src/optimizer/mod.rs | 2 +- bustubx/src/optimizer/rule/eliminate_limit.rs | 70 +++++++++++++++++++ bustubx/src/planner/logical_plan/mod.rs | 7 +- 6 files changed, 93 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 2712a46..822058a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # BustubX - a relational database for educational purpose (CMU 15-445) - [ ] DDL - [ ] DML -- [ ] Rule-based Optimizer -- [ ] Volcano Executor +- [x] Rule-based Optimizer +- [x] Volcano Executor - [ ] Parallel Execution - [ ] B+ Tree Index - [ ] Multi-Version Concurrency Control diff --git a/bustubx/src/database.rs b/bustubx/src/database.rs index 4313ae6..67a5d5d 100644 --- a/bustubx/src/database.rs +++ b/bustubx/src/database.rs @@ -12,6 +12,7 @@ use crate::{ planner::{LogicalPlanner, PlannerContext}, storage::{DiskManager, Tuple}, }; +use crate::optimizer::LogicalOptimizer; pub struct Database { disk_manager: Arc, @@ -50,11 +51,13 @@ impl Database { } pub fn run(&mut self, sql: &str) -> BustubxResult> { - let logical_plan = self.build_logical_plan(sql)?; - println!("logical plan: \n{}", logical_plan); + let logical_plan = self.create_logical_plan(sql)?; + // println!("logical plan: \n{}", logical_plan); + + let optimized_logical_plan = LogicalOptimizer::new().optimize(&logical_plan)?; // logical plan -> physical plan - let physical_plan = PhysicalPlanner::new().create_physical_plan(logical_plan); + let physical_plan = PhysicalPlanner::new().create_physical_plan(optimized_logical_plan); // println!("{:?}", physical_plan); let execution_ctx = ExecutionContext::new(&mut self.catalog); @@ -66,7 +69,7 @@ impl Database { Ok(tuples) } - pub fn build_logical_plan(&mut self, sql: &str) -> BustubxResult { + pub fn create_logical_plan(&mut self, sql: &str) -> BustubxResult { // sql -> ast let stmts = crate::parser::parse_sql(sql)?; if stmts.len() != 1 { diff --git a/bustubx/src/optimizer/logical_optimizer.rs b/bustubx/src/optimizer/logical_optimizer.rs index 3256d1f..87e1a55 100644 --- a/bustubx/src/optimizer/logical_optimizer.rs +++ b/bustubx/src/optimizer/logical_optimizer.rs @@ -1,5 +1,5 @@ use crate::error::BustubxResult; -use crate::optimizer::rule::PushDownLimit; +use crate::optimizer::rule::{EliminateLimit, PushDownLimit}; use crate::planner::logical_plan::LogicalPlan; use std::sync::Arc; @@ -38,7 +38,7 @@ pub struct LogicalOptimizer { impl LogicalOptimizer { pub fn new() -> Self { let rules: Vec> = - vec![Arc::new(PushDownLimit {})]; + vec![Arc::new(EliminateLimit {})]; Self { rules, @@ -46,6 +46,13 @@ impl LogicalOptimizer { } } + pub fn with_rules(rules: Vec>) -> Self { + Self { + rules, + max_passes: 3, + } + } + pub fn optimize(&self, plan: &LogicalPlan) -> BustubxResult { let mut new_plan = plan.clone(); let mut i = 0; diff --git a/bustubx/src/optimizer/mod.rs b/bustubx/src/optimizer/mod.rs index 036846c..37f7178 100644 --- a/bustubx/src/optimizer/mod.rs +++ b/bustubx/src/optimizer/mod.rs @@ -1,4 +1,4 @@ mod logical_optimizer; -mod rule; +pub mod rule; pub use logical_optimizer::{LogicalOptimizer, LogicalOptimizerRule}; diff --git a/bustubx/src/optimizer/rule/eliminate_limit.rs b/bustubx/src/optimizer/rule/eliminate_limit.rs index 880f660..57e4a65 100644 --- a/bustubx/src/optimizer/rule/eliminate_limit.rs +++ b/bustubx/src/optimizer/rule/eliminate_limit.rs @@ -1 +1,71 @@ +use crate::optimizer::logical_optimizer::ApplyOrder; +use crate::optimizer::LogicalOptimizerRule; +use crate::planner::logical_plan::{EmptyRelation, LogicalPlan}; +use crate::BustubxResult; + pub struct EliminateLimit; + +impl LogicalOptimizerRule for EliminateLimit { + fn try_optimize(&self, plan: &LogicalPlan) -> BustubxResult> { + if let LogicalPlan::Limit(limit) = plan { + match limit.limit { + Some(fetch) => { + if fetch == 0 { + return Ok(Some(LogicalPlan::EmptyRelation(EmptyRelation { + produce_one_row: false, + schema: limit.input.schema().clone(), + }))); + } + } + None => { + if limit.offset == 0 { + let input = limit.input.as_ref(); + // input also can be Limit, so we should apply again. + return Ok(Some( + self.try_optimize(input)?.unwrap_or_else(|| input.clone()), + )); + } + } + } + } + Ok(None) + } + + fn name(&self) -> &str { + "EliminateLimit" + } + + fn apply_order(&self) -> Option { + Some(ApplyOrder::BottomUp) + } +} + +#[cfg(test)] +mod tests { + use crate::optimizer::rule::EliminateLimit; + use crate::optimizer::LogicalOptimizer; + use crate::planner::logical_plan::LogicalPlan; + use crate::Database; + use std::sync::Arc; + + fn build_optimizer() -> LogicalOptimizer { + LogicalOptimizer::with_rules(vec![Arc::new(EliminateLimit)]) + } + #[test] + fn eliminate_limit() { + let mut db = Database::new_temp().unwrap(); + db.run("create table t1 (a int)").unwrap(); + + let plan = db.create_logical_plan("select a from t1 limit 0").unwrap(); + let optimized_plan = build_optimizer().optimize(&plan).unwrap(); + assert!(matches!(optimized_plan, LogicalPlan::EmptyRelation(_))); + + let plan = db.create_logical_plan("select a from t1 offset 0").unwrap(); + let optimized_plan = build_optimizer().optimize(&plan).unwrap(); + if let LogicalPlan::Project(p) = optimized_plan { + assert!(matches!(p.input.as_ref(), LogicalPlan::TableScan(_))); + } else { + panic!("the first node should be project"); + } + } +} diff --git a/bustubx/src/planner/logical_plan/mod.rs b/bustubx/src/planner/logical_plan/mod.rs index d000448..c2cb0f3 100644 --- a/bustubx/src/planner/logical_plan/mod.rs +++ b/bustubx/src/planner/logical_plan/mod.rs @@ -11,8 +11,6 @@ mod table_scan; mod util; mod values; -use crate::catalog::{SchemaRef, EMPTY_SCHEMA_REF, INSERT_OUTPUT_SCHEMA_REF}; -use crate::{BustubxError, BustubxResult}; pub use create_index::CreateIndex; pub use create_table::CreateTable; pub use empty_relation::EmptyRelation; @@ -22,11 +20,14 @@ pub use join::{Join, JoinType}; pub use limit::Limit; pub use project::Project; pub use sort::{OrderByExpr, Sort}; -use std::sync::Arc; pub use table_scan::TableScan; pub use util::*; pub use values::Values; +use crate::catalog::{SchemaRef, EMPTY_SCHEMA_REF, INSERT_OUTPUT_SCHEMA_REF}; +use crate::{BustubxError, BustubxResult}; +use std::sync::Arc; + #[derive(Debug, Clone)] pub enum LogicalPlan { CreateTable(CreateTable),