Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: remove Internal node and add more system catalogs #832

Merged
merged 10 commits into from
Mar 11, 2024
13 changes: 3 additions & 10 deletions src/binder/copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ impl Binder {
} => (table_name, columns),
CopySource::Query(_) => return Err(BindError::Todo("copy from query".into())),
};
let (table, is_internal) = self.bind_table_id(&table_name)?;
let (table, _) = self.bind_table_id(&table_name)?;

let cols = self.bind_table_columns(&table_name, &columns)?;

Expand All @@ -76,18 +76,11 @@ impl Binder {

let copy = if to {
// COPY <source_table> TO <dest_file>
let scan = if is_internal {
self.egraph.add(Node::Internal([table, cols]))
} else {
let true_ = self.egraph.add(Node::true_());
self.egraph.add(Node::Scan([table, cols, true_]))
};
let true_ = self.egraph.add(Node::true_());
let scan = self.egraph.add(Node::Scan([table, cols, true_]));
self.egraph.add(Node::CopyTo([ext_source, scan]))
} else {
// COPY <dest_table> FROM <source_file>
if is_internal {
return Err(BindError::NotSupportedOnInternalTable);
}
let types = self.type_(cols)?;
let types = self.egraph.add(Node::Type(types));
let copy = self.egraph.add(Node::CopyFrom([ext_source, types]));
Expand Down
4 changes: 2 additions & 2 deletions src/binder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use itertools::Itertools;

use crate::array;
use crate::catalog::function::FunctionCatalog;
use crate::catalog::{RootCatalog, RootCatalogRef, TableRefId, DEFAULT_SCHEMA_NAME};
use crate::catalog::{RootCatalog, RootCatalogRef, TableRefId};
use crate::parser::*;
use crate::planner::{Expr as Node, RecExpr, TypeError, TypeSchemaAnalysis};

Expand Down Expand Up @@ -370,7 +370,7 @@ impl Binder {
/// Split an object name into `(schema name, table name)`.
fn split_name(name: &ObjectName) -> Result<(&str, &str)> {
Ok(match name.0.as_slice() {
[table] => (DEFAULT_SCHEMA_NAME, &table.value),
[table] => (RootCatalog::DEFAULT_SCHEMA_NAME, &table.value),
[schema, table] => (&schema.value, &table.value),
_ => return Err(BindError::InvalidTableName(name.0.clone())),
})
Expand Down
16 changes: 6 additions & 10 deletions src/binder/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use std::vec::Vec;

use super::*;
use crate::catalog::{ColumnRefId, INTERNAL_SCHEMA_NAME};
use crate::catalog::{ColumnRefId, RootCatalog};

impl Binder {
/// Binds the FROM clause. Returns a nested [`Join`](Node::Join) plan of tables.
Expand Down Expand Up @@ -60,14 +60,10 @@ impl Binder {
fn bind_table_factor(&mut self, table: TableFactor) -> Result {
match table {
TableFactor::Table { name, alias, .. } => {
let (table_id, is_internal) = self.bind_table_id(&name)?;
let (table_id, _) = self.bind_table_id(&name)?;
let cols = self.bind_table_def(&name, alias, false)?;
let id = if is_internal {
self.egraph.add(Node::Internal([table_id, cols]))
} else {
let null = self.egraph.add(Node::null());
self.egraph.add(Node::Scan([table_id, cols, null]))
};
let null = self.egraph.add(Node::null());
let id = self.egraph.add(Node::Scan([table_id, cols, null]));
Ok(id)
}
TableFactor::Derived {
Expand Down Expand Up @@ -252,7 +248,7 @@ impl Binder {
.get_table_id_by_name(schema_name, table_name)
.ok_or_else(|| BindError::InvalidTable(table_name.into()))?;
let id = self.egraph.add(Node::Table(table_ref_id));
Ok((id, schema_name == INTERNAL_SCHEMA_NAME))
Ok((id, table_ref_id.schema_id == RootCatalog::SYSTEM_SCHEMA_ID))
}
}

Expand All @@ -270,7 +266,7 @@ mod tests {
let catalog = Arc::new(RootCatalog::new());
let col_catalog = ColumnCatalog::new(0, ColumnDesc::new("a", DataType::Int32, false));
catalog
.add_table(0, "t".into(), vec![col_catalog], false, vec![])
.add_table(1, "t".into(), vec![col_catalog], false, vec![])
.unwrap();

let stmts = parse("select x.b from (select a as b from t) as x").unwrap();
Expand Down
6 changes: 0 additions & 6 deletions src/catalog/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,6 @@ pub use self::schema::*;
pub use self::table::*;
use crate::types::*;

pub static DEFAULT_SCHEMA_NAME: &str = "postgres";
pub static INTERNAL_SCHEMA_NAME: &str = "pg_catalog";

static CONTRIBUTORS_TABLE_NAME: &str = "contributors";
pub const CONTRIBUTORS_TABLE_ID: TableId = 0;

mod column;
pub mod function;
mod root;
Expand Down
98 changes: 73 additions & 25 deletions src/catalog/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::sync::{Arc, Mutex};

use super::function::FunctionCatalog;
use super::*;
use crate::parser;

/// The root of all catalogs.
pub struct RootCatalog {
Expand All @@ -27,8 +28,8 @@ impl Default for RootCatalog {
impl RootCatalog {
pub fn new() -> RootCatalog {
let mut inner = Inner::default();
inner.add_schema(DEFAULT_SCHEMA_NAME.into()).unwrap();
inner.add_internals();
inner.add_system_schema();
inner.add_schema(Self::DEFAULT_SCHEMA_NAME.into()).unwrap();
RootCatalog {
inner: Mutex::new(inner),
}
Expand Down Expand Up @@ -126,6 +127,10 @@ impl RootCatalog {
let schema = inner.schemas.get_mut(&schema_idx).unwrap();
schema.create_function(name, arg_types, arg_names, return_type, language, body);
}

pub const DEFAULT_SCHEMA_NAME: &'static str = "postgres";
pub const SYSTEM_SCHEMA_NAME: &'static str = "pg_catalog";
pub const SYSTEM_SCHEMA_ID: TableId = 0;
}

impl Inner {
Expand All @@ -141,34 +146,73 @@ impl Inner {
Ok(schema_id)
}

fn add_internals(&mut self) {
let schema_id = self.add_schema(INTERNAL_SCHEMA_NAME.into()).unwrap();
let table_id = self
.schemas
.get_mut(&schema_id)
.unwrap()
.add_table(
CONTRIBUTORS_TABLE_NAME.to_string(),
vec![ColumnCatalog::new(
0,
ColumnDesc::new("github_id", DataType::String, false),
)],
false,
vec![],
)
fn add_system_schema(&mut self) {
let schema_id = self
.add_schema(RootCatalog::SYSTEM_SCHEMA_NAME.into())
.unwrap();
assert_eq!(table_id, CONTRIBUTORS_TABLE_ID);
let system_schema = self.schemas.get_mut(&schema_id).unwrap();
assert_eq!(schema_id, RootCatalog::SYSTEM_SCHEMA_ID);

let stmts = parser::parse(CREATE_SYSTEM_TABLE_SQL).unwrap();
for stmt in stmts {
let parser::Statement::CreateTable { name, columns, .. } = stmt else {
panic!("invalid system table sql: {stmt}");
};
system_schema
.add_table(
name.to_string(),
columns
.into_iter()
.enumerate()
.map(|(cid, col)| {
let mut column = ColumnCatalog::from(&col);
column.set_id(cid as u32);
column
})
.collect(),
false,
vec![],
)
.expect("failed to add system table");
}
}
}

fn split_name(name: &str) -> Option<(&str, &str)> {
match name.split('.').collect::<Vec<&str>>()[..] {
[table] => Some((DEFAULT_SCHEMA_NAME, table)),
[table] => Some((RootCatalog::DEFAULT_SCHEMA_NAME, table)),
[schema, table] => Some((schema, table)),
_ => None,
}
}

const CREATE_SYSTEM_TABLE_SQL: &str = "
create table contributors (
github_id string not null
);
create table pg_tables (
schema_id int not null,
schema_name string not null,
table_id int not null,
table_name string not null
);
create table pg_attribute (
schema_name string not null,
table_name string not null,
column_id int not null,
column_name string not null,
column_type string not null,
column_not_null boolean not null
);
create table pg_stat (
schema_name string not null,
table_name string not null,
column_name string not null,
n_row int,
n_distinct int
);
";

#[cfg(test)]
mod tests {
use std::sync::Arc;
Expand All @@ -178,17 +222,21 @@ mod tests {
#[test]
fn test_root_catalog() {
let catalog = Arc::new(RootCatalog::new());
let schema_catalog1 = catalog.get_schema_by_id(0).unwrap();
let schema_catalog1 = catalog
.get_schema_by_id(RootCatalog::SYSTEM_SCHEMA_ID)
.unwrap();
assert_eq!(schema_catalog1.id(), 0);
assert_eq!(schema_catalog1.name(), DEFAULT_SCHEMA_NAME);
assert_eq!(schema_catalog1.name(), RootCatalog::SYSTEM_SCHEMA_NAME);

let schema_catalog2 = catalog.get_schema_by_name(DEFAULT_SCHEMA_NAME).unwrap();
assert_eq!(schema_catalog1.id(), schema_catalog2.id());
assert_eq!(schema_catalog1.name(), schema_catalog2.name());
let schema_catalog2 = catalog
.get_schema_by_name(RootCatalog::DEFAULT_SCHEMA_NAME)
.unwrap();
assert_eq!(schema_catalog2.id(), 1);
assert_eq!(schema_catalog2.name(), RootCatalog::DEFAULT_SCHEMA_NAME);

let col = ColumnCatalog::new(0, ColumnDesc::new("a", DataType::Int32, false));
let table_id = catalog
.add_table(0, "t".into(), vec![col], false, vec![])
.add_table(1, "t".into(), vec![col], false, vec![])
.unwrap();
assert_eq!(table_id, 0);
}
Expand Down
2 changes: 1 addition & 1 deletion src/catalog/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl SchemaCatalog {

pub(super) fn delete_table(&mut self, id: TableId) {
let catalog = self.tables.remove(&id).unwrap();
self.table_idxs.remove(&catalog.name()).unwrap();
self.table_idxs.remove(catalog.name()).unwrap();
}

pub fn all_tables(&self) -> HashMap<TableId, Arc<TableCatalog>> {
Expand Down
4 changes: 2 additions & 2 deletions src/catalog/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ impl TableCatalog {
.cloned()
}

pub fn name(&self) -> String {
self.name.clone()
pub fn name(&self) -> &str {
&self.name
}

pub fn id(&self) -> TableId {
Expand Down
Loading
Loading