Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(query): support javascript/python script User Defined Aggregate Function #17108

Merged
merged 25 commits into from
Dec 25, 2024
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions src/common/exception/src/exception_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ build_exceptions! {
// Geometry errors.
GeometryError(1801),
InvalidGeometryFormat(1802),

// UDF errors.
UDFRuntimeError(1810),

// Tantivy errors.
TantivyError(1901),
TantivyOpenReadError(1902),
Expand Down
1 change: 1 addition & 0 deletions src/meta/app/src/principal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ pub use user_auth::AuthType;
pub use user_auth::PasswordHashMethod;
pub use user_defined_file_format::UserDefinedFileFormat;
pub use user_defined_function::LambdaUDF;
pub use user_defined_function::UDAFScript;
pub use user_defined_function::UDFDefinition;
pub use user_defined_function::UDFScript;
pub use user_defined_function::UDFServer;
Expand Down
38 changes: 37 additions & 1 deletion src/meta/app/src/principal/user_defined_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use std::fmt::Formatter;
use chrono::DateTime;
use chrono::Utc;
use databend_common_expression::types::DataType;
use databend_common_expression::DataField;

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct LambdaUDF {
Expand All @@ -44,11 +45,24 @@ pub struct UDFScript {
pub runtime_version: String,
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct UDAFScript {
pub code: String,
pub language: String,
// aggregate function input types
pub arg_types: Vec<DataType>,
// aggregate function state fields
pub state_fields: Vec<DataField>,
pub return_type: DataType,
pub runtime_version: String,
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub enum UDFDefinition {
LambdaUDF(LambdaUDF),
UDFServer(UDFServer),
UDFScript(UDFScript),
UDAFScript(UDAFScript),
}

#[derive(Clone, Debug, Eq, PartialEq)]
Expand Down Expand Up @@ -160,7 +174,6 @@ impl Display for UDFDefinition {
") RETURNS {return_type} LANGUAGE {language} HANDLER = {handler} ADDRESS = {address}"
)?;
}

UDFDefinition::UDFScript(UDFScript {
code,
arg_types,
Expand All @@ -180,6 +193,29 @@ impl Display for UDFDefinition {
") RETURNS {return_type} LANGUAGE {language} RUNTIME_VERSION = {runtime_version} HANDLER = {handler} AS $${code}$$"
)?;
}
UDFDefinition::UDAFScript(UDAFScript {
code,
arg_types,
state_fields,
return_type,
language,
runtime_version,
}) => {
for (i, item) in arg_types.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{item}")?;
}
write!(f, " STATE {{ ")?;
for (i, item) in state_fields.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{} {}", item.name(), item.data_type())?;
}
write!(f, " }} RETURNS {return_type} LANGUAGE {language} RUNTIME_VERSION = {runtime_version} AS $${code}$$")?;
}
}
Ok(())
}
Expand Down
96 changes: 96 additions & 0 deletions src/meta/proto-conv/src/udf_from_to_protobuf_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use chrono::DateTime;
use chrono::Utc;
use databend_common_expression::infer_schema_type;
use databend_common_expression::types::DataType;
use databend_common_expression::DataField;
use databend_common_expression::TableDataType;
use databend_common_meta_app::principal as mt;
use databend_common_protos::pb;
Expand Down Expand Up @@ -164,6 +165,95 @@ impl FromToProto for mt::UDFScript {
}
}

impl FromToProto for mt::UDAFScript {
type PB = pb::UdafScript;
fn get_pb_ver(p: &Self::PB) -> u64 {
p.ver
}
fn from_pb(p: pb::UdafScript) -> Result<Self, Incompatible> {
reader_check_msg(p.ver, p.min_reader_ver)?;

let arg_types = p
.arg_types
.into_iter()
.map(|arg_type| Ok((&TableDataType::from_pb(arg_type)?).into()))
.collect::<Result<Vec<_>, _>>()?;

let state_fields = p
.state_fields
.into_iter()
.map(|(name, data_type)| {
Ok(DataField::new(
&name,
(&TableDataType::from_pb(data_type)?).into(),
))
})
.collect::<Result<Vec<_>, _>>()?;

let return_type =
(&TableDataType::from_pb(p.return_type.ok_or_else(|| Incompatible {
reason: "UDAFScript.return_type can not be None".to_string(),
})?)?)
.into();

Ok(mt::UDAFScript {
code: p.code,
arg_types,
return_type,
language: p.language,
runtime_version: p.runtime_version,
state_fields,
})
}

fn to_pb(&self) -> Result<pb::UdafScript, Incompatible> {
let mut arg_types = Vec::with_capacity(self.arg_types.len());
for arg_type in self.arg_types.iter() {
let arg_type = infer_schema_type(arg_type)
.map_err(|e| Incompatible {
reason: format!("Convert DataType to TableDataType failed: {}", e.message()),
})?
.to_pb()?;
arg_types.push(arg_type);
}

let state_fields = self
.state_fields
.iter()
.map(|field| {
Ok((
field.name().clone(),
infer_schema_type(field.data_type())
.map_err(|e| Incompatible {
reason: format!(
"Convert DataType to TableDataType failed: {}",
e.message()
),
})?
.to_pb()?,
))
})
.collect::<Result<_, _>>()?;

let return_type = infer_schema_type(&self.return_type)
.map_err(|e| Incompatible {
reason: format!("Convert DataType to TableDataType failed: {}", e.message()),
})?
.to_pb()?;

Ok(pb::UdafScript {
ver: VER,
min_reader_ver: MIN_READER_VER,
code: self.code.clone(),
language: self.language.clone(),
runtime_version: self.runtime_version.clone(),
arg_types,
state_fields,
return_type: Some(return_type),
})
}
}

impl FromToProto for mt::UserDefinedFunction {
type PB = pb::UserDefinedFunction;
fn get_pb_ver(p: &Self::PB) -> u64 {
Expand All @@ -181,6 +271,9 @@ impl FromToProto for mt::UserDefinedFunction {
Some(pb::user_defined_function::Definition::UdfScript(udf_script)) => {
mt::UDFDefinition::UDFScript(mt::UDFScript::from_pb(udf_script)?)
}
Some(pb::user_defined_function::Definition::UdafScript(udaf_script)) => {
mt::UDFDefinition::UDAFScript(mt::UDAFScript::from_pb(udaf_script)?)
}
None => {
return Err(Incompatible {
reason: "UserDefinedFunction.definition cannot be None".to_string(),
Expand Down Expand Up @@ -210,6 +303,9 @@ impl FromToProto for mt::UserDefinedFunction {
mt::UDFDefinition::UDFScript(udf_script) => {
pb::user_defined_function::Definition::UdfScript(udf_script.to_pb()?)
}
mt::UDFDefinition::UDAFScript(udaf_script) => {
pb::user_defined_function::Definition::UdafScript(udaf_script.to_pb()?)
}
};

Ok(pb::UserDefinedFunction {
Expand Down
1 change: 1 addition & 0 deletions src/meta/proto-conv/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[
(112, "2024-11-28: Add: virtual_column add data_types field"),
(113, "2024-12-10: Add: GrantWarehouseObject"),
(114, "2024-12-12: Add: New DataType Interval."),
(115, "2024-12-16: Add: udf.proto: add UDAFScript"),
// Dear developer:
// If you're gonna add a new metadata version, you'll have to add a test for it.
// You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`)
Expand Down
1 change: 1 addition & 0 deletions src/meta/proto-conv/tests/it/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,4 @@ mod v111_add_glue_as_iceberg_catalog_option;
mod v112_virtual_column;
mod v113_warehouse_grantobject;
mod v114_interval_datatype;
mod v115_add_udaf_script;
31 changes: 31 additions & 0 deletions src/meta/proto-conv/tests/it/v081_udf_script.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use databend_common_expression::types::DataType;
use databend_common_expression::types::NumberDataType;
use databend_common_meta_app::principal::LambdaUDF;
use databend_common_meta_app::principal::UDFDefinition;
use databend_common_meta_app::principal::UDFScript;
use databend_common_meta_app::principal::UDFServer;
use databend_common_meta_app::principal::UserDefinedFunction;
use fastrace::func_name;
Expand Down Expand Up @@ -90,3 +91,33 @@ fn test_decode_v81_udf_sql() -> anyhow::Result<()> {
common::test_pb_from_to(func_name!(), want())?;
common::test_load_old(func_name!(), bytes.as_slice(), 81, want())
}

#[test]
fn test_decode_udf_script() -> anyhow::Result<()> {
let bytes: Vec<u8> = vec![
10, 5, 109, 121, 95, 102, 110, 18, 21, 84, 104, 105, 115, 32, 105, 115, 32, 97, 32, 100,
101, 115, 99, 114, 105, 112, 116, 105, 111, 110, 50, 78, 10, 9, 115, 111, 109, 101, 32, 99,
111, 100, 101, 18, 5, 109, 121, 95, 102, 110, 26, 6, 112, 121, 116, 104, 111, 110, 34, 17,
154, 2, 8, 58, 0, 160, 6, 115, 168, 6, 24, 160, 6, 115, 168, 6, 24, 42, 17, 154, 2, 8, 74,
0, 160, 6, 115, 168, 6, 24, 160, 6, 115, 168, 6, 24, 50, 6, 51, 46, 49, 50, 46, 50, 160, 6,
115, 168, 6, 24, 42, 23, 49, 57, 55, 48, 45, 48, 49, 45, 48, 49, 32, 48, 48, 58, 48, 48,
58, 48, 48, 32, 85, 84, 67, 160, 6, 115, 168, 6, 24,
];

let want = || UserDefinedFunction {
name: "my_fn".to_string(),
description: "This is a description".to_string(),
definition: UDFDefinition::UDFScript(UDFScript {
code: "some code".to_string(),
handler: "my_fn".to_string(),
language: "python".to_string(),
arg_types: vec![DataType::Number(NumberDataType::Int32)],
return_type: DataType::Number(NumberDataType::Float32),
runtime_version: "3.12.2".to_string(),
}),
created_on: DateTime::<Utc>::default(),
};

common::test_pb_from_to(func_name!(), want())?;
common::test_load_old(func_name!(), bytes.as_slice(), 115, want())
}
69 changes: 69 additions & 0 deletions src/meta/proto-conv/tests/it/v115_add_udaf_script.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2021 Datafuse Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use chrono::DateTime;
use chrono::Utc;
use databend_common_expression::types::DataType;
use databend_common_expression::types::NumberDataType;
use databend_common_expression::DataField;
use databend_common_meta_app::principal::UDAFScript;
use databend_common_meta_app::principal::UDFDefinition;
use databend_common_meta_app::principal::UserDefinedFunction;
use fastrace::func_name;

use crate::common;

// These bytes are built when a new version in introduced,
// and are kept for backward compatibility test.
//
// *************************************************************
// * These messages should never be updated, *
// * only be added when a new version is added, *
// * or be removed when an old version is no longer supported. *
// *************************************************************
//
// The message bytes are built from the output of `proto_conv::test_build_pb_buf()`
#[test]
fn test_decode_v115_add_udaf_script() -> anyhow::Result<()> {
let bytes: Vec<u8> = vec![
10, 5, 109, 121, 95, 102, 110, 18, 21, 84, 104, 105, 115, 32, 105, 115, 32, 97, 32, 100,
101, 115, 99, 114, 105, 112, 116, 105, 111, 110, 66, 93, 10, 9, 115, 111, 109, 101, 32, 99,
111, 100, 101, 18, 10, 106, 97, 118, 97, 115, 99, 114, 105, 112, 116, 34, 17, 154, 2, 8,
74, 0, 160, 6, 115, 168, 6, 24, 160, 6, 115, 168, 6, 24, 42, 17, 154, 2, 8, 58, 0, 160, 6,
115, 168, 6, 24, 160, 6, 115, 168, 6, 24, 50, 24, 10, 3, 115, 117, 109, 18, 17, 154, 2, 8,
66, 0, 160, 6, 115, 168, 6, 24, 160, 6, 115, 168, 6, 24, 160, 6, 115, 168, 6, 24, 42, 23,
49, 57, 55, 48, 45, 48, 49, 45, 48, 49, 32, 48, 48, 58, 48, 48, 58, 48, 48, 32, 85, 84, 67,
160, 6, 115, 168, 6, 24,
];

let want = || UserDefinedFunction {
name: "my_fn".to_string(),
description: "This is a description".to_string(),
definition: UDFDefinition::UDAFScript(UDAFScript {
code: "some code".to_string(),
language: "javascript".to_string(),
arg_types: vec![DataType::Number(NumberDataType::Int32)],
state_fields: vec![DataField::new(
"sum",
DataType::Number(NumberDataType::Int64),
)],
return_type: DataType::Number(NumberDataType::Float32),
runtime_version: "".to_string(),
}),
created_on: DateTime::<Utc>::default(),
};

common::test_pb_from_to(func_name!(), want())?;
common::test_load_old(func_name!(), bytes.as_slice(), 115, want())
}
17 changes: 16 additions & 1 deletion src/meta/protos/proto/udf.proto
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,21 @@ message UDFScript {
string runtime_version = 6;
}

message UDAFScript {
uint64 ver = 100;
uint64 min_reader_ver = 101;

string code = 1;
string language = 2;
string runtime_version = 3;
DataType return_type = 4;
repeated DataType arg_types = 5;
map<string, DataType> state_fields = 6;
}

message UserDefinedFunction {
reserved 7; // reserved for upcoming UDAFServer
forsaken628 marked this conversation as resolved.
Show resolved Hide resolved

uint64 ver = 100;
uint64 min_reader_ver = 101;

Expand All @@ -60,7 +73,9 @@ message UserDefinedFunction {
LambdaUDF lambda_udf = 3;
UDFServer udf_server = 4;
UDFScript udf_script = 6;
// UDAFServer udaf_server = 7;
UDAFScript udaf_script = 8;
}
// The time udf created.
optional string created_on = 5;
}
}
Loading
Loading