From fa0191772e87e04da2598aedb7fe11dd49f88f88 Mon Sep 17 00:00:00 2001 From: Xin Li <33629085+xinlifoobar@users.noreply.github.com> Date: Tue, 9 Jul 2024 22:12:48 +0900 Subject: [PATCH] Support `NULL` literals in where clause (#11266) * Try fix where clause incorrectly reject NULL literal * check null in filter --- datafusion/expr/src/logical_plan/plan.rs | 3 +- datafusion/physical-plan/src/filter.rs | 39 +++++++++++++++++---- datafusion/sqllogictest/test_files/misc.slt | 14 ++++++++ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index bda03fb7087a..998b5bdcb60c 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -2123,7 +2123,8 @@ impl Filter { // construction (such as with correlated subqueries) so we make a best effort here and // ignore errors resolving the expression against the schema. if let Ok(predicate_type) = predicate.get_type(input.schema()) { - if predicate_type != DataType::Boolean { + // Interpret NULL as a missing boolean value. + if predicate_type != DataType::Boolean && predicate_type != DataType::Null { return plan_err!( "Cannot create filter with non-boolean predicate '{predicate}' returning {predicate_type}" ); diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 96ec6c0cf34d..84afc227578f 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -31,13 +31,13 @@ use crate::{ metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}, DisplayFormatType, ExecutionPlan, }; - use arrow::compute::filter_record_batch; use arrow::datatypes::{DataType, SchemaRef}; use arrow::record_batch::RecordBatch; -use datafusion_common::cast::as_boolean_array; +use arrow_array::{Array, BooleanArray}; +use datafusion_common::cast::{as_boolean_array, as_null_array}; use datafusion_common::stats::Precision; -use datafusion_common::{plan_err, DataFusionError, Result}; +use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; use datafusion_execution::TaskContext; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::BinaryExpr; @@ -84,6 +84,19 @@ impl FilterExec { cache, }) } + DataType::Null => { + let default_selectivity = 0; + let cache = + Self::compute_properties(&input, &predicate, default_selectivity)?; + + Ok(Self { + predicate, + input: input.clone(), + metrics: ExecutionPlanMetricsSet::new(), + default_selectivity, + cache, + }) + } other => { plan_err!("Filter predicate must return boolean values, not {other:?}") } @@ -355,9 +368,23 @@ pub(crate) fn batch_filter( .evaluate(batch) .and_then(|v| v.into_array(batch.num_rows())) .and_then(|array| { - Ok(as_boolean_array(&array)?) - // apply filter array to record batch - .and_then(|filter_array| Ok(filter_record_batch(batch, filter_array)?)) + let filter_array = match as_boolean_array(&array) { + Ok(boolean_array) => { + Ok(boolean_array.to_owned()) + }, + Err(_) => { + let Ok(null_array) = as_null_array(&array) else { + return internal_err!("Cannot create filter_array from non-boolean predicates, unable to continute"); + }; + + // if the predicate is null, then the result is also null + Ok::(BooleanArray::new_null( + null_array.len(), + )) + } + }?; + + Ok(filter_record_batch(batch, &filter_array)?) }) } diff --git a/datafusion/sqllogictest/test_files/misc.slt b/datafusion/sqllogictest/test_files/misc.slt index 848cdc943914..66606df83480 100644 --- a/datafusion/sqllogictest/test_files/misc.slt +++ b/datafusion/sqllogictest/test_files/misc.slt @@ -24,3 +24,17 @@ query TT? select 'foo', '', NULL ---- foo (empty) NULL + +# Where clause accept NULL literal +query I +select 1 where NULL +---- + +query I +select 1 where NULL and 1 = 1 +---- + +query I +select 1 where NULL or 1 = 1 +---- +1 \ No newline at end of file