Skip to content

Commit

Permalink
Support NULL literals in where clause (apache#11266)
Browse files Browse the repository at this point in the history
* Try fix where clause incorrectly reject NULL literal

* check null in filter
  • Loading branch information
xinlifoobar authored Jul 9, 2024
1 parent 4123ad6 commit fa01917
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 7 deletions.
3 changes: 2 additions & 1 deletion datafusion/expr/src/logical_plan/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2123,7 +2123,8 @@ impl Filter {
// construction (such as with correlated subqueries) so we make a best effort here and
// ignore errors resolving the expression against the schema.
if let Ok(predicate_type) = predicate.get_type(input.schema()) {
if predicate_type != DataType::Boolean {
// Interpret NULL as a missing boolean value.
if predicate_type != DataType::Boolean && predicate_type != DataType::Null {
return plan_err!(
"Cannot create filter with non-boolean predicate '{predicate}' returning {predicate_type}"
);
Expand Down
39 changes: 33 additions & 6 deletions datafusion/physical-plan/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ use crate::{
metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
DisplayFormatType, ExecutionPlan,
};

use arrow::compute::filter_record_batch;
use arrow::datatypes::{DataType, SchemaRef};
use arrow::record_batch::RecordBatch;
use datafusion_common::cast::as_boolean_array;
use arrow_array::{Array, BooleanArray};
use datafusion_common::cast::{as_boolean_array, as_null_array};
use datafusion_common::stats::Precision;
use datafusion_common::{plan_err, DataFusionError, Result};
use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
use datafusion_execution::TaskContext;
use datafusion_expr::Operator;
use datafusion_physical_expr::expressions::BinaryExpr;
Expand Down Expand Up @@ -84,6 +84,19 @@ impl FilterExec {
cache,
})
}
DataType::Null => {
let default_selectivity = 0;
let cache =
Self::compute_properties(&input, &predicate, default_selectivity)?;

Ok(Self {
predicate,
input: input.clone(),
metrics: ExecutionPlanMetricsSet::new(),
default_selectivity,
cache,
})
}
other => {
plan_err!("Filter predicate must return boolean values, not {other:?}")
}
Expand Down Expand Up @@ -355,9 +368,23 @@ pub(crate) fn batch_filter(
.evaluate(batch)
.and_then(|v| v.into_array(batch.num_rows()))
.and_then(|array| {
Ok(as_boolean_array(&array)?)
// apply filter array to record batch
.and_then(|filter_array| Ok(filter_record_batch(batch, filter_array)?))
let filter_array = match as_boolean_array(&array) {
Ok(boolean_array) => {
Ok(boolean_array.to_owned())
},
Err(_) => {
let Ok(null_array) = as_null_array(&array) else {
return internal_err!("Cannot create filter_array from non-boolean predicates, unable to continute");
};

// if the predicate is null, then the result is also null
Ok::<BooleanArray, DataFusionError>(BooleanArray::new_null(
null_array.len(),
))
}
}?;

Ok(filter_record_batch(batch, &filter_array)?)
})
}

Expand Down
14 changes: 14 additions & 0 deletions datafusion/sqllogictest/test_files/misc.slt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,17 @@ query TT?
select 'foo', '', NULL
----
foo (empty) NULL

# Where clause accept NULL literal
query I
select 1 where NULL
----

query I
select 1 where NULL and 1 = 1
----

query I
select 1 where NULL or 1 = 1
----
1

0 comments on commit fa01917

Please sign in to comment.