diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs
index 7ba0c26ab619..3710a33258ea 100644
--- a/crates/polars-expr/src/expressions/aggregation.rs
+++ b/crates/polars-expr/src/expressions/aggregation.rs
@@ -442,6 +442,10 @@ impl PhysicalExpr for AggregationExpr {
         }
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.input.collect_live_columns(lv);
+    }
+
     fn is_scalar(&self) -> bool {
         true
     }
@@ -731,6 +735,11 @@ impl PhysicalExpr for AggQuantileExpr {
         ))
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.input.collect_live_columns(lv);
+        self.quantile.collect_live_columns(lv);
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.input.to_field(input_schema)
     }
diff --git a/crates/polars-expr/src/expressions/alias.rs b/crates/polars-expr/src/expressions/alias.rs
index 131d2ca2f16c..410ca00448a4 100644
--- a/crates/polars-expr/src/expressions/alias.rs
+++ b/crates/polars-expr/src/expressions/alias.rs
@@ -59,6 +59,11 @@ impl PhysicalExpr for AliasExpr {
         Ok(ac)
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.physical_expr.collect_live_columns(lv);
+        lv.insert(self.name.clone());
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         Ok(Field::new(
             self.name.clone(),
diff --git a/crates/polars-expr/src/expressions/apply.rs b/crates/polars-expr/src/expressions/apply.rs
index e6c5ae856ae7..7fc0739f131e 100644
--- a/crates/polars-expr/src/expressions/apply.rs
+++ b/crates/polars-expr/src/expressions/apply.rs
@@ -16,6 +16,7 @@ use crate::expressions::{
     AggState, AggregationContext, PartitionedAggregation, PhysicalExpr, UpdateGroups,
 };
 
+#[derive(Clone)]
 pub struct ApplyExpr {
     inputs: Vec<Arc<dyn PhysicalExpr>>,
     function: SpecialEq<Arc<dyn ColumnsUdf>>,
@@ -426,6 +427,50 @@ impl PhysicalExpr for ApplyExpr {
             }
         }
     }
+
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        for i in &self.inputs {
+            i.collect_live_columns(lv);
+        }
+    }
+    fn replace_elementwise_const_columns(
+        &self,
+        const_columns: &PlHashMap<PlSmallStr, AnyValue<'static>>,
+    ) -> Option<Arc<dyn PhysicalExpr>> {
+        if self.collect_groups == ApplyOptions::ElementWise {
+            let mut new_inputs = Vec::new();
+            for i in 0..self.inputs.len() {
+                match self.inputs[i].replace_elementwise_const_columns(const_columns) {
+                    None => continue,
+                    Some(new) => {
+                        new_inputs.reserve(self.inputs.len());
+                        new_inputs.extend(self.inputs[..i].iter().cloned());
+                        new_inputs.push(new);
+                        break;
+                    },
+                }
+            }
+
+            // Only copy inputs if it is actually needed
+            if new_inputs.is_empty() {
+                return None;
+            }
+
+            new_inputs.extend(self.inputs[new_inputs.len()..].iter().map(|i| {
+                match i.replace_elementwise_const_columns(const_columns) {
+                    None => i.clone(),
+                    Some(new) => new,
+                }
+            }));
+
+            let mut slf = self.clone();
+            slf.inputs = new_inputs;
+            return Some(Arc::new(slf));
+        }
+
+        None
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.expr.to_field(input_schema, Context::Default)
     }
diff --git a/crates/polars-expr/src/expressions/binary.rs b/crates/polars-expr/src/expressions/binary.rs
index 29db6dcd643a..3696b0d9bf46 100644
--- a/crates/polars-expr/src/expressions/binary.rs
+++ b/crates/polars-expr/src/expressions/binary.rs
@@ -8,6 +8,7 @@ use crate::expressions::{
     AggState, AggregationContext, PartitionedAggregation, PhysicalExpr, UpdateGroups,
 };
 
+#[derive(Clone)]
 pub struct BinaryExpr {
     left: Arc<dyn PhysicalExpr>,
     op: Operator,
@@ -265,6 +266,31 @@ impl PhysicalExpr for BinaryExpr {
         }
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.left.collect_live_columns(lv);
+        self.right.collect_live_columns(lv);
+    }
+    fn replace_elementwise_const_columns(
+        &self,
+        const_columns: &PlHashMap<PlSmallStr, AnyValue<'static>>,
+    ) -> Option<Arc<dyn PhysicalExpr>> {
+        let rcc_left = self.left.replace_elementwise_const_columns(const_columns);
+        let rcc_right = self.right.replace_elementwise_const_columns(const_columns);
+
+        if rcc_left.is_some() || rcc_right.is_some() {
+            let mut slf = self.clone();
+            if let Some(left) = rcc_left {
+                slf.left = left;
+            }
+            if let Some(right) = rcc_right {
+                slf.right = right;
+            }
+            return Some(Arc::new(slf));
+        }
+
+        None
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.expr.to_field(input_schema, Context::Default)
     }
diff --git a/crates/polars-expr/src/expressions/cast.rs b/crates/polars-expr/src/expressions/cast.rs
index 1bc230ceab8f..623854d35b11 100644
--- a/crates/polars-expr/src/expressions/cast.rs
+++ b/crates/polars-expr/src/expressions/cast.rs
@@ -87,6 +87,10 @@ impl PhysicalExpr for CastExpr {
         Ok(ac)
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.input.collect_live_columns(lv);
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.input.to_field(input_schema).map(|mut fld| {
             fld.coerce(self.dtype.clone());
diff --git a/crates/polars-expr/src/expressions/column.rs b/crates/polars-expr/src/expressions/column.rs
index 99b5ba9fe262..4c730663b339 100644
--- a/crates/polars-expr/src/expressions/column.rs
+++ b/crates/polars-expr/src/expressions/column.rs
@@ -133,6 +133,7 @@ impl PhysicalExpr for ColumnExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.expr)
     }
+
     fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let out = match self.schema.get_full(&self.name) {
             Some((idx, _, _)) => {
@@ -178,6 +179,22 @@ impl PhysicalExpr for ColumnExpr {
         Some(self)
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        lv.insert(self.name.clone());
+    }
+    fn replace_elementwise_const_columns(
+        &self,
+        const_columns: &PlHashMap<PlSmallStr, AnyValue<'static>>,
+    ) -> Option<Arc<dyn PhysicalExpr>> {
+        if let Some(av) = const_columns.get(&self.name) {
+            let lv = LiteralValue::from(av.clone());
+            let le = LiteralExpr::new(lv, self.expr.clone());
+            return Some(Arc::new(le));
+        }
+
+        None
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         input_schema.get_field(&self.name).ok_or_else(|| {
             polars_err!(
diff --git a/crates/polars-expr/src/expressions/count.rs b/crates/polars-expr/src/expressions/count.rs
index db25f0d9e73b..118334126ecf 100644
--- a/crates/polars-expr/src/expressions/count.rs
+++ b/crates/polars-expr/src/expressions/count.rs
@@ -36,6 +36,8 @@ impl PhysicalExpr for CountExpr {
         Ok(AggregationContext::new(c, Cow::Borrowed(groups), true))
     }
 
+    fn collect_live_columns(&self, _lv: &mut PlIndexSet<PlSmallStr>) {}
+
     fn to_field(&self, _input_schema: &Schema) -> PolarsResult<Field> {
         Ok(Field::new(PlSmallStr::from_static(LEN), IDX_DTYPE))
     }
diff --git a/crates/polars-expr/src/expressions/filter.rs b/crates/polars-expr/src/expressions/filter.rs
index f2b1383059ee..240e5a83be62 100644
--- a/crates/polars-expr/src/expressions/filter.rs
+++ b/crates/polars-expr/src/expressions/filter.rs
@@ -24,6 +24,7 @@ impl PhysicalExpr for FilterExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.expr)
     }
+
     fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let s_f = || self.input.evaluate(df, state);
         let predicate_f = || self.by.evaluate(df, state);
@@ -145,6 +146,11 @@ impl PhysicalExpr for FilterExpr {
         }
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.input.collect_live_columns(lv);
+        self.by.collect_live_columns(lv);
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.input.to_field(input_schema)
     }
diff --git a/crates/polars-expr/src/expressions/gather.rs b/crates/polars-expr/src/expressions/gather.rs
index 5c0ccae4f2bc..e38b27aaeacc 100644
--- a/crates/polars-expr/src/expressions/gather.rs
+++ b/crates/polars-expr/src/expressions/gather.rs
@@ -18,6 +18,7 @@ impl PhysicalExpr for GatherExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.expr)
     }
+
     fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let series = self.phys_expr.evaluate(df, state)?;
         self.finish(df, state, series)
@@ -88,6 +89,11 @@ impl PhysicalExpr for GatherExpr {
         Ok(ac)
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.phys_expr.collect_live_columns(lv);
+        self.idx.collect_live_columns(lv);
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.phys_expr.to_field(input_schema)
     }
diff --git a/crates/polars-expr/src/expressions/literal.rs b/crates/polars-expr/src/expressions/literal.rs
index 66a3e02e1834..8b152803bb64 100644
--- a/crates/polars-expr/src/expressions/literal.rs
+++ b/crates/polars-expr/src/expressions/literal.rs
@@ -121,6 +121,7 @@ impl PhysicalExpr for LiteralExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.1)
     }
+
     fn evaluate(&self, _df: &DataFrame, _state: &ExecutionState) -> PolarsResult<Column> {
         self.as_column()
     }
@@ -148,6 +149,8 @@ impl PhysicalExpr for LiteralExpr {
         Some(self)
     }
 
+    fn collect_live_columns(&self, _lv: &mut PlIndexSet<PlSmallStr>) {}
+
     fn to_field(&self, _input_schema: &Schema) -> PolarsResult<Field> {
         let dtype = self.0.get_datatype();
         Ok(Field::new(PlSmallStr::from_static("literal"), dtype))
diff --git a/crates/polars-expr/src/expressions/mod.rs b/crates/polars-expr/src/expressions/mod.rs
index 70963dde7eec..c309991990ee 100644
--- a/crates/polars-expr/src/expressions/mod.rs
+++ b/crates/polars-expr/src/expressions/mod.rs
@@ -590,6 +590,22 @@ pub trait PhysicalExpr: Send + Sync {
         None
     }
 
+    /// Get the variables that are used in the expression i.e. live variables.
+    /// This can contain duplicates.
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>);
+
+    /// Replace columns that are known to be a constant value with their const value.
+    ///
+    /// This should not replace values that are calculated non-elementwise e.g. col.max(),
+    /// col.std(), etc.
+    fn replace_elementwise_const_columns(
+        &self,
+        const_columns: &PlHashMap<PlSmallStr, AnyValue<'static>>,
+    ) -> Option<Arc<dyn PhysicalExpr>> {
+        _ = const_columns;
+        None
+    }
+
     /// Can take &dyn Statistics and determine of a file should be
     /// read -> `true`
     /// or not -> `false`
@@ -630,8 +646,8 @@ impl PhysicalIoExpr for PhysicalIoHelper {
             .map(|c| c.take_materialized_series())
     }
 
-    fn live_variables(&self) -> Option<Vec<PlSmallStr>> {
-        Some(expr_to_leaf_column_names(self.expr.as_expression()?))
+    fn collect_live_columns(&self, live_columns: &mut PlIndexSet<PlSmallStr>) {
+        self.expr.collect_live_columns(live_columns);
     }
 
     #[cfg(feature = "parquet")]
diff --git a/crates/polars-expr/src/expressions/rolling.rs b/crates/polars-expr/src/expressions/rolling.rs
index 7e9897d7328c..2ec32069a30f 100644
--- a/crates/polars-expr/src/expressions/rolling.rs
+++ b/crates/polars-expr/src/expressions/rolling.rs
@@ -59,6 +59,10 @@ impl PhysicalExpr for RollingExpr {
         polars_bail!(InvalidOperation: "rolling expression not allowed in aggregation");
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.phys_function.collect_live_columns(lv);
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.function.to_field(input_schema, Context::Default)
     }
diff --git a/crates/polars-expr/src/expressions/slice.rs b/crates/polars-expr/src/expressions/slice.rs
index 72bb6376466c..62df859460f8 100644
--- a/crates/polars-expr/src/expressions/slice.rs
+++ b/crates/polars-expr/src/expressions/slice.rs
@@ -268,6 +268,12 @@ impl PhysicalExpr for SliceExpr {
         Ok(ac)
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.input.collect_live_columns(lv);
+        self.offset.collect_live_columns(lv);
+        self.length.collect_live_columns(lv);
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.input.to_field(input_schema)
     }
diff --git a/crates/polars-expr/src/expressions/sort.rs b/crates/polars-expr/src/expressions/sort.rs
index df816f9b48e7..746978b760a9 100644
--- a/crates/polars-expr/src/expressions/sort.rs
+++ b/crates/polars-expr/src/expressions/sort.rs
@@ -46,6 +46,7 @@ impl PhysicalExpr for SortExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.expr)
     }
+
     fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let series = self.physical_expr.evaluate(df, state)?;
         series.sort_with(self.options)
@@ -104,6 +105,10 @@ impl PhysicalExpr for SortExpr {
         Ok(ac)
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.physical_expr.collect_live_columns(lv);
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.physical_expr.to_field(input_schema)
     }
diff --git a/crates/polars-expr/src/expressions/sortby.rs b/crates/polars-expr/src/expressions/sortby.rs
index ed34ed6414cd..3d9877038adc 100644
--- a/crates/polars-expr/src/expressions/sortby.rs
+++ b/crates/polars-expr/src/expressions/sortby.rs
@@ -201,6 +201,7 @@ impl PhysicalExpr for SortByExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.expr)
     }
+
     fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let series_f = || self.input.evaluate(df, state);
         if self.by.is_empty() {
@@ -374,6 +375,13 @@ impl PhysicalExpr for SortByExpr {
         Ok(ac_in)
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.input.collect_live_columns(lv);
+        for i in &self.by {
+            i.collect_live_columns(lv);
+        }
+    }
+
     fn to_field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.input.to_field(input_schema)
     }
diff --git a/crates/polars-expr/src/expressions/ternary.rs b/crates/polars-expr/src/expressions/ternary.rs
index bbd0c5f7d936..e7ec666eda50 100644
--- a/crates/polars-expr/src/expressions/ternary.rs
+++ b/crates/polars-expr/src/expressions/ternary.rs
@@ -328,6 +328,12 @@ impl PhysicalExpr for TernaryExpr {
         Some(self)
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        self.predicate.collect_live_columns(lv);
+        self.truthy.collect_live_columns(lv);
+        self.falsy.collect_live_columns(lv);
+    }
+
     fn is_scalar(&self) -> bool {
         self.returns_scalar
     }
diff --git a/crates/polars-expr/src/expressions/window.rs b/crates/polars-expr/src/expressions/window.rs
index bbb9a1cface1..d833278a12cb 100644
--- a/crates/polars-expr/src/expressions/window.rs
+++ b/crates/polars-expr/src/expressions/window.rs
@@ -641,6 +641,16 @@ impl PhysicalExpr for WindowExpr {
         false
     }
 
+    fn collect_live_columns(&self, lv: &mut PlIndexSet<PlSmallStr>) {
+        for i in &self.group_by {
+            i.collect_live_columns(lv);
+        }
+        if let Some((i, _)) = &self.order_by {
+            i.collect_live_columns(lv);
+        }
+        self.phys_function.collect_live_columns(lv);
+    }
+
     #[allow(clippy::ptr_arg)]
     fn evaluate_on_groups<'a>(
         &self,
diff --git a/crates/polars-io/src/parquet/read/mod.rs b/crates/polars-io/src/parquet/read/mod.rs
index 02189a0dce85..07628af1a3ba 100644
--- a/crates/polars-io/src/parquet/read/mod.rs
+++ b/crates/polars-io/src/parquet/read/mod.rs
@@ -32,6 +32,8 @@ or set 'streaming'",
 
 pub use options::{ParallelStrategy, ParquetOptions};
 use polars_error::{ErrString, PolarsError};
+pub use polars_parquet::arrow::read::infer_schema;
+pub use polars_parquet::read::FileMetadata;
 pub use read_impl::{create_sorting_map, try_set_sorted_flag};
 #[cfg(feature = "cloud")]
 pub use reader::ParquetAsyncReader;
diff --git a/crates/polars-io/src/parquet/read/read_impl.rs b/crates/polars-io/src/parquet/read/read_impl.rs
index a065d999e943..eb4448eebeb1 100644
--- a/crates/polars-io/src/parquet/read/read_impl.rs
+++ b/crates/polars-io/src/parquet/read/read_impl.rs
@@ -1,6 +1,6 @@
 use std::borrow::Cow;
 use std::collections::VecDeque;
-use std::ops::{Deref, Range};
+use std::ops::Range;
 
 use arrow::array::BooleanArray;
 use arrow::bitmap::MutableBitmap;
@@ -168,7 +168,9 @@ fn rg_to_dfs(
 
     if parallel == S::Prefiltered {
         if let Some(predicate) = predicate {
-            if let Some(live_variables) = predicate.live_variables() {
+            let mut live_columns = PlIndexSet::new();
+            predicate.collect_live_columns(&mut live_columns);
+            if !live_columns.is_empty() {
                 return rg_to_dfs_prefiltered(
                     store,
                     previous_row_count,
@@ -176,7 +178,7 @@ fn rg_to_dfs(
                     row_group_end,
                     file_metadata,
                     schema,
-                    live_variables,
+                    live_columns,
                     predicate,
                     row_index,
                     projection,
@@ -240,7 +242,7 @@ fn rg_to_dfs_prefiltered(
     row_group_end: usize,
     file_metadata: &FileMetadata,
     schema: &ArrowSchemaRef,
-    live_variables: Vec<PlSmallStr>,
+    live_columns: PlIndexSet<PlSmallStr>,
     predicate: &dyn PhysicalIoExpr,
     row_index: Option<RowIndex>,
     projection: &[usize],
@@ -267,14 +269,8 @@ fn rg_to_dfs_prefiltered(
             .collect(),
     };
 
-    // Deduplicate the live variables
-    let live_variables = live_variables
-        .iter()
-        .map(Deref::deref)
-        .collect::<PlHashSet<_>>();
-
     // Get the number of live columns
-    let num_live_columns = live_variables.len();
+    let num_live_columns = live_columns.len();
     let num_dead_columns =
         projection.len() + hive_partition_columns.map_or(0, |x| x.len()) - num_live_columns;
 
@@ -290,7 +286,7 @@ fn rg_to_dfs_prefiltered(
     for &i in projection.iter() {
         let name = schema.get_at_index(i).unwrap().0.as_str();
 
-        if live_variables.contains(name) {
+        if live_columns.contains(name) {
             live_idx_to_col_idx.push(i);
         } else {
             dead_idx_to_col_idx.push(i);
@@ -831,7 +827,9 @@ pub fn read_parquet<R: MmapBytesReader>(
         let prefilter_env = std::env::var("POLARS_PARQUET_PREFILTER");
         let prefilter_env = prefilter_env.as_deref();
 
-        let num_live_variables = predicate.live_variables().map_or(0, |v| v.len());
+        let mut live_columns = PlIndexSet::new();
+        predicate.collect_live_columns(&mut live_columns);
+        let num_live_variables = live_columns.len();
         let mut do_prefilter = false;
 
         do_prefilter |= prefilter_env == Ok("1"); // Force enable
diff --git a/crates/polars-io/src/predicates.rs b/crates/polars-io/src/predicates.rs
index 77872e708e40..75ebc922a4b9 100644
--- a/crates/polars-io/src/predicates.rs
+++ b/crates/polars-io/src/predicates.rs
@@ -9,7 +9,7 @@ pub trait PhysicalIoExpr: Send + Sync {
 
     /// Get the variables that are used in the expression i.e. live variables.
     /// This can contain duplicates.
-    fn live_variables(&self) -> Option<Vec<PlSmallStr>>;
+    fn collect_live_columns(&self, live_columns: &mut PlIndexSet<PlSmallStr>);
 
     /// Can take &dyn Statistics and determine of a file should be
     /// read -> `true`
@@ -214,6 +214,16 @@ pub struct BatchStats {
     num_rows: Option<usize>,
 }
 
+impl Default for BatchStats {
+    fn default() -> Self {
+        Self {
+            schema: Arc::new(Schema::default()),
+            stats: Vec::new(),
+            num_rows: None,
+        }
+    }
+}
+
 impl BatchStats {
     /// Constructs a new [`BatchStats`].
     ///
diff --git a/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs b/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs
index 0700f5f767e7..e36a8a7565a8 100644
--- a/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs
+++ b/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs
@@ -26,9 +26,8 @@ impl PhysicalIoExpr for Wrap {
         };
         h.evaluate_io(df)
     }
-    fn live_variables(&self) -> Option<Vec<PlSmallStr>> {
-        // @TODO: This should not unwrap
-        Some(expr_to_leaf_column_names(self.0.as_expression()?))
+    fn collect_live_columns(&self, live_columns: &mut PlIndexSet<PlSmallStr>) {
+        self.0.collect_live_columns(live_columns);
     }
     fn as_stats_evaluator(&self) -> Option<&dyn StatsEvaluator> {
         self.0.as_stats_evaluator()
diff --git a/crates/polars-mem-engine/src/executors/hive_scan.rs b/crates/polars-mem-engine/src/executors/hive_scan.rs
new file mode 100644
index 000000000000..538ab10bbb9e
--- /dev/null
+++ b/crates/polars-mem-engine/src/executors/hive_scan.rs
@@ -0,0 +1,535 @@
+use std::borrow::Cow;
+use std::cell::OnceCell;
+
+use hive::HivePartitions;
+use polars_core::config;
+use polars_core::frame::column::ScalarColumn;
+use polars_core::utils::{
+    accumulate_dataframes_vertical, accumulate_dataframes_vertical_unchecked,
+};
+use polars_io::predicates::BatchStats;
+use polars_io::RowIndex;
+
+use super::Executor;
+#[cfg(feature = "csv")]
+use crate::executors::CsvExec;
+#[cfg(feature = "parquet")]
+use crate::executors::ParquetExec;
+use crate::prelude::*;
+
+pub trait IOFileMetadata: Send + Sync {
+    fn as_any(&self) -> &dyn std::any::Any;
+    fn num_rows(&self) -> PolarsResult<IdxSize>;
+    fn schema(&self) -> PolarsResult<Schema>;
+}
+
+pub(super) struct BasicFileMetadata {
+    pub schema: Schema,
+    pub num_rows: IdxSize,
+}
+
+impl IOFileMetadata for BasicFileMetadata {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn num_rows(&self) -> PolarsResult<IdxSize> {
+        Ok(self.num_rows)
+    }
+
+    fn schema(&self) -> PolarsResult<Schema> {
+        Ok(self.schema.clone())
+    }
+}
+
+pub trait ScanExec {
+    fn read(
+        &mut self,
+        with_columns: Option<Arc<[PlSmallStr]>>,
+        slice: Option<(usize, usize)>,
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        row_index: Option<RowIndex>,
+        metadata: Option<Box<dyn IOFileMetadata>>,
+        schema: Schema,
+    ) -> PolarsResult<DataFrame>;
+
+    fn metadata(&mut self) -> PolarsResult<Box<dyn IOFileMetadata>>;
+}
+
+fn source_to_scan_exec(
+    source: ScanSourceRef,
+    scan_type: &FileScan,
+    file_info: &FileInfo,
+    file_options: &FileScanOptions,
+    metadata: Option<&dyn IOFileMetadata>,
+) -> PolarsResult<Box<dyn ScanExec>> {
+    let source = match source {
+        ScanSourceRef::Path(path) => ScanSources::Paths([path.to_path_buf()].into()),
+        ScanSourceRef::File(_) | ScanSourceRef::Buffer(_) => {
+            ScanSources::Buffers([source.to_memslice()?].into())
+        },
+    };
+
+    Ok(match scan_type {
+        #[cfg(feature = "parquet")]
+        FileScan::Parquet {
+            options,
+            cloud_options,
+            ..
+        } => Box::new(ParquetExec::new(
+            source,
+            file_info.clone(),
+            None,
+            None,
+            options.clone(),
+            cloud_options.clone(),
+            file_options.clone(),
+            metadata.map(|md| {
+                md.as_any()
+                    .downcast_ref::<Arc<polars_io::parquet::read::FileMetadata>>()
+                    .unwrap()
+                    .clone()
+            }),
+        )) as _,
+        #[cfg(feature = "csv")]
+        FileScan::Csv { options, .. } => Box::new(CsvExec {
+            sources: source,
+            file_info: file_info.clone(),
+            options: options.clone(),
+            file_options: file_options.clone(),
+            predicate: None,
+        }),
+        _ => todo!(),
+    })
+}
+
+pub struct Source {
+    scan_exec: Box<dyn ScanExec>,
+    metadata: OnceCell<Box<dyn IOFileMetadata>>,
+}
+
+impl Source {
+    fn new(
+        source: ScanSourceRef,
+        scan_type: &FileScan,
+        file_info: &FileInfo,
+        file_options: &FileScanOptions,
+        metadata: Option<&dyn IOFileMetadata>,
+    ) -> PolarsResult<Self> {
+        let scan_exec = source_to_scan_exec(source, scan_type, file_info, file_options, metadata)?;
+        Ok(Self {
+            scan_exec,
+            metadata: OnceCell::new(),
+        })
+    }
+
+    fn get_metadata(&mut self) -> PolarsResult<&dyn IOFileMetadata> {
+        match self.metadata.get() {
+            None => {
+                let metadata = self.scan_exec.metadata()?;
+                Ok(self.metadata.get_or_init(|| metadata).as_ref())
+            },
+            Some(metadata) => Ok(metadata.as_ref()),
+        }
+    }
+
+    fn num_unfiltered_rows(&mut self) -> PolarsResult<IdxSize> {
+        self.get_metadata()?.num_rows()
+    }
+
+    fn schema(&mut self) -> PolarsResult<Schema> {
+        self.get_metadata()?.schema()
+    }
+}
+
+/// Scan over multiple sources and combine their results.
+pub struct MultiScanExec {
+    sources: ScanSources,
+    file_info: FileInfo,
+    hive_parts: Option<Arc<Vec<HivePartitions>>>,
+    predicate: Option<Arc<dyn PhysicalExpr>>,
+    file_options: FileScanOptions,
+    scan_type: FileScan,
+
+    first_file_metadata: Option<Box<dyn IOFileMetadata>>,
+}
+
+impl MultiScanExec {
+    pub fn new(
+        sources: ScanSources,
+        file_info: FileInfo,
+        hive_parts: Option<Arc<Vec<HivePartitions>>>,
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        file_options: FileScanOptions,
+        mut scan_type: FileScan,
+    ) -> Self {
+        let first_file_metadata = match &mut scan_type {
+            #[cfg(feature = "parquet")]
+            FileScan::Parquet { metadata, .. } => metadata.take().map(|md| Box::new(md) as _),
+            _ => None,
+        };
+
+        Self {
+            sources,
+            file_info,
+            hive_parts,
+            predicate,
+            file_options,
+            scan_type,
+            first_file_metadata,
+        }
+    }
+
+    pub fn resolve_negative_slice(
+        &mut self,
+        offset: i64,
+        length: usize,
+    ) -> PolarsResult<(usize, usize)> {
+        // Walk the files in reverse until we find the first file, and then translate the
+        // slice into a positive-offset equivalent.
+        let mut offset_remaining = -offset as usize;
+
+        for i in (0..self.sources.len()).rev() {
+            let source = self.sources.get(i).unwrap();
+            let mut exec_source = Source::new(
+                source,
+                &self.scan_type,
+                &self.file_info,
+                &self.file_options,
+                self.first_file_metadata.as_deref().filter(|_| i == 0),
+            )?;
+
+            let num_rows = exec_source.num_unfiltered_rows()? as usize;
+
+            if num_rows >= offset_remaining {
+                return Ok((i, num_rows - offset_remaining));
+            }
+            offset_remaining -= num_rows;
+        }
+
+        Ok((0, length - offset_remaining))
+    }
+
+    pub fn read(&mut self) -> PolarsResult<DataFrame> {
+        let include_file_paths = self.file_options.include_file_paths.take();
+        let predicate = self.predicate.take();
+
+        // Create a index set of the hive columns.
+        let mut hive_column_set = PlIndexSet::default();
+        if let Some(hive_parts) = &self.hive_parts {
+            assert_eq!(self.sources.len(), hive_parts.len());
+
+            if let Some(fst_hive_part) = hive_parts.first() {
+                hive_column_set.extend(
+                    fst_hive_part
+                        .get_statistics()
+                        .column_stats()
+                        .iter()
+                        .map(|c| c.field_name().clone()),
+                );
+            }
+        }
+
+        // Look through the predicate and assess whether hive columns are being used in it.
+        let mut has_live_hive_columns = false;
+        if let Some(predicate) = &predicate {
+            let mut live_columns = PlIndexSet::new();
+            predicate.collect_live_columns(&mut live_columns);
+
+            for hive_column in &hive_column_set {
+                has_live_hive_columns |= live_columns.contains(hive_column);
+            }
+        }
+
+        // Remove the hive columns for each file load.
+        let mut file_with_columns = self.file_options.with_columns.take();
+        if self.hive_parts.is_some() {
+            if let Some(with_columns) = &self.file_options.with_columns {
+                file_with_columns = Some(
+                    with_columns
+                        .iter()
+                        .filter(|&c| !hive_column_set.contains(c))
+                        .cloned()
+                        .collect(),
+                );
+            }
+        }
+
+        let allow_missing_columns = self.file_options.allow_missing_columns;
+        self.file_options.allow_missing_columns = false;
+        let mut row_index = self.file_options.row_index.take();
+        let slice = self.file_options.slice.take();
+
+        let current_schema = self.file_info.schema.clone();
+        let output_schema = current_schema.clone();
+        let mut missing_columns = Vec::new();
+
+        let mut first_slice_file = None;
+        let mut slice = match slice {
+            None => None,
+            Some((offset, length)) => Some({
+                if offset >= 0 {
+                    (offset as usize, length)
+                } else {
+                    let (first_file, offset) = self.resolve_negative_slice(offset, length)?;
+                    first_slice_file = Some(first_file);
+                    (offset, length)
+                }
+            }),
+        };
+
+        let verbose = config::verbose();
+        let mut dfs = Vec::with_capacity(self.sources.len());
+
+        let mut const_columns = PlHashMap::new();
+
+        // @TODO: This should be moved outside of the FileScan::Parquet
+        let use_statistics = match &self.scan_type {
+            #[cfg(feature = "parquet")]
+            FileScan::Parquet { options, .. } => options.use_statistics,
+            _ => true,
+        };
+
+        for (i, source) in self.sources.iter().enumerate() {
+            let hive_part = self.hive_parts.as_ref().and_then(|h| h.get(i));
+            if slice.is_some_and(|s| s.1 == 0) {
+                break;
+            }
+
+            let mut exec_source = Source::new(
+                source,
+                &self.scan_type,
+                &self.file_info,
+                &self.file_options,
+                self.first_file_metadata.as_deref().filter(|_| i == 0),
+            )?;
+
+            if verbose {
+                eprintln!(
+                    "Multi-file / Hive read: currently reading '{}'",
+                    source.to_include_path_name()
+                );
+            }
+
+            // @TODO: There are cases where we can ignore reading. E.g. no row index + empty with columns + no predicate
+            let mut schema = exec_source.schema()?;
+            let mut extra_columns = Vec::new();
+
+            if let Some(file_with_columns) = &file_with_columns {
+                if allow_missing_columns {
+                    schema = schema.try_project(
+                        file_with_columns
+                            .iter()
+                            .filter(|c| schema.contains(c.as_str())),
+                    )?;
+                } else {
+                    schema = schema.try_project(file_with_columns.iter())?;
+                }
+            }
+
+            if allow_missing_columns {
+                missing_columns.clear();
+                extra_columns.clear();
+
+                current_schema.as_ref().field_compare(
+                    &schema,
+                    &mut missing_columns,
+                    &mut extra_columns,
+                );
+
+                if !extra_columns.is_empty() {
+                    // @TODO: Better error
+                    polars_bail!(InvalidOperation: "More schema in file after first");
+                }
+            }
+
+            // Insert the hive partition values into the predicate. This allows the predicate
+            // to function even when there is a combination of hive and non-hive columns being
+            // used.
+            let mut file_predicate = predicate.clone();
+            if has_live_hive_columns {
+                let hive_part = hive_part.unwrap();
+                let predicate = predicate.as_ref().unwrap();
+                const_columns.clear();
+                for (idx, column) in hive_column_set.iter().enumerate() {
+                    let value = hive_part.get_statistics().column_stats()[idx]
+                        .to_min()
+                        .unwrap()
+                        .get(0)
+                        .unwrap()
+                        .into_static();
+                    const_columns.insert(column.clone(), value);
+                }
+                for (_, (missing_column, _)) in &missing_columns {
+                    const_columns.insert((*missing_column).clone(), AnyValue::Null);
+                }
+
+                file_predicate = predicate.replace_elementwise_const_columns(&const_columns);
+
+                // @TODO: Set predicate to `None` if it's constant evaluated to true.
+
+                // At this point the file_predicate should not contain any references to the
+                // hive columns anymore.
+                //
+                // Note that, replace_elementwise_const_columns does not actually guarantee the
+                // replacement of all reference to the const columns. But any expression which
+                // does not guarantee this should not be pushed down as an IO predicate.
+                if cfg!(debug_assertions) {
+                    let mut live_columns = PlIndexSet::new();
+                    file_predicate
+                        .as_ref()
+                        .unwrap()
+                        .collect_live_columns(&mut live_columns);
+                    for hive_column in hive_part.get_statistics().column_stats() {
+                        assert!(
+                            !live_columns.contains(hive_column.field_name()),
+                            "Predicate still contains hive column"
+                        );
+                    }
+                }
+            }
+
+            let mut do_skip_file = false;
+            if let Some(slice) = &slice {
+                let allow_slice_skip = match first_slice_file {
+                    None => slice.0 as IdxSize >= exec_source.num_unfiltered_rows()?,
+                    Some(f) => i < f,
+                };
+
+                if allow_slice_skip && verbose {
+                    eprintln!(
+                        "Slice allows skipping of '{}'",
+                        source.to_include_path_name()
+                    );
+                }
+                do_skip_file |= allow_slice_skip;
+            }
+
+            let stats_evaluator = file_predicate.as_ref().and_then(|p| p.as_stats_evaluator());
+            let stats_evaluator = stats_evaluator.filter(|_| use_statistics);
+
+            if let Some(stats_evaluator) = stats_evaluator {
+                let allow_predicate_skip = !stats_evaluator
+                    .should_read(&BatchStats::default())
+                    .unwrap_or(true);
+                if allow_predicate_skip && verbose {
+                    eprintln!(
+                        "File statistics allows skipping of '{}'",
+                        source.to_include_path_name()
+                    );
+                }
+                do_skip_file |= allow_predicate_skip;
+            }
+
+            if do_skip_file {
+                // Update the row_index to the proper offset.
+                if let Some(row_index) = row_index.as_mut() {
+                    row_index.offset += exec_source.num_unfiltered_rows()?;
+                }
+                // Update the slice offset.
+                if let Some(slice) = slice.as_mut() {
+                    if first_slice_file.is_none_or(|f| i >= f) {
+                        slice.0 = slice
+                            .0
+                            .saturating_sub(exec_source.num_unfiltered_rows()? as usize);
+                    }
+                }
+
+                continue;
+            }
+
+            let with_columns = if allow_missing_columns {
+                file_with_columns
+                    .as_ref()
+                    .map(|_| schema.iter_names().cloned().collect())
+            } else {
+                file_with_columns.clone()
+            };
+
+            // Read the DataFrame and needed metadata.
+            let num_unfiltered_rows = exec_source.num_unfiltered_rows()?;
+            let mut df = exec_source.scan_exec.read(
+                with_columns,
+                slice,
+                file_predicate,
+                row_index.clone(),
+                exec_source.metadata.take(),
+                schema,
+            )?;
+
+            // Update the row_index to the proper offset.
+            if let Some(row_index) = row_index.as_mut() {
+                row_index.offset += num_unfiltered_rows;
+            }
+            // Update the slice.
+            if let Some(slice) = slice.as_mut() {
+                if first_slice_file.is_none_or(|f| i >= f) {
+                    slice.1 = slice
+                        .1
+                        .saturating_sub(num_unfiltered_rows as usize - slice.0);
+                    slice.0 = slice.0.saturating_sub(num_unfiltered_rows as usize);
+                }
+            }
+
+            // Add all the missing columns.
+            if allow_missing_columns && !missing_columns.is_empty() {
+                for (_, (name, field)) in &missing_columns {
+                    df.with_column(Column::full_null((*name).clone(), df.height(), field))?;
+                }
+            }
+            // Materialize the hive columns and add them back in.
+            if let Some(hive_part) = hive_part {
+                for hive_col in hive_part.get_statistics().column_stats() {
+                    df.with_column(
+                        ScalarColumn::from_single_value_series(
+                            hive_col
+                                .to_min()
+                                .unwrap()
+                                .clone()
+                                .with_name(hive_col.field_name().clone()),
+                            df.height(),
+                        )
+                        .into_column(),
+                    )?;
+                }
+            }
+            // Add the `include_file_paths` column
+            if let Some(include_file_paths) = &include_file_paths {
+                df.with_column(ScalarColumn::new(
+                    include_file_paths.clone(),
+                    PlSmallStr::from_str(source.to_include_path_name()).into(),
+                    df.height(),
+                ))?;
+            }
+
+            // Project to ensure that all DataFrames have the proper order.
+            df = df.select(output_schema.iter_names().cloned())?;
+            dfs.push(df);
+        }
+
+        let out = if cfg!(debug_assertions) {
+            accumulate_dataframes_vertical(dfs)?
+        } else {
+            accumulate_dataframes_vertical_unchecked(dfs)
+        };
+
+        Ok(out)
+    }
+}
+
+impl Executor for MultiScanExec {
+    fn execute(&mut self, state: &mut ExecutionState) -> PolarsResult<DataFrame> {
+        let profile_name = if state.has_node_timer() {
+            let mut ids = vec![self.sources.id()];
+            if self.predicate.is_some() {
+                ids.push("predicate".into())
+            }
+            let name = comma_delimited("hive".to_string(), &ids);
+            Cow::Owned(name)
+        } else {
+            Cow::Borrowed("")
+        };
+
+        state.record(|| self.read(), profile_name)
+    }
+}
diff --git a/crates/polars-mem-engine/src/executors/mod.rs b/crates/polars-mem-engine/src/executors/mod.rs
index 5c9d093d986a..7dc6ed65e545 100644
--- a/crates/polars-mem-engine/src/executors/mod.rs
+++ b/crates/polars-mem-engine/src/executors/mod.rs
@@ -7,6 +7,7 @@ mod group_by_dynamic;
 mod group_by_partitioned;
 pub(super) mod group_by_rolling;
 mod hconcat;
+mod hive_scan;
 mod join;
 mod projection;
 mod projection_simple;
@@ -38,6 +39,7 @@ pub(super) use self::group_by_partitioned::*;
 #[cfg(feature = "dynamic_group_by")]
 pub(super) use self::group_by_rolling::GroupByRollingExec;
 pub(super) use self::hconcat::*;
+pub(super) use self::hive_scan::*;
 pub(super) use self::join::*;
 pub(super) use self::projection::*;
 pub(super) use self::projection_simple::*;
diff --git a/crates/polars-mem-engine/src/executors/scan/csv.rs b/crates/polars-mem-engine/src/executors/scan/csv.rs
index 6f4448667130..6caf40019c8d 100644
--- a/crates/polars-mem-engine/src/executors/scan/csv.rs
+++ b/crates/polars-mem-engine/src/executors/scan/csv.rs
@@ -17,7 +17,7 @@ pub struct CsvExec {
 }
 
 impl CsvExec {
-    fn read(&self) -> PolarsResult<DataFrame> {
+    fn read_impl(&self) -> PolarsResult<DataFrame> {
         let with_columns = self
             .file_options
             .with_columns
@@ -209,6 +209,72 @@ impl CsvExec {
     }
 }
 
+impl ScanExec for CsvExec {
+    fn read(
+        &mut self,
+        with_columns: Option<Arc<[PlSmallStr]>>,
+        slice: Option<(usize, usize)>,
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        row_index: Option<polars_io::RowIndex>,
+        metadata: Option<Box<dyn IOFileMetadata>>,
+        schema: Schema,
+    ) -> PolarsResult<DataFrame> {
+        self.file_options.with_columns = with_columns;
+        self.file_options.slice = slice.map(|(o, l)| (o as i64, l));
+        self.predicate = predicate;
+        self.file_options.row_index = row_index;
+
+        let schema = Arc::new(schema);
+        self.file_info.reader_schema = Some(arrow::Either::Right(schema.clone()));
+        self.file_info.schema = schema.clone();
+
+        self.options.schema.take();
+        // self.options.schema_overwrite.take();
+
+        // Use the metadata somehow
+        _ = metadata;
+
+        self.read_impl()
+    }
+
+    fn metadata(&mut self) -> PolarsResult<Box<dyn IOFileMetadata>> {
+        let force_async = config::force_async();
+        let run_async = (self.sources.is_paths() && force_async) || self.sources.is_cloud_url();
+
+        let source = self.sources.at(0);
+        let owned = &mut vec![];
+
+        let memslice = source.to_memslice_async_assume_latest(run_async)?;
+
+        let popt = self.options.parse_options.as_ref();
+
+        let bytes = maybe_decompress_bytes(&memslice, owned)?;
+        let num_rows = count_rows_from_slice(
+            bytes,
+            popt.separator,
+            popt.quote_char,
+            popt.comment_prefix.as_ref(),
+            popt.eol_char,
+            self.options.has_header,
+        )? as IdxSize;
+        let schema = infer_file_schema(
+            &get_reader_bytes(&mut std::io::Cursor::new(bytes))?,
+            self.options.parse_options.as_ref(),
+            self.options.infer_schema_length,
+            self.options.has_header,
+            self.options.schema_overwrite.as_deref(),
+            self.options.skip_rows,
+            self.options.skip_lines,
+            self.options.skip_rows_after_header,
+            self.options.raise_if_empty,
+            &mut self.options.n_threads,
+        )?
+        .0;
+
+        Ok(Box::new(BasicFileMetadata { schema, num_rows }) as _)
+    }
+}
+
 impl Executor for CsvExec {
     fn execute(&mut self, state: &mut ExecutionState) -> PolarsResult<DataFrame> {
         let profile_name = if state.has_node_timer() {
@@ -222,6 +288,6 @@ impl Executor for CsvExec {
             Cow::Borrowed("")
         };
 
-        state.record(|| self.read(), profile_name)
+        state.record(|| self.read_impl(), profile_name)
     }
 }
diff --git a/crates/polars-mem-engine/src/executors/scan/parquet.rs b/crates/polars-mem-engine/src/executors/scan/parquet.rs
index 61a89693cfe0..25595ff5d998 100644
--- a/crates/polars-mem-engine/src/executors/scan/parquet.rs
+++ b/crates/polars-mem-engine/src/executors/scan/parquet.rs
@@ -14,7 +14,9 @@ use super::*;
 pub struct ParquetExec {
     sources: ScanSources,
     file_info: FileInfo,
+
     hive_parts: Option<Arc<Vec<HivePartitions>>>,
+
     predicate: Option<Arc<dyn PhysicalExpr>>,
     options: ParquetOptions,
     #[allow(dead_code)]
@@ -39,7 +41,9 @@ impl ParquetExec {
         ParquetExec {
             sources,
             file_info,
+
             hive_parts,
+
             predicate,
             options,
             cloud_options,
@@ -473,7 +477,7 @@ impl ParquetExec {
         Ok(result)
     }
 
-    fn read(&mut self) -> PolarsResult<DataFrame> {
+    fn read_with_num_unfiltered_rows(&mut self) -> PolarsResult<(IdxSize, DataFrame)> {
         // FIXME: The row index implementation is incorrect when a predicate is
         // applied. This code mitigates that by applying the predicate after the
         // collection of the entire dataframe if a row index is requested. This is
@@ -502,12 +506,107 @@ impl ParquetExec {
 
         let mut out = accumulate_dataframes_vertical(out)?;
 
+        let num_unfiltered_rows = out.height() as IdxSize;
+
         polars_io::predicates::apply_predicate(&mut out, post_predicate.as_deref(), true)?;
 
         if self.file_options.rechunk {
             out.as_single_chunk_par();
         }
-        Ok(out)
+        Ok((num_unfiltered_rows, out))
+    }
+
+    fn metadata_sync(&mut self) -> PolarsResult<Box<dyn IOFileMetadata>> {
+        Ok(Box::new(match &self.metadata {
+            None => {
+                let memslice = self.sources.get(0).unwrap().to_memslice()?;
+                ParquetReader::new(std::io::Cursor::new(memslice))
+                    .get_metadata()?
+                    .clone()
+            },
+            Some(md) => md.clone(),
+        }) as _)
+    }
+
+    #[cfg(feature = "cloud")]
+    async fn metadata_async(&mut self) -> PolarsResult<Box<dyn IOFileMetadata>> {
+        let ScanSourceRef::Path(path) = self.sources.get(0).unwrap() else {
+            unreachable!();
+        };
+
+        Ok(Box::new(match &self.metadata {
+            None => {
+                let mut reader = ParquetAsyncReader::from_uri(
+                    path.to_str().unwrap(),
+                    self.cloud_options.as_ref(),
+                    None,
+                )
+                .await?;
+
+                reader.get_metadata().await?.clone()
+            },
+            Some(md) => md.clone(),
+        }) as _)
+    }
+}
+
+impl IOFileMetadata for Arc<FileMetadata> {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn num_rows(&self) -> PolarsResult<IdxSize> {
+        Ok(self.num_rows as IdxSize)
+    }
+
+    fn schema(&self) -> PolarsResult<Schema> {
+        let arrow_schema = polars_io::parquet::read::infer_schema(self)?;
+        Ok(Schema::from_iter(arrow_schema.iter().map(
+            |(name, field)| (name.clone(), DataType::from_arrow_field(field)),
+        )))
+    }
+}
+
+impl ScanExec for ParquetExec {
+    fn read(
+        &mut self,
+        with_columns: Option<Arc<[PlSmallStr]>>,
+        slice: Option<(usize, usize)>,
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        row_index: Option<RowIndex>,
+        metadata: Option<Box<dyn IOFileMetadata>>,
+        schema: Schema,
+    ) -> PolarsResult<DataFrame> {
+        self.file_options.with_columns = with_columns;
+        self.file_options.slice = slice.map(|(o, l)| (o as i64, l));
+        self.predicate = predicate;
+        self.file_options.row_index = row_index;
+
+        self.file_info.reader_schema = Some(arrow::Either::Left(Arc::new(
+            schema.to_arrow(CompatLevel::newest()),
+        )));
+        self.file_info.schema = Arc::new(schema);
+        if let Some(metadata) = metadata {
+            self.metadata = Some(
+                metadata
+                    .as_any()
+                    .downcast_ref::<Arc<FileMetadata>>()
+                    .unwrap()
+                    .clone(),
+            );
+        }
+
+        self.read_with_num_unfiltered_rows().map(|(_, df)| df)
+    }
+
+    fn metadata(&mut self) -> PolarsResult<Box<dyn IOFileMetadata>> {
+        #[cfg(feature = "cloud")]
+        if self.sources.is_cloud_url() {
+            return polars_io::pl_async::get_runtime()
+                .block_on_potential_spawn(self.metadata_async());
+        }
+
+        self.metadata_sync()
     }
 }
 
@@ -524,6 +623,9 @@ impl Executor for ParquetExec {
             Cow::Borrowed("")
         };
 
-        state.record(|| self.read(), profile_name)
+        state.record(
+            || self.read_with_num_unfiltered_rows().map(|(_, df)| df),
+            profile_name,
+        )
     }
 }
diff --git a/crates/polars-mem-engine/src/planner/lp.rs b/crates/polars-mem-engine/src/planner/lp.rs
index 25ad3eb48293..e473ea413794 100644
--- a/crates/polars-mem-engine/src/planner/lp.rs
+++ b/crates/polars-mem-engine/src/planner/lp.rs
@@ -300,15 +300,30 @@ fn create_physical_plan_impl(
                 })
                 .map_or(Ok(None), |v| v.map(Some))?;
 
-            match scan_type {
+            match scan_type.clone() {
                 #[cfg(feature = "csv")]
-                FileScan::Csv { options, .. } => Ok(Box::new(executors::CsvExec {
-                    sources,
-                    file_info,
-                    options,
-                    predicate,
-                    file_options,
-                })),
+                FileScan::Csv { options, .. } => {
+                    if sources.len() > 1
+                        && std::env::var("POLARS_NEW_MULTIFILE").as_deref() == Ok("1")
+                    {
+                        Ok(Box::new(executors::MultiScanExec::new(
+                            sources,
+                            file_info,
+                            hive_parts,
+                            predicate,
+                            file_options,
+                            scan_type,
+                        )))
+                    } else {
+                        Ok(Box::new(executors::CsvExec {
+                            sources,
+                            file_info,
+                            options,
+                            predicate,
+                            file_options,
+                        }))
+                    }
+                },
                 #[cfg(feature = "ipc")]
                 FileScan::Ipc {
                     options,
@@ -328,16 +343,31 @@ fn create_physical_plan_impl(
                     options,
                     cloud_options,
                     metadata,
-                } => Ok(Box::new(executors::ParquetExec::new(
-                    sources,
-                    file_info,
-                    hive_parts,
-                    predicate,
-                    options,
-                    cloud_options,
-                    file_options,
-                    metadata,
-                ))),
+                } => {
+                    if sources.len() > 1
+                        && std::env::var("POLARS_NEW_MULTIFILE").as_deref() == Ok("1")
+                    {
+                        Ok(Box::new(executors::MultiScanExec::new(
+                            sources,
+                            file_info,
+                            hive_parts,
+                            predicate,
+                            file_options,
+                            scan_type,
+                        )))
+                    } else {
+                        Ok(Box::new(executors::ParquetExec::new(
+                            sources,
+                            file_info,
+                            hive_parts,
+                            predicate,
+                            options,
+                            cloud_options,
+                            file_options,
+                            metadata,
+                        )))
+                    }
+                },
                 #[cfg(feature = "json")]
                 FileScan::NDJson { options, .. } => Ok(Box::new(executors::JsonExec::new(
                     sources,
diff --git a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/convert.rs b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/convert.rs
index 14ce4836096c..d00094fae4b6 100644
--- a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/convert.rs
+++ b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/convert.rs
@@ -3,7 +3,7 @@ use std::sync::Arc;
 use polars_core::datatypes::Field;
 use polars_core::error::PolarsResult;
 use polars_core::frame::DataFrame;
-use polars_core::prelude::{DataType, SchemaRef, Series, IDX_DTYPE};
+use polars_core::prelude::{DataType, PlIndexSet, SchemaRef, Series, IDX_DTYPE};
 use polars_core::schema::Schema;
 use polars_expr::state::ExecutionState;
 use polars_io::predicates::PhysicalIoExpr;
@@ -32,9 +32,7 @@ impl PhysicalIoExpr for Len {
         unimplemented!()
     }
 
-    fn live_variables(&self) -> Option<Vec<PlSmallStr>> {
-        Some(vec![])
-    }
+    fn collect_live_columns(&self, _live_columns: &mut PlIndexSet<PlSmallStr>) {}
 }
 impl PhysicalPipedExpr for Len {
     fn evaluate(&self, chunk: &DataChunk, _lazy_state: &ExecutionState) -> PolarsResult<Series> {
diff --git a/crates/polars-pipe/src/pipeline/convert.rs b/crates/polars-pipe/src/pipeline/convert.rs
index 9775518fffb6..a195e1f15766 100644
--- a/crates/polars-pipe/src/pipeline/convert.rs
+++ b/crates/polars-pipe/src/pipeline/convert.rs
@@ -137,8 +137,11 @@ where
                                     self.p.evaluate_io(df)
                                 }
 
-                                fn live_variables(&self) -> Option<Vec<PlSmallStr>> {
-                                    None
+                                fn collect_live_columns(
+                                    &self,
+                                    live_columns: &mut PlIndexSet<PlSmallStr>,
+                                ) {
+                                    self.p.collect_live_columns(live_columns);
                                 }
 
                                 fn as_stats_evaluator(&self) -> Option<&dyn StatsEvaluator> {
diff --git a/crates/polars-plan/src/plans/lit.rs b/crates/polars-plan/src/plans/lit.rs
index 3e18f44703fb..8c6aab394140 100644
--- a/crates/polars-plan/src/plans/lit.rs
+++ b/crates/polars-plan/src/plans/lit.rs
@@ -233,7 +233,7 @@ impl LiteralValue {
         }
     }
 
-    pub(crate) fn new_idxsize(value: IdxSize) -> Self {
+    pub fn new_idxsize(value: IdxSize) -> Self {
         #[cfg(feature = "bigidx")]
         {
             LiteralValue::UInt64(value)
diff --git a/crates/polars-schema/src/schema.rs b/crates/polars-schema/src/schema.rs
index d29113635de8..1e63fbc78b5f 100644
--- a/crates/polars-schema/src/schema.rs
+++ b/crates/polars-schema/src/schema.rs
@@ -285,6 +285,26 @@ impl<D> Schema<D> {
 
         Ok(i)
     }
+
+    /// Compare the fields between two schema returning the additional columns that each schema has.
+    pub fn field_compare<'a, 'b>(
+        &'a self,
+        other: &'b Self,
+        self_extra: &mut Vec<(usize, (&'a PlSmallStr, &'a D))>,
+        other_extra: &mut Vec<(usize, (&'b PlSmallStr, &'b D))>,
+    ) {
+        self_extra.extend(
+            self.iter()
+                .enumerate()
+                .filter(|(_, (n, _))| !other.contains(n)),
+        );
+        other_extra.extend(
+            other
+                .iter()
+                .enumerate()
+                .filter(|(_, (n, _))| !self.contains(n)),
+        );
+    }
 }
 
 impl<D> Schema<D>
diff --git a/crates/polars-stream/src/nodes/io_sources/parquet/init.rs b/crates/polars-stream/src/nodes/io_sources/parquet/init.rs
index 5b344644199e..2c45fd0cc5d4 100644
--- a/crates/polars-stream/src/nodes/io_sources/parquet/init.rs
+++ b/crates/polars-stream/src/nodes/io_sources/parquet/init.rs
@@ -5,6 +5,7 @@ use std::sync::Arc;
 use futures::stream::FuturesUnordered;
 use futures::StreamExt;
 use polars_core::frame::DataFrame;
+use polars_core::prelude::PlIndexSet;
 use polars_error::PolarsResult;
 use polars_io::prelude::ParallelStrategy;
 use polars_io::prelude::_internal::PrefilterMaskSetting;
@@ -264,25 +265,26 @@ impl ParquetSourceNode {
             );
 
         let predicate_arrow_field_indices = if use_prefiltered {
-            let v = physical_predicate
+            let mut live_columns = PlIndexSet::default();
+            physical_predicate
                 .as_ref()
                 .unwrap()
-                .live_variables()
-                .and_then(|x| {
-                    let mut out = x
+                .collect_live_columns(&mut live_columns);
+            let v = (!live_columns.is_empty())
+                .then(|| {
+                    let out = live_columns
                         .iter()
                         // Can be `None` - if the column is e.g. a hive column, or the row index column.
                         .filter_map(|x| projected_arrow_schema.index_of(x))
                         .collect::<Vec<_>>();
 
-                    out.sort_unstable();
-                    out.dedup();
                     // There is at least one non-predicate column, or pre-filtering was
                     // explicitly requested (only useful for testing).
                     (out.len() < projected_arrow_schema.len()
                         || matches!(self.options.parallel, ParallelStrategy::Prefiltered))
                     .then_some(out)
-                });
+                })
+                .flatten();
 
             use_prefiltered &= v.is_some();