From 76b5d0e9267d7608b82c5e66f671f6b2f6685dd1 Mon Sep 17 00:00:00 2001
From: Masha Basmanova <mbasmanova@meta.com>
Date: Mon, 12 Feb 2024 07:50:40 -0800
Subject: [PATCH 01/38] Remove deprecated ExchangeSource::request(maxBytes) API
 (#8727)

Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/8727

Reviewed By: Yuhta

Differential Revision: D53662612

fbshipit-source-id: c7f6b1e1a2dc9bb26d1db58afb5bf0a69037333c
---
 velox/exec/ExchangeSource.h | 8 --------
 1 file changed, 8 deletions(-)
diff --git a/velox/exec/ExchangeSource.h b/velox/exec/ExchangeSource.h
index 59a3a339f5d6..8cee990d8c02 100644
--- a/velox/exec/ExchangeSource.h
+++ b/velox/exec/ExchangeSource.h
@@ -59,14 +59,6 @@ class ExchangeSource : public std::enable_shared_from_this<ExchangeSource> {
     return requestPending_;
   }
 
-  /// Requests the producer to generate up to 'maxBytes' more data.
-  /// Returns a future that completes when producer responds either with 'data'
-  /// or with a message indicating that all data has been already produced or
-  /// data will take more time to produce.
-  virtual ContinueFuture request(uint32_t /*maxBytes*/) {
-    VELOX_NYI();
-  }
-
   struct Response {
     /// Size of the response in bytes. Zero means response didn't contain any
     /// data.

From 7830d641d3a6a69e0dd75d84c3a77a065d5e7d9a Mon Sep 17 00:00:00 2001
From: PHILO-HE <feilong.he@intel.com>
Date: Mon, 12 Feb 2024 09:16:35 -0800
Subject: [PATCH 02/38] Support UNKNOWN type in exporting to Arrow array
 (#8724)

Summary:
In arrow, "n" is used to denote NullType which is UNKNOWN type in velox.
See [arrow code link](https://github.com/apache/arrow/blob/main/cpp/src/arrow/c/bridge.cc#L318).

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8724

Reviewed By: Yuhta

Differential Revision: D53663645

Pulled By: mbasmanova

fbshipit-source-id: 67afdb5027ee5a236be5c22f05c889ddfb0cf9ac
---
 velox/vector/arrow/Bridge.cpp                 |  5 +++++
 .../arrow/tests/ArrowBridgeSchemaTest.cpp     | 21 ++++++-------------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/velox/vector/arrow/Bridge.cpp b/velox/vector/arrow/Bridge.cpp
index 601cbc08e847..6522ab9f18fc 100644
--- a/velox/vector/arrow/Bridge.cpp
+++ b/velox/vector/arrow/Bridge.cpp
@@ -248,6 +248,8 @@ const char* exportArrowFormatStr(
       return "u"; // utf-8 string
     case TypeKind::VARBINARY:
       return "z"; // binary
+    case TypeKind::UNKNOWN:
+      return "n"; // NullType
 
     case TypeKind::TIMESTAMP:
       return "ttn"; // time64 [nanoseconds]
@@ -598,6 +600,7 @@ void exportFlat(
     case TypeKind::REAL:
     case TypeKind::DOUBLE:
     case TypeKind::TIMESTAMP:
+    case TypeKind::UNKNOWN:
       exportValues(vec, rows, out, pool, holder);
       break;
     case TypeKind::VARCHAR:
@@ -940,6 +943,8 @@ TypePtr importFromArrowImpl(
       return REAL();
     case 'g':
       return DOUBLE();
+    case 'n':
+      return UNKNOWN();
 
     // Map both utf-8 and large utf-8 string to varchar.
     case 'u':
diff --git a/velox/vector/arrow/tests/ArrowBridgeSchemaTest.cpp b/velox/vector/arrow/tests/ArrowBridgeSchemaTest.cpp
index 8def65ce8e8e..a880d93f1a97 100644
--- a/velox/vector/arrow/tests/ArrowBridgeSchemaTest.cpp
+++ b/velox/vector/arrow/tests/ArrowBridgeSchemaTest.cpp
@@ -195,6 +195,8 @@ TEST_F(ArrowBridgeSchemaExportTest, scalar) {
 
   testScalarType(DECIMAL(10, 4), "d:10,4");
   testScalarType(DECIMAL(20, 15), "d:20,15");
+
+  testScalarType(UNKNOWN(), "n");
 }
 
 TEST_F(ArrowBridgeSchemaExportTest, nested) {
@@ -238,24 +240,14 @@ TEST_F(ArrowBridgeSchemaExportTest, constant) {
   testConstant(DOUBLE(), "g");
   testConstant(VARCHAR(), "u");
   testConstant(DATE(), "tdD");
+  testConstant(UNKNOWN(), "n");
 
   testConstant(ARRAY(INTEGER()), "+l");
+  testConstant(ARRAY(UNKNOWN()), "+l");
   testConstant(MAP(BOOLEAN(), REAL()), "+m");
+  testConstant(MAP(UNKNOWN(), REAL()), "+m");
   testConstant(ROW({TIMESTAMP(), DOUBLE()}), "+s");
-}
-
-TEST_F(ArrowBridgeSchemaExportTest, unsupported) {
-  // Try some combination of unsupported types to ensure there's no crash or
-  // memory leak in failure scenarios.
-  EXPECT_THROW(testScalarType(UNKNOWN(), ""), VeloxException);
-
-  EXPECT_THROW(testScalarType(ARRAY(UNKNOWN()), ""), VeloxException);
-  EXPECT_THROW(testScalarType(MAP(UNKNOWN(), INTEGER()), ""), VeloxException);
-  EXPECT_THROW(testScalarType(MAP(BIGINT(), UNKNOWN()), ""), VeloxException);
-
-  EXPECT_THROW(testScalarType(ROW({BIGINT(), UNKNOWN()}), ""), VeloxException);
-  EXPECT_THROW(
-      testScalarType(ROW({BIGINT(), REAL(), UNKNOWN()}), ""), VeloxException);
+  testConstant(ROW({UNKNOWN(), UNKNOWN()}), "+s");
 }
 
 class ArrowBridgeSchemaImportTest : public ArrowBridgeSchemaExportTest {
@@ -395,7 +387,6 @@ TEST_F(ArrowBridgeSchemaImportTest, complexTypes) {
 }
 
 TEST_F(ArrowBridgeSchemaImportTest, unsupported) {
-  EXPECT_THROW(testSchemaImport("n"), VeloxUserError);
   EXPECT_THROW(testSchemaImport("C"), VeloxUserError);
   EXPECT_THROW(testSchemaImport("S"), VeloxUserError);
   EXPECT_THROW(testSchemaImport("I"), VeloxUserError);

From c51abe36b7fba1142a9110129209b9a80442cbd1 Mon Sep 17 00:00:00 2001
From: Wei He <weihe@meta.com>
Date: Mon, 12 Feb 2024 10:19:35 -0800
Subject: [PATCH 03/38] Fix incorrect result of first_value/last_value
 functions (#8626)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8626

The first_value and last_value functions produce incorrect results when used with
IGNORE NULLS and when the frame doesn't start from the first row. In this situation,
FirstLastValueFunction::setRowNumbersIgnoreNulls() attemps to find the index of
the first non-null value from `leastFrame` to `leastFrame + frameSize`. But
the index found is relative to `leastFrame`, so setRowNumbersIgnoreNulls() should
add leastFrame to the index before returning it.

This diff fixes https://github.com/facebookincubator/velox/issues/8427.

Reviewed By: kgpai

Differential Revision: D53295212

fbshipit-source-id: 614f41b5ced7059aad475dca1967a02ba1c4beec
---
 .../prestosql/window/FirstLastValue.cpp       |  6 +++--
 .../prestosql/window/tests/NthValueTest.cpp   | 26 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/velox/functions/prestosql/window/FirstLastValue.cpp b/velox/functions/prestosql/window/FirstLastValue.cpp
index bab12e5d01cd..eaec4db98703 100644
--- a/velox/functions/prestosql/window/FirstLastValue.cpp
+++ b/velox/functions/prestosql/window/FirstLastValue.cpp
@@ -127,11 +127,13 @@ class FirstLastValueFunction : public exec::WindowFunction {
       // The function returns null for this case. -1 correctly maps to
       // kNullRow as expected for rowNumbers_ extraction.
       if constexpr (TValue == ValueType::kFirst) {
-        rowNumbers_[i] = bits::findFirstBit(
+        auto position = bits::findFirstBit(
             rawNonNulls, frameStart - leastFrame, frameEnd - leastFrame + 1);
+        rowNumbers_[i] = (position == -1) ? -1 : position + leastFrame;
       } else {
-        rowNumbers_[i] = bits::findLastBit(
+        auto position = bits::findLastBit(
             rawNonNulls, frameStart - leastFrame, frameEnd - leastFrame + 1);
+        rowNumbers_[i] = (position == -1) ? -1 : position + leastFrame;
       }
     });
   }
diff --git a/velox/functions/prestosql/window/tests/NthValueTest.cpp b/velox/functions/prestosql/window/tests/NthValueTest.cpp
index 18715361c8fa..c27c690fabcc 100644
--- a/velox/functions/prestosql/window/tests/NthValueTest.cpp
+++ b/velox/functions/prestosql/window/tests/NthValueTest.cpp
@@ -335,6 +335,32 @@ TEST_F(NthValueTest, ignoreNulls) {
   }
 }
 
+TEST_F(NthValueTest, frameStartsFromFollowing) {
+  auto input = makeRowVector({
+      makeNullableFlatVector<int64_t>({1, std::nullopt, 2}),
+      makeFlatVector<bool>({false, false, false}),
+      makeFlatVector<int64_t>({1, 2, 3}),
+  });
+  auto expected = makeRowVector(
+      {makeNullableFlatVector<int64_t>({1, std::nullopt, 2}),
+       makeFlatVector<bool>({false, false, false}),
+       makeFlatVector<int64_t>({1, 2, 3}),
+       makeNullableFlatVector<int64_t>({2, 2, std::nullopt})});
+
+  WindowTestBase::testWindowFunction(
+      {input},
+      "first_value(c0 IGNORE NULLS)",
+      "partition by c1 order by c2",
+      "rows between 1 following and unbounded following",
+      expected);
+  WindowTestBase::testWindowFunction(
+      {input},
+      "last_value(c0 IGNORE NULLS)",
+      "partition by c1 order by c2",
+      "rows between 1 following and unbounded following",
+      expected);
+}
+
 // These tests are added since DuckDB has issues with
 // CURRENT ROW frames. These tests will be replaced by DuckDB based
 // tests after it is upgraded to v0.8.

From 320f578ce7a7128b75cd92502488a92d11fe4995 Mon Sep 17 00:00:00 2001
From: "Schierbeck, Cody" <cody.schierbeck@intel.com>
Date: Mon, 12 Feb 2024 10:53:38 -0800
Subject: [PATCH 04/38] Created an expression runner for Spark functions
 (#8341)

Summary:
velox_expression_runner_test is only linked to velox_functions_prestosql.
Added a spark_expression_runner_test that is linked to velox_functions_spark to allow for running SparkSQL functions.

See comments for examples.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8341

Reviewed By: Yuhta

Differential Revision: D53357054

Pulled By: kgpai

fbshipit-source-id: f9cf562775839d1e5b95397905896faba8f940da
---
 velox/docs/develop/testing/fuzzer.rst         |  2 ++
 velox/expression/tests/CMakeLists.txt         |  5 +--
 .../expression/tests/ExpressionRunnerTest.cpp | 34 ++++++++++++++++---
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/velox/docs/develop/testing/fuzzer.rst b/velox/docs/develop/testing/fuzzer.rst
index cb883f69e4b1..57ee0e968310 100644
--- a/velox/docs/develop/testing/fuzzer.rst
+++ b/velox/docs/develop/testing/fuzzer.rst
@@ -235,6 +235,8 @@ ExpressionRunner supports the following flags:
 
 * ``--sql_path`` path to expression SQL that was created by the Fuzzer
 
+* ``--registry`` function registry to use for evaluating expression. One of "presto" (default) or "spark".
+
 * ``--complex_constant_path`` optional path to complex constants that aren't accurately expressable in SQL (Array, Map, Structs, ...). This is used with SQL file to reproduce the exact expression, not needed when the expression doesn't contain complex constants.
 
 * ``--lazy_column_list_path`` optional path for the file stored on-disk which contains a vector of column indices that specify which columns of the input row vector should be wrapped in lazy. This is used when the failing test included input columns that were lazy vector.
diff --git a/velox/expression/tests/CMakeLists.txt b/velox/expression/tests/CMakeLists.txt
index fb5fab232c86..ded7d6aa82c5 100644
--- a/velox/expression/tests/CMakeLists.txt
+++ b/velox/expression/tests/CMakeLists.txt
@@ -112,8 +112,9 @@ target_link_libraries(
   gtest_main)
 
 add_library(velox_expression_runner ExpressionRunner.cpp)
-target_link_libraries(velox_expression_runner velox_expression_verifier
-                      velox_functions_prestosql velox_parse_parser gtest)
+target_link_libraries(
+  velox_expression_runner velox_expression_verifier velox_functions_prestosql
+  velox_functions_spark velox_parse_parser gtest)
 
 add_executable(velox_expression_runner_test ExpressionRunnerTest.cpp)
 target_link_libraries(
diff --git a/velox/expression/tests/ExpressionRunnerTest.cpp b/velox/expression/tests/ExpressionRunnerTest.cpp
index 08e976af4a57..c6e06d79141d 100644
--- a/velox/expression/tests/ExpressionRunnerTest.cpp
+++ b/velox/expression/tests/ExpressionRunnerTest.cpp
@@ -22,6 +22,7 @@
 #include "velox/expression/tests/ExpressionVerifier.h"
 #include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h"
 #include "velox/functions/prestosql/registration/RegistrationFunctions.h"
+#include "velox/functions/sparksql/Register.h"
 #include "velox/vector/VectorSaver.h"
 
 using namespace facebook::velox;
@@ -52,6 +53,12 @@ DEFINE_string(
     "flag are mutually exclusive. If both are specified, --sql is used and "
     "--sql_path is ignored.");
 
+DEFINE_string(
+    registry,
+    "presto",
+    "Funciton registry to use for expression evaluation. Currently supported values are "
+    "presto and spark. Default is presto.");
+
 DEFINE_string(
     result_path,
     "",
@@ -83,16 +90,35 @@ static bool validateMode(const char* flagName, const std::string& value) {
   static const std::unordered_set<std::string> kModes = {
       "common", "simplified", "verify", "query"};
   if (kModes.count(value) != 1) {
-    std::cout << "Invalid value for --" << flagName << ": " << value << ". ";
-    std::cout << "Valid values are: " << folly::join(", ", kModes) << "."
+    std::cerr << "Invalid value for --" << flagName << ": " << value << ". ";
+    std::cerr << "Valid values are: " << folly::join(", ", kModes) << "."
+              << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+static bool validateRegistry(const char* flagName, const std::string& value) {
+  static const std::unordered_set<std::string> kRegistries = {
+      "presto", "spark"};
+  if (kRegistries.count(value) != 1) {
+    std::cerr << "Invalid value for --" << flagName << ": " << value << ". ";
+    std::cerr << "Valid values are: " << folly::join(", ", kRegistries) << "."
               << std::endl;
     return false;
   }
+  if (value == "spark") {
+    functions::sparksql::registerFunctions("");
+  } else if (value == "presto") {
+    functions::prestosql::registerAllScalarFunctions();
+  }
 
   return true;
 }
 
 DEFINE_validator(mode, &validateMode);
+DEFINE_validator(registry, &validateRegistry);
 
 DEFINE_int32(
     num_rows,
@@ -172,7 +198,7 @@ int main(int argc, char** argv) {
   }
 
   if (FLAGS_sql.empty() && FLAGS_sql_path.empty()) {
-    std::cout << "One of --sql or --sql_path flags must be set." << std::endl;
+    std::cerr << "One of --sql or --sql_path flags must be set." << std::endl;
     exit(1);
   }
 
@@ -182,8 +208,6 @@ int main(int argc, char** argv) {
     VELOX_CHECK(!sql.empty());
   }
 
-  functions::prestosql::registerAllScalarFunctions();
-  aggregate::prestosql::registerAllAggregateFunctions();
   test::ExpressionRunner::run(
       FLAGS_input_path,
       sql,

From 080b7857139ca647e90a5cee81344f920a1be84d Mon Sep 17 00:00:00 2001
From: Wei He <weihe@meta.com>
Date: Mon, 12 Feb 2024 10:58:33 -0800
Subject: [PATCH 05/38] Fix min_by/max_by(x, y, n) (#8566)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8566

Same as bug in min/max(x, n) fixed in https://github.com/facebookincubator/velox/pull/8311, min_by/max_by(x, y, n) also breaks the
assumption of incremental window aggregation because their extractValues() methods
has a side effect of clearing the accumulator.

This diff fixes this issue by making the extractValues() methods of min_by/max_by(x, y, n)
not clear the accumulators.

Since Presto's min_by/max_by have the same bug (https://github.com/prestodb/presto/issues/21653). This fix
will make Velox's min_by/max_by behave differently from Presto when used in Window
operation, until https://github.com/prestodb/presto/issues/21653 is fixed.

This diff fixes https://github.com/facebookincubator/velox/issues/8138.

Reviewed By: bikramSingh91

Differential Revision: D53139892

fbshipit-source-id: 1323f22196e22554c0d880d20584a4ee4059b64c
---
 velox/docs/develop/aggregate-functions.rst    |  10 +-
 .../aggregates/MinMaxByAggregates.cpp         | 292 ++++++++++--------
 .../tests/MinMaxByAggregationTest.cpp         |  92 +++++-
 3 files changed, 256 insertions(+), 138 deletions(-)

diff --git a/velox/docs/develop/aggregate-functions.rst b/velox/docs/develop/aggregate-functions.rst
index a67f0a4e7e8a..572c7446e16a 100644
--- a/velox/docs/develop/aggregate-functions.rst
+++ b/velox/docs/develop/aggregate-functions.rst
@@ -283,6 +283,9 @@ initialize all accumulators.
 The author can also optionally define a `destroy` function that is called when
 *this* accumulator object is destructed.
 
+Notice that `writeIntermediateResult` and `writeFinalResult` are expected to not
+modify contents in the accumulator.
+
 addInput
 """"""""
 
@@ -365,6 +368,9 @@ behavior.
 On the other hand, the C++ function signatures of `addInput`, `combine`,
 `writeIntermediateResult`, and `writeFinalResult` are different.
 
+Same as the case for default-null behavior, `writeIntermediateResult` and
+`writeFinalResult` are expected to not modify contents in the accumulator.
+
 addInput
 """"""""
 
@@ -605,6 +611,7 @@ After implementing the addRawInput() method, we proceed to adding logic for extr
 .. code-block:: c++
 
       // Extracts partial results (used for partial and intermediate aggregations).
+      // This method is expected to not modify contents in accumulators.
       // @param groups Pointers to the start of the group rows.
       // @param numGroups Number of groups to extract results from.
       // @param result The result vector to store the results in.
@@ -625,7 +632,8 @@ Next, we implement the extractValues() method that extracts final results from t
 
 .. code-block:: c++
 
-      // Extracts final results (used for final and single aggregations).
+      // Extracts final results (used for final and single aggregations). This method
+      // is expected to not modify contents in accumulators.
       // @param groups Pointers to the start of the group rows.
       // @param numGroups Number of groups to extract results from.
       // @param result The result vector to store the results in.
diff --git a/velox/functions/prestosql/aggregates/MinMaxByAggregates.cpp b/velox/functions/prestosql/aggregates/MinMaxByAggregates.cpp
index 877134ceff88..0c81d61204b8 100644
--- a/velox/functions/prestosql/aggregates/MinMaxByAggregates.cpp
+++ b/velox/functions/prestosql/aggregates/MinMaxByAggregates.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <algorithm>
 #include "velox/functions/lib/aggregates/MinMaxByAggregatesBase.h"
 #include "velox/functions/lib/aggregates/ValueSet.h"
 #include "velox/functions/prestosql/aggregates/AggregateNames.h"
@@ -89,19 +90,18 @@ struct MinMaxByNAccumulator {
   int64_t n{0};
 
   using Pair = std::pair<C, std::optional<V>>;
-  using Queue =
-      std::priority_queue<Pair, std::vector<Pair, StlAllocator<Pair>>, Compare>;
-  Queue topPairs;
+  using Heap = std::vector<Pair, StlAllocator<Pair>>;
+  Heap heapValues;
 
   explicit MinMaxByNAccumulator(HashStringAllocator* allocator)
-      : topPairs{Compare{}, StlAllocator<Pair>(allocator)} {}
+      : heapValues{StlAllocator<Pair>(allocator)} {}
 
   int64_t getN() const {
     return n;
   }
 
   size_t size() const {
-    return topPairs.size();
+    return heapValues.size();
   }
 
   void checkAndSetN(DecodedVector& decodedN, vector_size_t row) {
@@ -126,61 +126,64 @@ struct MinMaxByNAccumulator {
 
   void
   compareAndAdd(C comparison, std::optional<V> value, Compare& comparator) {
-    if (topPairs.size() < n) {
-      topPairs.push({comparison, value});
+    if (heapValues.size() < n) {
+      heapValues.push_back({comparison, value});
+      std::push_heap(heapValues.begin(), heapValues.end(), comparator);
     } else {
-      const auto& topPair = topPairs.top();
+      const auto& topPair = heapValues.front();
       if (comparator.compare(comparison, topPair)) {
-        topPairs.pop();
-        topPairs.push({comparison, value});
+        std::pop_heap(heapValues.begin(), heapValues.end(), comparator);
+        heapValues.back() = std::make_pair(comparison, value);
+        std::push_heap(heapValues.begin(), heapValues.end(), comparator);
       }
     }
   }
 
-  /// Moves all values from 'topPairs' into 'rawValues' and 'rawValueNulls'
-  /// buffers. The queue of 'topPairs' will be empty after this call.
+  /// Extract all values from 'heapValues' into 'rawValues' and 'rawValueNulls'
+  /// buffers. The heap remains unchanged after the call.
   void extractValues(
       TRawValue* rawValues,
       uint64_t* rawValueNulls,
-      vector_size_t offset) {
-    const vector_size_t size = topPairs.size();
-    for (auto i = size - 1; i >= 0; --i) {
-      const auto& topPair = topPairs.top();
+      vector_size_t offset,
+      Compare& comparator) {
+    std::sort_heap(heapValues.begin(), heapValues.end(), comparator);
+    // Add heap elements to rawValues in ascending order.
+    for (int64_t i = 0; i < heapValues.size(); ++i) {
+      const auto& pair = heapValues[i];
       const auto index = offset + i;
-
-      const bool valueIsNull = !topPair.second.has_value();
+      const bool valueIsNull = !pair.second.has_value();
       bits::setNull(rawValueNulls, index, valueIsNull);
       if (!valueIsNull) {
-        RawValueExtractor<V>::extract(rawValues, index, topPair.second.value());
+        RawValueExtractor<V>::extract(rawValues, index, pair.second.value());
       }
-
-      topPairs.pop();
     }
+    std::make_heap(heapValues.begin(), heapValues.end(), comparator);
   }
 
-  /// Moves all pairs of (comparison, value) from 'topPairs' into
-  /// 'rawComparisons', 'rawValues' and 'rawValueNulls' buffers. The queue of
-  /// 'topPairs' will be empty after this call.
+  /// Moves all pairs of (comparison, value) from 'heapValues' into
+  /// 'rawComparisons', 'rawValues' and 'rawValueNulls' buffers. The heap
+  /// remains unchanged after the call.
   void extractPairs(
       TRawComparison* rawComparisons,
       TRawValue* rawValues,
       uint64_t* rawValueNulls,
-      vector_size_t offset) {
-    const vector_size_t size = topPairs.size();
-    for (auto i = size - 1; i >= 0; --i) {
-      const auto& topPair = topPairs.top();
+      vector_size_t offset,
+      Compare& comparator) {
+    std::sort_heap(heapValues.begin(), heapValues.end(), comparator);
+    // Add heap elements to rawComparisons and rawValues in ascending order.
+    for (int64_t i = 0; i < heapValues.size(); ++i) {
+      const auto& pair = heapValues[i];
       const auto index = offset + i;
 
-      RawValueExtractor<C>::extract(rawComparisons, index, topPair.first);
+      RawValueExtractor<C>::extract(rawComparisons, index, pair.first);
 
-      const bool valueIsNull = !topPair.second.has_value();
+      const bool valueIsNull = !pair.second.has_value();
       bits::setNull(rawValueNulls, index, valueIsNull);
       if (!valueIsNull) {
-        RawValueExtractor<V>::extract(rawValues, index, topPair.second.value());
+        RawValueExtractor<V>::extract(rawValues, index, pair.second.value());
       }
-
-      topPairs.pop();
     }
+    std::make_heap(heapValues.begin(), heapValues.end(), comparator);
   }
 };
 
@@ -199,15 +202,18 @@ struct Extractor {
 
   void extractValues(
       MinMaxByNAccumulator<V, C, Compare>* accumulator,
-      vector_size_t offset) {
-    accumulator->extractValues(rawValues, rawValueNulls, offset);
+      vector_size_t offset,
+      Compare& comparator) {
+    accumulator->extractValues(rawValues, rawValueNulls, offset, comparator);
   }
 
   void extractPairs(
       MinMaxByNAccumulator<V, C, Compare>* accumulator,
       TRawComparison* rawComparisons,
-      vector_size_t offset) {
-    accumulator->extractPairs(rawComparisons, rawValues, rawValueNulls, offset);
+      vector_size_t offset,
+      Compare& comparator) {
+    accumulator->extractPairs(
+        rawComparisons, rawValues, rawValueNulls, offset, comparator);
   }
 };
 
@@ -221,10 +227,8 @@ struct MinMaxByNStringViewAccumulator {
       : base{allocator}, valueSet{allocator} {}
 
   ~MinMaxByNStringViewAccumulator() {
-    while (!base.topPairs.empty()) {
-      auto& pair = base.topPairs.top();
-      freePair(pair);
-      base.topPairs.pop();
+    for (auto i = 0; i < base.heapValues.size(); ++i) {
+      freePair(base.heapValues[i]);
     }
   }
 
@@ -242,47 +246,52 @@ struct MinMaxByNStringViewAccumulator {
 
   void
   compareAndAdd(C comparison, std::optional<V> value, Compare& comparator) {
-    if (base.topPairs.size() < base.n) {
-      addToAccumulator(comparison, value);
+    if (base.heapValues.size() < base.n) {
+      addToAccumulator(comparison, value, comparator);
     } else {
-      const auto& topPair = base.topPairs.top();
+      const auto& topPair = base.heapValues.front();
       if (comparator.compare(comparison, topPair)) {
-        freePair(topPair);
-        base.topPairs.pop();
-        addToAccumulator(comparison, value);
+        std::pop_heap(
+            base.heapValues.begin(), base.heapValues.end(), comparator);
+        base.heapValues.pop_back();
+        addToAccumulator(comparison, value, comparator);
       }
     }
   }
 
-  /// Moves all values from 'topPairs' into 'values'
-  /// buffers. The queue of 'topPairs' will be empty after this call.
-  void extractValues(FlatVector<V>& values, vector_size_t offset) {
-    const vector_size_t size = base.topPairs.size();
-    for (auto i = size - 1; i >= 0; --i) {
-      const auto& pair = base.topPairs.top();
+  /// Extract all values from 'heapValues' into 'rawValues' and 'rawValueNulls'
+  /// buffers. The heap remains unchanged after the call.
+  void extractValues(
+      FlatVector<V>& values,
+      vector_size_t offset,
+      Compare& comparator) {
+    std::sort_heap(base.heapValues.begin(), base.heapValues.end(), comparator);
+    // Add heap elements to values in ascending order.
+    for (int64_t i = 0; i < base.heapValues.size(); ++i) {
+      const auto& pair = base.heapValues[i];
       extractValue(pair, values, offset + i);
-      freePair(pair);
-      base.topPairs.pop();
     }
+    std::make_heap(base.heapValues.begin(), base.heapValues.end(), comparator);
   }
 
-  /// Moves all pairs of (comparison, value) from 'topPairs' into
-  /// 'rawComparisons' buffer and 'values' vector. The queue of
-  /// 'topPairs' will be empty after this call.
+  /// Moves all pairs of (comparison, value) from 'heapValues' into
+  /// 'rawComparisons', 'rawValues' and 'rawValueNulls' buffers. The heap
+  /// remains unchanged after the call.
   void extractPairs(
       FlatVector<C>& compares,
       FlatVector<V>& values,
-      vector_size_t offset) {
-    const vector_size_t size = base.topPairs.size();
-    for (auto i = size - 1; i >= 0; --i) {
-      const auto& topPair = base.topPairs.top();
+      vector_size_t offset,
+      Compare& comparator) {
+    std::sort_heap(base.heapValues.begin(), base.heapValues.end(), comparator);
+    // Add heap elements to compares and values in ascending order.
+    for (int64_t i = 0; i < base.heapValues.size(); ++i) {
+      const auto& pair = base.heapValues[i];
       const auto index = offset + i;
 
-      extractCompare(topPair, compares, index);
-      extractValue(topPair, values, index);
-      freePair(topPair);
-      base.topPairs.pop();
+      extractCompare(pair, compares, index);
+      extractValue(pair, values, index);
     }
+    std::make_heap(base.heapValues.begin(), base.heapValues.end(), comparator);
   }
 
  private:
@@ -296,48 +305,50 @@ struct MinMaxByNStringViewAccumulator {
     return valueSet.write(*value);
   }
 
-  void addToAccumulator(C comparison, std::optional<V> value) {
+  void
+  addToAccumulator(C comparison, std::optional<V> value, Compare& comparator) {
     if constexpr (
         std::is_same_v<V, StringView> && std::is_same_v<C, StringView>) {
-      base.topPairs.push({valueSet.write(comparison), writeString(value)});
+      base.heapValues.push_back(
+          std::make_pair(valueSet.write(comparison), writeString(value)));
     } else if constexpr (std::is_same_v<V, StringView>) {
-      base.topPairs.push({comparison, writeString(value)});
+      base.heapValues.push_back(std::make_pair(comparison, writeString(value)));
     } else {
       static_assert(
           std::is_same_v<C, StringView>,
           "At least one of V and C must be StringView.");
-      base.topPairs.push({valueSet.write(comparison), value});
+      base.heapValues.push_back(
+          std::make_pair(valueSet.write(comparison), value));
     }
+    std::push_heap(base.heapValues.begin(), base.heapValues.end(), comparator);
   }
 
-  void freePair(typename BaseType::Queue::const_reference topPair) {
+  void freePair(typename BaseType::Heap::const_reference pair) {
     if constexpr (std::is_same_v<C, StringView>) {
-      valueSet.free(topPair.first);
+      valueSet.free(pair.first);
     }
     if constexpr (std::is_same_v<V, StringView>) {
-      if (topPair.second.has_value()) {
-        valueSet.free(*topPair.second);
+      if (pair.second.has_value()) {
+        valueSet.free(*pair.second);
       }
     }
   }
 
-  void extractValue(
-      const Pair& topPair,
-      FlatVector<V>& values,
-      vector_size_t index) {
-    const bool valueIsNull = !topPair.second.has_value();
+  void
+  extractValue(const Pair& pair, FlatVector<V>& values, vector_size_t index) {
+    const bool valueIsNull = !pair.second.has_value();
     values.setNull(index, valueIsNull);
     if (!valueIsNull) {
-      values.set(index, topPair.second.value());
+      values.set(index, pair.second.value());
     }
   }
 
   void extractCompare(
-      const Pair& topPair,
+      const Pair& pair,
       FlatVector<C>& compares,
       vector_size_t index) {
     compares.setNull(index, false);
-    compares.set(index, topPair.first);
+    compares.set(index, pair.first);
   }
 };
 
@@ -353,15 +364,17 @@ struct StringViewExtractor {
 
   void extractValues(
       MinMaxByNStringViewAccumulator<V, C, Compare>* accumulator,
-      vector_size_t offset) {
-    accumulator->extractValues(values, offset);
+      vector_size_t offset,
+      Compare& comparator) {
+    accumulator->extractValues(values, offset, comparator);
   }
 
   void extractPairs(
       MinMaxByNStringViewAccumulator<V, C, Compare>* accumulator,
-      vector_size_t offset) {
+      vector_size_t offset,
+      Compare& comparator) {
     VELOX_DCHECK_NOT_NULL(compares);
-    accumulator->extractPairs(*compares, values, offset);
+    accumulator->extractPairs(*compares, values, offset, comparator);
   }
 };
 
@@ -379,10 +392,8 @@ struct MinMaxByNComplexTypeAccumulator {
       : base{allocator}, valueSet{allocator} {}
 
   ~MinMaxByNComplexTypeAccumulator() {
-    while (!base.topPairs.empty()) {
-      auto& pair = base.topPairs.top();
-      freePair(pair);
-      base.topPairs.pop();
+    for (auto i = 0; i < base.heapValues.size(); ++i) {
+      freePair(base.heapValues[i]);
     }
   }
 
@@ -403,62 +414,69 @@ struct MinMaxByNComplexTypeAccumulator {
       DecodedVector& decoded,
       vector_size_t index,
       Compare& comparator) {
-    if (base.topPairs.size() < base.n) {
+    if (base.heapValues.size() < base.n) {
       auto position = writeComplex(decoded, index);
-      addToAccumulator(comparison, position);
+      addToAccumulator(comparison, position, comparator);
     } else {
-      const auto& topPair = base.topPairs.top();
+      const auto& topPair = base.heapValues.front();
       if (comparator.compare(comparison, topPair)) {
-        freePair(topPair);
-        base.topPairs.pop();
-
+        std::pop_heap(
+            base.heapValues.begin(), base.heapValues.end(), comparator);
         auto position = writeComplex(decoded, index);
-        addToAccumulator(comparison, position);
+        base.heapValues.pop_back();
+        addToAccumulator(comparison, position, comparator);
       }
     }
   }
 
-  /// Moves all values from 'topPairs' into 'values' vector. The queue of
-  /// 'topPairs' will be empty after this call.
-  void extractValues(BaseVector& values, vector_size_t offset) {
-    const vector_size_t size = base.topPairs.size();
-    for (auto i = size - 1; i >= 0; --i) {
-      const auto& pair = base.topPairs.top();
+  /// Extract all values from 'heapValues' into 'rawValues' and 'rawValueNulls'
+  /// buffers. The heap remains unchanged after the call.
+  void
+  extractValues(BaseVector& values, vector_size_t offset, Compare& comparator) {
+    std::sort_heap(base.heapValues.begin(), base.heapValues.end(), comparator);
+    // Add heap elements to values in ascending order.
+    for (int64_t i = 0; i < base.heapValues.size(); ++i) {
+      const auto& pair = base.heapValues[i];
       extractValue(pair, values, offset + i);
-      freePair(pair);
-      base.topPairs.pop();
     }
+    std::make_heap(base.heapValues.begin(), base.heapValues.end(), comparator);
   }
 
-  /// Moves all pairs of (comparison, value) from 'topPairs' into
-  /// 'rawComparisons' buffer and 'values' vector. The queue of
-  /// 'topPairs' will be empty after this call.
+  /// Moves all pairs of (comparison, value) from 'heapValues' into
+  /// 'rawComparisons', 'rawValues' and 'rawValueNulls' buffers. The heap
+  /// remains unchanged after the call.
   void extractPairs(
       FlatVector<C>& compares,
       BaseVector& values,
-      vector_size_t offset) {
-    const vector_size_t size = base.topPairs.size();
-    for (auto i = size - 1; i >= 0; --i) {
-      const auto& topPair = base.topPairs.top();
+      vector_size_t offset,
+      Compare& comparator) {
+    std::sort_heap(base.heapValues.begin(), base.heapValues.end(), comparator);
+    // Add heap elements to compares and values in ascending order.
+    for (int64_t i = 0; i < base.heapValues.size(); ++i) {
+      const auto& pair = base.heapValues[i];
       const auto index = offset + i;
 
-      extractCompare(topPair, compares, index);
-      extractValue(topPair, values, index);
-      freePair(topPair);
-      base.topPairs.pop();
+      extractCompare(pair, compares, index);
+      extractValue(pair, values, index);
     }
+    std::make_heap(base.heapValues.begin(), base.heapValues.end(), comparator);
   }
 
  private:
   using V = HashStringAllocator::Position;
   using Pair = typename MinMaxByNAccumulator<V, C, Compare>::Pair;
 
-  void addToAccumulator(C comparison, const std::optional<V>& position) {
+  void addToAccumulator(
+      C comparison,
+      const std::optional<V>& position,
+      Compare& comparator) {
     if constexpr (std::is_same_v<C, StringView>) {
-      base.topPairs.push({valueSet.write(comparison), position});
+      base.heapValues.push_back(
+          std::make_pair(valueSet.write(comparison), position));
     } else {
-      base.topPairs.push({comparison, position});
+      base.heapValues.push_back(std::make_pair(comparison, position));
     }
+    std::push_heap(base.heapValues.begin(), base.heapValues.end(), comparator);
   }
 
   std::optional<HashStringAllocator::Position> writeComplex(
@@ -472,31 +490,30 @@ struct MinMaxByNComplexTypeAccumulator {
     return position;
   }
 
-  void freePair(typename BaseType::Queue::const_reference topPair) {
+  void freePair(typename BaseType::Heap::const_reference pair) {
     if constexpr (std::is_same_v<C, StringView>) {
-      valueSet.free(topPair.first);
+      valueSet.free(pair.first);
     }
-    if (topPair.second.has_value()) {
-      valueSet.free(topPair.second->header);
+    if (pair.second.has_value()) {
+      valueSet.free(pair.second->header);
     }
   }
 
-  void
-  extractValue(const Pair& topPair, BaseVector& values, vector_size_t index) {
-    const bool valueIsNull = !topPair.second.has_value();
+  void extractValue(const Pair& pair, BaseVector& values, vector_size_t index) {
+    const bool valueIsNull = !pair.second.has_value();
     values.setNull(index, valueIsNull);
     if (!valueIsNull) {
-      auto position = topPair.second.value();
+      auto position = pair.second.value();
       valueSet.read(&values, index, position.header);
     }
   }
 
   void extractCompare(
-      const Pair& topPair,
+      const Pair& pair,
       FlatVector<C>& compares,
       vector_size_t index) {
     compares.setNull(index, false);
-    compares.set(index, topPair.first);
+    compares.set(index, pair.first);
   }
 }; // namespace
 
@@ -511,15 +528,17 @@ struct ComplexTypeExtractor {
 
   void extractValues(
       MinMaxByNComplexTypeAccumulator<C, Compare>* accumulator,
-      vector_size_t offset) {
-    accumulator->extractValues(values, offset);
+      vector_size_t offset,
+      Compare& comparator) {
+    accumulator->extractValues(values, offset, comparator);
   }
 
   void extractPairs(
       MinMaxByNComplexTypeAccumulator<C, Compare>* accumulator,
-      vector_size_t offset) {
+      vector_size_t offset,
+      Compare& comparator) {
     VELOX_DCHECK_NOT_NULL(compares);
-    accumulator->extractPairs(*compares, values, offset);
+    accumulator->extractPairs(*compares, values, offset, comparator);
   }
 };
 
@@ -658,7 +677,7 @@ class MinMaxByNAggregate : public exec::Aggregate {
         rawOffsets[i] = offset;
         rawSizes[i] = size;
 
-        extractor->extractValues(accumulator, offset);
+        extractor->extractValues(accumulator, offset, comparator_);
 
         offset += size;
       }
@@ -720,9 +739,10 @@ class MinMaxByNAggregate : public exec::Aggregate {
         if constexpr (
             std::is_same_v<V, StringView> || std::is_same_v<C, StringView> ||
             std::is_same_v<V, ComplexType>) {
-          extractor->extractPairs(accumulator, offset);
+          extractor->extractPairs(accumulator, offset, comparator_);
         } else {
-          extractor->extractPairs(accumulator, rawComparisons, offset);
+          extractor->extractPairs(
+              accumulator, rawComparisons, offset, comparator_);
         }
 
         offset += size;
diff --git a/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp b/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp
index 5ac1386527e6..ecc17ad1d7ec 100644
--- a/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp
+++ b/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp
@@ -1370,7 +1370,6 @@ class MinMaxByNTest : public AggregationTestBase {
     AggregationTestBase::SetUp();
     AggregationTestBase::allowInputShuffle();
     AggregationTestBase::enableTestStreaming();
-    AggregationTestBase::disableTestIncremental();
   }
 };
 
@@ -1764,6 +1763,10 @@ TEST_F(MinMaxByNTest, sortedGroupBy) {
 }
 
 TEST_F(MinMaxByNTest, variableN) {
+  // Tests below check the error behavior on invalid inputs, so testIncremental
+  // is not needed for these cases.
+  AggregationTestBase::disableTestIncremental();
+
   auto data = makeRowVector({
       makeFlatVector<int32_t>({1, 2, 3, 4, 5, 6, 7}),
       makeFlatVector<int64_t>({77, 66, 55, 44, 33, 22, 11}),
@@ -1819,6 +1822,8 @@ TEST_F(MinMaxByNTest, variableN) {
   VELOX_ASSERT_THROW(
       AssertQueryBuilder(plan).copyResults(pool()),
       "third argument of max_by/min_by must be a constant for all rows in a group");
+
+  AggregationTestBase::enableTestIncremental();
 }
 
 TEST_F(MinMaxByNTest, globalRow) {
@@ -2111,5 +2116,90 @@ TEST_F(MinMaxByNTest, stringComparison) {
   }
 }
 
+TEST_F(MinMaxByNTest, incrementalWindow) {
+  // Test that min_by(x, x, 10) and max_by(x, x, 10) produce correct results
+  // when used in window operation with incremental frames.
+  std::vector<VectorPtr> inputs = {
+      makeFlatVector<int64_t>({1, 2}),
+      makeFlatVector<StringView>({"1"_sv, "2"_sv}),
+      makeArrayVector<StringView>({{"1"_sv}, {"2"_sv}}),
+      makeFlatVector<Timestamp>({Timestamp(0, 0), Timestamp(0, 1)}),
+      makeFlatVector<int64_t>({10, 10}),
+      makeFlatVector<bool>({false, false}),
+      makeFlatVector<int64_t>({0, 1})};
+  auto data = makeRowVector(inputs);
+  auto result = inputs;
+
+  // Test primitive type.
+  {
+    auto plan =
+        PlanBuilder()
+            .values({data})
+            .window(
+                {"max_by(c0, c0, c4) over (partition by c5 order by c6 asc)"})
+            .planNode();
+
+    result.push_back(makeArrayVector<int64_t>({{1}, {2, 1}}));
+    AssertQueryBuilder(plan).assertResults(makeRowVector(result));
+
+    plan =
+        PlanBuilder()
+            .values({data})
+            .window(
+                {"min_by(c0, c0, c4) over (partition by c5 order by c6 asc)"})
+            .planNode();
+    result.back() = makeArrayVector<int64_t>({{1}, {1, 2}});
+    AssertQueryBuilder(plan).assertResults(makeRowVector(result));
+  }
+
+  // Test varchar type.
+  {
+    auto plan =
+        PlanBuilder()
+            .values({data})
+            .window(
+                {"max_by(c1, c1, c4) over (partition by c5 order by c6 asc)"})
+            .planNode();
+
+    result.back() = makeArrayVector<StringView>({{"1"_sv}, {"2"_sv, "1"_sv}});
+    AssertQueryBuilder(plan).assertResults(makeRowVector(result));
+
+    plan =
+        PlanBuilder()
+            .values({data})
+            .window(
+                {"min_by(c1, c1, c4) over (partition by c5 order by c6 asc)"})
+            .planNode();
+
+    result.back() = makeArrayVector<StringView>({{"1"_sv}, {"1"_sv, "2"_sv}});
+    AssertQueryBuilder(plan).assertResults(makeRowVector(result));
+  }
+
+  // Test complex type.
+  {
+    auto plan =
+        PlanBuilder()
+            .values({data})
+            .window(
+                {"max_by(c2, c3, c4) over (partition by c5 order by c6 asc)"})
+            .planNode();
+
+    result.back() = makeNullableNestedArrayVector<StringView>(
+        {{{{{"1"_sv}}}}, {{{{"2"_sv}}, {{"1"_sv}}}}});
+    AssertQueryBuilder(plan).assertResults(makeRowVector(result));
+
+    plan =
+        PlanBuilder()
+            .values({data})
+            .window(
+                {"min_by(c2, c3, c4) over (partition by c5 order by c6 asc)"})
+            .planNode();
+
+    result.back() = makeNullableNestedArrayVector<StringView>(
+        {{{{{"1"_sv}}}}, {{{{"1"_sv}}, {{"2"_sv}}}}});
+    AssertQueryBuilder(plan).assertResults(makeRowVector(result));
+  }
+}
+
 } // namespace
 } // namespace facebook::velox::aggregate::test

From a3a57cbf5a80ef9c5b31460f9f0fb35bb110afbe Mon Sep 17 00:00:00 2001
From: Ankita Victor <anvicto@microsoft.com>
Date: Mon, 12 Feb 2024 11:51:36 -0800
Subject: [PATCH 06/38] Add date_from_unix_date Spark function (#8672)

Summary:
Spark docs - https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.date_from_unix_date.html

Fixes https://github.com/facebookincubator/velox/issues/8671

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8672

Reviewed By: bikramSingh91

Differential Revision: D53569359

Pulled By: mbasmanova

fbshipit-source-id: 71677ee9e222641d04cb4f798949e643ee40376e
---
 velox/docs/functions/spark/datetime.rst       |  7 ++++++
 velox/functions/sparksql/DateTimeFunctions.h  |  9 ++++++++
 velox/functions/sparksql/Register.cpp         |  3 +++
 .../sparksql/tests/DateTimeFunctionsTest.cpp  | 22 +++++++++++++++++++
 4 files changed, 41 insertions(+)

diff --git a/velox/docs/functions/spark/datetime.rst b/velox/docs/functions/spark/datetime.rst
index 138d3ce68a95..3ac392536524 100644
--- a/velox/docs/functions/spark/datetime.rst
+++ b/velox/docs/functions/spark/datetime.rst
@@ -34,6 +34,13 @@ These functions support TIMESTAMP and DATE input types.
     deducted from ``start_date``.
     Supported types for ``num_days`` are: TINYINT, SMALLINT, INTEGER.
 
+.. spark:function:: date_from_unix_date(integer) -> date
+
+    Creates date from the number of days since 1970-01-01 in either direction. Returns null when input is null.
+
+        SELECT date_from_unix_date(1); -- '1970-01-02'
+        SELECT date_from_unix_date(-1); -- '1969-12-31'
+
 .. spark:function:: date_sub(start_date, num_days) -> date
 
     Returns the date that is ``num_days`` before ``start_date``. According to the inputs,
diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h
index 7d6c184eb1bb..94a7f21317e5 100644
--- a/velox/functions/sparksql/DateTimeFunctions.h
+++ b/velox/functions/sparksql/DateTimeFunctions.h
@@ -360,6 +360,15 @@ struct LastDayFunction {
   }
 };
 
+template <typename T>
+struct DateFromUnixDateFunction {
+  VELOX_DEFINE_FUNCTION_TYPES(T);
+
+  FOLLY_ALWAYS_INLINE void call(out_type<Date>& result, const int32_t& value) {
+    result = value;
+  }
+};
+
 template <typename T>
 struct DateAddFunction {
   VELOX_DEFINE_FUNCTION_TYPES(T);
diff --git a/velox/functions/sparksql/Register.cpp b/velox/functions/sparksql/Register.cpp
index 7b028f6af978..a24c0bf215ae 100644
--- a/velox/functions/sparksql/Register.cpp
+++ b/velox/functions/sparksql/Register.cpp
@@ -292,6 +292,9 @@ void registerFunctions(const std::string& prefix) {
   registerFunction<DateAddFunction, Date, Date, int16_t>({prefix + "date_add"});
   registerFunction<DateAddFunction, Date, Date, int32_t>({prefix + "date_add"});
 
+  registerFunction<DateFromUnixDateFunction, Date, int32_t>(
+      {prefix + "date_from_unix_date"});
+
   registerFunction<DateSubFunction, Date, Date, int8_t>({prefix + "date_sub"});
   registerFunction<DateSubFunction, Date, Date, int16_t>({prefix + "date_sub"});
   registerFunction<DateSubFunction, Date, Date, int32_t>({prefix + "date_sub"});
diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
index 66562e3c735b..b7cfbe4d694e 100644
--- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
+++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
@@ -245,6 +245,28 @@ TEST_F(DateTimeFunctionsTest, lastDay) {
   EXPECT_EQ(lastDayFunc(std::nullopt), std::nullopt);
 }
 
+TEST_F(DateTimeFunctionsTest, dateFromUnixDate) {
+  const auto dateFromUnixDate = [&](std::optional<int32_t> value) {
+    return evaluateOnce<int32_t>("date_from_unix_date(c0)", value);
+  };
+
+  // Basic tests
+  EXPECT_EQ(parseDate("1970-01-01"), dateFromUnixDate(0));
+  EXPECT_EQ(parseDate("1970-01-02"), dateFromUnixDate(1));
+  EXPECT_EQ(parseDate("1969-12-31"), dateFromUnixDate(-1));
+  EXPECT_EQ(parseDate("1970-02-01"), dateFromUnixDate(31));
+  EXPECT_EQ(parseDate("1971-01-31"), dateFromUnixDate(395));
+  EXPECT_EQ(parseDate("1971-01-01"), dateFromUnixDate(365));
+
+  // Leap year tests
+  EXPECT_EQ(parseDate("1972-02-29"), dateFromUnixDate(365 + 365 + 30 + 29));
+  EXPECT_EQ(parseDate("1971-03-01"), dateFromUnixDate(365 + 30 + 28 + 1));
+
+  // Min and max value tests
+  EXPECT_EQ(parseDate("5881580-07-11"), dateFromUnixDate(kMax));
+  EXPECT_EQ(parseDate("-5877641-06-23"), dateFromUnixDate(kMin));
+}
+
 TEST_F(DateTimeFunctionsTest, dateAdd) {
   const auto dateAdd = [&](const std::string& dateStr,
                            std::optional<int32_t> value) {

From 7227ff82291670874cb151d13d60bb3a1a748617 Mon Sep 17 00:00:00 2001
From: Wei He <weihe@meta.com>
Date: Mon, 12 Feb 2024 13:28:11 -0800
Subject: [PATCH 07/38] Fix arbitrary() to always return the first non-null
 value in Window operation (#8640)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8640

The implementation of arbitrary() intends to always return the first non-null value, but it
doesn't for complex-typed inputs when used in Window operation with incremental frames.
This is because ArbitraryFunction::addSingleGroupRawInput() still updates the accumulator
even if the accumulator already has a value. This diff fixes this issue by making
ArbitraryFunction::addSingleGroupRawInput() return immediately if accumulator already has
a value.

This diff fixes https://github.com/facebookincubator/velox/issues/8593.

Reviewed By: kgpai

Differential Revision: D53328253

fbshipit-source-id: 803261dce0ec1fc52187947b1f9316dfd814c3fd
---
 .../aggregates/ArbitraryAggregate.cpp         |  6 +++-
 .../aggregates/tests/ArbitraryTest.cpp        | 35 +++++++++++++++++++
 .../prestosql/aggregates/tests/CMakeLists.txt |  1 +
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/velox/functions/prestosql/aggregates/ArbitraryAggregate.cpp b/velox/functions/prestosql/aggregates/ArbitraryAggregate.cpp
index fd7197430ba0..6bab95ba7f71 100644
--- a/velox/functions/prestosql/aggregates/ArbitraryAggregate.cpp
+++ b/velox/functions/prestosql/aggregates/ArbitraryAggregate.cpp
@@ -226,6 +226,11 @@ class NonNumericArbitrary : public exec::Aggregate {
       const SelectivityVector& rows,
       const std::vector<VectorPtr>& args,
       bool /*unused*/) override {
+    auto* accumulator = value<SingleValueAccumulator>(group);
+    if (accumulator->hasValue()) {
+      return;
+    }
+
     DecodedVector decoded(*args[0], rows, true);
     if (decoded.isConstantMapping() && decoded.isNullAt(0)) {
       // nothing to do; all values are nulls
@@ -234,7 +239,6 @@ class NonNumericArbitrary : public exec::Aggregate {
 
     const auto* indices = decoded.indices();
     const auto* baseVector = decoded.base();
-    auto* accumulator = value<SingleValueAccumulator>(group);
     // Find the first non-null value.
     rows.testSelected([&](vector_size_t i) {
       if (!decoded.isNullAt(i)) {
diff --git a/velox/functions/prestosql/aggregates/tests/ArbitraryTest.cpp b/velox/functions/prestosql/aggregates/tests/ArbitraryTest.cpp
index fcab97ce63db..93082cce97a5 100644
--- a/velox/functions/prestosql/aggregates/tests/ArbitraryTest.cpp
+++ b/velox/functions/prestosql/aggregates/tests/ArbitraryTest.cpp
@@ -16,9 +16,11 @@
 
 #include "velox/exec/tests/utils/PlanBuilder.h"
 #include "velox/functions/lib/aggregates/tests/utils/AggregationTestBase.h"
+#include "velox/functions/lib/window/tests/WindowTestBase.h"
 
 using namespace facebook::velox::exec::test;
 using namespace facebook::velox::functions::aggregate::test;
+using namespace facebook::velox::window::test;
 
 namespace facebook::velox::aggregate::test {
 
@@ -367,5 +369,38 @@ TEST_F(ArbitraryTest, interval) {
   testAggregations({data}, {}, {"arbitrary(c2)"}, "SELECT null");
 }
 
+class ArbitraryWindowTest : public WindowTestBase {};
+
+TEST_F(ArbitraryWindowTest, basic) {
+  auto data = makeRowVector(
+      {makeFlatVector<int64_t>({1, 2, 3, 4, 5}),
+       makeArrayVector<double>({{1.0}, {2.0}, {3.0}, {4.0}, {5.0}}),
+       makeFlatVector<bool>({false, false, false, false, false})});
+
+  auto expected = makeRowVector(
+      {makeFlatVector<int64_t>({1, 2, 3, 4, 5}),
+       makeArrayVector<double>({{1.0}, {2.0}, {3.0}, {4.0}, {5.0}}),
+       makeFlatVector<bool>({false, false, false, false, false}),
+       makeFlatVector<int64_t>({1, 1, 1, 1, 1})});
+  window::test::WindowTestBase::testWindowFunction(
+      {data},
+      "arbitrary(c0)",
+      "partition by c2 order by c0",
+      "range between unbounded preceding and current row",
+      expected);
+
+  expected = makeRowVector(
+      {makeFlatVector<int64_t>({1, 2, 3, 4, 5}),
+       makeArrayVector<double>({{1.0}, {2.0}, {3.0}, {4.0}, {5.0}}),
+       makeFlatVector<bool>({false, false, false, false, false}),
+       makeArrayVector<double>({{1.0}, {1.0}, {1.0}, {1.0}, {1.0}})});
+  window::test::WindowTestBase::testWindowFunction(
+      {data},
+      "arbitrary(c1)",
+      "partition by c2 order by c0",
+      "range between unbounded preceding and current row",
+      expected);
+}
+
 } // namespace
 } // namespace facebook::velox::aggregate::test
diff --git a/velox/functions/prestosql/aggregates/tests/CMakeLists.txt b/velox/functions/prestosql/aggregates/tests/CMakeLists.txt
index 51b0bebd8f37..0ded4a46fe9f 100644
--- a/velox/functions/prestosql/aggregates/tests/CMakeLists.txt
+++ b/velox/functions/prestosql/aggregates/tests/CMakeLists.txt
@@ -64,6 +64,7 @@ target_link_libraries(
   velox_file
   velox_functions_aggregates
   velox_functions_aggregates_test_lib
+  velox_functions_window_test_lib
   velox_functions_test_lib
   velox_functions_prestosql
   velox_functions_lib

From 42b10d9432f1f211e9641d327282f1dfc4dc2325 Mon Sep 17 00:00:00 2001
From: Sergey Pershin <spershin@meta.com>
Date: Mon, 12 Feb 2024 17:13:19 -0800
Subject: [PATCH 08/38] Fix crash in parseSerdeParameters() (#8730)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8730

We simply didn't check `nullStringIt` for being
invalid before using it.

Reviewed By: gggrace14

Differential Revision: D53676033

fbshipit-source-id: aea451e7995be84a06d86ae17eee39c26747e04b
---
 velox/connectors/hive/HiveConnectorUtil.cpp   |   4 +-
 velox/connectors/hive/tests/CMakeLists.txt    |   1 +
 .../hive/tests/HiveConnectorUtilTest.cpp      | 205 ++++++++++++++++++
 velox/dwio/common/Options.h                   |   7 +
 4 files changed, 216 insertions(+), 1 deletion(-)
 create mode 100644 velox/connectors/hive/tests/HiveConnectorUtilTest.cpp

diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp
index 464d7560691a..a0bc7fbd046e 100644
--- a/velox/connectors/hive/HiveConnectorUtil.cpp
+++ b/velox/connectors/hive/HiveConnectorUtil.cpp
@@ -418,7 +418,9 @@ std::unique_ptr<dwio::common::SerDeOptions> parseSerdeParameters(
   }
   auto serDeOptions = std::make_unique<dwio::common::SerDeOptions>(
       fieldDelim, collectionDelim, mapKeyDelim);
-  serDeOptions->nullString = nullStringIt->second;
+  if (nullStringIt != tableParameters.end()) {
+    serDeOptions->nullString = nullStringIt->second;
+  }
   return serDeOptions;
 }
 
diff --git a/velox/connectors/hive/tests/CMakeLists.txt b/velox/connectors/hive/tests/CMakeLists.txt
index c98db338002e..f84b2eb8ccfc 100644
--- a/velox/connectors/hive/tests/CMakeLists.txt
+++ b/velox/connectors/hive/tests/CMakeLists.txt
@@ -18,6 +18,7 @@ add_executable(
   FileHandleTest.cpp
   HivePartitionUtilTest.cpp
   HiveConnectorTest.cpp
+  HiveConnectorUtilTest.cpp
   HiveConnectorSerDeTest.cpp
   PartitionIdGeneratorTest.cpp
   TableHandleTest.cpp
diff --git a/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp b/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp
new file mode 100644
index 000000000000..5cd33e13d975
--- /dev/null
+++ b/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include "velox/exec/tests/utils/HiveConnectorTestBase.h"
+
+#include "velox/connectors/hive/HiveConfig.h"
+#include "velox/connectors/hive/HiveConnectorSplit.h"
+#include "velox/connectors/hive/HiveConnectorUtil.h"
+#include "velox/connectors/hive/TableHandle.h"
+#include "velox/core/Config.h"
+
+namespace facebook::velox::connector {
+
+using namespace dwio::common;
+
+class HiveConnectorUtilTest : public exec::test::HiveConnectorTestBase {
+ protected:
+  static bool compareSerDeOptions(
+      const SerDeOptions& l,
+      const SerDeOptions& r) {
+    return l.isEscaped == r.isEscaped && l.escapeChar == r.escapeChar &&
+        l.lastColumnTakesRest == r.lastColumnTakesRest &&
+        l.nullString == r.nullString && l.separators == r.separators;
+  }
+
+  std::shared_ptr<memory::MemoryPool> pool_ =
+      memory::memoryManager()->addLeafPool();
+};
+
+TEST_F(HiveConnectorUtilTest, configureReaderOptions) {
+  core::MemConfig sessionProperties;
+  auto hiveConfig =
+      std::make_shared<hive::HiveConfig>(std::make_shared<core::MemConfig>());
+  const std::unordered_map<std::string, std::optional<std::string>>
+      partitionKeys;
+  const std::unordered_map<std::string, std::string> customSplitInfo;
+
+  // Dynamic parameters.
+  dwio::common::ReaderOptions readerOptions(pool_.get());
+  FileFormat fileFormat{FileFormat::DWRF};
+  std::unordered_map<std::string, std::string> tableParameters;
+  std::unordered_map<std::string, std::string> serdeParameters;
+  SerDeOptions expectedSerDe;
+
+  auto createTableHandle = [&]() {
+    return std::make_shared<hive::HiveTableHandle>(
+        "testConnectorId",
+        "testTable",
+        false,
+        hive::SubfieldFilters{},
+        nullptr,
+        nullptr,
+        tableParameters);
+  };
+
+  auto createSplit = [&]() {
+    return std::make_shared<hive::HiveConnectorSplit>(
+        "testConnectorId",
+        "/tmp/",
+        fileFormat,
+        0UL,
+        std::numeric_limits<uint64_t>::max(),
+        partitionKeys,
+        std::nullopt,
+        customSplitInfo,
+        nullptr,
+        serdeParameters);
+  };
+
+  auto performConfigure = [&]() {
+    auto tableHandle = createTableHandle();
+    auto split = createSplit();
+    configureReaderOptions(
+        readerOptions, hiveConfig, &sessionProperties, tableHandle, split);
+  };
+
+  auto clearDynamicParameters = [&](FileFormat newFileFormat) {
+    readerOptions = dwio::common::ReaderOptions(pool_.get());
+    fileFormat = newFileFormat;
+    tableParameters.clear();
+    serdeParameters.clear();
+    expectedSerDe = SerDeOptions{};
+  };
+
+  // Default.
+  performConfigure();
+  EXPECT_EQ(readerOptions.getFileFormat(), fileFormat);
+  EXPECT_TRUE(
+      compareSerDeOptions(readerOptions.getSerDeOptions(), expectedSerDe));
+  EXPECT_EQ(readerOptions.maxCoalesceBytes(), hiveConfig->maxCoalescedBytes());
+  EXPECT_EQ(
+      readerOptions.maxCoalesceDistance(),
+      hiveConfig->maxCoalescedDistanceBytes());
+  EXPECT_EQ(
+      readerOptions.isFileColumnNamesReadAsLowerCase(),
+      hiveConfig->isFileColumnNamesReadAsLowerCase(&sessionProperties));
+  EXPECT_EQ(
+      readerOptions.isUseColumnNamesForColumnMapping(),
+      hiveConfig->isOrcUseColumnNames(&sessionProperties));
+  EXPECT_EQ(
+      readerOptions.getFooterEstimatedSize(),
+      hiveConfig->footerEstimatedSize());
+  EXPECT_EQ(
+      readerOptions.getFilePreloadThreshold(),
+      hiveConfig->filePreloadThreshold());
+
+  // Modify field delimiter and change the file format.
+  clearDynamicParameters(FileFormat::TEXT);
+  serdeParameters[SerDeOptions::kFieldDelim] = '\t';
+  expectedSerDe.separators[size_t(SerDeSeparator::FIELD_DELIM)] = '\t';
+  performConfigure();
+  EXPECT_EQ(readerOptions.getFileFormat(), fileFormat);
+  EXPECT_TRUE(
+      compareSerDeOptions(readerOptions.getSerDeOptions(), expectedSerDe));
+
+  // Modify collection delimiter.
+  clearDynamicParameters(FileFormat::TEXT);
+  serdeParameters[SerDeOptions::kCollectionDelim] = '=';
+  expectedSerDe.separators[size_t(SerDeSeparator::COLLECTION_DELIM)] = '=';
+  performConfigure();
+  EXPECT_TRUE(
+      compareSerDeOptions(readerOptions.getSerDeOptions(), expectedSerDe));
+
+  // Modify map key delimiter.
+  clearDynamicParameters(FileFormat::TEXT);
+  serdeParameters[SerDeOptions::kMapKeyDelim] = '&';
+  expectedSerDe.separators[size_t(SerDeSeparator::MAP_KEY_DELIM)] = '&';
+  performConfigure();
+  EXPECT_TRUE(
+      compareSerDeOptions(readerOptions.getSerDeOptions(), expectedSerDe));
+
+  // Modify null string.
+  clearDynamicParameters(FileFormat::TEXT);
+  tableParameters[TableParameter::kSerializationNullFormat] = "x-x";
+  expectedSerDe.nullString = "x-x";
+  performConfigure();
+  EXPECT_TRUE(
+      compareSerDeOptions(readerOptions.getSerDeOptions(), expectedSerDe));
+
+  // Modify all previous together.
+  clearDynamicParameters(FileFormat::TEXT);
+  serdeParameters[SerDeOptions::kFieldDelim] = '~';
+  expectedSerDe.separators[size_t(SerDeSeparator::FIELD_DELIM)] = '~';
+  serdeParameters[SerDeOptions::kCollectionDelim] = '$';
+  expectedSerDe.separators[size_t(SerDeSeparator::COLLECTION_DELIM)] = '$';
+  serdeParameters[SerDeOptions::kMapKeyDelim] = '*';
+  expectedSerDe.separators[size_t(SerDeSeparator::MAP_KEY_DELIM)] = '*';
+  tableParameters[TableParameter::kSerializationNullFormat] = "";
+  expectedSerDe.nullString = "";
+  performConfigure();
+  EXPECT_TRUE(
+      compareSerDeOptions(readerOptions.getSerDeOptions(), expectedSerDe));
+  EXPECT_TRUE(
+      compareSerDeOptions(readerOptions.getSerDeOptions(), expectedSerDe));
+  EXPECT_TRUE(
+      compareSerDeOptions(readerOptions.getSerDeOptions(), expectedSerDe));
+  EXPECT_TRUE(
+      compareSerDeOptions(readerOptions.getSerDeOptions(), expectedSerDe));
+
+  // Tests other custom reader options.
+  clearDynamicParameters(FileFormat::TEXT);
+  std::unordered_map<std::string, std::string> customHiveConfigProps;
+  customHiveConfigProps[hive::HiveConfig::kMaxCoalescedBytes] = "129";
+  customHiveConfigProps[hive::HiveConfig::kMaxCoalescedDistanceBytes] = "513";
+  customHiveConfigProps[hive::HiveConfig::kFileColumnNamesReadAsLowerCase] =
+      "true";
+  customHiveConfigProps[hive::HiveConfig::kOrcUseColumnNames] = "true";
+  customHiveConfigProps[hive::HiveConfig::kFooterEstimatedSize] = "1111";
+  customHiveConfigProps[hive::HiveConfig::kFilePreloadThreshold] = "9999";
+  hiveConfig = std::make_shared<hive::HiveConfig>(
+      std::make_shared<core::MemConfig>(customHiveConfigProps));
+  performConfigure();
+  EXPECT_EQ(readerOptions.maxCoalesceBytes(), hiveConfig->maxCoalescedBytes());
+  EXPECT_EQ(
+      readerOptions.maxCoalesceDistance(),
+      hiveConfig->maxCoalescedDistanceBytes());
+  EXPECT_EQ(
+      readerOptions.isFileColumnNamesReadAsLowerCase(),
+      hiveConfig->isFileColumnNamesReadAsLowerCase(&sessionProperties));
+  EXPECT_EQ(
+      readerOptions.isUseColumnNamesForColumnMapping(),
+      hiveConfig->isOrcUseColumnNames(&sessionProperties));
+  EXPECT_EQ(
+      readerOptions.getFooterEstimatedSize(),
+      hiveConfig->footerEstimatedSize());
+  EXPECT_EQ(
+      readerOptions.getFilePreloadThreshold(),
+      hiveConfig->filePreloadThreshold());
+}
+
+}; // namespace facebook::velox::connector
diff --git a/velox/dwio/common/Options.h b/velox/dwio/common/Options.h
index c154f8f4a6e0..92674f5a8fa2 100644
--- a/velox/dwio/common/Options.h
+++ b/velox/dwio/common/Options.h
@@ -93,7 +93,14 @@ class SerDeOptions {
 };
 
 struct TableParameter {
+  /// If present in the table parameters, the option is passed to the row reader
+  /// to instruct it to skip the number of rows from the current position. Used
+  /// to skip the column header row(s).
   static constexpr const char* kSkipHeaderLineCount = "skip.header.line.count";
+  /// If present in the table parameters, the option overrides the default value
+  /// of the SerDeOptions::nullString. It causes any field read from the file
+  /// (usually of the TEXT format) to be considered NULL if it is equal to this
+  /// string.
   static constexpr const char* kSerializationNullFormat =
       "serialization.null.format";
 };

From aba702c05221e7ce8a64550542cc2791b6fb8d8a Mon Sep 17 00:00:00 2001
From: Zhenyuan Zhao <zzhao@meta.com>
Date: Tue, 13 Feb 2024 09:16:21 -0800
Subject: [PATCH 09/38] Return whether registration succeeded in the custom
 opaque path (#8716)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8716

This makes it easier for callers to check and decide whether they need to register serialization hooks

Reviewed By: pedroerp

Differential Revision: D53614803

fbshipit-source-id: a493bbf59ef6c82e5459279328ad7f530de86cbd
---
 velox/type/OpaqueCustomTypes.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/velox/type/OpaqueCustomTypes.h b/velox/type/OpaqueCustomTypes.h
index fdb42d3d94a0..dacf0cfb87b7 100644
--- a/velox/type/OpaqueCustomTypes.h
+++ b/velox/type/OpaqueCustomTypes.h
@@ -33,8 +33,8 @@ class CastOperator;
 template <typename T, const char* customTypeName>
 class OpaqueCustomTypeRegister {
  public:
-  static void registerType() {
-    facebook::velox::registerCustomType(
+  static bool registerType() {
+    return facebook::velox::registerCustomType(
         customTypeName, std::make_unique<const TypeFactory>());
   }
 

From b0eeef9177f3b79c010b243f9b2e4bd4d11120f3 Mon Sep 17 00:00:00 2001
From: "Schierbeck, Cody" <cody.schierbeck@intel.com>
Date: Tue, 13 Feb 2024 11:17:59 -0800
Subject: [PATCH 10/38] Introduce cappedByteLength to help with indexing UTF-8
 strings (#8637)

Summary:
UTF strings may contain multi-byte characters that make character-based indexing inaccurate. This PR introduces functions stringImpl::cappedByteLength and stringCore::cappedByteLengthUnicode to help with indexing UTF strings that may contain multi-byte characters.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8637

Reviewed By: pedroerp

Differential Revision: D53627624

Pulled By: kgpai

fbshipit-source-id: 2f28a7d1bb81c1a5e875e7b8a6f300f1fc9fbb16
---
 velox/functions/lib/string/StringCore.h       | 27 ++++++
 velox/functions/lib/string/StringImpl.h       | 15 ++-
 .../lib/string/tests/StringImplTest.cpp       | 94 +++++++++++++++++++
 3 files changed, 135 insertions(+), 1 deletion(-)

diff --git a/velox/functions/lib/string/StringCore.h b/velox/functions/lib/string/StringCore.h
index ba988b06c4af..8fdcc4c61892 100644
--- a/velox/functions/lib/string/StringCore.h
+++ b/velox/functions/lib/string/StringCore.h
@@ -264,6 +264,33 @@ cappedLengthUnicode(const char* input, size_t size, size_t maxChars) {
   return numChars;
 }
 
+///
+/// Return an capped length in bytes(controlled by maxChars) of a unicode
+/// string. The returned length may be greater than maxCharacters if there are
+/// multi-byte characters present in the input string.
+///
+/// This method is used to help with indexing unicode strings by byte position.
+/// It is used to find the byte position of the Nth character in a string.
+///
+/// @param input input buffer that hold the string
+/// @param size size of input buffer
+/// @param maxChars stop counting characters if the string is longer
+/// than this value
+/// @return the number of bytes represented by the input utf8 string up to
+/// maxChars
+///
+FOLLY_ALWAYS_INLINE int64_t
+cappedByteLengthUnicode(const char* input, size_t size, int64_t maxChars) {
+  size_t utf8Position = 0;
+  size_t numCharacters = 0;
+  while (utf8Position < size && numCharacters < maxChars) {
+    auto charSize = utf8proc_char_length(input + utf8Position);
+    utf8Position += UNLIKELY(charSize < 0) ? 1 : charSize;
+    numCharacters++;
+  }
+  return utf8Position;
+}
+
 /// Returns the start byte index of the Nth instance of subString in
 /// string. Search starts from startPosition. Positions start with 0. If not
 /// found, -1 is returned. To facilitate finding overlapping strings, the
diff --git a/velox/functions/lib/string/StringImpl.h b/velox/functions/lib/string/StringImpl.h
index 871f3bffd194..73b6a4366162 100644
--- a/velox/functions/lib/string/StringImpl.h
+++ b/velox/functions/lib/string/StringImpl.h
@@ -111,7 +111,7 @@ FOLLY_ALWAYS_INLINE int64_t length(const T& input) {
   }
 }
 
-/// Return a capped length(controlled by maxLength) of a string.
+/// Return a capped length in characters(controlled by maxLength) of a string.
 /// The returned length is not greater than maxLength.
 template <bool isAscii, typename T>
 FOLLY_ALWAYS_INLINE int64_t cappedLength(const T& input, size_t maxLength) {
@@ -122,6 +122,19 @@ FOLLY_ALWAYS_INLINE int64_t cappedLength(const T& input, size_t maxLength) {
   }
 }
 
+/// Return a capped length in bytes(controlled by maxCharacters) of a string.
+/// The returned length may be greater than maxCharacters if there are
+/// multi-byte characters present in the input string.
+template <bool isAscii, typename TString>
+FOLLY_ALWAYS_INLINE int64_t
+cappedByteLength(const TString& input, size_t maxCharacters) {
+  if constexpr (isAscii) {
+    return input.size() > maxCharacters ? maxCharacters : input.size();
+  } else {
+    return cappedByteLengthUnicode(input.data(), input.size(), maxCharacters);
+  }
+}
+
 /// Write the Unicode codePoint as string to the output string. The function
 /// behavior is undefined when code point it invalid. Implements the logic of
 /// presto chr function.
diff --git a/velox/functions/lib/string/tests/StringImplTest.cpp b/velox/functions/lib/string/tests/StringImplTest.cpp
index 258eb6f37053..883949e33c3a 100644
--- a/velox/functions/lib/string/tests/StringImplTest.cpp
+++ b/velox/functions/lib/string/tests/StringImplTest.cpp
@@ -196,6 +196,100 @@ TEST_F(StringImplTest, cappedLength) {
   ASSERT_EQ(cappedLength</*isAscii*/ false>(input, 7), 5);
 }
 
+TEST_F(StringImplTest, cappedUnicodeBytes) {
+  // Test functions use case for indexing
+  // UTF strings.
+  std::string stringInput = "\xF4\x90\x80\x80Hello";
+  ASSERT_EQ('H', stringInput[cappedByteLength<false>(stringInput, 2) - 1]);
+  ASSERT_EQ('e', stringInput[cappedByteLength<false>(stringInput, 3) - 1]);
+  ASSERT_EQ('l', stringInput[cappedByteLength<false>(stringInput, 4) - 1]);
+  ASSERT_EQ('l', stringInput[cappedByteLength<false>(stringInput, 5) - 1]);
+  ASSERT_EQ('o', stringInput[cappedByteLength<false>(stringInput, 6) - 1]);
+  ASSERT_EQ('o', stringInput[cappedByteLength<false>(stringInput, 7) - 1]);
+
+  // Multi-byte chars
+  stringInput = "♫¡Singing is fun!♫";
+  auto sPos = cappedByteLength<false>(stringInput, 2);
+  auto exPos = cappedByteLength<false>(stringInput, 17);
+  ASSERT_EQ("Singing is fun!♫", stringInput.substr(sPos));
+  ASSERT_EQ("♫¡Singing is fun!", stringInput.substr(0, exPos));
+  ASSERT_EQ("Singing is fun!", stringInput.substr(sPos, exPos - sPos));
+
+  stringInput = std::string("abcd");
+  auto stringViewInput = std::string_view(stringInput);
+  ASSERT_EQ(cappedByteLength<true>(stringInput, 1), 1);
+  ASSERT_EQ(cappedByteLength<true>(stringInput, 2), 2);
+  ASSERT_EQ(cappedByteLength<true>(stringInput, 3), 3);
+  ASSERT_EQ(cappedByteLength<true>(stringInput, 4), 4);
+  ASSERT_EQ(cappedByteLength<true>(stringInput, 5), 4);
+  ASSERT_EQ(cappedByteLength<true>(stringInput, 6), 4);
+
+  ASSERT_EQ(cappedByteLength<true>(stringViewInput, 1), 1);
+  ASSERT_EQ(cappedByteLength<true>(stringViewInput, 2), 2);
+  ASSERT_EQ(cappedByteLength<true>(stringViewInput, 3), 3);
+  ASSERT_EQ(cappedByteLength<true>(stringViewInput, 4), 4);
+  ASSERT_EQ(cappedByteLength<true>(stringViewInput, 5), 4);
+  ASSERT_EQ(cappedByteLength<true>(stringViewInput, 6), 4);
+
+  stringInput = std::string("你好a世界");
+  stringViewInput = std::string_view(stringInput);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 1), 3);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 2), 6);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 3), 7);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 4), 10);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 5), 13);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 6), 13);
+
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 1), 3);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 2), 6);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 3), 7);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 4), 10);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 5), 13);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 6), 13);
+
+  stringInput = std::string("\x80");
+  stringViewInput = std::string_view(stringInput);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 1), 1);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 2), 1);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 3), 1);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 4), 1);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 5), 1);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 6), 1);
+
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 1), 1);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 2), 1);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 3), 1);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 4), 1);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 5), 1);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 6), 1);
+
+  stringInput.resize(2);
+  // Create corrupt data below.
+  char16_t c = u'\u04FF';
+  stringInput[0] = (char)c;
+  stringInput[1] = (char)c;
+
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 1), 1);
+
+  stringInput.resize(4);
+  c = u'\u04F4';
+  char16_t c2 = u'\u048F';
+  char16_t c3 = u'\u04BF';
+  stringInput[0] = (char)c;
+  stringInput[1] = (char)c2;
+  stringInput[2] = (char)c3;
+  stringInput[3] = (char)c3;
+
+  stringViewInput = std::string_view(stringInput);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 1), 4);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 2), 4);
+  ASSERT_EQ(cappedByteLength<false>(stringInput, 3), 4);
+
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 1), 4);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 2), 4);
+  ASSERT_EQ(cappedByteLength<false>(stringViewInput, 3), 4);
+}
+
 TEST_F(StringImplTest, badUnicodeLength) {
   ASSERT_EQ(0, length</*isAscii*/ false>(std::string("")));
   ASSERT_EQ(2, length</*isAscii*/ false>(std::string("ab")));

From 76a5fd0ce2f01748e184b946ae39af93a8ae8f18 Mon Sep 17 00:00:00 2001
From: Bikramjeet Vig <bikramjeet@meta.com>
Date: Tue, 13 Feb 2024 15:26:54 -0800
Subject: [PATCH 11/38] Add FieldReference benchmark

Summary:
Adds a benchmark for FieldReference specifically for chained dereferences at different level.
```
============================================================================
[...]hmarks/ExpressionBenchmarkBuilder.cpp     relative  time/iter   iters/s
============================================================================
dereference_nullfree##1LevelThenFlat                      275.83us     3.63K
dereference_nullfree##1LevelThenComplex                   419.07us     2.39K
dereference_nullfree##2LevelThenFlat                      431.62us     2.32K
dereference_nullfree##2LevelThenComplex                   599.98us     1.67K
dereference_nullfree##3LevelThenFlat                      632.73us     1.58K
dereference_nullfree##3LevelThenComplex                   777.87us     1.29K
dereference_nullfree##4LevelThenFlat                      746.93us     1.34K
dereference_nullfree##4LevelThenComplex                   867.19us     1.15K
dereference_nulls##1LevelThenFlat                           3.85ms    259.90
dereference_nulls##1LevelThenComplex                       38.80ms     25.77
dereference_nulls##2LevelThenFlat                          13.55ms     73.80
dereference_nulls##2LevelThenComplex                       42.46ms     23.55
dereference_nulls##3LevelThenFlat                          17.10ms     58.47
dereference_nulls##3LevelThenComplex                       43.49ms     22.99
dereference_nulls##4LevelThenFlat                          19.87ms     50.32
dereference_nulls##4LevelThenComplex                       45.43ms     22.01
```

Reviewed By: Yuhta

Differential Revision: D53683303

fbshipit-source-id: 48b39c3dc4bcca6f2a4bd1249fd43fb8fc4e2492
---
 .../prestosql/benchmarks/CMakeLists.txt       |  5 ++
 .../benchmarks/FieldReferenceBenchmark.cpp    | 89 +++++++++++++++++++
 2 files changed, 94 insertions(+)
 create mode 100644 velox/functions/prestosql/benchmarks/FieldReferenceBenchmark.cpp

diff --git a/velox/functions/prestosql/benchmarks/CMakeLists.txt b/velox/functions/prestosql/benchmarks/CMakeLists.txt
index f7364c1d2287..e794460ef0d9 100644
--- a/velox/functions/prestosql/benchmarks/CMakeLists.txt
+++ b/velox/functions/prestosql/benchmarks/CMakeLists.txt
@@ -46,6 +46,11 @@ add_executable(velox_functions_prestosql_benchmarks_array_sum
 target_link_libraries(velox_functions_prestosql_benchmarks_array_sum
                       ${BENCHMARK_DEPENDENCIES})
 
+add_executable(velox_functions_prestosql_benchmarks_field_reference
+               FieldReferenceBenchmark.cpp)
+target_link_libraries(velox_functions_prestosql_benchmarks_field_reference
+                      ${BENCHMARK_DEPENDENCIES})
+
 add_executable(velox_functions_prestosql_benchmarks_width_bucket
                WidthBucketBenchmark.cpp)
 target_link_libraries(velox_functions_prestosql_benchmarks_width_bucket
diff --git a/velox/functions/prestosql/benchmarks/FieldReferenceBenchmark.cpp b/velox/functions/prestosql/benchmarks/FieldReferenceBenchmark.cpp
new file mode 100644
index 000000000000..0abc546258ea
--- /dev/null
+++ b/velox/functions/prestosql/benchmarks/FieldReferenceBenchmark.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <folly/Benchmark.h>
+#include <folly/init/Init.h>
+
+#include "velox/benchmarks/ExpressionBenchmarkBuilder.h"
+#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h"
+#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
+#include "velox/vector/fuzzer/VectorFuzzer.h"
+
+using namespace facebook::velox;
+using namespace facebook::velox::exec;
+using namespace facebook::velox::functions;
+
+std::vector<std::string> getColumnNames(int children) {
+  std::vector<std::string> result;
+  for (int i = 0; i < children; ++i) {
+    result.push_back(fmt::format("{}{}", 'c', i));
+  }
+  return result;
+}
+
+RowTypePtr getRowColumnType(FuzzerGenerator& rng, int children, int level) {
+  VELOX_CHECK_GE(level, 1);
+  VELOX_CHECK_GE(children, 3);
+  std::vector<TypePtr> result;
+  result.push_back(ARRAY(INTEGER()));
+  result.push_back(INTEGER());
+  if (level > 1) {
+    result.push_back(getRowColumnType(rng, children, level - 1));
+  } else {
+    result.push_back(randType(rng, 2));
+  }
+  for (int i = 0; i < children - 3; ++i) {
+    result.push_back(randType(rng, 2));
+  }
+  return ROW(getColumnNames(children), std::move(result));
+}
+
+int main(int argc, char** argv) {
+  folly::Init init{&argc, &argv};
+
+  ExpressionBenchmarkBuilder benchmarkBuilder;
+  FuzzerGenerator rng;
+
+  auto createSet = [&](bool withNulls, RowTypePtr& inputType) {
+    benchmarkBuilder
+        .addBenchmarkSet(
+            fmt::format("dereference_{}", withNulls ? "nulls" : "nullfree"),
+            inputType)
+        .withFuzzerOptions(
+            {.vectorSize = 1000, .nullRatio = withNulls ? 0.2 : 0})
+        .addExpression("1LevelThenFlat", "(c0).c1")
+        .addExpression("1LevelThenComplex", "(c0).c0")
+        .addExpression("2LevelThenFlat", "(c0).c2.c1")
+        .addExpression("2LevelThenComplex", "(c0).c2.c0")
+        .addExpression("3LevelThenFlat", "(c0).c2.c2.c1")
+        .addExpression("3LevelThenComplex", "(c0).c2.c2.c0")
+        .addExpression("4LevelThenFlat", "(c0).c2.c2.c2.c1")
+        .addExpression("4LevelThenComplex", "(c0).c2.c2.c2.c0");
+  };
+
+  // Create a nested row column of depth 4. Each level has 50 columns. Each ROW
+  // at depth n will have the first three columns as ARRAY(INTEGER()), INTEGER()
+  // and ROW {of depth 4-n} respectively. The third column for the deepest ROW
+  // however can be anything.
+  auto inputType = ROW({"c0"}, {getRowColumnType(rng, 50, 4)});
+
+  createSet(true, inputType);
+  createSet(false, inputType);
+
+  benchmarkBuilder.registerBenchmarks();
+
+  folly::runBenchmarks();
+  return 0;
+}

From f0583e76be95d865074ccd3ad04343b9f03e3d2f Mon Sep 17 00:00:00 2001
From: Pedro Pedreira <pedroerp@meta.com>
Date: Tue, 13 Feb 2024 15:49:47 -0800
Subject: [PATCH 12/38] Add `VELOX_BUILD_MINIMAL_WITH_DWIO` compilation option
 (#8682)

Summary:
`VELOX_BUILD_MINIMAL_WITH_DWIO` allows developers using Velox to compile only dwio (in addition to Velox minimal), but without pulling all other dependencies and internal libraries (exec, connectors, parser, aggregates, storage adapters, etc).

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8682

Reviewed By: Yuhta

Differential Revision: D53729171

Pulled By: pedroerp

fbshipit-source-id: 417e8cc4fc2b512ec658fe76a38477d7e30026f8
---
 CMakeLists.txt                   | 23 ++++++++++++++++++-----
 Makefile                         | 18 +++++++++++++++++-
 velox/CMakeLists.txt             |  4 ++--
 velox/codegen/CMakeLists.txt     |  2 +-
 velox/dwio/common/CMakeLists.txt |  1 -
 5 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1c7dc7d568d3..e2099787a969 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -62,6 +62,11 @@ option(
   VELOX_BUILD_MINIMAL
   "Build a minimal set of components only. This will override other build options."
   OFF)
+option(
+  VELOX_BUILD_MINIMAL_WITH_DWIO
+  "Build a minimal set of components, including DWIO (file format readers/writers).
+   This will override other build options."
+  OFF)
 
 # option() always creates a BOOL variable so we have to use a normal cache
 # variable with STRING type for this option.
@@ -96,6 +101,7 @@ option(VELOX_ENABLE_PARQUET "Enable Parquet support" OFF)
 option(VELOX_ENABLE_ARROW "Enable Arrow support" OFF)
 option(VELOX_ENABLE_REMOTE_FUNCTIONS "Enable remote function support" OFF)
 option(VELOX_ENABLE_CCACHE "Use ccache if installed." ON)
+option(VELOX_ENABLE_CODEGEN_SUPPORT "Enable experimental codegen support." OFF)
 
 option(VELOX_BUILD_TEST_UTILS "Builds Velox test utilities" OFF)
 option(VELOX_BUILD_PYTHON_PACKAGE "Builds Velox Python bindings" OFF)
@@ -125,7 +131,7 @@ if(${VELOX_BUILD_MINIMAL})
   set(VELOX_ENABLE_GCS OFF)
   set(VELOX_ENABLE_ABFS OFF)
   set(VELOX_ENABLE_SUBSTRAIT OFF)
-  set(VELOX_CODEGEN_SUPPORT OFF)
+  set(VELOX_ENABLE_CODEGEN_SUPPORT OFF)
 endif()
 
 if(${VELOX_BUILD_TESTING})
@@ -175,7 +181,7 @@ if(${VELOX_BUILD_PYTHON_PACKAGE})
   set(VELOX_ENABLE_GCS OFF)
   set(VELOX_ENABLE_ABFS OFF)
   set(VELOX_ENABLE_SUBSTRAIT OFF)
-  set(VELOX_CODEGEN_SUPPORT OFF)
+  set(VELOX_ENABLE_CODEGEN_SUPPORT OFF)
   set(VELOX_ENABLE_BENCHMARKS_BASIC OFF)
   set(VELOX_ENABLE_BENCHMARKS OFF)
 endif()
@@ -257,7 +263,7 @@ if(VELOX_ENABLE_PARQUET)
 endif()
 
 # define processor variable for conditional compilation
-if(${VELOX_CODEGEN_SUPPORT})
+if(${VELOX_ENABLE_CODEGEN_SUPPORT})
   add_compile_definitions(CODEGEN_ENABLED=1)
 endif()
 
@@ -420,7 +426,10 @@ endif()
 set_source(fmt)
 resolve_dependency(fmt 9.0.0)
 
-if(NOT ${VELOX_BUILD_MINIMAL})
+if(${VELOX_BUILD_MINIMAL_WITH_DWIO} OR ${VELOX_ENABLE_HIVE_CONNECTOR})
+  # DWIO needs all sorts of stream compression libraries.
+  #
+  # TODO: make these optional and pluggable.
   find_package(ZLIB REQUIRED)
   find_package(lz4 REQUIRED)
   find_package(lzo2 REQUIRED)
@@ -467,7 +476,11 @@ else()
   set(FOLLY_BENCHMARK Folly::follybenchmark)
 endif()
 
-if(NOT ${VELOX_BUILD_MINIMAL})
+# DWIO (ORC/DWRF), Substrait and experimental/codegen depend on protobuf.
+if(${VELOX_BUILD_MINIMAL_WITH_DWIO}
+   OR ${VELOX_ENABLE_HIVE_CONNECTOR}
+   OR ${VELOX_ENABLE_SUBSTRAIT}
+   OR ${VELOX_ENABLE_CODEGEN_SUPPORT})
   # Locate or build protobuf.
   set_source(Protobuf)
   resolve_dependency(Protobuf 3.21 EXACT)
diff --git a/Makefile b/Makefile
index 794d876b41c9..82de59432436 100644
--- a/Makefile
+++ b/Makefile
@@ -98,10 +98,26 @@ release:				#: Build the release version
 	$(MAKE) cmake BUILD_DIR=release BUILD_TYPE=Release && \
 	$(MAKE) build BUILD_DIR=release
 
-min_debug:				#: Minimal build with debugging symbols
+minimal_debug:			#: Minimal build with debugging symbols
 	$(MAKE) cmake BUILD_DIR=debug BUILD_TYPE=debug EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DVELOX_BUILD_MINIMAL=ON"
 	$(MAKE) build BUILD_DIR=debug
 
+min_debug: minimal_debug
+
+minimal:				 #: Minimal build
+	$(MAKE) cmake BUILD_DIR=release BUILD_TYPE=release EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DVELOX_BUILD_MINIMAL=ON"
+	$(MAKE) build BUILD_DIR=release
+
+dwio:						#: Minimal build with dwio enabled.
+	$(MAKE) cmake BUILD_DIR=release BUILD_TYPE=release EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} \
+																										    							  -DVELOX_BUILD_MINIMAL_WITH_DWIO=ON"
+	$(MAKE) build BUILD_DIR=release
+
+dwio_debug:			#: Minimal build with dwio debugging symbols.
+	$(MAKE) cmake BUILD_DIR=debug BUILD_TYPE=debug EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} \
+																																	  -DVELOX_BUILD_MINIMAL_WITH_DWIO=ON"
+	$(MAKE) build BUILD_DIR=debug
+
 benchmarks-basic-build:
 	$(MAKE) release EXTRA_CMAKE_FLAGS=" ${EXTRA_CMAKE_FLAGS} \
                                             -DVELOX_BUILD_TESTING=OFF \
diff --git a/velox/CMakeLists.txt b/velox/CMakeLists.txt
index cd18d344e809..be53ad8beeb9 100644
--- a/velox/CMakeLists.txt
+++ b/velox/CMakeLists.txt
@@ -44,7 +44,7 @@ if(${VELOX_ENABLE_PARSE})
 endif()
 
 # hive connector depends on dwio
-if(${VELOX_ENABLE_HIVE_CONNECTOR})
+if(${VELOX_BUILD_MINIMAL_WITH_DWIO} OR ${VELOX_ENABLE_HIVE_CONNECTOR})
   add_subdirectory(dwio)
 endif()
 
@@ -65,7 +65,7 @@ if(${VELOX_ENABLE_DUCKDB})
   add_subdirectory(duckdb)
 endif()
 
-if(${VELOX_CODEGEN_SUPPORT})
+if(${VELOX_ENABLE_CODEGEN_SUPPORT})
   add_subdirectory(experimental/codegen)
 endif()
 
diff --git a/velox/codegen/CMakeLists.txt b/velox/codegen/CMakeLists.txt
index e54a0d133c07..37e7d2a34389 100644
--- a/velox/codegen/CMakeLists.txt
+++ b/velox/codegen/CMakeLists.txt
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 add_library(velox_codegen Codegen.cpp)
-if(${VELOX_CODEGEN_SUPPORT})
+if(${VELOX_ENABLE_CODEGEN_SUPPORT})
   target_link_libraries(velox_codegen velox_experimental_codegen)
 else()
   target_link_libraries(velox_codegen velox_core velox_exec velox_expression
diff --git a/velox/dwio/common/CMakeLists.txt b/velox/dwio/common/CMakeLists.txt
index 8334de75e0f5..25e8bb56a104 100644
--- a/velox/dwio/common/CMakeLists.txt
+++ b/velox/dwio/common/CMakeLists.txt
@@ -74,7 +74,6 @@ target_link_libraries(
   velox_exception
   velox_expression
   velox_memory
-  velox_exec
   Boost::regex
   Folly::folly
   glog::glog)

From dec4c446f92806fb4a3671cd19da81526a695ff8 Mon Sep 17 00:00:00 2001
From: Masha Basmanova <mbasmanova@fb.com>
Date: Tue, 13 Feb 2024 16:18:19 -0800
Subject: [PATCH 13/38] Fix 'out of range in dynamic array' error in
 Task::toJson (#8735)

Summary:
Task::toJson used to create folly::dynamic::array for drivers and access non-existing
elements via [index]. That resulted in 'out of range in dynamic array' errors.

Fix is to use folly::dynamic::object.

Fixes https://github.com/prestodb/presto/issues/21917

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8735

Reviewed By: spershin

Differential Revision: D53727317

Pulled By: mbasmanova

fbshipit-source-id: afcfd490fd44d67b78c0b70b3f557a179fddd123
---
 velox/exec/Task.cpp           | 11 +++++----
 velox/exec/tests/TaskTest.cpp | 42 +++++++++++++++++++++++++++--------
 2 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/velox/exec/Task.cpp b/velox/exec/Task.cpp
index fd5addd7af67..364ba829fb4e 100644
--- a/velox/exec/Task.cpp
+++ b/velox/exec/Task.cpp
@@ -2154,14 +2154,13 @@ folly::dynamic Task::toJson() const {
     obj["plan"] = planFragment_.planNode->toString(true, true);
   }
 
-  folly::dynamic driverObj = folly::dynamic::array;
-  int index = 0;
-  for (auto& driver : drivers_) {
-    if (driver) {
-      driverObj[index++] = driver->toJson();
+  folly::dynamic drivers = folly::dynamic::object;
+  for (auto i = 0; i < drivers_.size(); ++i) {
+    if (drivers_[i] != nullptr) {
+      drivers[i] = drivers_[i]->toJson();
     }
   }
-  obj["drivers"] = driverObj;
+  obj["drivers"] = drivers;
 
   if (auto buffers = bufferManager_.lock()) {
     if (auto buffer = buffers->getBufferIfExists(taskId_)) {
diff --git a/velox/exec/tests/TaskTest.cpp b/velox/exec/tests/TaskTest.cpp
index c64252719084..37c9e5618395 100644
--- a/velox/exec/tests/TaskTest.cpp
+++ b/velox/exec/tests/TaskTest.cpp
@@ -502,14 +502,7 @@ class TaskTest : public HiveConnectorTestBase {
   }
 };
 
-TEST_F(TaskTest, wrongPlanNodeForSplit) {
-  auto connectorSplit = std::make_shared<connector::hive::HiveConnectorSplit>(
-      "test",
-      "file:/tmp/abc",
-      facebook::velox::dwio::common::FileFormat::DWRF,
-      0,
-      100);
-
+TEST_F(TaskTest, toJson) {
   auto plan = PlanBuilder()
                   .tableScan(ROW({"a", "b"}, {INTEGER(), DOUBLE()}))
                   .project({"a * a", "b + b"})
@@ -525,11 +518,42 @@ TEST_F(TaskTest, wrongPlanNodeForSplit) {
       task->toString(), "{Task task-1 (task-1)Plan: -- Project\n\n drivers:\n");
   ASSERT_EQ(
       folly::toPrettyJson(task->toJson()),
-      "{\n  \"concurrentSplitGroups\": 1,\n  \"drivers\": [],\n  \"exchangeClientByPlanNode\": {},\n  \"groupedPartitionedOutput\": false,\n  \"id\": \"task-1\",\n  \"noMoreOutputBuffers\": false,\n  \"numDriversPerSplitGroup\": 0,\n  \"numDriversUngrouped\": 0,\n  \"numFinishedDrivers\": 0,\n  \"numRunningDrivers\": 0,\n  \"numRunningSplitGroups\": 0,\n  \"numThreads\": 0,\n  \"numTotalDrivers\": 0,\n  \"onThreadSince\": \"0\",\n  \"partitionedOutputConsumed\": false,\n  \"pauseRequested\": false,\n  \"plan\": \"-- Project[expressions: (p0:INTEGER, multiply(ROW[\\\"a\\\"],ROW[\\\"a\\\"])), (p1:DOUBLE, plus(ROW[\\\"b\\\"],ROW[\\\"b\\\"]))] -> p0:INTEGER, p1:DOUBLE\\n  -- TableScan[table: hive_table] -> a:INTEGER, b:DOUBLE\\n\",\n  \"shortId\": \"task-1\",\n  \"state\": \"Running\",\n  \"terminateRequested\": false\n}");
+      "{\n  \"concurrentSplitGroups\": 1,\n  \"drivers\": {},\n  \"exchangeClientByPlanNode\": {},\n  \"groupedPartitionedOutput\": false,\n  \"id\": \"task-1\",\n  \"noMoreOutputBuffers\": false,\n  \"numDriversPerSplitGroup\": 0,\n  \"numDriversUngrouped\": 0,\n  \"numFinishedDrivers\": 0,\n  \"numRunningDrivers\": 0,\n  \"numRunningSplitGroups\": 0,\n  \"numThreads\": 0,\n  \"numTotalDrivers\": 0,\n  \"onThreadSince\": \"0\",\n  \"partitionedOutputConsumed\": false,\n  \"pauseRequested\": false,\n  \"plan\": \"-- Project[expressions: (p0:INTEGER, multiply(ROW[\\\"a\\\"],ROW[\\\"a\\\"])), (p1:DOUBLE, plus(ROW[\\\"b\\\"],ROW[\\\"b\\\"]))] -> p0:INTEGER, p1:DOUBLE\\n  -- TableScan[table: hive_table] -> a:INTEGER, b:DOUBLE\\n\",\n  \"shortId\": \"task-1\",\n  \"state\": \"Running\",\n  \"terminateRequested\": false\n}");
   ASSERT_EQ(
       folly::toPrettyJson(task->toShortJson()),
       "{\n  \"id\": \"task-1\",\n  \"numFinishedDrivers\": 0,\n  \"numRunningDrivers\": 0,\n  \"numThreads\": 0,\n  \"numTotalDrivers\": 0,\n  \"pauseRequested\": false,\n  \"shortId\": \"task-1\",\n  \"state\": \"Running\",\n  \"terminateRequested\": false\n}");
 
+  task->start(2);
+
+  ASSERT_NO_THROW(task->toJson());
+  ASSERT_NO_THROW(task->toShortJson());
+
+  task->noMoreSplits("0");
+  waitForTaskCompletion(task.get());
+
+  ASSERT_NO_THROW(task->toJson());
+  ASSERT_NO_THROW(task->toShortJson());
+}
+
+TEST_F(TaskTest, wrongPlanNodeForSplit) {
+  auto connectorSplit = std::make_shared<connector::hive::HiveConnectorSplit>(
+      "test",
+      "file:/tmp/abc",
+      facebook::velox::dwio::common::FileFormat::DWRF,
+      0,
+      100);
+
+  auto plan = PlanBuilder()
+                  .tableScan(ROW({"a", "b"}, {INTEGER(), DOUBLE()}))
+                  .project({"a * a", "b + b"})
+                  .planFragment();
+
+  auto task = Task::create(
+      "task-1",
+      std::move(plan),
+      0,
+      std::make_shared<core::QueryCtx>(driverExecutor_.get()));
+
   // Add split for the source node.
   task->addSplit("0", exec::Split(folly::copy(connectorSplit)));
 

From 793222bf59a88f2539ce0e8d42d881e47e24511a Mon Sep 17 00:00:00 2001
From: Masha Basmanova <mbasmanova@fb.com>
Date: Tue, 13 Feb 2024 17:18:10 -0800
Subject: [PATCH 14/38] Report rawInputPositions stat for MergeExchange (#8742)

Summary:
Source operators are expected to report rawInputBytes and rawInputPositions.
MergeExchange didn't report rawInputPositions.

Prestissimo uses 'rawInputPositions' to show number of rows processed by a task
in the coordinator UI. When this stat is missing, the UI shows zero.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8742

Reviewed By: Yuhta

Differential Revision: D53730491

Pulled By: mbasmanova

fbshipit-source-id: 7747f57521549e33e4a297535c5f3170c65512a2
---
 velox/exec/MergeSource.cpp             |  1 +
 velox/exec/tests/MultiFragmentTest.cpp | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/velox/exec/MergeSource.cpp b/velox/exec/MergeSource.cpp
index 36904506f8bd..08bb79b8acac 100644
--- a/velox/exec/MergeSource.cpp
+++ b/velox/exec/MergeSource.cpp
@@ -167,6 +167,7 @@ class MergeExchangeSource : public MergeSource {
 
       auto lockedStats = mergeExchange_->stats().wlock();
       lockedStats->addInputVector(data->estimateFlatSize(), data->size());
+      lockedStats->rawInputPositions += data->size();
     }
 
     // Since VectorStreamGroup::read() may cause inputStream to be at end,
diff --git a/velox/exec/tests/MultiFragmentTest.cpp b/velox/exec/tests/MultiFragmentTest.cpp
index 52f539138dea..32957ad8154e 100644
--- a/velox/exec/tests/MultiFragmentTest.cpp
+++ b/velox/exec/tests/MultiFragmentTest.cpp
@@ -404,8 +404,10 @@ TEST_F(MultiFragmentTest, mergeExchange) {
   }
 
   auto finalSortTaskId = makeTaskId("orderby", tasks.size());
+  core::PlanNodeId mergeExchangeId;
   auto finalSortPlan = PlanBuilder()
                            .mergeExchange(outputType, {"c0"})
+                           .capturePlanNodeId(mergeExchangeId)
                            .partitionedOutput({}, 1)
                            .planNode();
 
@@ -421,6 +423,15 @@ TEST_F(MultiFragmentTest, mergeExchange) {
   for (auto& task : tasks) {
     ASSERT_TRUE(waitForTaskCompletion(task.get())) << task->taskId();
   }
+
+  const auto finalSortStats = toPlanStats(task->taskStats());
+  const auto& mergeExchangeStats = finalSortStats.at(mergeExchangeId);
+
+  EXPECT_EQ(20'000, mergeExchangeStats.inputRows);
+  EXPECT_EQ(20'000, mergeExchangeStats.rawInputRows);
+
+  EXPECT_LT(0, mergeExchangeStats.inputBytes);
+  EXPECT_LT(0, mergeExchangeStats.rawInputBytes);
 }
 
 // Test reordering and dropping columns in PartitionedOutput operator.

From 5d1d2a3fa5055b1ff14f2b5154cfd9570ce35984 Mon Sep 17 00:00:00 2001
From: Ankita Victor <anvicto@microsoft.com>
Date: Tue, 13 Feb 2024 17:20:26 -0800
Subject: [PATCH 15/38] Fix install_conda in setup-ubuntu.sh to consider the
 CPU architecture (#8706)

Summary:
The function install_conda in setup_ubuntu.sh always downloads the conda package corresponding to
the CPU architecture x86_64.
Fix the function to download the conda package based on the actual CPU architecture (x86_64 or aarch64).

Resolves https://github.com/facebookincubator/velox/issues/8453

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8706

Reviewed By: Yuhta

Differential Revision: D53727378

Pulled By: mbasmanova

fbshipit-source-id: 5e2e076dd3a67f3de115dcc64c682b4cbf59fd34
---
 scripts/setup-ubuntu.sh | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/scripts/setup-ubuntu.sh b/scripts/setup-ubuntu.sh
index 69760cf85ec0..60b37c3b2ad5 100755
--- a/scripts/setup-ubuntu.sh
+++ b/scripts/setup-ubuntu.sh
@@ -119,11 +119,20 @@ function install_fbthrift {
 
 function install_conda {
   mkdir -p conda && cd conda
-  wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+  ARCH=$(uname -m)
+  
+  if [ "$ARCH" != "x86_64" ] && [ "$ARCH" != "aarch64" ]; then
+    echo "Unsupported architecture: $ARCH"
+    exit 1
+  fi
+  
+  wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-$ARCH.sh
+  
   MINICONDA_PATH=/opt/miniconda-for-velox
-  bash Miniconda3-latest-Linux-x86_64.sh -b -p $MINICONDA_PATH
+  bash Miniconda3-latest-Linux-$ARCH.sh -b -p $MINICONDA_PATH
 }
 
+
 function install_velox_deps {
   run_and_time install_fmt
   run_and_time install_folly

From 701c95da4c13ec88dcfac3e9780f09bb9386a5b5 Mon Sep 17 00:00:00 2001
From: Masha Basmanova <mbasmanova@fb.com>
Date: Wed, 14 Feb 2024 07:11:22 -0800
Subject: [PATCH 16/38] Fix task id used to create ExchangeClient for
 MergeExchangeSource (#8743)

Summary:
ExchangeClient's constructor takes task ID of the owning task, the task that
receives and processes the data coming from the exchange. MergeExchangeSource
used to create ExchangeClient using task ID of the remote task, the task the
data is being pulled from.

ExchangeClient uses task ID only for logging, hence, there is no hard failure when
using the wrong task ID.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8743

Reviewed By: Yuhta

Differential Revision: D53741039

Pulled By: mbasmanova

fbshipit-source-id: d2213fe5330c28e9ec98261bbd86a4aba8c4f58f
---
 velox/exec/MergeSource.cpp | 2 +-
 velox/exec/Operator.h      | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/velox/exec/MergeSource.cpp b/velox/exec/MergeSource.cpp
index 08bb79b8acac..0ba8b6463d69 100644
--- a/velox/exec/MergeSource.cpp
+++ b/velox/exec/MergeSource.cpp
@@ -125,7 +125,7 @@ class MergeExchangeSource : public MergeSource {
       folly::Executor* executor)
       : mergeExchange_(mergeExchange),
         client_(std::make_shared<ExchangeClient>(
-            taskId,
+            mergeExchange->taskId(),
             destination,
             maxQueuedBytes,
             pool,
diff --git a/velox/exec/Operator.h b/velox/exec/Operator.h
index 189f209dd864..030e06fbc764 100644
--- a/velox/exec/Operator.h
+++ b/velox/exec/Operator.h
@@ -519,6 +519,10 @@ class Operator : public BaseRuntimeStatWriter {
     return operatorCtx_->operatorType();
   }
 
+  const std::string& taskId() const {
+    return operatorCtx_->taskId();
+  }
+
   /// Registers 'translator' for mapping user defined PlanNode subclass
   /// instances to user-defined Operators.
   static void registerOperator(std::unique_ptr<PlanNodeTranslator> translator);

From 0fabd2463ee818b64d8bcf165089f63eb7d7ae43 Mon Sep 17 00:00:00 2001
From: Ankita Victor <anvicto@microsoft.com>
Date: Wed, 14 Feb 2024 07:12:54 -0800
Subject: [PATCH 17/38] Drop support for TIMESTAMP input from dayofweek Spark
 function (#8746)

Summary:
In Spark `dayofweek` doesn't directly accept TIMESTAMP input. For TIMESTAMP input, Spark will add a cast expression to convert it to date type, so only DATE type can be considered in Velox.

Also removing alias `dow`.

Spark function doc - https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.dayofweek.html

Addresses https://github.com/facebookincubator/velox/issues/8736

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8746

Reviewed By: Yuhta

Differential Revision: D53748021

Pulled By: mbasmanova

fbshipit-source-id: cee7f7a7a216085485f043451616da508a2cec58
---
 velox/docs/functions/spark/datetime.rst       | 11 +--
 velox/functions/sparksql/DateTimeFunctions.h  |  8 +-
 velox/functions/sparksql/Register.cpp         |  5 +-
 .../sparksql/tests/DateTimeFunctionsTest.cpp  | 84 +++++--------------
 4 files changed, 24 insertions(+), 84 deletions(-)

diff --git a/velox/docs/functions/spark/datetime.rst b/velox/docs/functions/spark/datetime.rst
index 3ac392536524..bcdae4c2c637 100644
--- a/velox/docs/functions/spark/datetime.rst
+++ b/velox/docs/functions/spark/datetime.rst
@@ -71,17 +71,12 @@ These functions support TIMESTAMP and DATE input types.
 
         SELECT dayofyear('2016-04-09'); -- 100
 
-.. spark:function:: dayofweek(date/timestamp) -> integer
+.. spark:function:: dayofweek(date) -> integer
 
-    Returns the day of the week for date/timestamp (1 = Sunday, 2 = Monday, ..., 7 = Saturday).
-    We can use `dow` as alias for ::
+    Returns the day of the week for date (1 = Sunday, 2 = Monday, ..., 7 = Saturday).
 
         SELECT dayofweek('2009-07-30'); -- 5
-        SELECT dayofweek('2023-08-22 11:23:00.100'); -- 3
-
-.. spark:function:: dow(x) -> integer
-
-    This is an alias for :spark:func:`dayofweek`.
+        SELECT dayofweek('2023-08-22'); -- 3
 
 .. spark:function:: from_unixtime(unixTime, format) -> string
 
diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h
index 94a7f21317e5..f2b452f7cb7d 100644
--- a/velox/functions/sparksql/DateTimeFunctions.h
+++ b/velox/functions/sparksql/DateTimeFunctions.h
@@ -396,7 +396,7 @@ struct DateSubFunction {
 };
 
 template <typename T>
-struct DayOfWeekFunction : public InitSessionTimezone<T> {
+struct DayOfWeekFunction {
   VELOX_DEFINE_FUNCTION_TYPES(T);
 
   // 1 = Sunday, 2 = Monday, ..., 7 = Saturday
@@ -404,12 +404,6 @@ struct DayOfWeekFunction : public InitSessionTimezone<T> {
     return time.tm_wday + 1;
   }
 
-  FOLLY_ALWAYS_INLINE void call(
-      int32_t& result,
-      const arg_type<Timestamp>& timestamp) {
-    result = getDayOfWeek(getDateTime(timestamp, this->timeZone_));
-  }
-
   FOLLY_ALWAYS_INLINE void call(int32_t& result, const arg_type<Date>& date) {
     result = getDayOfWeek(getDateTime(date));
   }
diff --git a/velox/functions/sparksql/Register.cpp b/velox/functions/sparksql/Register.cpp
index a24c0bf215ae..cf31e9512d74 100644
--- a/velox/functions/sparksql/Register.cpp
+++ b/velox/functions/sparksql/Register.cpp
@@ -304,10 +304,7 @@ void registerFunctions(const std::string& prefix) {
   registerFunction<DayOfYearFunction, int32_t, Date>(
       {prefix + "doy", prefix + "dayofyear"});
 
-  registerFunction<DayOfWeekFunction, int32_t, Timestamp>(
-      {prefix + "dow", prefix + "dayofweek"});
-  registerFunction<DayOfWeekFunction, int32_t, Date>(
-      {prefix + "dow", prefix + "dayofweek"});
+  registerFunction<DayOfWeekFunction, int32_t, Date>({prefix + "dayofweek"});
 
   registerFunction<QuarterFunction, int32_t, Date>({prefix + "quarter"});
 
diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
index b7cfbe4d694e..df0067060b71 100644
--- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
+++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
@@ -427,73 +427,27 @@ TEST_F(DateTimeFunctionsTest, dayOfMonth) {
 }
 
 TEST_F(DateTimeFunctionsTest, dayOfWeekDate) {
-  const auto dayOfWeek = [&](std::optional<int32_t> date,
-                             const std::string& func) {
-    return evaluateOnce<int32_t, int32_t>(
-        fmt::format("{}(c0)", func), {date}, {DATE()});
+  const auto dayOfWeek = [&](std::optional<int32_t> date) {
+    return evaluateOnce<int32_t, int32_t>("dayofweek(c0)", {date}, {DATE()});
   };
 
-  for (const auto& func : {"dayofweek", "dow"}) {
-    EXPECT_EQ(std::nullopt, dayOfWeek(std::nullopt, func));
-    EXPECT_EQ(5, dayOfWeek(0, func));
-    EXPECT_EQ(4, dayOfWeek(-1, func));
-    EXPECT_EQ(7, dayOfWeek(-40, func));
-    EXPECT_EQ(5, dayOfWeek(parseDate("2009-07-30"), func));
-    EXPECT_EQ(1, dayOfWeek(parseDate("2023-08-20"), func));
-    EXPECT_EQ(2, dayOfWeek(parseDate("2023-08-21"), func));
-    EXPECT_EQ(3, dayOfWeek(parseDate("2023-08-22"), func));
-    EXPECT_EQ(4, dayOfWeek(parseDate("2023-08-23"), func));
-    EXPECT_EQ(5, dayOfWeek(parseDate("2023-08-24"), func));
-    EXPECT_EQ(6, dayOfWeek(parseDate("2023-08-25"), func));
-    EXPECT_EQ(7, dayOfWeek(parseDate("2023-08-26"), func));
-    EXPECT_EQ(1, dayOfWeek(parseDate("2023-08-27"), func));
-
-    // test cases from spark's DateExpressionSuite.
-    EXPECT_EQ(6, dayOfWeek(util::fromDateString("2011-05-06"), func));
-  }
-}
-
-TEST_F(DateTimeFunctionsTest, dayofWeekTs) {
-  const auto dayOfWeek = [&](std::optional<Timestamp> date,
-                             const std::string& func) {
-    return evaluateOnce<int32_t>(fmt::format("{}(c0)", func), date);
-  };
-
-  for (const auto& func : {"dayofweek", "dow"}) {
-    EXPECT_EQ(5, dayOfWeek(Timestamp(0, 0), func));
-    EXPECT_EQ(4, dayOfWeek(Timestamp(-1, 0), func));
-    EXPECT_EQ(
-        1,
-        dayOfWeek(util::fromTimestampString("2023-08-20 20:23:00.001"), func));
-    EXPECT_EQ(
-        2,
-        dayOfWeek(util::fromTimestampString("2023-08-21 21:23:00.030"), func));
-    EXPECT_EQ(
-        3,
-        dayOfWeek(util::fromTimestampString("2023-08-22 11:23:00.100"), func));
-    EXPECT_EQ(
-        4,
-        dayOfWeek(util::fromTimestampString("2023-08-23 22:23:00.030"), func));
-    EXPECT_EQ(
-        5,
-        dayOfWeek(util::fromTimestampString("2023-08-24 15:23:00.000"), func));
-    EXPECT_EQ(
-        6,
-        dayOfWeek(util::fromTimestampString("2023-08-25 03:23:04.000"), func));
-    EXPECT_EQ(
-        7,
-        dayOfWeek(util::fromTimestampString("2023-08-26 01:03:00.300"), func));
-    EXPECT_EQ(
-        1,
-        dayOfWeek(util::fromTimestampString("2023-08-27 01:13:00.000"), func));
-    // test cases from spark's DateExpressionSuite.
-    EXPECT_EQ(
-        4, dayOfWeek(util::fromTimestampString("2015-04-08 13:10:15"), func));
-    EXPECT_EQ(
-        7, dayOfWeek(util::fromTimestampString("2017-05-27 13:10:15"), func));
-    EXPECT_EQ(
-        6, dayOfWeek(util::fromTimestampString("1582-10-15 13:10:15"), func));
-  }
+  EXPECT_EQ(std::nullopt, dayOfWeek(std::nullopt));
+  EXPECT_EQ(5, dayOfWeek(0));
+  EXPECT_EQ(4, dayOfWeek(-1));
+  EXPECT_EQ(7, dayOfWeek(-40));
+  EXPECT_EQ(5, dayOfWeek(parseDate("2009-07-30")));
+  EXPECT_EQ(1, dayOfWeek(parseDate("2023-08-20")));
+  EXPECT_EQ(2, dayOfWeek(parseDate("2023-08-21")));
+  EXPECT_EQ(3, dayOfWeek(parseDate("2023-08-22")));
+  EXPECT_EQ(4, dayOfWeek(parseDate("2023-08-23")));
+  EXPECT_EQ(5, dayOfWeek(parseDate("2023-08-24")));
+  EXPECT_EQ(6, dayOfWeek(parseDate("2023-08-25")));
+  EXPECT_EQ(7, dayOfWeek(parseDate("2023-08-26")));
+  EXPECT_EQ(1, dayOfWeek(parseDate("2023-08-27")));
+  EXPECT_EQ(6, dayOfWeek(util::fromDateString("2011-05-06")));
+  EXPECT_EQ(4, dayOfWeek(util::fromDateString("2015-04-08")));
+  EXPECT_EQ(7, dayOfWeek(util::fromDateString("2017-05-27")));
+  EXPECT_EQ(6, dayOfWeek(util::fromDateString("1582-10-15")));
 }
 
 TEST_F(DateTimeFunctionsTest, dateDiffDate) {

From afe819e807100262c8b056efa3defb29606daf6e Mon Sep 17 00:00:00 2001
From: yingsu00 <yingsu00@outlook.com>
Date: Wed, 14 Feb 2024 07:26:14 -0800
Subject: [PATCH 18/38] Support reading Iceberg positional delete files (#7847)

Summary:
In this PR we introduces IcebergSplitReader which supports reading
Iceberg splits with positional delete files

Pull Request resolved: https://github.com/facebookincubator/velox/pull/7847

Reviewed By: mbasmanova

Differential Revision: D53591413

Pulled By: Yuhta

fbshipit-source-id: 8b8b5c0487ae1d1ecc24e0ca5def6d62df34eee1
---
 velox/connectors/hive/CMakeLists.txt          |  30 +-
 velox/connectors/hive/FileHandle.h            |   5 -
 velox/connectors/hive/HiveConnectorUtil.cpp   |  22 +-
 velox/connectors/hive/HiveConnectorUtil.h     |   8 +
 velox/connectors/hive/SplitReader.cpp         |  40 ++-
 velox/connectors/hive/iceberg/CMakeLists.txt  |  28 ++
 .../hive/iceberg/IcebergDeleteFile.h          |  69 +++++
 .../hive/iceberg/IcebergMetadataColumns.h     |  55 ++++
 .../connectors/hive/iceberg/IcebergSplit.cpp  |  69 +++++
 velox/connectors/hive/iceberg/IcebergSplit.h  |  56 ++++
 .../hive/iceberg/IcebergSplitReader.cpp       | 113 +++++++
 .../hive/iceberg/IcebergSplitReader.h         |  62 ++++
 .../iceberg/PositionalDeleteFileReader.cpp    | 243 +++++++++++++++
 .../hive/iceberg/PositionalDeleteFileReader.h |  86 ++++++
 .../hive/iceberg/tests/CMakeLists.txt         |  34 +++
 .../hive/iceberg/tests/IcebergReadTest.cpp    | 280 ++++++++++++++++++
 16 files changed, 1167 insertions(+), 33 deletions(-)
 create mode 100644 velox/connectors/hive/iceberg/CMakeLists.txt
 create mode 100644 velox/connectors/hive/iceberg/IcebergDeleteFile.h
 create mode 100644 velox/connectors/hive/iceberg/IcebergMetadataColumns.h
 create mode 100644 velox/connectors/hive/iceberg/IcebergSplit.cpp
 create mode 100644 velox/connectors/hive/iceberg/IcebergSplit.h
 create mode 100644 velox/connectors/hive/iceberg/IcebergSplitReader.cpp
 create mode 100644 velox/connectors/hive/iceberg/IcebergSplitReader.h
 create mode 100644 velox/connectors/hive/iceberg/PositionalDeleteFileReader.cpp
 create mode 100644 velox/connectors/hive/iceberg/PositionalDeleteFileReader.h
 create mode 100644 velox/connectors/hive/iceberg/tests/CMakeLists.txt
 create mode 100644 velox/connectors/hive/iceberg/tests/IcebergReadTest.cpp

diff --git a/velox/connectors/hive/CMakeLists.txt b/velox/connectors/hive/CMakeLists.txt
index 265a35b54f80..f8f60c41c4e2 100644
--- a/velox/connectors/hive/CMakeLists.txt
+++ b/velox/connectors/hive/CMakeLists.txt
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 add_library(velox_hive_config OBJECT HiveConfig.cpp)
-
 target_link_libraries(velox_hive_config velox_exception)
 
+add_subdirectory(iceberg)
+
 add_library(
   velox_hive_connector OBJECT
   FileHandle.cpp
@@ -31,19 +32,20 @@ add_library(
 
 target_link_libraries(
   velox_hive_connector
-  velox_common_io
-  velox_connector
-  velox_dwio_catalog_fbhive
-  velox_dwio_dwrf_reader
-  velox_dwio_dwrf_writer
-  velox_dwio_parquet_reader
-  velox_dwio_parquet_writer
-  velox_file
-  velox_hive_partition_function
-  velox_s3fs
-  velox_hdfs
-  velox_gcs
-  velox_abfs)
+  PUBLIC velox_hive_iceberg_splitreader
+  PRIVATE velox_common_io
+          velox_connector
+          velox_dwio_catalog_fbhive
+          velox_dwio_dwrf_reader
+          velox_dwio_dwrf_writer
+          velox_dwio_parquet_reader
+          velox_dwio_parquet_writer
+          velox_file
+          velox_hive_partition_function
+          velox_s3fs
+          velox_hdfs
+          velox_gcs
+          velox_abfs)
 
 add_library(velox_hive_partition_function HivePartitionFunction.cpp)
 
diff --git a/velox/connectors/hive/FileHandle.h b/velox/connectors/hive/FileHandle.h
index 15edd9d2ac2f..6fb6853d7544 100644
--- a/velox/connectors/hive/FileHandle.h
+++ b/velox/connectors/hive/FileHandle.h
@@ -25,14 +25,9 @@
 
 #pragma once
 
-#include <cstdint>
-#include <memory>
-#include <string>
-
 #include "velox/common/caching/CachedFactory.h"
 #include "velox/common/caching/FileIds.h"
 #include "velox/common/file/File.h"
-#include "velox/dwio/common/InputStream.h"
 
 namespace facebook::velox {
 
diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp
index a0bc7fbd046e..95b47de314a0 100644
--- a/velox/connectors/hive/HiveConnectorUtil.cpp
+++ b/velox/connectors/hive/HiveConnectorUtil.cpp
@@ -430,13 +430,29 @@ void configureReaderOptions(
     const Config* sessionProperties,
     const std::shared_ptr<HiveTableHandle>& hiveTableHandle,
     const std::shared_ptr<HiveConnectorSplit>& hiveSplit) {
+  configureReaderOptions(
+      readerOptions,
+      hiveConfig,
+      sessionProperties,
+      hiveTableHandle->dataColumns(),
+      hiveSplit,
+      hiveTableHandle->tableParameters());
+}
+
+void configureReaderOptions(
+    dwio::common::ReaderOptions& readerOptions,
+    const std::shared_ptr<HiveConfig>& hiveConfig,
+    const Config* sessionProperties,
+    const RowTypePtr& fileSchema,
+    const std::shared_ptr<HiveConnectorSplit>& hiveSplit,
+    const std::unordered_map<std::string, std::string>& tableParameters) {
   readerOptions.setMaxCoalesceBytes(hiveConfig->maxCoalescedBytes());
   readerOptions.setMaxCoalesceDistance(hiveConfig->maxCoalescedDistanceBytes());
   readerOptions.setFileColumnNamesReadAsLowerCase(
       hiveConfig->isFileColumnNamesReadAsLowerCase(sessionProperties));
   readerOptions.setUseColumnNamesForColumnMapping(
       hiveConfig->isOrcUseColumnNames(sessionProperties));
-  readerOptions.setFileSchema(hiveTableHandle->dataColumns());
+  readerOptions.setFileSchema(fileSchema);
   readerOptions.setFooterEstimatedSize(hiveConfig->footerEstimatedSize());
   readerOptions.setFilePreloadThreshold(hiveConfig->filePreloadThreshold());
 
@@ -447,8 +463,8 @@ void configureReaderOptions(
         dwio::common::toString(readerOptions.getFileFormat()),
         dwio::common::toString(hiveSplit->fileFormat));
   } else {
-    auto serDeOptions = parseSerdeParameters(
-        hiveSplit->serdeParameters, hiveTableHandle->tableParameters());
+    auto serDeOptions =
+        parseSerdeParameters(hiveSplit->serdeParameters, tableParameters);
     if (serDeOptions) {
       readerOptions.setSerDeOptions(*serDeOptions);
     }
diff --git a/velox/connectors/hive/HiveConnectorUtil.h b/velox/connectors/hive/HiveConnectorUtil.h
index 67426bef78ca..329295b133d4 100644
--- a/velox/connectors/hive/HiveConnectorUtil.h
+++ b/velox/connectors/hive/HiveConnectorUtil.h
@@ -61,6 +61,14 @@ void configureReaderOptions(
     const std::shared_ptr<HiveTableHandle>& hiveTableHandle,
     const std::shared_ptr<HiveConnectorSplit>& hiveSplit);
 
+void configureReaderOptions(
+    dwio::common::ReaderOptions& readerOptions,
+    const std::shared_ptr<HiveConfig>& hiveConfig,
+    const Config* sessionProperties,
+    const RowTypePtr& fileSchema,
+    const std::shared_ptr<HiveConnectorSplit>& hiveSplit,
+    const std::unordered_map<std::string, std::string>& tableParameters = {});
+
 void configureRowReaderOptions(
     dwio::common::RowReaderOptions& rowReaderOptions,
     const std::unordered_map<std::string, std::string>& tableParameters,
diff --git a/velox/connectors/hive/SplitReader.cpp b/velox/connectors/hive/SplitReader.cpp
index 92376e566d38..6395d5d5f8bb 100644
--- a/velox/connectors/hive/SplitReader.cpp
+++ b/velox/connectors/hive/SplitReader.cpp
@@ -21,6 +21,8 @@
 #include "velox/connectors/hive/HiveConnectorSplit.h"
 #include "velox/connectors/hive/HiveConnectorUtil.h"
 #include "velox/connectors/hive/TableHandle.h"
+#include "velox/connectors/hive/iceberg/IcebergSplitReader.h"
+#include "velox/dwio/common/CachedBufferedInput.h"
 #include "velox/dwio/common/ReaderFactory.h"
 
 namespace facebook::velox::connector::hive {
@@ -38,17 +40,33 @@ std::unique_ptr<SplitReader> SplitReader::create(
     const ConnectorQueryCtx* connectorQueryCtx,
     const std::shared_ptr<HiveConfig>& hiveConfig,
     const std::shared_ptr<io::IoStatistics>& ioStats) {
-  return std::make_unique<SplitReader>(
-      hiveSplit,
-      hiveTableHandle,
-      scanSpec,
-      readerOutputType,
-      partitionKeys,
-      fileHandleFactory,
-      executor,
-      connectorQueryCtx,
-      hiveConfig,
-      ioStats);
+  //  Create the SplitReader based on hiveSplit->customSplitInfo["table_format"]
+  if (hiveSplit->customSplitInfo.count("table_format") > 0 &&
+      hiveSplit->customSplitInfo["table_format"] == "hive-iceberg") {
+    return std::make_unique<iceberg::IcebergSplitReader>(
+        hiveSplit,
+        hiveTableHandle,
+        scanSpec,
+        readerOutputType,
+        partitionKeys,
+        fileHandleFactory,
+        executor,
+        connectorQueryCtx,
+        hiveConfig,
+        ioStats);
+  } else {
+    return std::make_unique<SplitReader>(
+        hiveSplit,
+        hiveTableHandle,
+        scanSpec,
+        readerOutputType,
+        partitionKeys,
+        fileHandleFactory,
+        executor,
+        connectorQueryCtx,
+        hiveConfig,
+        ioStats);
+  }
 }
 
 SplitReader::SplitReader(
diff --git a/velox/connectors/hive/iceberg/CMakeLists.txt b/velox/connectors/hive/iceberg/CMakeLists.txt
new file mode 100644
index 000000000000..726ca63e31f3
--- /dev/null
+++ b/velox/connectors/hive/iceberg/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(
+  velox_hive_iceberg_splitreader IcebergSplitReader.cpp IcebergSplit.cpp
+                                 PositionalDeleteFileReader.cpp)
+
+target_link_libraries(
+  velox_hive_iceberg_splitreader
+  Folly::folly
+  gflags::gflags
+  glog::glog
+  gtest
+  gtest_main
+  xsimd)
+
+add_subdirectory(tests)
diff --git a/velox/connectors/hive/iceberg/IcebergDeleteFile.h b/velox/connectors/hive/iceberg/IcebergDeleteFile.h
new file mode 100644
index 000000000000..2f9206dfc264
--- /dev/null
+++ b/velox/connectors/hive/iceberg/IcebergDeleteFile.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "velox/dwio/common/Options.h"
+
+namespace facebook::velox::connector::hive::iceberg {
+
+enum class FileContent {
+  kData,
+  kPositionalDeletes,
+  kEqualityDeletes,
+};
+
+struct IcebergDeleteFile {
+  FileContent content;
+  const std::string filePath;
+  dwio::common::FileFormat fileFormat;
+  uint64_t recordCount;
+  uint64_t fileSizeInBytes;
+  // The field ids for the delete columns for equality delete files
+  std::vector<int32_t> equalityFieldIds;
+  // The lower bounds of the in-file positions for the deleted rows, identified
+  // by each column's field id. E.g. The deleted rows for a column with field id
+  // 1 is in range [10, 50], where 10 and 50 are the deleted row positions in
+  // the data file, then lowerBounds would contain entry <1, "10">
+  std::unordered_map<int32_t, std::string> lowerBounds;
+  // The upper bounds of the in-file positions for the deleted rows, identified
+  // by each column's field id. E.g. The deleted rows for a column with field id
+  // 1 is in range [10, 50], then upperBounds will contain entry <1, "50">
+  std::unordered_map<int32_t, std::string> upperBounds;
+
+  IcebergDeleteFile(
+      FileContent _content,
+      const std::string& _filePath,
+      dwio::common::FileFormat _fileFormat,
+      uint64_t _recordCount,
+      uint64_t _fileSizeInBytes,
+      std::vector<int32_t> _equalityFieldIds = {},
+      std::unordered_map<int32_t, std::string> _lowerBounds = {},
+      std::unordered_map<int32_t, std::string> _upperBounds = {})
+      : content(_content),
+        filePath(_filePath),
+        fileFormat(_fileFormat),
+        recordCount(_recordCount),
+        fileSizeInBytes(_fileSizeInBytes),
+        equalityFieldIds(_equalityFieldIds),
+        lowerBounds(_lowerBounds),
+        upperBounds(_upperBounds) {}
+};
+
+} // namespace facebook::velox::connector::hive::iceberg
diff --git a/velox/connectors/hive/iceberg/IcebergMetadataColumns.h b/velox/connectors/hive/iceberg/IcebergMetadataColumns.h
new file mode 100644
index 000000000000..4cbf2a7862b3
--- /dev/null
+++ b/velox/connectors/hive/iceberg/IcebergMetadataColumns.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "velox/type/Type.h"
+
+namespace facebook::velox::connector::hive::iceberg {
+
+struct IcebergMetadataColumn {
+  int id;
+  std::string name;
+  std::shared_ptr<const Type> type;
+  std::string doc;
+
+  IcebergMetadataColumn(
+      int _id,
+      const std::string& _name,
+      std::shared_ptr<const Type> _type,
+      const std::string& _doc)
+      : id(_id), name(_name), type(_type), doc(_doc) {}
+
+  static std::shared_ptr<IcebergMetadataColumn> icebergDeleteFilePathColumn() {
+    return std::make_shared<IcebergMetadataColumn>(
+        2147483546,
+        "file_path",
+        VARCHAR(),
+        "Path of a file in which a deleted row is stored");
+  }
+
+  static std::shared_ptr<IcebergMetadataColumn> icebergDeletePosColumn() {
+    return std::make_shared<IcebergMetadataColumn>(
+        2147483545,
+        "pos",
+        BIGINT(),
+        "Ordinal position of a deleted row in the data file");
+  }
+};
+
+} // namespace facebook::velox::connector::hive::iceberg
diff --git a/velox/connectors/hive/iceberg/IcebergSplit.cpp b/velox/connectors/hive/iceberg/IcebergSplit.cpp
new file mode 100644
index 000000000000..7fa9a52f2c69
--- /dev/null
+++ b/velox/connectors/hive/iceberg/IcebergSplit.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/hive/iceberg/IcebergSplit.h"
+
+#include "velox/connectors/hive/iceberg/IcebergDeleteFile.h"
+
+namespace facebook::velox::connector::hive::iceberg {
+
+HiveIcebergSplit::HiveIcebergSplit(
+    const std::string& _connectorId,
+    const std::string& _filePath,
+    dwio::common::FileFormat _fileFormat,
+    uint64_t _start,
+    uint64_t _length,
+    const std::unordered_map<std::string, std::optional<std::string>>&
+        _partitionKeys,
+    std::optional<int32_t> _tableBucketNumber,
+    const std::unordered_map<std::string, std::string>& _customSplitInfo,
+    const std::shared_ptr<std::string>& _extraFileInfo)
+    : HiveConnectorSplit(
+          _connectorId,
+          _filePath,
+          _fileFormat,
+          _start,
+          _length,
+          _partitionKeys,
+          _tableBucketNumber) {
+  // TODO: Deserialize _extraFileInfo to get deleteFiles;
+}
+
+// For tests only
+HiveIcebergSplit::HiveIcebergSplit(
+    const std::string& _connectorId,
+    const std::string& _filePath,
+    dwio::common::FileFormat _fileFormat,
+    uint64_t _start,
+    uint64_t _length,
+    const std::unordered_map<std::string, std::optional<std::string>>&
+        _partitionKeys,
+    std::optional<int32_t> _tableBucketNumber,
+    const std::unordered_map<std::string, std::string>& _customSplitInfo,
+    const std::shared_ptr<std::string>& _extraFileInfo,
+    std::vector<IcebergDeleteFile> _deletes)
+    : HiveConnectorSplit(
+          _connectorId,
+          _filePath,
+          _fileFormat,
+          _start,
+          _length,
+          _partitionKeys,
+          _tableBucketNumber,
+          _customSplitInfo,
+          _extraFileInfo),
+      deleteFiles(_deletes) {}
+} // namespace facebook::velox::connector::hive::iceberg
diff --git a/velox/connectors/hive/iceberg/IcebergSplit.h b/velox/connectors/hive/iceberg/IcebergSplit.h
new file mode 100644
index 000000000000..37b8c3c3eb36
--- /dev/null
+++ b/velox/connectors/hive/iceberg/IcebergSplit.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <string>
+
+#include "velox/connectors/hive/HiveConnectorSplit.h"
+
+namespace facebook::velox::connector::hive::iceberg {
+
+class IcebergDeleteFile;
+
+struct HiveIcebergSplit : public connector::hive::HiveConnectorSplit {
+  std::vector<IcebergDeleteFile> deleteFiles;
+
+  HiveIcebergSplit(
+      const std::string& connectorId,
+      const std::string& _filePath,
+      dwio::common::FileFormat _fileFormat,
+      uint64_t _start = 0,
+      uint64_t _length = std::numeric_limits<uint64_t>::max(),
+      const std::unordered_map<std::string, std::optional<std::string>>&
+          _partitionKeys = {},
+      std::optional<int32_t> _tableBucketNumber = std::nullopt,
+      const std::unordered_map<std::string, std::string>& _customSplitInfo = {},
+      const std::shared_ptr<std::string>& _extraFileInfo = {});
+
+  // For tests only
+  HiveIcebergSplit(
+      const std::string& connectorId,
+      const std::string& _filePath,
+      dwio::common::FileFormat _fileFormat,
+      uint64_t _start = 0,
+      uint64_t _length = std::numeric_limits<uint64_t>::max(),
+      const std::unordered_map<std::string, std::optional<std::string>>&
+          _partitionKeys = {},
+      std::optional<int32_t> _tableBucketNumber = std::nullopt,
+      const std::unordered_map<std::string, std::string>& _customSplitInfo = {},
+      const std::shared_ptr<std::string>& _extraFileInfo = {},
+      std::vector<IcebergDeleteFile> deletes = {});
+};
+
+} // namespace facebook::velox::connector::hive::iceberg
diff --git a/velox/connectors/hive/iceberg/IcebergSplitReader.cpp b/velox/connectors/hive/iceberg/IcebergSplitReader.cpp
new file mode 100644
index 000000000000..fa65c41043e4
--- /dev/null
+++ b/velox/connectors/hive/iceberg/IcebergSplitReader.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/hive/iceberg/IcebergSplitReader.h"
+
+#include "velox/connectors/hive/iceberg/IcebergDeleteFile.h"
+#include "velox/connectors/hive/iceberg/IcebergSplit.h"
+#include "velox/dwio/common/BufferUtil.h"
+#include "velox/dwio/common/Mutation.h"
+#include "velox/dwio/common/Reader.h"
+
+using namespace facebook::velox::dwio::common;
+
+namespace facebook::velox::connector::hive::iceberg {
+
+IcebergSplitReader::IcebergSplitReader(
+    std::shared_ptr<velox::connector::hive::HiveConnectorSplit> hiveSplit,
+    std::shared_ptr<HiveTableHandle> hiveTableHandle,
+    std::shared_ptr<common::ScanSpec> scanSpec,
+    const RowTypePtr readerOutputType,
+    std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>*
+        partitionKeys,
+    FileHandleFactory* fileHandleFactory,
+    folly::Executor* executor,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    const std::shared_ptr<HiveConfig> hiveConfig,
+    std::shared_ptr<io::IoStatistics> ioStats)
+    : SplitReader(
+          hiveSplit,
+          hiveTableHandle,
+          scanSpec,
+          readerOutputType,
+          partitionKeys,
+          fileHandleFactory,
+          executor,
+          connectorQueryCtx,
+          hiveConfig,
+          ioStats) {}
+
+void IcebergSplitReader::prepareSplit(
+    std::shared_ptr<common::MetadataFilter> metadataFilter,
+    dwio::common::RuntimeStatistics& runtimeStats) {
+  SplitReader::prepareSplit(metadataFilter, runtimeStats);
+  baseReadOffset_ = 0;
+  positionalDeleteFileReaders_.clear();
+  splitOffset_ = baseRowReader_->nextRowNumber();
+
+  // TODO: Deserialize the std::vector<IcebergDeleteFile> deleteFiles. For now
+  // we assume it's already deserialized.
+  std::shared_ptr<HiveIcebergSplit> icebergSplit =
+      std::dynamic_pointer_cast<HiveIcebergSplit>(hiveSplit_);
+
+  const auto& deleteFiles = icebergSplit->deleteFiles;
+  for (const auto& deleteFile : deleteFiles) {
+    positionalDeleteFileReaders_.push_back(
+        std::make_unique<PositionalDeleteFileReader>(
+            deleteFile,
+            hiveSplit_->filePath,
+            fileHandleFactory_,
+            connectorQueryCtx_,
+            executor_,
+            hiveConfig_,
+            ioStats_,
+            runtimeStats,
+            splitOffset_,
+            hiveSplit_->connectorId));
+  }
+}
+
+uint64_t IcebergSplitReader::next(int64_t size, VectorPtr& output) {
+  Mutation mutation;
+  mutation.deletedRows = nullptr;
+
+  if (!positionalDeleteFileReaders_.empty()) {
+    auto numBytes = bits::nbytes(size);
+    dwio::common::ensureCapacity<int8_t>(
+        deleteBitmap_, numBytes, connectorQueryCtx_->memoryPool());
+    std::memset((void*)deleteBitmap_->as<int8_t>(), 0L, numBytes);
+
+    for (auto iter = positionalDeleteFileReaders_.begin();
+         iter != positionalDeleteFileReaders_.end();
+         iter++) {
+      (*iter)->readDeletePositions(
+          baseReadOffset_, size, deleteBitmap_->asMutable<int8_t>());
+      if ((*iter)->endOfFile()) {
+        iter = positionalDeleteFileReaders_.erase(iter);
+      }
+    }
+
+    deleteBitmap_->setSize(numBytes);
+    mutation.deletedRows = deleteBitmap_->as<uint64_t>();
+  }
+
+  auto rowsScanned = baseRowReader_->next(size, output, &mutation);
+  baseReadOffset_ += rowsScanned;
+
+  return rowsScanned;
+}
+
+} // namespace facebook::velox::connector::hive::iceberg
diff --git a/velox/connectors/hive/iceberg/IcebergSplitReader.h b/velox/connectors/hive/iceberg/IcebergSplitReader.h
new file mode 100644
index 000000000000..5c5552369735
--- /dev/null
+++ b/velox/connectors/hive/iceberg/IcebergSplitReader.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "velox/connectors/Connector.h"
+#include "velox/connectors/hive/SplitReader.h"
+#include "velox/connectors/hive/iceberg/PositionalDeleteFileReader.h"
+
+namespace facebook::velox::connector::hive::iceberg {
+
+class IcebergDeleteFile;
+
+class IcebergSplitReader : public SplitReader {
+ public:
+  IcebergSplitReader(
+      std::shared_ptr<velox::connector::hive::HiveConnectorSplit> hiveSplit,
+      std::shared_ptr<HiveTableHandle> hiveTableHandle,
+      std::shared_ptr<common::ScanSpec> scanSpec,
+      const RowTypePtr readerOutputType,
+      std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>*
+          partitionKeys,
+      FileHandleFactory* fileHandleFactory,
+      folly::Executor* executor,
+      const ConnectorQueryCtx* connectorQueryCtx,
+      const std::shared_ptr<HiveConfig> hiveConfig,
+      std::shared_ptr<io::IoStatistics> ioStats);
+
+  ~IcebergSplitReader() override = default;
+
+  void prepareSplit(
+      std::shared_ptr<common::MetadataFilter> metadataFilter,
+      dwio::common::RuntimeStatistics& runtimeStats) override;
+
+  uint64_t next(int64_t size, VectorPtr& output) override;
+
+ private:
+  // The read offset to the beginning of the split in number of rows for the
+  // current batch for the base data file
+  uint64_t baseReadOffset_;
+
+  // The file position for the first row in the split
+  uint64_t splitOffset_;
+
+  std::list<std::unique_ptr<PositionalDeleteFileReader>>
+      positionalDeleteFileReaders_;
+  BufferPtr deleteBitmap_;
+};
+} // namespace facebook::velox::connector::hive::iceberg
diff --git a/velox/connectors/hive/iceberg/PositionalDeleteFileReader.cpp b/velox/connectors/hive/iceberg/PositionalDeleteFileReader.cpp
new file mode 100644
index 000000000000..b87007fb9804
--- /dev/null
+++ b/velox/connectors/hive/iceberg/PositionalDeleteFileReader.cpp
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/hive/iceberg/PositionalDeleteFileReader.h"
+
+#include "velox/connectors/hive/HiveConnectorUtil.h"
+#include "velox/connectors/hive/TableHandle.h"
+#include "velox/connectors/hive/iceberg/IcebergDeleteFile.h"
+#include "velox/connectors/hive/iceberg/IcebergMetadataColumns.h"
+#include "velox/dwio/common/ReaderFactory.h"
+
+namespace facebook::velox::connector::hive::iceberg {
+
+PositionalDeleteFileReader::PositionalDeleteFileReader(
+    const IcebergDeleteFile& deleteFile,
+    const std::string& baseFilePath,
+    FileHandleFactory* fileHandleFactory,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    folly::Executor* executor,
+    const std::shared_ptr<HiveConfig> hiveConfig,
+    std::shared_ptr<io::IoStatistics> ioStats,
+    dwio::common::RuntimeStatistics& runtimeStats,
+    uint64_t splitOffset,
+    const std::string& connectorId)
+    : deleteFile_(deleteFile),
+      baseFilePath_(baseFilePath),
+      fileHandleFactory_(fileHandleFactory),
+      executor_(executor),
+      connectorQueryCtx_(connectorQueryCtx),
+      hiveConfig_(hiveConfig),
+      ioStats_(ioStats),
+      pool_(connectorQueryCtx->memoryPool()),
+      filePathColumn_(IcebergMetadataColumn::icebergDeleteFilePathColumn()),
+      posColumn_(IcebergMetadataColumn::icebergDeletePosColumn()),
+      splitOffset_(splitOffset),
+      deleteSplit_(nullptr),
+      deleteRowReader_(nullptr),
+      deletePositionsOutput_(nullptr),
+      deletePositionsOffset_(0),
+      endOfFile_(false) {
+  VELOX_CHECK(deleteFile_.content == FileContent::kPositionalDeletes);
+
+  if (deleteFile_.recordCount == 0) {
+    return;
+  }
+
+  // TODO: check if the lowerbounds and upperbounds in deleteFile overlap with
+  //  this batch. If not, no need to proceed.
+
+  // Create the ScanSpec for this delete file
+  auto scanSpec = std::make_shared<common::ScanSpec>("<root>");
+  scanSpec->addField(posColumn_->name, 0);
+  auto* pathSpec =
+      scanSpec->getOrCreateChild(common::Subfield(filePathColumn_->name));
+  pathSpec->setFilter(std::make_unique<common::BytesValues>(
+      std::vector<std::string>({baseFilePath_}), false));
+
+  // Create the file schema (in RowType) and split that will be used by readers
+  std::vector<std::string> deleteColumnNames(
+      {filePathColumn_->name, posColumn_->name});
+  std::vector<std::shared_ptr<const Type>> deleteColumnTypes(
+      {filePathColumn_->type, posColumn_->type});
+  RowTypePtr deleteFileSchema =
+      ROW(std::move(deleteColumnNames), std::move(deleteColumnTypes));
+
+  deleteSplit_ = std::make_shared<HiveConnectorSplit>(
+      connectorId,
+      deleteFile_.filePath,
+      deleteFile_.fileFormat,
+      0,
+      deleteFile_.fileSizeInBytes);
+
+  // Create the Reader and RowReader
+
+  dwio::common::ReaderOptions deleteReaderOpts(pool_);
+  configureReaderOptions(
+      deleteReaderOpts,
+      hiveConfig_,
+      connectorQueryCtx_->sessionProperties(),
+      deleteFileSchema,
+      deleteSplit_);
+
+  auto deleteFileHandle =
+      fileHandleFactory_->generate(deleteFile_.filePath).second;
+  auto deleteFileInput = createBufferedInput(
+      *deleteFileHandle,
+      deleteReaderOpts,
+      connectorQueryCtx_,
+      ioStats_,
+      executor_);
+
+  auto deleteReader =
+      dwio::common::getReaderFactory(deleteReaderOpts.getFileFormat())
+          ->createReader(std::move(deleteFileInput), deleteReaderOpts);
+
+  // Check if the whole delete file split can be skipped. This could happen when
+  // 1) the delete file doesn't contain the base file that is being read; 2) The
+  // delete file does not contain the positions in the current batch for the
+  // base file.
+  if (!testFilters(
+          scanSpec.get(),
+          deleteReader.get(),
+          deleteSplit_->filePath,
+          deleteSplit_->partitionKeys,
+          {})) {
+    ++runtimeStats.skippedSplits;
+    runtimeStats.skippedSplitBytes += deleteSplit_->length;
+    deleteSplit_.reset();
+    return;
+  }
+
+  dwio::common::RowReaderOptions deleteRowReaderOpts;
+  configureRowReaderOptions(
+      deleteRowReaderOpts,
+      {},
+      scanSpec,
+      nullptr,
+      deleteFileSchema,
+      deleteSplit_);
+
+  deleteRowReader_.reset();
+  deleteRowReader_ = deleteReader->createRowReader(deleteRowReaderOpts);
+}
+
+void PositionalDeleteFileReader::readDeletePositions(
+    uint64_t baseReadOffset,
+    uint64_t size,
+    int8_t* deleteBitmap) {
+  // We are going to read to the row number up to the end of the batch. For the
+  // same base file, the deleted rows are in ascending order in the same delete
+  // file
+  int64_t rowNumberUpperBound = splitOffset_ + baseReadOffset + size;
+
+  // Finish unused delete positions from last batch
+  if (deletePositionsOutput_ &&
+      deletePositionsOffset_ < deletePositionsOutput_->size()) {
+    updateDeleteBitmap(
+        std::dynamic_pointer_cast<RowVector>(deletePositionsOutput_)
+            ->childAt(0),
+        baseReadOffset,
+        rowNumberUpperBound,
+        deleteBitmap);
+
+    if (readFinishedForBatch(rowNumberUpperBound)) {
+      return;
+    }
+  }
+
+  if (!deleteRowReader_ || !deleteSplit_) {
+    return;
+  }
+
+  // Read the new delete positions for this batch into deletePositionsOutput_
+  // and update the delete bitmap
+
+  auto outputType = posColumn_->type;
+
+  RowTypePtr outputRowType = ROW({posColumn_->name}, {posColumn_->type});
+  if (!deletePositionsOutput_) {
+    deletePositionsOutput_ = BaseVector::create(outputRowType, 0, pool_);
+  }
+
+  while (!readFinishedForBatch(rowNumberUpperBound)) {
+    auto rowsScanned = deleteRowReader_->next(size, deletePositionsOutput_);
+    if (rowsScanned > 0) {
+      VELOX_CHECK(
+          !deletePositionsOutput_->mayHaveNulls(),
+          "Iceberg delete file pos column cannot have nulls");
+
+      auto numDeletedRows = deletePositionsOutput_->size();
+      if (numDeletedRows > 0) {
+        deletePositionsOutput_->loadedVector();
+        deletePositionsOffset_ = 0;
+
+        updateDeleteBitmap(
+            std::dynamic_pointer_cast<RowVector>(deletePositionsOutput_)
+                ->childAt(0),
+            baseReadOffset,
+            rowNumberUpperBound,
+            deleteBitmap);
+      }
+    } else {
+      // Reaching the end of the file
+      endOfFile_ = true;
+      deleteSplit_.reset();
+      return;
+    }
+  }
+}
+
+bool PositionalDeleteFileReader::endOfFile() {
+  return endOfFile_;
+}
+
+void PositionalDeleteFileReader::updateDeleteBitmap(
+    VectorPtr deletePositionsVector,
+    uint64_t baseReadOffset,
+    int64_t rowNumberUpperBound,
+    int8_t* deleteBitmap) {
+  // Convert the positions in file into positions relative to the start of the
+  // split.
+  const int64_t* deletePositions =
+      deletePositionsVector->as<FlatVector<int64_t>>()->rawValues();
+  int64_t offset = baseReadOffset + splitOffset_;
+  while (deletePositionsOffset_ < deletePositionsVector->size() &&
+         deletePositions[deletePositionsOffset_] < rowNumberUpperBound) {
+    bits::setBit(
+        deleteBitmap, deletePositions[deletePositionsOffset_] - offset);
+    deletePositionsOffset_++;
+  }
+}
+
+bool PositionalDeleteFileReader::readFinishedForBatch(
+    int64_t rowNumberUpperBound) {
+  VELOX_CHECK_NOT_NULL(deletePositionsOutput_);
+
+  auto deletePositionsVector =
+      std::dynamic_pointer_cast<RowVector>(deletePositionsOutput_)->childAt(0);
+  const int64_t* deletePositions =
+      deletePositionsVector->as<FlatVector<int64_t>>()->rawValues();
+
+  if (deletePositionsOutput_->size() != 0 &&
+      deletePositionsOffset_ < deletePositionsVector->size() &&
+      deletePositions[deletePositionsOffset_] >= rowNumberUpperBound) {
+    return true;
+  }
+  return false;
+}
+
+} // namespace facebook::velox::connector::hive::iceberg
diff --git a/velox/connectors/hive/iceberg/PositionalDeleteFileReader.h b/velox/connectors/hive/iceberg/PositionalDeleteFileReader.h
new file mode 100644
index 000000000000..f6a1ddebcdb0
--- /dev/null
+++ b/velox/connectors/hive/iceberg/PositionalDeleteFileReader.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <folly/Executor.h>
+#include <memory>
+
+#include "velox/connectors/Connector.h"
+#include "velox/connectors/hive/FileHandle.h"
+#include "velox/connectors/hive/HiveConfig.h"
+#include "velox/connectors/hive/HiveConnectorSplit.h"
+#include "velox/dwio/common/Reader.h"
+
+namespace facebook::velox::connector::hive::iceberg {
+
+class IcebergDeleteFile;
+class IcebergMetadataColumn;
+
+using SubfieldFilters =
+    std::unordered_map<common::Subfield, std::unique_ptr<common::Filter>>;
+
+class PositionalDeleteFileReader {
+ public:
+  PositionalDeleteFileReader(
+      const IcebergDeleteFile& deleteFile,
+      const std::string& baseFilePath,
+      FileHandleFactory* fileHandleFactory,
+      const ConnectorQueryCtx* connectorQueryCtx,
+      folly::Executor* executor,
+      const std::shared_ptr<HiveConfig> hiveConfig,
+      std::shared_ptr<io::IoStatistics> ioStats,
+      dwio::common::RuntimeStatistics& runtimeStats,
+      uint64_t splitOffset,
+      const std::string& connectorId);
+
+  void readDeletePositions(
+      uint64_t baseReadOffset,
+      uint64_t size,
+      int8_t* deleteBitmap);
+
+  bool endOfFile();
+
+ private:
+  void updateDeleteBitmap(
+      VectorPtr deletePositionsVector,
+      uint64_t baseReadOffset,
+      int64_t rowNumberUpperBound,
+      int8_t* deleteBitmap);
+
+  bool readFinishedForBatch(int64_t rowNumberUpperBound);
+
+  const IcebergDeleteFile& deleteFile_;
+  const std::string& baseFilePath_;
+  FileHandleFactory* const fileHandleFactory_;
+  folly::Executor* const executor_;
+  const ConnectorQueryCtx* const connectorQueryCtx_;
+  const std::shared_ptr<HiveConfig> hiveConfig_;
+  std::shared_ptr<io::IoStatistics> ioStats_;
+  memory::MemoryPool* const pool_;
+
+  std::shared_ptr<IcebergMetadataColumn> filePathColumn_;
+  std::shared_ptr<IcebergMetadataColumn> posColumn_;
+  uint64_t splitOffset_;
+
+  std::shared_ptr<HiveConnectorSplit> deleteSplit_;
+  std::unique_ptr<dwio::common::RowReader> deleteRowReader_;
+  VectorPtr deletePositionsOutput_;
+  uint64_t deletePositionsOffset_;
+  bool endOfFile_;
+};
+
+} // namespace facebook::velox::connector::hive::iceberg
diff --git a/velox/connectors/hive/iceberg/tests/CMakeLists.txt b/velox/connectors/hive/iceberg/tests/CMakeLists.txt
new file mode 100644
index 000000000000..63603c724ec2
--- /dev/null
+++ b/velox/connectors/hive/iceberg/tests/CMakeLists.txt
@@ -0,0 +1,34 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+if(NOT VELOX_DISABLE_GOOGLETEST)
+
+  add_executable(velox_hive_iceberg_test IcebergReadTest.cpp)
+  add_test(velox_hive_iceberg_test velox_hive_iceberg_test)
+
+  target_link_libraries(
+    velox_hive_iceberg_test
+    velox_hive_connector
+    velox_hive_iceberg_splitreader
+    velox_hive_partition_function
+    velox_dwio_common_exception
+    velox_dwio_common_test_utils
+    velox_dwio_dwrf_proto
+    velox_vector_test_lib
+    velox_exec
+    velox_exec_test_lib
+    Folly::folly
+    gtest
+    gtest_main)
+
+endif()
diff --git a/velox/connectors/hive/iceberg/tests/IcebergReadTest.cpp b/velox/connectors/hive/iceberg/tests/IcebergReadTest.cpp
new file mode 100644
index 000000000000..79443c73b3ce
--- /dev/null
+++ b/velox/connectors/hive/iceberg/tests/IcebergReadTest.cpp
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/common/file/FileSystems.h"
+#include "velox/connectors/hive/HiveConnectorSplit.h"
+#include "velox/connectors/hive/iceberg/IcebergDeleteFile.h"
+#include "velox/connectors/hive/iceberg/IcebergMetadataColumns.h"
+#include "velox/connectors/hive/iceberg/IcebergSplit.h"
+#include "velox/exec/PlanNodeStats.h"
+#include "velox/exec/tests/utils/HiveConnectorTestBase.h"
+#include "velox/exec/tests/utils/PlanBuilder.h"
+
+#include <folly/Singleton.h>
+
+using namespace facebook::velox::exec::test;
+using namespace facebook::velox::exec;
+using namespace facebook::velox::dwio::common;
+using namespace facebook::velox::test;
+
+namespace facebook::velox::connector::hive::iceberg {
+
+class HiveIcebergTest : public HiveConnectorTestBase {
+ public:
+  void assertPositionalDeletes(
+      const std::vector<int64_t>& deleteRows,
+      bool multipleBaseFiles = false) {
+    assertPositionalDeletes(
+        deleteRows,
+        "SELECT * FROM tmp WHERE c0 NOT IN (" + makeNotInList(deleteRows) + ")",
+        multipleBaseFiles);
+  }
+  void assertPositionalDeletes(
+      const std::vector<int64_t>& deleteRows,
+      std::string duckdbSql,
+      bool multipleBaseFiles = false) {
+    std::shared_ptr<TempFilePath> dataFilePath = writeDataFile(rowCount);
+
+    std::mt19937 gen{0};
+    int64_t numDeleteRowsBefore =
+        multipleBaseFiles ? folly::Random::rand32(0, 1000, gen) : 0;
+    int64_t numDeleteRowsAfter =
+        multipleBaseFiles ? folly::Random::rand32(0, 1000, gen) : 0;
+    std::shared_ptr<TempFilePath> deleteFilePath = writePositionDeleteFile(
+        dataFilePath->path,
+        deleteRows,
+        numDeleteRowsBefore,
+        numDeleteRowsAfter);
+
+    IcebergDeleteFile deleteFile(
+        FileContent::kPositionalDeletes,
+        deleteFilePath->path,
+        fileFomat_,
+        deleteRows.size() + numDeleteRowsBefore + numDeleteRowsAfter,
+        testing::internal::GetFileSize(
+            std::fopen(deleteFilePath->path.c_str(), "r")));
+
+    auto icebergSplit = makeIcebergSplit(dataFilePath->path, {deleteFile});
+
+    auto plan = tableScanNode();
+    auto task = OperatorTestBase::assertQuery(plan, {icebergSplit}, duckdbSql);
+
+    auto planStats = toPlanStats(task->taskStats());
+    auto scanNodeId = plan->id();
+    auto it = planStats.find(scanNodeId);
+    ASSERT_TRUE(it != planStats.end());
+    ASSERT_TRUE(it->second.peakMemoryBytes > 0);
+  }
+
+  std::vector<int64_t> makeRandomDeleteRows(int32_t maxRowNumber) {
+    std::mt19937 gen{0};
+    std::vector<int64_t> deleteRows;
+    for (int i = 0; i < maxRowNumber; i++) {
+      if (folly::Random::rand32(0, 10, gen) > 8) {
+        deleteRows.push_back(i);
+      }
+    }
+    return deleteRows;
+  }
+
+  std::vector<int64_t> makeSequenceRows(int32_t maxRowNumber) {
+    std::vector<int64_t> deleteRows;
+    deleteRows.resize(maxRowNumber);
+    std::iota(deleteRows.begin(), deleteRows.end(), 0);
+    return deleteRows;
+  }
+
+  const static int rowCount = 20000;
+
+ private:
+  std::shared_ptr<connector::ConnectorSplit> makeIcebergSplit(
+      const std::string& dataFilePath,
+      const std::vector<IcebergDeleteFile>& deleteFiles = {}) {
+    std::unordered_map<std::string, std::optional<std::string>> partitionKeys;
+    std::unordered_map<std::string, std::string> customSplitInfo;
+    customSplitInfo["table_format"] = "hive-iceberg";
+
+    auto file = filesystems::getFileSystem(dataFilePath, nullptr)
+                    ->openFileForRead(dataFilePath);
+    const int64_t fileSize = file->size();
+
+    return std::make_shared<HiveIcebergSplit>(
+        kHiveConnectorId,
+        dataFilePath,
+        fileFomat_,
+        0,
+        fileSize,
+        partitionKeys,
+        std::nullopt,
+        customSplitInfo,
+        nullptr,
+        deleteFiles);
+  }
+
+  std::vector<RowVectorPtr> makeVectors(int32_t count, int32_t rowsPerVector) {
+    std::vector<RowVectorPtr> vectors;
+
+    for (int i = 0; i < count; i++) {
+      auto data = makeSequenceRows(rowsPerVector);
+      VectorPtr c0 = vectorMaker_.flatVector<int64_t>(data);
+      vectors.push_back(makeRowVector({"c0"}, {c0}));
+    }
+
+    return vectors;
+  }
+
+  std::shared_ptr<TempFilePath> writeDataFile(uint64_t numRows) {
+    auto dataVectors = makeVectors(1, numRows);
+
+    auto dataFilePath = TempFilePath::create();
+    writeToFile(dataFilePath->path, dataVectors);
+    createDuckDbTable(dataVectors);
+    return dataFilePath;
+  }
+
+  std::shared_ptr<TempFilePath> writePositionDeleteFile(
+      const std::string& dataFilePath,
+      const std::vector<int64_t>& deleteRows,
+      int64_t numRowsBefore = 0,
+      int64_t numRowsAfter = 0) {
+    // if containsMultipleDataFiles == true, we will write rows for other base
+    // files before and after the target base file
+    uint32_t numDeleteRows = numRowsBefore + deleteRows.size() + numRowsAfter;
+
+    std::string dataFilePathBefore = dataFilePath + "_before";
+    std::string dataFilePathAfter = dataFilePath + "_after";
+
+    auto filePathVector =
+        vectorMaker_.flatVector<StringView>(numDeleteRows, [&](auto row) {
+          if (row < numRowsBefore) {
+            return StringView(dataFilePathBefore);
+          } else if (
+              row >= numRowsBefore && row < deleteRows.size() + numRowsBefore) {
+            return StringView(dataFilePath);
+          } else if (
+              row >= deleteRows.size() + numRowsBefore && row < numDeleteRows) {
+            return StringView(dataFilePathAfter);
+          } else {
+            return StringView();
+          }
+        });
+
+    std::vector<int64_t> deleteRowsVec;
+    deleteRowsVec.reserve(numDeleteRows);
+
+    if (numRowsBefore > 0) {
+      auto rowsBefore = makeSequenceRows(numRowsBefore);
+      deleteRowsVec.insert(
+          deleteRowsVec.end(), rowsBefore.begin(), rowsBefore.end());
+    }
+    deleteRowsVec.insert(
+        deleteRowsVec.end(), deleteRows.begin(), deleteRows.end());
+    if (numRowsAfter > 0) {
+      auto rowsAfter = makeSequenceRows(numRowsAfter);
+      deleteRowsVec.insert(
+          deleteRowsVec.end(), rowsAfter.begin(), rowsAfter.end());
+    }
+
+    auto deletePositionsVector =
+        vectorMaker_.flatVector<int64_t>(deleteRowsVec);
+    RowVectorPtr deleteFileVectors = makeRowVector(
+        {pathColumn_->name, posColumn_->name},
+        {filePathVector, deletePositionsVector});
+
+    auto deleteFilePath = TempFilePath::create();
+    writeToFile(deleteFilePath->path, deleteFileVectors);
+
+    return deleteFilePath;
+  }
+
+  std::string makeNotInList(const std::vector<int64_t>& deleteRows) {
+    if (deleteRows.empty()) {
+      return "";
+    }
+
+    return std::accumulate(
+        deleteRows.begin() + 1,
+        deleteRows.end(),
+        std::to_string(deleteRows[0]),
+        [](const std::string& a, int64_t b) {
+          return a + ", " + std::to_string(b);
+        });
+  }
+
+  std::shared_ptr<exec::Task> assertQuery(
+      const core::PlanNodePtr& plan,
+      std::shared_ptr<TempFilePath> dataFilePath,
+      const std::vector<IcebergDeleteFile>& deleteFiles,
+      const std::string& duckDbSql) {
+    auto icebergSplit = makeIcebergSplit(dataFilePath->path, deleteFiles);
+    return OperatorTestBase::assertQuery(plan, {icebergSplit}, duckDbSql);
+  }
+
+  core::PlanNodePtr tableScanNode() {
+    return PlanBuilder(pool_.get()).tableScan(rowType_).planNode();
+  }
+
+ private:
+  dwio::common::FileFormat fileFomat_{dwio::common::FileFormat::DWRF};
+  RowTypePtr rowType_{ROW({"c0"}, {BIGINT()})};
+  std::shared_ptr<IcebergMetadataColumn> pathColumn_ =
+      IcebergMetadataColumn::icebergDeleteFilePathColumn();
+  std::shared_ptr<IcebergMetadataColumn> posColumn_ =
+      IcebergMetadataColumn::icebergDeletePosColumn();
+};
+
+TEST_F(HiveIcebergTest, positionalDeletesSingleBaseFile) {
+  folly::SingletonVault::singleton()->registrationComplete();
+
+  // Delete row 0, 1, 2, 3 from the first batch out of two.
+  assertPositionalDeletes({0, 1, 2, 3});
+  // Delete the first and last row in each batch (10000 rows per batch)
+  assertPositionalDeletes({0, 9999, 10000, 19999});
+  // Delete several rows in the second batch (10000 rows per batch)
+  assertPositionalDeletes({10000, 10002, 19999});
+  // Delete random rows
+  assertPositionalDeletes(makeRandomDeleteRows(rowCount));
+  // Delete 0 rows
+  assertPositionalDeletes({}, "SELECT * FROM tmp", false);
+  // Delete all rows
+  assertPositionalDeletes(
+      makeSequenceRows(rowCount), "SELECT * FROM tmp WHERE 1 = 0", false);
+  // Delete rows that don't exist
+  assertPositionalDeletes({20000, 29999});
+}
+
+// The positional delete file contains rows from multiple base files
+TEST_F(HiveIcebergTest, positionalDeletesMultipleBaseFiles) {
+  folly::SingletonVault::singleton()->registrationComplete();
+
+  // Delete row 0, 1, 2, 3 from the first batch out of two.
+  assertPositionalDeletes({0, 1, 2, 3}, true);
+  // Delete the first and last row in each batch (10000 rows per batch)
+  assertPositionalDeletes({0, 9999, 10000, 19999}, true);
+  // Delete several rows in the second batch (10000 rows per batch)
+  assertPositionalDeletes({10000, 10002, 19999}, true);
+  // Delete random rows
+  assertPositionalDeletes(makeRandomDeleteRows(rowCount), true);
+  // Delete 0 rows
+  assertPositionalDeletes({}, "SELECT * FROM tmp", true);
+  // Delete all rows
+  assertPositionalDeletes(
+      makeSequenceRows(rowCount), "SELECT * FROM tmp WHERE 1 = 0", true);
+  // Delete rows that don't exist
+  assertPositionalDeletes({20000, 29999}, true);
+}
+
+} // namespace facebook::velox::connector::hive::iceberg

From 021b2298e0cf8dd3f9820d90475b7a4be589db97 Mon Sep 17 00:00:00 2001
From: zhli1142015 <zhli@microsoft.com>
Date: Wed, 14 Feb 2024 07:53:05 -0800
Subject: [PATCH 19/38] Remove duplicate line in arrayGroupProbe (#8745)

Summary:
Line 584 should be duplicate with line 583, we should not set the result twice.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8745

Reviewed By: Yuhta

Differential Revision: D53758874

Pulled By: mbasmanova

fbshipit-source-id: 5bb6a74aa948fab47253a69f1dc29efdc87aed9c
---
 velox/exec/HashTable.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/velox/exec/HashTable.cpp b/velox/exec/HashTable.cpp
index d09b3bc041f4..44cd1e82e273 100644
--- a/velox/exec/HashTable.cpp
+++ b/velox/exec/HashTable.cpp
@@ -580,8 +580,7 @@ void HashTable<ignoreNullKeys>::arrayGroupProbe(HashLookup& lookup) {
     if (UNLIKELY(!group)) {
       group = insertEntry(lookup, index, row);
     }
-    groups[row] = group;
-    lookup.hits[row] = group; // NOLINT
+    groups[row] = group; // NOLINT
   }
 }
 

From 2eaf155c522e6bc2a6b498ed0d86482a5ff11847 Mon Sep 17 00:00:00 2001
From: Kevin Wilfong <kevinwilfong@meta.com>
Date: Wed, 14 Feb 2024 10:25:55 -0800
Subject: [PATCH 20/38] Remove the notion of encodings from
 PrestoIterativeVectorSerializer (#8606)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8606

PrestoBatchVectorSerializer supports maintaining encodings while serializing through the BatchVectorSerializer interface.
We no longer need it in PrestoIterativeVectorSerializer which is a little dangerous (e.g. append can be called multiple times
before flush breaking serialization).

Reviewed By: bikramSingh91

Differential Revision: D53202362

fbshipit-source-id: 9a67f972af8db37fb114b69eef9771254744211c
---
 velox/serializers/PrestoSerializer.cpp        | 56 -------------------
 velox/serializers/PrestoSerializer.h          | 22 --------
 .../tests/PrestoSerializerTest.cpp            | 44 ---------------
 3 files changed, 122 deletions(-)

diff --git a/velox/serializers/PrestoSerializer.cpp b/velox/serializers/PrestoSerializer.cpp
index 149a70948fc5..bc9843174699 100644
--- a/velox/serializers/PrestoSerializer.cpp
+++ b/velox/serializers/PrestoSerializer.cpp
@@ -3320,7 +3320,6 @@ class PrestoIterativeVectorSerializer : public IterativeVectorSerializer {
  public:
   PrestoIterativeVectorSerializer(
       const RowTypePtr& rowType,
-      std::vector<VectorEncoding::Simple> encodings,
       int32_t numRows,
       StreamArena* streamArena,
       bool useLosslessTimestamp,
@@ -3332,40 +3331,10 @@ class PrestoIterativeVectorSerializer : public IterativeVectorSerializer {
     streams_.resize(numTypes);
 
     for (int i = 0; i < numTypes; ++i) {
-      std::optional<VectorEncoding::Simple> encoding = std::nullopt;
-      if (i < encodings.size()) {
-        encoding = encodings[i];
-      }
       streams_[i] = std::make_unique<VectorStream>(
           types[i],
-          encoding,
           std::nullopt,
-          streamArena,
-          numRows,
-          useLosslessTimestamp);
-    }
-  }
-
-  // Constructor that takes a row vector instead of only the types. This is
-  // different because then we know exactly how each vector is encoded
-  // (recursively).
-  PrestoIterativeVectorSerializer(
-      const RowVectorPtr& rowVector,
-      StreamArena* streamArena,
-      bool useLosslessTimestamp,
-      common::CompressionKind compressionKind)
-      : streamArena_(streamArena),
-        codec_(common::compressionKindToCodec(compressionKind)) {
-    auto numRows = rowVector->size();
-    auto rowType = rowVector->type();
-    auto numChildren = rowVector->childrenSize();
-    streams_.resize(numChildren);
-
-    for (int i = 0; i < numChildren; i++) {
-      streams_[i] = std::make_unique<VectorStream>(
-          rowType->childAt(i),
           std::nullopt,
-          rowVector->childAt(i),
           streamArena,
           numRows,
           useLosslessTimestamp);
@@ -3420,16 +3389,6 @@ class PrestoIterativeVectorSerializer : public IterativeVectorSerializer {
     flushStreams(streams_, numRows_, *streamArena_, *codec_, out);
   }
 
-  void flushEncoded(const RowVectorPtr& vector, OutputStream* out) {
-    VELOX_CHECK_EQ(0, numRows_);
-
-    std::vector<IndexRange> ranges{{0, vector->size()}};
-    Scratch scratch;
-    append(vector, folly::Range(ranges.data(), ranges.size()), scratch);
-
-    flushStreams(streams_, vector->size(), *streamArena_, *codec_, out);
-  }
-
  private:
   StreamArena* const streamArena_;
   const std::unique_ptr<folly::io::Codec> codec_;
@@ -3464,7 +3423,6 @@ PrestoVectorSerde::createIterativeSerializer(
   const auto prestoOptions = toPrestoOptions(options);
   return std::make_unique<PrestoIterativeVectorSerializer>(
       type,
-      prestoOptions.encodings,
       numRows,
       streamArena,
       prestoOptions.useLosslessTimestamp,
@@ -3479,20 +3437,6 @@ std::unique_ptr<BatchVectorSerializer> PrestoVectorSerde::createBatchSerializer(
       pool, prestoOptions.useLosslessTimestamp, prestoOptions.compressionKind);
 }
 
-void PrestoVectorSerde::deprecatedSerializeEncoded(
-    const RowVectorPtr& vector,
-    StreamArena* streamArena,
-    const Options* options,
-    OutputStream* out) {
-  auto prestoOptions = toPrestoOptions(options);
-  auto serializer = std::make_unique<PrestoIterativeVectorSerializer>(
-      vector,
-      streamArena,
-      prestoOptions.useLosslessTimestamp,
-      prestoOptions.compressionKind);
-  serializer->flushEncoded(vector, out);
-}
-
 void PrestoVectorSerde::deserialize(
     ByteInputStream* source,
     velox::memory::MemoryPool* pool,
diff --git a/velox/serializers/PrestoSerializer.h b/velox/serializers/PrestoSerializer.h
index a957e27ce76e..287741afe0c8 100644
--- a/velox/serializers/PrestoSerializer.h
+++ b/velox/serializers/PrestoSerializer.h
@@ -58,9 +58,6 @@ class PrestoVectorSerde : public VectorSerde {
     bool useLosslessTimestamp{false};
     common::CompressionKind compressionKind{
         common::CompressionKind::CompressionKind_NONE};
-
-    /// Specifies the encoding for each of the top-level child vector.
-    std::vector<VectorEncoding::Simple> encodings;
   };
 
   /// Adds the serialized sizes of the rows of 'vector' in 'ranges[i]' to
@@ -90,25 +87,6 @@ class PrestoVectorSerde : public VectorSerde {
       memory::MemoryPool* pool,
       const Options* options) override;
 
-  /// Serializes a single RowVector with possibly encoded children, preserving
-  /// their encodings. Encodings are preserved recursively for any RowVector
-  /// children, but not for children of other nested vectors such as Array, Map,
-  /// and Dictionary.
-  ///
-  /// PrestoPage does not support serialization of Dictionaries with nulls;
-  /// in case dictionaries contain null they are serialized as flat buffers.
-  ///
-  /// In order to override the encodings of top-level columns in the RowVector,
-  /// you can specifiy the encodings using PrestoOptions.encodings
-  ///
-  /// DEPRECATED: Use createBatchSerializer and the BatchVectorSerializer's
-  /// serialize function instead.
-  void deprecatedSerializeEncoded(
-      const RowVectorPtr& vector,
-      StreamArena* streamArena,
-      const Options* options,
-      OutputStream* out);
-
   bool supportsAppendInDeserialize() const override {
     return true;
   }
diff --git a/velox/serializers/tests/PrestoSerializerTest.cpp b/velox/serializers/tests/PrestoSerializerTest.cpp
index 6c75a67d6c3a..143abc7dc2cb 100644
--- a/velox/serializers/tests/PrestoSerializerTest.cpp
+++ b/velox/serializers/tests/PrestoSerializerTest.cpp
@@ -253,23 +253,6 @@ class PrestoSerializerTest
     return stats;
   }
 
-  void serializeEncoded(
-      const RowVectorPtr& rowVector,
-      std::ostream* output,
-      const serializer::presto::PrestoVectorSerde::PrestoOptions*
-          serdeOptions) {
-    facebook::velox::serializer::presto::PrestoOutputStreamListener listener;
-    OStreamOutputStream out(output, &listener);
-    StreamArena arena{pool_.get()};
-    auto paramOptions = getParamSerdeOptions(serdeOptions);
-
-    for (const auto& child : rowVector->children()) {
-      paramOptions.encodings.push_back(child->encoding());
-    }
-
-    serde_->deprecatedSerializeEncoded(rowVector, &arena, &paramOptions, &out);
-  }
-
   void assertEqualEncoding(
       const RowVectorPtr& expected,
       const RowVectorPtr& actual) {
@@ -316,17 +299,6 @@ class PrestoSerializerTest
     assertEqualVectors(expected, result);
   }
 
-  void testEncodedRoundTrip(
-      const RowVectorPtr& data,
-      const serializer::presto::PrestoVectorSerde::PrestoOptions* serdeOptions =
-          nullptr) {
-    std::ostringstream out;
-    serializeEncoded(data, &out, serdeOptions);
-    const auto serialized = out.str();
-
-    verifySerializedEncodedData(data, serialized, serdeOptions);
-  }
-
   void serializeBatch(
       const RowVectorPtr& rowVector,
       std::ostream* output,
@@ -744,34 +716,18 @@ TEST_P(PrestoSerializerTest, longDecimal) {
   testRoundTrip(vector);
 }
 
-// Test that hierarchically encoded columns (rows) have their encodings
-// preserved.
-TEST_P(PrestoSerializerTest, encodings) {
-  testEncodedRoundTrip(encodingsTestVector());
-}
-
 // Test that hierarchically encoded columns (rows) have their encodings
 // preserved by the PrestoBatchVectorSerializer.
 TEST_P(PrestoSerializerTest, encodingsBatchVectorSerializer) {
   testBatchVectorSerializerRoundTrip(encodingsTestVector());
 }
 
-// Test that array elements have their encodings preserved.
-TEST_P(PrestoSerializerTest, encodingsArrayElements) {
-  testEncodedRoundTrip(encodingsArrayElementsTestVector());
-}
-
 // Test that array elements have their encodings preserved by the
 // PrestoBatchVectorSerializer.
 TEST_P(PrestoSerializerTest, encodingsArrayElementsBatchVectorSerializer) {
   testBatchVectorSerializerRoundTrip(encodingsArrayElementsTestVector());
 }
 
-// Test that map values have their encodings preserved.
-TEST_P(PrestoSerializerTest, encodingsMapValues) {
-  testEncodedRoundTrip(encodingsMapValuesTestVector());
-}
-
 // Test that map values have their encodings preserved by the
 // PrestoBatchVectorSerializer.
 TEST_P(PrestoSerializerTest, encodingsMapValuesBatchVectorSerializer) {

From 159e7e972393eb0d7c974e10c8d85652967bb06b Mon Sep 17 00:00:00 2001
From: Deepak Majeti <deepak.majeti@ibm.com>
Date: Wed, 14 Feb 2024 10:31:41 -0800
Subject: [PATCH 21/38] Improve Centos8 setup script (#8683)

Summary:
Improve centos8 setup script to match the MacOS and Ubuntu setup scripts.
Move common functions to setup-helper-functions.sh

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8683

Reviewed By: mbasmanova

Differential Revision: D53600088

Pulled By: kgpai

fbshipit-source-id: c508134a253b012e63a7504eb6acdd914d9c32aa
---
 scripts/setup-centos8.sh          | 210 ++++++++++++++++++++----------
 scripts/setup-helper-functions.sh |  22 +++-
 scripts/setup-macos.sh            |  20 ---
 scripts/setup-ubuntu.sh           |  22 +---
 4 files changed, 162 insertions(+), 112 deletions(-)

diff --git a/scripts/setup-centos8.sh b/scripts/setup-centos8.sh
index 60a20d49c7e9..2df599e0d0d8 100755
--- a/scripts/setup-centos8.sh
+++ b/scripts/setup-centos8.sh
@@ -30,19 +30,16 @@ function dnf_install {
   dnf install -y -q --setopt=install_weak_deps=False "$@"
 }
 
+dnf update -y
 dnf_install epel-release dnf-plugins-core # For ccache, ninja
 dnf config-manager --set-enabled powertools
+dnf update -y
 dnf_install ninja-build cmake curl ccache gcc-toolset-9 git wget which libevent-devel \
   openssl-devel re2-devel libzstd-devel lz4-devel double-conversion-devel \
   libdwarf-devel curl-devel libicu-devel
 
-dnf remove -y gflags
-
-# Required for Thrift
 dnf_install autoconf automake libtool bison flex python3 libsodium-devel
 
-dnf_install conda
-
 # install sphinx for doc gen
 pip3 install sphinx sphinx-tabs breathe sphinx_rtd_theme
 
@@ -50,83 +47,156 @@ pip3 install sphinx sphinx-tabs breathe sphinx_rtd_theme
 source /opt/rh/gcc-toolset-9/enable || exit 1
 set -u
 
-function cmake_install {
-  cmake -B "$1-build" -GNinja -DCMAKE_CXX_STANDARD=17 \
-    -DCMAKE_CXX_FLAGS="${CFLAGS}" -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" -Wno-dev "$@"
-  ninja -C "$1-build" install
+function install_conda {
+  dnf_install conda
 }
 
-# Fetch sources.
-wget_and_untar https://github.com/gflags/gflags/archive/v2.2.2.tar.gz gflags
-wget_and_untar https://github.com/google/glog/archive/v0.6.0.tar.gz glog
-wget_and_untar http://www.oberhumer.com/opensource/lzo/download/lzo-2.10.tar.gz lzo
-wget_and_untar https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.gz boost
-wget_and_untar https://github.com/google/snappy/archive/1.1.8.tar.gz snappy
-wget_and_untar https://github.com/fmtlib/fmt/archive/10.1.1.tar.gz fmt
+function install_gflags {
+  # Remove an older version if present.
+  dnf remove -y gflags
+  wget_and_untar https://github.com/gflags/gflags/archive/v2.2.2.tar.gz gflags
+  (
+    cd gflags
+    cmake_install -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DBUILD_gflags_LIB=ON -DLIB_SUFFIX=64
+  )
+}
 
-wget_and_untar https://github.com/protocolbuffers/protobuf/releases/download/v21.4/protobuf-all-21.4.tar.gz protobuf
+function install_glog {
+  wget_and_untar https://github.com/google/glog/archive/v0.6.0.tar.gz glog
+  (
+    cd glog
+    cmake_install -DBUILD_SHARED_LIBS=ON
+  )
+}
+
+function install_lzo {
+  wget_and_untar http://www.oberhumer.com/opensource/lzo/download/lzo-2.10.tar.gz lzo
+  (
+    cd lzo
+    ./configure --prefix=/usr --enable-shared --disable-static --docdir=/usr/share/doc/lzo-2.10
+    make "-j$(nproc)"
+    make install
+  )
+}
+
+function install_boost {
+  wget_and_untar https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.gz boost
+  (
+   cd boost
+   ./bootstrap.sh --prefix=/usr/local
+   ./b2 "-j$(nproc)" -d0 install threading=multi
+  )
+}
+
+function install_snappy {
+  wget_and_untar https://github.com/google/snappy/archive/1.1.8.tar.gz snappy
+  (
+    cd snappy
+    cmake_install -DSNAPPY_BUILD_TESTS=OFF
+  )
+}
+
+function install_fmt {
+  wget_and_untar https://github.com/fmtlib/fmt/archive/10.1.1.tar.gz fmt
+  (
+    cd fmt
+    cmake_install -DFMT_TEST=OFF
+  )
+}
+
+function install_protobuf {
+  wget_and_untar https://github.com/protocolbuffers/protobuf/releases/download/v21.4/protobuf-all-21.4.tar.gz protobuf
+  (
+    cd protobuf
+    ./configure --prefix=/usr
+    make "-j${NPROC}"
+    make install
+    ldconfig
+  )
+}
 
 FB_OS_VERSION="v2023.12.04.00"
 
-wget_and_untar https://github.com/facebookincubator/fizz/archive/refs/tags/${FB_OS_VERSION}.tar.gz fizz
-wget_and_untar https://github.com/facebook/folly/archive/refs/tags/${FB_OS_VERSION}.tar.gz folly
-wget_and_untar https://github.com/facebook/wangle/archive/refs/tags/${FB_OS_VERSION}.tar.gz wangle
-wget_and_untar https://github.com/facebook/fbthrift/archive/refs/tags/${FB_OS_VERSION}.tar.gz fbthrift
-wget_and_untar https://github.com/facebook/mvfst/archive/refs/tags/${FB_OS_VERSION}.tar.gz mvfst
+function install_fizz {
+  wget_and_untar https://github.com/facebookincubator/fizz/archive/refs/tags/${FB_OS_VERSION}.tar.gz fizz
+  (
+    cd fizz/fizz
+    cmake_install -DBUILD_TESTS=OFF
+  )
+}
 
-wait  # For cmake and source downloads to complete.
+function install_folly {
+  wget_and_untar https://github.com/facebook/folly/archive/refs/tags/${FB_OS_VERSION}.tar.gz folly
+  (
+    cd folly
+    cmake_install -DFOLLY_HAVE_INT128_T=ON
+  )
+}
 
-# Build & install.
-(
-  cd lzo
-  ./configure --prefix=/usr --enable-shared --disable-static --docdir=/usr/share/doc/lzo-2.10
-  make "-j$(nproc)"
-  make install
-)
+function install_wangle {
+  wget_and_untar https://github.com/facebook/wangle/archive/refs/tags/${FB_OS_VERSION}.tar.gz wangle
+  (
+    cd wangle/wangle
+    cmake_install -DBUILD_TESTS=OFF
+  )
+}
 
-(
-  cd boost
-  ./bootstrap.sh --prefix=/usr/local
-  ./b2 "-j$(nproc)" -d0 install threading=multi
-)
+function install_fbthrift {
+  wget_and_untar https://github.com/facebook/fbthrift/archive/refs/tags/${FB_OS_VERSION}.tar.gz fbthrift
+  (
+    cd fbthrift
+    cmake_install -Denable_tests=OFF
+  )
+}
+
+function install_mvfst {
+  wget_and_untar https://github.com/facebook/mvfst/archive/refs/tags/${FB_OS_VERSION}.tar.gz mvfst
+  (
+   cd mvfst
+   cmake_install -DBUILD_TESTS=OFF
+  )
+}
+
+function install_duckdb {
+  if $BUILD_DUCKDB ; then
+    echo 'Building DuckDB'
+    wget_and_untar https://github.com/duckdb/duckdb/archive/refs/tags/v0.8.1.tar.gz duckdb
+    (
+      cd duckdb
+      cmake_install -DBUILD_UNITTESTS=OFF -DENABLE_SANITIZER=OFF -DENABLE_UBSAN=OFF -DBUILD_SHELL=OFF -DEXPORT_DLL_SYMBOLS=OFF -DCMAKE_BUILD_TYPE=Release
+    )
+  fi
+}
+
+function install_velox_deps {
+  run_and_time install_conda
+  run_and_time install_gflags
+  run_and_time install_glog
+  run_and_time install_lzo
+  run_and_time install_snappy
+  run_and_time install_boost
+  run_and_time install_protobuf
+  run_and_time install_fmt
+  run_and_time install_folly
+  run_and_time install_fizz
+  run_and_time install_wangle
+  run_and_time install_mvfst
+  run_and_time install_fbthrift
+  run_and_time install_duckdb
+}
+
+(return 2> /dev/null) && return # If script was sourced, don't run commands.
 
 (
-  cd protobuf
-  ./configure --prefix=/usr
-  make "-j${NPROC}"
-  make install
-  ldconfig
+  if [[ $# -ne 0 ]]; then
+    for cmd in "$@"; do
+      run_and_time "${cmd}"
+    done
+  else
+    install_velox_deps
+  fi
 )
 
-cmake_install gflags -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DBUILD_gflags_LIB=ON -DLIB_SUFFIX=64 -DCMAKE_INSTALL_PREFIX:PATH=/usr
-cmake_install glog -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX:PATH=/usr
-cmake_install snappy -DSNAPPY_BUILD_TESTS=OFF
-cmake_install fmt -DFMT_TEST=OFF
-cmake_install folly -DFOLLY_HAVE_INT128_T=ON
-
-cmake_install fizz/fizz -DBUILD_TESTS=OFF
-cmake_install wangle/wangle -DBUILD_TESTS=OFF
-cmake_install mvfst -DBUILD_TESTS=OFF
-cmake_install fbthrift -Denable_tests=OFF
-
-if $BUILD_DUCKDB ; then
-  echo 'Building DuckDB'
-  mkdir ~/duckdb-install && cd ~/duckdb-install
-  wget https://github.com/duckdb/duckdb/archive/refs/tags/v0.8.1.tar.gz
-  tar -xf v0.8.1.tar.gz
-  cd duckdb-0.8.1
-  mkdir build && cd build
-  CMAKE_FLAGS=(
-    "-DBUILD_UNITTESTS=OFF"
-    "-DENABLE_SANITIZER=OFF"
-    "-DENABLE_UBSAN=OFF"
-    "-DBUILD_SHELL=OFF"
-    "-DEXPORT_DLL_SYMBOLS=OFF"
-    "-DCMAKE_BUILD_TYPE=Release"
-  )
-  cmake ${CMAKE_FLAGS[*]}  ..
-  make install -j 16
-  rm -rf ~/duckdb-install
-fi
+echo "All dependencies for Velox installed!"
 
 dnf clean all
diff --git a/scripts/setup-helper-functions.sh b/scripts/setup-helper-functions.sh
index 9495d42bfc36..4f0a11e152fd 100644
--- a/scripts/setup-helper-functions.sh
+++ b/scripts/setup-helper-functions.sh
@@ -15,6 +15,27 @@
 
 # github_checkout $REPO $VERSION $GIT_CLONE_PARAMS clones or re-uses an existing clone of the
 # specified repo, checking out the requested version.
+
+function run_and_time {
+  time "$@" || (echo "Failed to run $* ." ; exit 1 )
+  { echo "+ Finished running $*"; } 2> /dev/null
+}
+
+function prompt {
+  (
+    while true; do
+      local input="${PROMPT_ALWAYS_RESPOND:-}"
+      echo -n "$(tput bold)$* [Y, n]$(tput sgr0) "
+      [[ -z "${input}" ]] && read input
+      if [[ "${input}" == "Y" || "${input}" == "y" || "${input}" == "" ]]; then
+        return 0
+      elif [[ "${input}" == "N" || "${input}" == "n" ]]; then
+        return 1
+      fi
+    done
+  ) 2> /dev/null
+}
+
 function github_checkout {
   local REPO=$1
   shift
@@ -36,7 +57,6 @@ function github_checkout {
   cd "${DIRNAME}"
 }
 
-
 # get_cxx_flags [$CPU_ARCH]
 # Sets and exports the variable VELOX_CXX_FLAGS with appropriate compiler flags.
 # If $CPU_ARCH is set then we use that else we determine best possible set of flags
diff --git a/scripts/setup-macos.sh b/scripts/setup-macos.sh
index 197ea54e8394..7872195b9e77 100755
--- a/scripts/setup-macos.sh
+++ b/scripts/setup-macos.sh
@@ -38,26 +38,6 @@ MACOS_DEPS="ninja flex bison cmake ccache protobuf@21 icu4c boost gflags glog li
 
 FB_OS_VERSION="v2023.12.04.00"
 
-function run_and_time {
-  time "$@" || (echo "Failed to run $* ." ; exit 1 )
-  { echo "+ Finished running $*"; } 2> /dev/null
-}
-
-function prompt {
-  (
-    while true; do
-      local input="${PROMPT_ALWAYS_RESPOND:-}"
-      echo -n "$(tput bold)$* [Y, n]$(tput sgr0) "
-      [[ -z "${input}" ]] && read input
-      if [[ "${input}" == "Y" || "${input}" == "y" || "${input}" == "" ]]; then
-        return 0
-      elif [[ "${input}" == "N" || "${input}" == "n" ]]; then
-        return 1
-      fi
-    done
-  ) 2> /dev/null
-}
-
 function update_brew {
   DEFAULT_BREW_PATH=/usr/local/bin/brew
   if [ `arch` == "arm64" ] ;
diff --git a/scripts/setup-ubuntu.sh b/scripts/setup-ubuntu.sh
index 60b37c3b2ad5..d44b06df22da 100755
--- a/scripts/setup-ubuntu.sh
+++ b/scripts/setup-ubuntu.sh
@@ -67,26 +67,6 @@ sudo --preserve-env apt update && sudo --preserve-env apt install -y libunwind-d
   tzdata \
   wget
 
-function run_and_time {
-  time "$@"
-  { echo "+ Finished running $*"; } 2> /dev/null
-}
-
-function prompt {
-  (
-    while true; do
-      local input="${PROMPT_ALWAYS_RESPOND:-}"
-      echo -n "$(tput bold)$* [Y, n]$(tput sgr0) "
-      [[ -z "${input}" ]] && read input
-      if [[ "${input}" == "Y" || "${input}" == "y" || "${input}" == "" ]]; then
-        return 0
-      elif [[ "${input}" == "N" || "${input}" == "n" ]]; then
-        return 1
-      fi
-    done
-  ) 2> /dev/null
-}
-
 function install_fmt {
   github_checkout fmtlib/fmt "${FMT_VERSION}"
   cmake_install -DFMT_TEST=OFF
@@ -155,4 +135,4 @@ function install_velox_deps {
   fi
 )
 
-echo "All deps for Velox installed! Now try \"make\""
+echo "All dependencies for Velox installed!"

From a7ac9a716fb30312ae4022bbca1b20c5291377c0 Mon Sep 17 00:00:00 2001
From: Pedro Pedreira <pedroerp@meta.com>
Date: Wed, 14 Feb 2024 16:07:16 -0800
Subject: [PATCH 22/38] Add `VELOX_BUILD_VECTOR_TEST_UTILS` compile flag
 (#8747)

Summary:
Adding `VELOX_BUILD_VECTOR_TEST_UTILS` to allow users to compile only vector util test
 utilities (VectorMaker), but without bringing the larger exec test dependencies.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8747

Reviewed By: Yuhta

Differential Revision: D53775689

Pulled By: pedroerp

fbshipit-source-id: e43e63e28720d322ec912339c5f0d7dd8c8f0fd8
---
 CMakeLists.txt                          | 1 +
 velox/vector/CMakeLists.txt             | 2 +-
 velox/vector/tests/utils/CMakeLists.txt | 3 +--
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e2099787a969..82fed089f27c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -104,6 +104,7 @@ option(VELOX_ENABLE_CCACHE "Use ccache if installed." ON)
 option(VELOX_ENABLE_CODEGEN_SUPPORT "Enable experimental codegen support." OFF)
 
 option(VELOX_BUILD_TEST_UTILS "Builds Velox test utilities" OFF)
+option(VELOX_BUILD_VECTOR_TEST_UTILS "Builds Velox vector test utilities" OFF)
 option(VELOX_BUILD_PYTHON_PACKAGE "Builds Velox Python bindings" OFF)
 option(
   VELOX_ENABLE_INT64_BUILD_PARTITION_BOUND
diff --git a/velox/vector/CMakeLists.txt b/velox/vector/CMakeLists.txt
index faa906605cd0..1b38cc1d248d 100644
--- a/velox/vector/CMakeLists.txt
+++ b/velox/vector/CMakeLists.txt
@@ -37,7 +37,7 @@ add_subdirectory(fuzzer)
 
 if(${VELOX_BUILD_TESTING})
   add_subdirectory(tests)
-elseif(${VELOX_BUILD_TEST_UTILS})
+elseif(${VELOX_BUILD_TEST_UTILS} OR ${VELOX_BUILD_VECTOR_TEST_UTILS})
   add_subdirectory(tests/utils)
 endif()
 
diff --git a/velox/vector/tests/utils/CMakeLists.txt b/velox/vector/tests/utils/CMakeLists.txt
index 8b7d335db7ab..5b77b40ba99b 100644
--- a/velox/vector/tests/utils/CMakeLists.txt
+++ b/velox/vector/tests/utils/CMakeLists.txt
@@ -13,5 +13,4 @@
 # limitations under the License.
 add_library(velox_vector_test_lib VectorMaker.cpp VectorTestBase.cpp)
 
-target_link_libraries(velox_vector_test_lib velox_exec velox_vector gtest
-                      gtest_main)
+target_link_libraries(velox_vector_test_lib velox_vector gtest gtest_main)

From 2068b95a31db717b81ce5903f23782522996f037 Mon Sep 17 00:00:00 2001
From: Kevin Wilfong <kevinwilfong@meta.com>
Date: Wed, 14 Feb 2024 16:08:42 -0800
Subject: [PATCH 23/38] Call unsafeResize in FlatMapReaders (#8129)

Summary:
X-link: https://github.com/facebookexternal/alpha/pull/15

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8129

We need to call unsafeResize in the DWRF FlatMapReader.

Reviewed By: zzhao0

Differential Revision: D52339055

fbshipit-source-id: 5ce8b38cebab5da7623c8a4923dc816cd94e5e12
---
 velox/dwio/dwrf/reader/FlatMapColumnReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/velox/dwio/dwrf/reader/FlatMapColumnReader.cpp b/velox/dwio/dwrf/reader/FlatMapColumnReader.cpp
index 7ba27932222b..2e52f05081b8 100644
--- a/velox/dwio/dwrf/reader/FlatMapColumnReader.cpp
+++ b/velox/dwio/dwrf/reader/FlatMapColumnReader.cpp
@@ -686,7 +686,7 @@ void FlatMapStructEncodingColumnReader<T>::next(
   if (rowVector) {
     // Track children vectors in a local variable because readNulls may reset
     // the parent vector.
-    result->resize(numValues, false);
+    rowVector->unsafeResize(numValues, false);
     children = rowVector->children();
     DWIO_ENSURE_EQ(children.size(), keyNodes_.size());
   }

From 36071eb3b7befcf97f9d7d2948f026001cd4014b Mon Sep 17 00:00:00 2001
From: lingbin <lingbinlb@gmail.com>
Date: Thu, 15 Feb 2024 08:05:20 -0800
Subject: [PATCH 24/38] Fix typos in Memory Management documentation (#8756)

Summary:
`SizeClass::SizeMix` --> `MemoryAllocator::SizeMix`.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8756

Reviewed By: Yuhta

Differential Revision: D53810704

Pulled By: mbasmanova

fbshipit-source-id: 35525ec3a165784e2b31d607c31f6ec139de4925
---
 velox/docs/develop/memory.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/velox/docs/develop/memory.rst b/velox/docs/develop/memory.rst
index 04e5a649f1e2..d784a4c91e1b 100644
--- a/velox/docs/develop/memory.rst
+++ b/velox/docs/develop/memory.rst
@@ -722,7 +722,7 @@ allocation of a fixed size buffer (class page) which is a power of 2 of a
 machine page size. *MMapAllocator* creates 9 different *SizeClass* objects with
 class page size ranging from 1 machine page (4KB) to 256 machine pages (1MB).
 To allocate a large number of machines pages, *MmapAllocator* calls
-&MemoryAllocator::allocationSize* to build the allocation plan
+*MemoryAllocator::allocationSize* to build the allocation plan
 (*MemoryAllocator::SizeMix*) which consists of a list of chosen *SizeClass* objects
 and the number of class pages to allocate from each of them.
 
@@ -780,9 +780,9 @@ The simplified *MmapAllocator::allocateNonContiguous* implementation:
 #. calls *MemoryAllocator::allocationSize* with *numPages* and *minSizeClass*.
    *numPages* specifies the number of machine pages to allocate. *minSizeClass*
    specifies the minimum class page size to allocate from. The function returns
-   the number of class pages to allocate from each chosen *SizeClasses* in
-   *SizeClass::SizeMix*. The sum of machine pages to allocate from *SizeClasses*
-   should be no less than the requested *numPages*
+   the number of class pages to allocate from each chosen *SizeClass* in
+   *MemoryAllocator::SizeMix*. The sum of machine pages to allocate from all
+   *SizeClass* objects should be no less than the requested *numPages*.
 
 #. increase the memory allocator’s memory usage and check if it exceeds the
    system memory limit (*MemoryAllocator::capacity_*). If it exceeds, then fails
@@ -901,4 +901,4 @@ check the system memory usage periodically. Whenever the system memory usage
 exceeds a certain threshold, it tries to free up memory from Velox by shrinking
 the file cache (*AsyncDataCache::shrink*), and returns the freed cache memory
 back to the OS. This way we can automatically shrink the file cache in response
-to the transient spiky memory usage from non-Velox components in a query system.
\ No newline at end of file
+to the transient spiky memory usage from non-Velox components in a query system.

From 615af5158b57cf1593fbe69a1c365fc44aede396 Mon Sep 17 00:00:00 2001
From: rrando901 <ronwqr@gmail.com>
Date: Thu, 15 Feb 2024 08:06:02 -0800
Subject: [PATCH 25/38] Add Decimal support to set_agg and set_union (#7936)

Summary:
Delivers https://github.com/facebookincubator/velox/issues/7935

Pull Request resolved: https://github.com/facebookincubator/velox/pull/7936

Reviewed By: mbasmanova

Differential Revision: D53782978

Pulled By: Yuhta

fbshipit-source-id: 435af3f6f6115d7349a99ad6338e975574413849
---
 .../prestosql/aggregates/SetAggregates.cpp    |  26 ++-
 .../prestosql/aggregates/tests/SetAggTest.cpp | 186 ++++++++++++++++++
 .../aggregates/tests/SetUnionTest.cpp         | 177 +++++++++++++++++
 3 files changed, 381 insertions(+), 8 deletions(-)

diff --git a/velox/functions/prestosql/aggregates/SetAggregates.cpp b/velox/functions/prestosql/aggregates/SetAggregates.cpp
index 4f3d3eb65ab1..c83846884fb4 100644
--- a/velox/functions/prestosql/aggregates/SetAggregates.cpp
+++ b/velox/functions/prestosql/aggregates/SetAggregates.cpp
@@ -385,9 +385,9 @@ class SetUnionAggregate : public SetBaseAggregate<T> {
 
 template <template <typename T> class Aggregate>
 std::unique_ptr<exec::Aggregate> create(
-    TypeKind typeKind,
+    const TypePtr& inputType,
     const TypePtr& resultType) {
-  switch (typeKind) {
+  switch (inputType->kind()) {
     case TypeKind::BOOLEAN:
       return std::make_unique<Aggregate<bool>>(resultType);
     case TypeKind::TINYINT:
@@ -398,6 +398,11 @@ std::unique_ptr<exec::Aggregate> create(
       return std::make_unique<Aggregate<int32_t>>(resultType);
     case TypeKind::BIGINT:
       return std::make_unique<Aggregate<int64_t>>(resultType);
+    case TypeKind::HUGEINT:
+      VELOX_CHECK(
+          inputType->isLongDecimal(),
+          "Non-decimal use of HUGEINT is not supported");
+      return std::make_unique<Aggregate<int128_t>>(resultType);
     case TypeKind::REAL:
       return std::make_unique<Aggregate<float>>(resultType);
     case TypeKind::DOUBLE:
@@ -415,7 +420,8 @@ std::unique_ptr<exec::Aggregate> create(
     case TypeKind::ROW:
       return std::make_unique<Aggregate<ComplexType>>(resultType);
     default:
-      VELOX_UNREACHABLE("Unexpected type {}", mapTypeKindToName(typeKind));
+      VELOX_UNREACHABLE(
+          "Unexpected type {}", mapTypeKindToName(inputType->kind()));
   }
 }
 
@@ -443,8 +449,9 @@ void registerSetAggAggregate(const std::string& prefix) {
         VELOX_CHECK_EQ(argTypes.size(), 1);
 
         const bool isRawInput = exec::isRawInput(step);
-        const TypeKind typeKind =
-            isRawInput ? argTypes[0]->kind() : argTypes[0]->childAt(0)->kind();
+        const TypePtr& inputType =
+            isRawInput ? argTypes[0] : argTypes[0]->childAt(0);
+        const TypeKind typeKind = inputType->kind();
         const bool throwOnNestedNulls = isRawInput;
 
         switch (typeKind) {
@@ -458,6 +465,11 @@ void registerSetAggAggregate(const std::string& prefix) {
             return std::make_unique<SetAggAggregate<int32_t>>(resultType);
           case TypeKind::BIGINT:
             return std::make_unique<SetAggAggregate<int64_t>>(resultType);
+          case TypeKind::HUGEINT:
+            VELOX_CHECK(
+                inputType->isLongDecimal(),
+                "Non-decimal use of HUGEINT is not supported");
+            return std::make_unique<SetAggAggregate<int128_t>>(resultType);
           case TypeKind::REAL:
             return std::make_unique<SetAggAggregate<float>>(resultType);
           case TypeKind::DOUBLE:
@@ -503,9 +515,7 @@ void registerSetUnionAggregate(const std::string& prefix) {
           -> std::unique_ptr<exec::Aggregate> {
         VELOX_CHECK_EQ(argTypes.size(), 1);
 
-        const TypeKind typeKind = argTypes[0]->childAt(0)->kind();
-
-        return create<SetUnionAggregate>(typeKind, resultType);
+        return create<SetUnionAggregate>(argTypes[0]->childAt(0), resultType);
       });
 }
 
diff --git a/velox/functions/prestosql/aggregates/tests/SetAggTest.cpp b/velox/functions/prestosql/aggregates/tests/SetAggTest.cpp
index ac21c30bf1f4..1422b2c61e6e 100644
--- a/velox/functions/prestosql/aggregates/tests/SetAggTest.cpp
+++ b/velox/functions/prestosql/aggregates/tests/SetAggTest.cpp
@@ -26,6 +26,11 @@ namespace facebook::velox::aggregate::test {
 
 namespace {
 
+constexpr int64_t kLongMax = std::numeric_limits<int64_t>::max();
+constexpr int64_t kLongMin = std::numeric_limits<int64_t>::min();
+constexpr int128_t kHugeMax = std::numeric_limits<int128_t>::max();
+constexpr int128_t kHugeMin = std::numeric_limits<int128_t>::min();
+
 class SetAggTest : public AggregationTestBase {
  protected:
   void SetUp() override {
@@ -155,6 +160,187 @@ TEST_F(SetAggTest, groupBy) {
       {expected});
 }
 
+TEST_F(SetAggTest, shortDecimal) {
+  // Test with short decimal
+  auto type = DECIMAL(6, 2);
+
+  auto data = makeRowVector({
+      makeFlatVector<int64_t>(
+          {kLongMin,
+           2000,
+           3000,
+           -4321,
+           kLongMax,
+           5000,
+           3000,
+           kLongMax,
+           -2000,
+           6000,
+           7000},
+          type),
+  });
+
+  auto expected = makeRowVector({
+      makeArrayVector<int64_t>(
+          {
+              {kLongMin, -4321, -2000, 2000, 3000, 5000, 6000, 7000, kLongMax},
+          },
+          type),
+  });
+
+  testAggregations({data}, {}, {"set_agg(c0)"}, {"array_sort(a0)"}, {expected});
+
+  // Test with some NULL inputs (short decimals)
+  data = makeRowVector({
+      makeNullableFlatVector<int64_t>(
+          {1000,
+           std::nullopt,
+           kLongMin,
+           4000,
+           std::nullopt,
+           4000,
+           std::nullopt,
+           -1000,
+           5000,
+           -9999,
+           kLongMax},
+          type),
+  });
+
+  expected = makeRowVector({
+      makeNullableArrayVector(
+          std::vector<std::vector<std::optional<int64_t>>>{
+              {kLongMin,
+               -9999,
+               -1000,
+               1000,
+               4000,
+               5000,
+               kLongMax,
+               std::nullopt}},
+          ARRAY(type)),
+  });
+
+  testAggregations({data}, {}, {"set_agg(c0)"}, {"array_sort(a0)"}, {expected});
+
+  // Test with all NULL inputs (short decimals)
+  data = makeRowVector({
+      makeNullableFlatVector<int64_t>(
+          {std::nullopt,
+           std::nullopt,
+           std::nullopt,
+           std::nullopt,
+           std::nullopt},
+          type),
+  });
+
+  expected = makeRowVector({
+      makeNullableArrayVector(
+          std::vector<std::vector<std::optional<int64_t>>>{{std::nullopt}},
+          ARRAY(type)),
+  });
+
+  testAggregations({data}, {}, {"set_agg(c0)"}, {"array_sort(a0)"}, {expected});
+}
+
+TEST_F(SetAggTest, longDecimal) {
+  // Test with long decimal
+  auto type = DECIMAL(20, 2);
+
+  auto data = makeRowVector({
+      makeFlatVector<int128_t>(
+          {kHugeMin,
+           -2000,
+           3000,
+           4000,
+           5000,
+           kHugeMax,
+           -9630,
+           2000,
+           6000,
+           7000},
+          type),
+  });
+
+  auto expected = makeRowVector({
+      makeArrayVector<int128_t>(
+          {
+              {kHugeMin,
+               -9630,
+               -2000,
+               2000,
+               3000,
+               4000,
+               5000,
+               6000,
+               7000,
+               kHugeMax},
+          },
+          type),
+  });
+
+  testAggregations(
+      {data}, {}, {"set_agg(c0)"}, {"array_sort(a0)"}, {expected}, {}, false);
+
+  // Test with some NULL inputs (long decimals)
+  data = makeRowVector({
+      makeNullableFlatVector<int128_t>(
+          {1000,
+           std::nullopt,
+           3000,
+           4000,
+           std::nullopt,
+           kHugeMax,
+           -8424,
+           4000,
+           std::nullopt,
+           -1000,
+           5000,
+           kHugeMin,
+           2000},
+          type),
+  });
+
+  expected = makeRowVector({
+      makeNullableArrayVector(
+          std::vector<std::vector<std::optional<int128_t>>>{
+              {kHugeMin,
+               -8424,
+               -1000,
+               1000,
+               2000,
+               3000,
+               4000,
+               5000,
+               kHugeMax,
+               std::nullopt}},
+          ARRAY(type)),
+  });
+
+  testAggregations(
+      {data}, {}, {"set_agg(c0)"}, {"array_sort(a0)"}, {expected}, {}, false);
+
+  // Test with all NULL inputs (long decimals)
+  data = makeRowVector({
+      makeNullableFlatVector<int128_t>(
+          {std::nullopt,
+           std::nullopt,
+           std::nullopt,
+           std::nullopt,
+           std::nullopt},
+          type),
+  });
+
+  expected = makeRowVector({
+      makeNullableArrayVector(
+          std::vector<std::vector<std::optional<int128_t>>>{{std::nullopt}},
+          ARRAY(type)),
+  });
+
+  testAggregations(
+      {data}, {}, {"set_agg(c0)"}, {"array_sort(a0)"}, {expected}, {}, false);
+}
+
 std::vector<std::optional<std::string>> generateStrings(
     const std::vector<std::optional<std::string>>& choices,
     vector_size_t size) {
diff --git a/velox/functions/prestosql/aggregates/tests/SetUnionTest.cpp b/velox/functions/prestosql/aggregates/tests/SetUnionTest.cpp
index 63e23ddff7e2..2eb5f48954a8 100644
--- a/velox/functions/prestosql/aggregates/tests/SetUnionTest.cpp
+++ b/velox/functions/prestosql/aggregates/tests/SetUnionTest.cpp
@@ -24,6 +24,11 @@ namespace facebook::velox::aggregate::test {
 
 namespace {
 
+constexpr int64_t kLongMax = std::numeric_limits<int64_t>::max();
+constexpr int64_t kLongMin = std::numeric_limits<int64_t>::min();
+constexpr int128_t kHugeMax = std::numeric_limits<int128_t>::max();
+constexpr int128_t kHugeMin = std::numeric_limits<int128_t>::min();
+
 class SetUnionTest : public AggregationTestBase {
  protected:
   void SetUp() override {
@@ -324,5 +329,177 @@ TEST_F(SetUnionTest, inputOrder) {
   assertQuery(plan, expected);
 }
 
+TEST_F(SetUnionTest, shortDecimal) {
+  // Test with short decimal
+  auto type = DECIMAL(6, 2);
+
+  auto data = makeRowVector({
+      makeArrayVector<int64_t>(
+          {
+              {},
+              {kLongMin, 2000, 3000},
+              {kLongMin, -2000},
+              {2000, 3000, kLongMax, 5000},
+              {6000, 7000, -5432},
+          },
+          type),
+  });
+
+  auto expected = makeRowVector({
+      makeArrayVector<int64_t>(
+          {
+              {kLongMin, -5432, -2000, 2000, 3000, 5000, 6000, 7000, kLongMax},
+          },
+          type),
+  });
+
+  testAggregations(
+      {data}, {}, {"set_union(c0)"}, {"array_sort(a0)"}, {expected});
+
+  // Test with some NULL inputs (short decimals)
+  data = makeRowVector({
+      makeNullableArrayVector<int64_t>(
+          {
+              {},
+              {-1000, std::nullopt, kLongMax, std::nullopt, 7000},
+              {1000, kLongMax, std::nullopt, 4000, std::nullopt, std::nullopt},
+              {kLongMin, -5923},
+          },
+          ARRAY(type)),
+  });
+
+  expected = makeRowVector({
+      makeNullableArrayVector(
+          std::vector<std::vector<std::optional<int64_t>>>{
+              {kLongMin,
+               -5923,
+               -1000,
+               1000,
+               4000,
+               7000,
+               kLongMax,
+               std::nullopt},
+          },
+          ARRAY(type)),
+  });
+
+  testAggregations(
+      {data}, {}, {"set_union(c0)"}, {"array_sort(a0)"}, {expected});
+
+  // Test with all NULL inputs (short decimals)
+  data = makeRowVector({
+      makeAllNullArrayVector(10, type),
+  });
+
+  expected = makeRowVector({
+      makeArrayVector<int64_t>({{}}, type),
+  });
+
+  testAggregations(
+      {data}, {}, {"set_union(c0)"}, {"array_sort(a0)"}, {expected});
+
+  data = makeRowVector({
+      makeNullableArrayVector<int64_t>(
+          {
+              {},
+              {std::nullopt, std::nullopt, std::nullopt, std::nullopt},
+              {std::nullopt, std::nullopt, std::nullopt},
+          },
+          ARRAY(type)),
+  });
+
+  expected = makeRowVector({
+      makeNullableArrayVector(
+          std::vector<std::vector<std::optional<int64_t>>>{
+              {std::nullopt},
+          },
+          ARRAY(type)),
+  });
+
+  testAggregations(
+      {data}, {}, {"set_union(c0)"}, {"array_sort(a0)"}, {expected});
+}
+
+TEST_F(SetUnionTest, longDecimal) {
+  // Test with long decimals
+  auto type = DECIMAL(20, 2);
+
+  auto data = makeRowVector({
+      makeArrayVector<int128_t>(
+          {
+              {},
+              {kHugeMax, -2000, 3000},
+              {1000, 2000},
+              {2000, kHugeMin, 3000, 4000, 5000},
+              {-6363, 7000},
+          },
+          type),
+  });
+
+  auto expected = makeRowVector({
+      makeArrayVector<int128_t>(
+          {
+              {kHugeMin,
+               -6363,
+               -2000,
+               1000,
+               2000,
+               3000,
+               4000,
+               5000,
+               7000,
+               kHugeMax},
+          },
+          type),
+  });
+
+  testAggregations(
+      {data}, {}, {"set_union(c0)"}, {"array_sort(a0)"}, {expected}, {}, false);
+
+  // Test with some NULL inputs (long decimals).
+  data = makeRowVector({
+      makeNullableArrayVector<int128_t>(
+          {
+              {},
+              {kHugeMin},
+              {1000, std::nullopt, 3000, std::nullopt, 7000},
+              {-1000, 3000, std::nullopt, 4000, std::nullopt, std::nullopt},
+              {2000, kHugeMax, -1234},
+          },
+          ARRAY(type)),
+  });
+
+  expected = makeRowVector({
+      makeNullableArrayVector(
+          std::vector<std::vector<std::optional<int128_t>>>{
+              {kHugeMin,
+               -1234,
+               -1000,
+               1000,
+               2000,
+               3000,
+               4000,
+               7000,
+               kHugeMax,
+               std::nullopt},
+          },
+          ARRAY(type)),
+  });
+
+  testAggregations(
+      {data}, {}, {"set_union(c0)"}, {"array_sort(a0)"}, {expected}, {}, false);
+
+  // Test with all NULL inputs (long decimals).
+  data = makeRowVector({
+      makeAllNullArrayVector(10, type),
+  });
+
+  expected = makeRowVector({
+      makeArrayVector<int128_t>({{}}, type),
+  });
+
+  testAggregations(
+      {data}, {}, {"set_union(c0)"}, {"array_sort(a0)"}, {expected}, {}, false);
+}
 } // namespace
 } // namespace facebook::velox::aggregate::test

From 27bbff5a851f69bb52fa2659d8646fdbe9491089 Mon Sep 17 00:00:00 2001
From: Pramod <pramod@ahana.io>
Date: Thu, 15 Feb 2024 11:24:18 -0800
Subject: [PATCH 26/38] Split window aggregate tests (#8056)

Summary:
The window aggregates test file, `SimpleAggregatesTest.cpp`, contains some tests
for the window operator which do not test the window aggregate functions. These
tests are moved to a new file, `WindowTest.cpp`. This follows from the conversation
in https://github.com/facebookincubator/velox/issues/7243.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8056

Reviewed By: pedroerp

Differential Revision: D53743638

Pulled By: kagamiori

fbshipit-source-id: 9e31f514b6cde1e24e4233c039cb88f3f02e515f
---
 velox/exec/tests/CMakeLists.txt               |   1 +
 .../tests/WindowTest.cpp}                     | 161 +---------------
 .../window/tests/AggregateWindowTest.cpp      | 179 ++++++++++++++++++
 .../prestosql/window/tests/CMakeLists.txt     |   2 +-
 4 files changed, 188 insertions(+), 155 deletions(-)
 rename velox/{functions/prestosql/window/tests/SimpleAggregatesTest.cpp => exec/tests/WindowTest.cpp} (51%)
 create mode 100644 velox/functions/prestosql/window/tests/AggregateWindowTest.cpp

diff --git a/velox/exec/tests/CMakeLists.txt b/velox/exec/tests/CMakeLists.txt
index baf67dd3ea1d..4977d835c862 100644
--- a/velox/exec/tests/CMakeLists.txt
+++ b/velox/exec/tests/CMakeLists.txt
@@ -82,6 +82,7 @@ add_executable(
   VectorHasherTest.cpp
   ValuesTest.cpp
   WindowFunctionRegistryTest.cpp
+  WindowTest.cpp
   SortBufferTest.cpp)
 
 add_executable(
diff --git a/velox/functions/prestosql/window/tests/SimpleAggregatesTest.cpp b/velox/exec/tests/WindowTest.cpp
similarity index 51%
rename from velox/functions/prestosql/window/tests/SimpleAggregatesTest.cpp
rename to velox/exec/tests/WindowTest.cpp
index 9888972291cf..ca719023fb2e 100644
--- a/velox/functions/prestosql/window/tests/SimpleAggregatesTest.cpp
+++ b/velox/exec/tests/WindowTest.cpp
@@ -17,141 +17,26 @@
 #include "velox/common/file/FileSystems.h"
 #include "velox/exec/PlanNodeStats.h"
 #include "velox/exec/tests/utils/AssertQueryBuilder.h"
+#include "velox/exec/tests/utils/OperatorTestBase.h"
 #include "velox/exec/tests/utils/PlanBuilder.h"
 #include "velox/exec/tests/utils/TempDirectoryPath.h"
-#include "velox/functions/lib/window/tests/WindowTestBase.h"
 #include "velox/functions/prestosql/window/WindowFunctionsRegistration.h"
 
 using namespace facebook::velox::exec::test;
 
-namespace facebook::velox::window::test {
+namespace facebook::velox::exec {
 
 namespace {
 
-static const std::vector<std::string> kAggregateFunctions = {
-    std::string("sum(c2)"),
-    std::string("min(c2)"),
-    std::string("max(c2)"),
-    std::string("count(c2)"),
-    std::string("avg(c2)"),
-    std::string("sum(1)")};
-
-// This AggregateWindowTestBase class is used to instantiate parameterized
-// aggregate window function tests. The parameters are (function, over clause).
-// The window function is tested for the over clause and all combinations of
-// frame clauses. Doing so helps to construct input vectors and DuckDB table
-// only once for the (function, over clause) combination over all frame clauses.
-struct AggregateWindowTestParam {
-  const std::string function;
-  const std::string overClause;
-};
-
-class AggregateWindowTestBase : public WindowTestBase {
- protected:
-  explicit AggregateWindowTestBase(const AggregateWindowTestParam& testParam)
-      : function_(testParam.function), overClause_(testParam.overClause) {}
-
-  void SetUp() override {
-    WindowTestBase::SetUp();
-    window::prestosql::registerAllWindowFunctions();
-  }
-
-  void testWindowFunction(const std::vector<RowVectorPtr>& vectors) {
-    testWindowFunction(vectors, kFrameClauses);
-  }
-
-  void testWindowFunction(
-      const std::vector<RowVectorPtr>& vectors,
-      const std::vector<std::string>& frameClauses) {
-    WindowTestBase::testWindowFunction(
-        vectors, function_, {overClause_}, frameClauses);
-  }
-
-  const std::string function_;
-  const std::string overClause_;
-};
-
-std::vector<AggregateWindowTestParam> getAggregateTestParams() {
-  std::vector<AggregateWindowTestParam> params;
-  for (auto function : kAggregateFunctions) {
-    for (auto overClause : kOverClauses) {
-      params.push_back({function, overClause});
-    }
-  }
-  return params;
-}
-
-class SimpleAggregatesTest
-    : public AggregateWindowTestBase,
-      public testing::WithParamInterface<AggregateWindowTestParam> {
+class WindowTest : public OperatorTestBase {
  public:
-  SimpleAggregatesTest() : AggregateWindowTestBase(GetParam()) {}
-};
-
-// Tests function with a dataset with uniform partitions.
-TEST_P(SimpleAggregatesTest, basic) {
-  testWindowFunction({makeSimpleVector(10)});
-}
-
-// Tests function with a dataset with a single partition but 2 input row
-// vectors.
-TEST_P(SimpleAggregatesTest, singlePartition) {
-  auto input = {makeSinglePartitionVector(50), makeSinglePartitionVector(40)};
-  testWindowFunction(input);
-}
-
-// Tests function with a dataset where all partitions have a single row.
-TEST_P(SimpleAggregatesTest, singleRowPartitions) {
-  testWindowFunction({makeSingleRowPartitionsVector(40)});
-}
-
-// Tests function with a randomly generated input dataset.
-TEST_P(SimpleAggregatesTest, randomInput) {
-  testWindowFunction({makeRandomInputVector(25)});
-}
-
-// Instantiate all the above tests for each combination of aggregate function
-// and over clause.
-VELOX_INSTANTIATE_TEST_SUITE_P(
-    AggregatesTestInstantiation,
-    SimpleAggregatesTest,
-    testing::ValuesIn(getAggregateTestParams()));
-
-class WindowTest : public WindowTestBase {
- protected:
   void SetUp() override {
-    WindowTestBase::SetUp();
+    OperatorTestBase::SetUp();
     window::prestosql::registerAllWindowFunctions();
     filesystems::registerLocalFileSystem();
   }
 };
 
-// Test for an aggregate function with strings that needs out of line storage.
-TEST_F(WindowTest, variableWidthAggregate) {
-  auto size = 10;
-  auto input = {makeRowVector({
-      makeRandomInputVector(BIGINT(), size, 0.2),
-      makeRandomInputVector(SMALLINT(), size, 0.2),
-      makeRandomInputVector(VARCHAR(), size, 0.3),
-      makeRandomInputVector(VARCHAR(), size, 0.3),
-  })};
-
-  testWindowFunction(input, "min(c2)", kOverClauses);
-  testWindowFunction(input, "max(c2)", kOverClauses);
-}
-
-// Tests function with k RANGE PRECEDING (FOLLOWING) frames.
-TEST_F(WindowTest, rangeFrames) {
-  for (const auto& function : kAggregateFunctions) {
-    // count function is skipped as DuckDB returns inconsistent results
-    // with Velox for rows with empty frames. Velox expects empty frames to
-    // return 0, but DuckDB returns null.
-    if (function != "count(c2)") {
-      testKRangeFrames(function);
-    }
-  }
-}
-
 TEST_F(WindowTest, spill) {
   const vector_size_t size = 1'000;
   auto data = makeRowVector(
@@ -174,7 +59,7 @@ TEST_F(WindowTest, spill) {
                   .capturePlanNodeId(windowId)
                   .planNode();
 
-  auto spillDirectory = exec::test::TempDirectoryPath::create();
+  auto spillDirectory = TempDirectoryPath::create();
   auto task =
       AssertQueryBuilder(plan, duckDbQueryRunner_)
           .config(core::QueryConfig::kPreferredOutputBatchBytes, "1024")
@@ -296,38 +181,6 @@ TEST_F(WindowTest, duplicateOrOverlappingKeys) {
       "Sorting keys must be unique and not overlap with partitioning keys. Found duplicate key: b");
 }
 
-class AggregateEmptyFramesTest : public WindowTestBase {};
+} // namespace
 
-// Test for aggregates that return NULL as the default value for empty frames
-// against DuckDb.
-TEST_F(AggregateEmptyFramesTest, nullEmptyResult) {
-  auto input = {makeSinglePartitionVector(50), makeSinglePartitionVector(40)};
-  auto aggregateFunctions = kAggregateFunctions;
-  aggregateFunctions.erase(
-      std::remove(
-          aggregateFunctions.begin(), aggregateFunctions.end(), "count(c2)"),
-      aggregateFunctions.end());
-  for (const auto& function : aggregateFunctions) {
-    testWindowFunction(input, function, kOverClauses, kEmptyFrameClauses);
-  }
-}
-
-// Test for count aggregate with empty frames against expectedResult and not
-// DuckDb, since DuckDb returns NULL instead of 0 for such queries.
-TEST_F(AggregateEmptyFramesTest, nonNullEmptyResult) {
-  auto c0 = makeFlatVector<int64_t>({-1, -1, -1, -1, -1, -1, 2, 2, 2, 2});
-  auto c1 = makeFlatVector<double>({-1, -2, -3, -4, -5, -6, -7, -8, -9, -10});
-  auto input = makeRowVector({c0, c1});
-
-  auto expected = makeRowVector(
-      {c0, c1, makeFlatVector<int64_t>({0, 0, 0, 1, 2, 3, 0, 0, 0, 1})});
-  std::string overClause = "partition by c0 order by c1 desc";
-  std::string frameClause = "rows between 6 preceding and 3 preceding";
-  testWindowFunction({input}, "count(c1)", overClause, frameClause, expected);
-
-  expected = makeRowVector({c0, c1, makeConstant<int64_t>(0, c0->size())});
-  frameClause = "rows between 6 following and unbounded following";
-  testWindowFunction({input}, "count(c1)", overClause, frameClause, expected);
-}
-}; // namespace
-}; // namespace facebook::velox::window::test
+} // namespace facebook::velox::exec
diff --git a/velox/functions/prestosql/window/tests/AggregateWindowTest.cpp b/velox/functions/prestosql/window/tests/AggregateWindowTest.cpp
new file mode 100644
index 000000000000..f6e4a652a4ad
--- /dev/null
+++ b/velox/functions/prestosql/window/tests/AggregateWindowTest.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/functions/lib/window/tests/WindowTestBase.h"
+#include "velox/functions/prestosql/window/WindowFunctionsRegistration.h"
+
+using namespace facebook::velox::exec::test;
+
+namespace facebook::velox::window::test {
+
+namespace {
+
+static const std::vector<std::string> kAggregateFunctions = {
+    std::string("sum(c2)"),
+    std::string("min(c2)"),
+    std::string("max(c2)"),
+    std::string("count(c2)"),
+    std::string("avg(c2)"),
+    std::string("sum(1)")};
+
+// This AggregateWindowTestBase class is used to instantiate parameterized
+// aggregate window function tests. The parameters are (function, over clause).
+// The window function is tested for the over clause and all combinations of
+// frame clauses. Doing so helps to construct input vectors and DuckDB table
+// only once for the (function, over clause) combination over all frame clauses.
+struct AggregateWindowTestParam {
+  const std::string function;
+  const std::string overClause;
+};
+
+class AggregateWindowTest : public WindowTestBase {
+ protected:
+  explicit AggregateWindowTest(const AggregateWindowTestParam& testParam)
+      : function_(testParam.function), overClause_(testParam.overClause) {}
+
+  explicit AggregateWindowTest() : function_(""), overClause_("") {}
+
+  void SetUp() override {
+    WindowTestBase::SetUp();
+    window::prestosql::registerAllWindowFunctions();
+  }
+
+  void testWindowFunction(const std::vector<RowVectorPtr>& vectors) {
+    testWindowFunction(vectors, kFrameClauses);
+  }
+
+  void testWindowFunction(
+      const std::vector<RowVectorPtr>& vectors,
+      const std::vector<std::string>& frameClauses) {
+    WindowTestBase::testWindowFunction(
+        vectors, function_, {overClause_}, frameClauses);
+  }
+
+  const std::string function_;
+  const std::string overClause_;
+};
+
+std::vector<AggregateWindowTestParam> getAggregateTestParams() {
+  std::vector<AggregateWindowTestParam> params;
+  for (auto function : kAggregateFunctions) {
+    for (auto overClause : kOverClauses) {
+      params.push_back({function, overClause});
+    }
+  }
+  return params;
+}
+
+class MultiAggregateWindowTest
+    : public AggregateWindowTest,
+      public testing::WithParamInterface<AggregateWindowTestParam> {
+ public:
+  MultiAggregateWindowTest() : AggregateWindowTest(GetParam()) {}
+};
+
+// Tests function with a dataset with uniform partitions.
+TEST_P(MultiAggregateWindowTest, basic) {
+  testWindowFunction({makeSimpleVector(10)});
+}
+
+// Tests function with a dataset with a single partition but 2 input row
+// vectors.
+TEST_P(MultiAggregateWindowTest, singlePartition) {
+  auto input = {makeSinglePartitionVector(50), makeSinglePartitionVector(40)};
+  testWindowFunction(input);
+}
+
+// Tests function with a dataset where all partitions have a single row.
+TEST_P(MultiAggregateWindowTest, singleRowPartitions) {
+  testWindowFunction({makeSingleRowPartitionsVector(40)});
+}
+
+// Tests function with a randomly generated input dataset.
+TEST_P(MultiAggregateWindowTest, randomInput) {
+  testWindowFunction({makeRandomInputVector(25)});
+}
+
+// Instantiate all the above tests for each combination of aggregate function
+// and over clause.
+VELOX_INSTANTIATE_TEST_SUITE_P(
+    AggregatesTestInstantiation,
+    MultiAggregateWindowTest,
+    testing::ValuesIn(getAggregateTestParams()));
+
+// Test for an aggregate function with strings that needs out of line storage.
+TEST_F(AggregateWindowTest, variableWidthAggregate) {
+  auto size = 10;
+  auto input = {makeRowVector({
+      makeRandomInputVector(BIGINT(), size, 0.2),
+      makeRandomInputVector(SMALLINT(), size, 0.2),
+      makeRandomInputVector(VARCHAR(), size, 0.3),
+      makeRandomInputVector(VARCHAR(), size, 0.3),
+  })};
+
+  WindowTestBase::testWindowFunction(input, "min(c2)", kOverClauses);
+  WindowTestBase::testWindowFunction(
+      input, "max(c2)", kOverClauses, {""}, false);
+}
+
+// Tests function with k RANGE PRECEDING (FOLLOWING) frames.
+TEST_F(AggregateWindowTest, rangeFrames) {
+  for (const auto& function : kAggregateFunctions) {
+    // count function is skipped as DuckDB returns inconsistent results
+    // with Velox for rows with empty frames. Velox expects empty frames to
+    // return 0, but DuckDB returns null.
+    if (function != "count(c2)") {
+      testKRangeFrames(function);
+    }
+  }
+}
+
+// Test for aggregates that return NULL as the default value for empty frames
+// against DuckDb.
+TEST_F(AggregateWindowTest, nullEmptyResult) {
+  auto input = {makeSinglePartitionVector(50), makeSinglePartitionVector(40)};
+  auto aggregateFunctions = kAggregateFunctions;
+  aggregateFunctions.erase(
+      std::remove(
+          aggregateFunctions.begin(), aggregateFunctions.end(), "count(c2)"),
+      aggregateFunctions.end());
+  for (const auto& function : aggregateFunctions) {
+    WindowTestBase::testWindowFunction(
+        input, function, kOverClauses, kEmptyFrameClauses);
+  }
+}
+
+// Test for count aggregate with empty frames against expectedResult and not
+// DuckDb, since DuckDb returns NULL instead of 0 for such queries.
+TEST_F(AggregateWindowTest, nonNullEmptyResult) {
+  auto c0 = makeFlatVector<int64_t>({-1, -1, -1, -1, -1, -1, 2, 2, 2, 2});
+  auto c1 = makeFlatVector<double>({-1, -2, -3, -4, -5, -6, -7, -8, -9, -10});
+  auto input = makeRowVector({c0, c1});
+
+  auto expected = makeRowVector(
+      {c0, c1, makeFlatVector<int64_t>({0, 0, 0, 1, 2, 3, 0, 0, 0, 1})});
+  std::string overClause = "partition by c0 order by c1 desc";
+  std::string frameClause = "rows between 6 preceding and 3 preceding";
+  WindowTestBase::testWindowFunction(
+      {input}, "count(c1)", overClause, frameClause, expected);
+
+  expected = makeRowVector({c0, c1, makeConstant<int64_t>(0, c0->size())});
+  frameClause = "rows between 6 following and unbounded following";
+  WindowTestBase::testWindowFunction(
+      {input}, "count(c1)", overClause, frameClause, expected);
+}
+}; // namespace
+}; // namespace facebook::velox::window::test
diff --git a/velox/functions/prestosql/window/tests/CMakeLists.txt b/velox/functions/prestosql/window/tests/CMakeLists.txt
index bc154da14f61..d4bdebfd43e1 100644
--- a/velox/functions/prestosql/window/tests/CMakeLists.txt
+++ b/velox/functions/prestosql/window/tests/CMakeLists.txt
@@ -48,7 +48,7 @@ add_test(
 target_link_libraries(velox_windows_value_test
                       ${CMAKE_WINDOW_TEST_LINK_LIBRARIES})
 
-add_executable(velox_windows_agg_test SimpleAggregatesTest.cpp
+add_executable(velox_windows_agg_test AggregateWindowTest.cpp
                                       ${CMAKE_WINDOW_TEST_MAIN_FILES})
 
 add_test(

From ec6741c944f79045b873a09f75a6bb0a144f927c Mon Sep 17 00:00:00 2001
From: Ankita Victor <anvicto@microsoft.com>
Date: Thu, 15 Feb 2024 11:45:58 -0800
Subject: [PATCH 27/38] Add weekday Spark function (#8721)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Returns the day of the week for date as 0 = Monday, 1 = Tuesday, …, 6 = Sunday.

Addresses https://github.com/facebookincubator/velox/issues/8720

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8721

Reviewed By: Yuhta

Differential Revision: D53811698

Pulled By: mbasmanova

fbshipit-source-id: 818fc9572e48a2d2705a31dbb0b37e2f000f46f1
---
 velox/docs/functions/spark/datetime.rst       |  7 ++++++
 velox/functions/sparksql/DateTimeFunctions.h  | 14 +++++++++++
 velox/functions/sparksql/Register.cpp         |  2 ++
 .../sparksql/tests/DateTimeFunctionsTest.cpp  | 24 +++++++++++++++++++
 4 files changed, 47 insertions(+)

diff --git a/velox/docs/functions/spark/datetime.rst b/velox/docs/functions/spark/datetime.rst
index bcdae4c2c637..35f17687ee09 100644
--- a/velox/docs/functions/spark/datetime.rst
+++ b/velox/docs/functions/spark/datetime.rst
@@ -189,6 +189,13 @@ These functions support TIMESTAMP and DATE input types.
     Returns the `ISO-Week`_ of the year from x. The value ranges from ``1`` to ``53``.
     A week is considered to start on a Monday and week 1 is the first week with >3 days.
 
+.. function:: weekday(date) -> integer
+
+    Returns the day of the week for date (0 = Monday, 1 = Tuesday, …, 6 = Sunday).
+
+        SELECT weekday('2015-04-08'); -- 2
+        SELECT weekday('2024-02-10'); -- 5
+
 .. _ISO-Week: https://en.wikipedia.org/wiki/ISO_week_date
 
 .. spark:function:: year(x) -> integer
diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h
index f2b452f7cb7d..eb5c321a254f 100644
--- a/velox/functions/sparksql/DateTimeFunctions.h
+++ b/velox/functions/sparksql/DateTimeFunctions.h
@@ -499,6 +499,20 @@ struct DayOfYearFunction {
   }
 };
 
+template <typename T>
+struct WeekdayFunction {
+  VELOX_DEFINE_FUNCTION_TYPES(T);
+
+  // 0 = Monday, 1 = Tuesday, ..., 6 = Sunday
+  FOLLY_ALWAYS_INLINE int32_t getWeekday(const std::tm& time) {
+    return (time.tm_wday + 6) % 7;
+  }
+
+  FOLLY_ALWAYS_INLINE void call(int32_t& result, const arg_type<Date>& date) {
+    result = getWeekday(getDateTime(date));
+  }
+};
+
 template <typename T>
 struct NextDayFunction {
   VELOX_DEFINE_FUNCTION_TYPES(T);
diff --git a/velox/functions/sparksql/Register.cpp b/velox/functions/sparksql/Register.cpp
index cf31e9512d74..bd748af7dadf 100644
--- a/velox/functions/sparksql/Register.cpp
+++ b/velox/functions/sparksql/Register.cpp
@@ -306,6 +306,8 @@ void registerFunctions(const std::string& prefix) {
 
   registerFunction<DayOfWeekFunction, int32_t, Date>({prefix + "dayofweek"});
 
+  registerFunction<WeekdayFunction, int32_t, Date>({prefix + "weekday"});
+
   registerFunction<QuarterFunction, int32_t, Date>({prefix + "quarter"});
 
   registerFunction<MonthFunction, int32_t, Date>({prefix + "month"});
diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
index df0067060b71..683ba3711f21 100644
--- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
+++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
@@ -450,6 +450,30 @@ TEST_F(DateTimeFunctionsTest, dayOfWeekDate) {
   EXPECT_EQ(6, dayOfWeek(util::fromDateString("1582-10-15")));
 }
 
+TEST_F(DateTimeFunctionsTest, weekdayDate) {
+  const auto weekday = [&](std::optional<int32_t> value) {
+    return evaluateOnce<int32_t, int32_t>("weekday(c0)", {value}, {DATE()});
+  };
+
+  EXPECT_EQ(3, weekday(0));
+  EXPECT_EQ(2, weekday(-1));
+  EXPECT_EQ(5, weekday(-40));
+  EXPECT_EQ(3, weekday(parseDate("2009-07-30")));
+  EXPECT_EQ(6, weekday(parseDate("2023-08-20")));
+  EXPECT_EQ(0, weekday(parseDate("2023-08-21")));
+  EXPECT_EQ(1, weekday(parseDate("2023-08-22")));
+  EXPECT_EQ(2, weekday(parseDate("2023-08-23")));
+  EXPECT_EQ(3, weekday(parseDate("2023-08-24")));
+  EXPECT_EQ(4, weekday(parseDate("2023-08-25")));
+  EXPECT_EQ(5, weekday(parseDate("2023-08-26")));
+  EXPECT_EQ(6, weekday(parseDate("2023-08-27")));
+  EXPECT_EQ(5, weekday(parseDate("2017-05-27")));
+  EXPECT_EQ(2, weekday(parseDate("2015-04-08")));
+  EXPECT_EQ(4, weekday(parseDate("2013-11-08")));
+  EXPECT_EQ(4, weekday(parseDate("2011-05-06")));
+  EXPECT_EQ(4, weekday(parseDate("1582-10-15")));
+}
+
 TEST_F(DateTimeFunctionsTest, dateDiffDate) {
   const auto dateDiff = [&](std::optional<int32_t> endDate,
                             std::optional<int32_t> startDate) {

From 20379570eaeb3e66168ba41fee9ffdc7926bda0e Mon Sep 17 00:00:00 2001
From: Orri Erling <oerling@fb.com>
Date: Thu, 15 Feb 2024 14:21:55 -0800
Subject: [PATCH 28/38] Scatter struct nulls when deserializing Presto wire
 format (#8526)

Summary:
When reading spill serialization, struct nulls are written before the struct columns and the reading can proceed i a single pass.

Like this, nulls from enclosing structs are passed down when reading. These are combined ith nulls of the contained column so that the contained column also has a null for rows where the enclosing struct is null.

When reading Presto Pages, struct nulls come after the child columns. A separate pass scatters the child column values so as to create a null gap for the rows where the containing struct is null.

Adds a test for encoding preserving roud trips. Adds a test for concatenating different encodings in a message, e.g. constant, dictionary, flat in all combinations of same/different encoding/value domain. This functionality only applies to nulls first representations. This will apply to Presto pages when the struct nulls are read before constructing the struct. See PR 8152 for the end state.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8526

Reviewed By: bikramSingh91

Differential Revision: D53056966

Pulled By: oerling

fbshipit-source-id: a1cdaab64895324fdf5b17b434307427612992dd
---
 velox/exec/SpillFile.cpp                      |   7 +-
 velox/serializers/PrestoSerializer.cpp        | 629 ++++++++++++------
 velox/serializers/PrestoSerializer.h          |  23 +-
 .../tests/PrestoSerializerTest.cpp            | 216 ++++--
 4 files changed, 607 insertions(+), 268 deletions(-)

diff --git a/velox/exec/SpillFile.cpp b/velox/exec/SpillFile.cpp
index 6fe491e24644..240da4a9c952 100644
--- a/velox/exec/SpillFile.cpp
+++ b/velox/exec/SpillFile.cpp
@@ -176,7 +176,7 @@ uint64_t SpillWriter::write(
     MicrosecondTimer timer(&timeUs);
     if (batch_ == nullptr) {
       serializer::presto::PrestoVectorSerde::PrestoOptions options = {
-          kDefaultUseLosslessTimestamp, compressionKind_};
+          kDefaultUseLosslessTimestamp, compressionKind_, true /*nullsFirst*/};
       batch_ = std::make_unique<VectorStreamGroup>(pool_);
       batch_->createStreamTree(
           std::static_pointer_cast<const RowType>(rows->type()),
@@ -292,7 +292,10 @@ SpillReadFile::SpillReadFile(
       numSortKeys_(numSortKeys),
       sortCompareFlags_(sortCompareFlags),
       compressionKind_(compressionKind),
-      readOptions_{kDefaultUseLosslessTimestamp, compressionKind_},
+      readOptions_{
+          kDefaultUseLosslessTimestamp,
+          compressionKind_,
+          true /*nullsFirst*/},
       pool_(pool) {
   constexpr uint64_t kMaxReadBufferSize =
       (1 << 20) - AlignedBuffer::kPaddedSize; // 1MB - padding.
diff --git a/velox/serializers/PrestoSerializer.cpp b/velox/serializers/PrestoSerializer.cpp
index bc9843174699..ffb21036ce1e 100644
--- a/velox/serializers/PrestoSerializer.cpp
+++ b/velox/serializers/PrestoSerializer.cpp
@@ -25,6 +25,9 @@
 #include "velox/vector/VectorTypeUtils.h"
 
 namespace facebook::velox::serializer::presto {
+
+using SerdeOpts = PrestoVectorSerde::PrestoOptions;
+
 namespace {
 constexpr int8_t kCompressedBitMask = 1;
 constexpr int8_t kEncryptedBitMask = 2;
@@ -157,23 +160,42 @@ FOLLY_ALWAYS_INLINE bool needCompression(const folly::io::Codec& codec) {
   return codec.type() != folly::io::CodecType::NO_COMPRESSION;
 }
 
+template <typename T>
+int32_t checkValuesSize(
+    const BufferPtr& values,
+    const BufferPtr& nulls,
+    int32_t size,
+    int32_t offset) {
+  auto bufferSize = (std::is_same_v<T, bool>) ? values->size() * 8
+                                              : values->size() / sizeof(T);
+  // If all nulls, values does not have to be sized for vector size.
+  if (nulls && bits::isAllSet(nulls->as<uint64_t>(), 0, size + offset, false)) {
+    return 0;
+  }
+  VELOX_CHECK_LE(offset + size, bufferSize);
+  return bufferSize;
+}
+
 template <typename T>
 void readValues(
     ByteInputStream* source,
     vector_size_t size,
     vector_size_t offset,
-    BufferPtr nulls,
+    const BufferPtr& nulls,
     vector_size_t nullCount,
-    BufferPtr values) {
+    const BufferPtr& values) {
   if (nullCount) {
+    auto bufferSize = checkValuesSize<T>(values, nulls, size, offset);
     auto rawValues = values->asMutable<T>();
     int32_t toClear = offset;
     bits::forEachSetBit(
         nulls->as<uint64_t>(), offset, offset + size, [&](int32_t row) {
           // Set the values between the last non-null and this to type default.
           for (; toClear < row; ++toClear) {
+            VELOX_CHECK_LT(toClear, bufferSize);
             rawValues[toClear] = T();
           }
+          VELOX_CHECK_LT(row, bufferSize);
           rawValues[row] = source->read<T>();
           toClear = row + 1;
         });
@@ -188,18 +210,21 @@ void readValues<bool>(
     ByteInputStream* source,
     vector_size_t size,
     vector_size_t offset,
-    BufferPtr nulls,
+    const BufferPtr& nulls,
     vector_size_t nullCount,
-    BufferPtr values) {
+    const BufferPtr& values) {
   auto rawValues = values->asMutable<uint64_t>();
+  auto bufferSize = checkValuesSize<bool>(values, nulls, size, offset);
   if (nullCount) {
     int32_t toClear = offset;
     bits::forEachSetBit(
         nulls->as<uint64_t>(), offset, offset + size, [&](int32_t row) {
           // Set the values between the last non-null and this to type default.
           for (; toClear < row; ++toClear) {
+            VELOX_CHECK_LT(toClear, bufferSize);
             bits::clearBit(rawValues, toClear);
           }
+          VELOX_CHECK_LT(row, bufferSize);
           bits::setBit(rawValues, row, (source->read<int8_t>() != 0));
           toClear = row + 1;
         });
@@ -220,10 +245,11 @@ void readValues<Timestamp>(
     ByteInputStream* source,
     vector_size_t size,
     vector_size_t offset,
-    BufferPtr nulls,
+    const BufferPtr& nulls,
     vector_size_t nullCount,
-    BufferPtr values) {
+    const BufferPtr& values) {
   auto rawValues = values->asMutable<Timestamp>();
+  checkValuesSize<Timestamp>(values, nulls, size, offset);
   if (nullCount) {
     int32_t toClear = offset;
     bits::forEachSetBit(
@@ -252,10 +278,12 @@ void readLosslessTimestampValues(
     ByteInputStream* source,
     vector_size_t size,
     vector_size_t offset,
-    BufferPtr nulls,
+    const BufferPtr& nulls,
     vector_size_t nullCount,
-    BufferPtr values) {
+    const BufferPtr& values) {
+  auto bufferSize = values->size() / sizeof(Timestamp);
   auto rawValues = values->asMutable<Timestamp>();
+  checkValuesSize<Timestamp>(values, nulls, size, offset);
   if (nullCount > 0) {
     int32_t toClear = offset;
     bits::forEachSetBit(
@@ -291,11 +319,13 @@ void readDecimalValues(
     ByteInputStream* source,
     vector_size_t size,
     vector_size_t offset,
-    BufferPtr nulls,
+    const BufferPtr& nulls,
     vector_size_t nullCount,
-    BufferPtr values) {
+    const BufferPtr& values) {
   auto rawValues = values->asMutable<int128_t>();
   if (nullCount) {
+    auto bufferSize = checkValuesSize<int128_t>(values, nulls, size, offset);
+
     int32_t toClear = offset;
     bits::forEachSetBit(
         nulls->as<uint64_t>(), offset, offset + size, [&](int32_t row) {
@@ -313,21 +343,53 @@ void readDecimalValues(
   }
 }
 
+/// When deserializing vectors under row vectors that introduce
+/// nulls, the child vector must have a gap at the place where a
+/// parent RowVector has a null. So, if there is a parent RowVector
+/// that adds a null, 'incomingNulls' is the bitmap where a null
+/// denotes a null in the parent RowVector(s). 'numIncomingNulls' is
+/// the number of bits in this bitmap, i.e. the number of rows in
+/// the parentRowVector. 'size' is the size of the child vector
+/// being deserialized. This size does not include rows where a
+/// parent RowVector has nulls.
+vector_size_t sizeWithIncomingNulls(
+    vector_size_t size,
+    int32_t numIncomingNulls) {
+  return numIncomingNulls == 0 ? size : numIncomingNulls;
+}
+
+// Fills the nulls of 'result' from the serialized nulls in
+// 'source'. Adds nulls from 'incomingNulls' so that the null flags
+// gets padded with extra nulls where a parent RowVector has a
+// null. Returns the number of nulls in the result.
 vector_size_t readNulls(
     ByteInputStream* source,
     vector_size_t size,
-    BaseVector& result,
-    vector_size_t resultOffset) {
+    vector_size_t resultOffset,
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    BaseVector& result) {
+  VELOX_DCHECK_LE(
+      result.size(), resultOffset + (incomingNulls ? numIncomingNulls : size));
   if (source->readByte() == 0) {
-    result.clearNulls(resultOffset, resultOffset + size);
-    return 0;
+    if (incomingNulls) {
+      auto* rawNulls = result.mutableRawNulls();
+      bits::copyBits(
+          incomingNulls, 0, rawNulls, resultOffset, numIncomingNulls);
+    } else {
+      result.clearNulls(resultOffset, resultOffset + size);
+    }
+    return incomingNulls
+        ? numIncomingNulls - bits::countBits(incomingNulls, 0, numIncomingNulls)
+        : 0;
   }
 
-  const bool noPriorNulls = (result.rawNulls() == nullptr);
+  const auto numNewValues = sizeWithIncomingNulls(size, numIncomingNulls);
 
+  const bool noPriorNulls = (result.rawNulls() == nullptr);
   // Allocate one extra byte in case we cannot use bits from the current last
   // partial byte.
-  BufferPtr& nulls = result.mutableNulls(resultOffset + size + 8);
+  BufferPtr& nulls = result.mutableNulls(resultOffset + numNewValues + 8);
   if (noPriorNulls) {
     bits::fillBits(
         nulls->asMutable<uint64_t>(), 0, resultOffset, bits::kNotNull);
@@ -339,6 +401,15 @@ vector_size_t readNulls(
   source->readBytes(rawNulls, numBytes);
   bits::reverseBits(rawNulls, numBytes);
   bits::negate(reinterpret_cast<char*>(rawNulls), numBytes * 8);
+  // Add incoming nulls if any.
+  if (incomingNulls) {
+    bits::scatterBits(
+        size,
+        numIncomingNulls,
+        reinterpret_cast<const char*>(rawNulls),
+        incomingNulls,
+        reinterpret_cast<char*>(rawNulls));
+  }
 
   // Shift bits if needed.
   if (bits::nbytes(resultOffset) * 8 > resultOffset) {
@@ -347,64 +418,93 @@ vector_size_t readNulls(
         bits::nbytes(resultOffset) * 8,
         nulls->asMutable<uint64_t>(),
         resultOffset,
-        size);
+        numNewValues);
   }
 
-  return BaseVector::countNulls(nulls, resultOffset, resultOffset + size);
+  return BaseVector::countNulls(
+      nulls, resultOffset, resultOffset + numNewValues);
 }
 
 template <typename T>
 void read(
     ByteInputStream* source,
     const TypePtr& type,
-    velox::memory::MemoryPool* pool,
-    VectorPtr& result,
     vector_size_t resultOffset,
-    bool useLosslessTimestamp) {
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    velox::memory::MemoryPool* pool,
+    const SerdeOpts& opts,
+    VectorPtr& result) {
   const int32_t size = source->read<int32_t>();
-  result->resize(resultOffset + size);
+  const auto numNewValues = sizeWithIncomingNulls(size, numIncomingNulls);
+  result->resize(resultOffset + numNewValues);
 
   auto flatResult = result->asFlatVector<T>();
-  auto nullCount = readNulls(source, size, *flatResult, resultOffset);
+  auto nullCount = readNulls(
+      source, size, resultOffset, incomingNulls, numIncomingNulls, *flatResult);
 
-  BufferPtr values = flatResult->mutableValues(resultOffset + size);
+  BufferPtr values = flatResult->mutableValues(resultOffset + numNewValues);
   if constexpr (std::is_same_v<T, Timestamp>) {
-    if (useLosslessTimestamp) {
+    if (opts.useLosslessTimestamp) {
       readLosslessTimestampValues(
-          source, size, resultOffset, flatResult->nulls(), nullCount, values);
+          source,
+          numNewValues,
+          resultOffset,
+          flatResult->nulls(),
+          nullCount,
+          values);
       return;
     }
   }
   if (type->isLongDecimal()) {
     readDecimalValues(
-        source, size, resultOffset, flatResult->nulls(), nullCount, values);
+        source,
+        numNewValues,
+        resultOffset,
+        flatResult->nulls(),
+        nullCount,
+        values);
     return;
   }
   readValues<T>(
-      source, size, resultOffset, flatResult->nulls(), nullCount, values);
+      source,
+      numNewValues,
+      resultOffset,
+      flatResult->nulls(),
+      nullCount,
+      values);
 }
 
 template <>
 void read<StringView>(
     ByteInputStream* source,
     const TypePtr& type,
-    velox::memory::MemoryPool* pool,
-    VectorPtr& result,
     vector_size_t resultOffset,
-    bool useLosslessTimestamp) {
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    velox::memory::MemoryPool* pool,
+    const SerdeOpts& opts,
+    VectorPtr& result) {
   const int32_t size = source->read<int32_t>();
+  const int32_t numNewValues = sizeWithIncomingNulls(size, numIncomingNulls);
 
-  result->resize(resultOffset + size);
+  result->resize(resultOffset + numNewValues);
 
   auto flatResult = result->as<FlatVector<StringView>>();
   BufferPtr values = flatResult->mutableValues(resultOffset + size);
   auto rawValues = values->asMutable<StringView>();
-  for (int32_t i = 0; i < size; ++i) {
+  int32_t lastOffset = 0;
+  for (int32_t i = 0; i < numNewValues; ++i) {
     // Set the first int32_t of each StringView to be the offset.
-    *reinterpret_cast<int32_t*>(&rawValues[resultOffset + i]) =
-        source->read<int32_t>();
+    if (incomingNulls && bits::isBitNull(incomingNulls, i)) {
+      *reinterpret_cast<int32_t*>(&rawValues[resultOffset + i]) = lastOffset;
+      continue;
+    }
+    lastOffset = source->read<int32_t>();
+    *reinterpret_cast<int32_t*>(&rawValues[resultOffset + i]) = lastOffset;
   }
-  readNulls(source, size, *flatResult, resultOffset);
+  readNulls(
+      source, size, resultOffset, incomingNulls, numIncomingNulls, *flatResult);
 
   const int32_t dataSize = source->read<int32_t>();
   if (dataSize == 0) {
@@ -417,7 +517,7 @@ void read<StringView>(
   source->readBytes(rawStrings, dataSize);
   int32_t previousOffset = 0;
   auto rawChars = reinterpret_cast<char*>(rawStrings);
-  for (int32_t i = 0; i < size; ++i) {
+  for (int32_t i = 0; i < numNewValues; ++i) {
     int32_t offset = rawValues[resultOffset + i].size();
     rawValues[resultOffset + i] =
         StringView(rawChars + previousOffset, offset - previousOffset);
@@ -427,89 +527,132 @@ void read<StringView>(
 
 void readColumns(
     ByteInputStream* source,
-    velox::memory::MemoryPool* pool,
     const std::vector<TypePtr>& types,
-    std::vector<VectorPtr>& result,
     vector_size_t resultOffset,
-    bool useLosslessTimestamp);
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    velox::memory::MemoryPool* pool,
+    const SerdeOpts& opts,
+    std::vector<VectorPtr>& result);
 
 void readConstantVector(
     ByteInputStream* source,
     const TypePtr& type,
-    velox::memory::MemoryPool* pool,
-    VectorPtr& result,
     vector_size_t resultOffset,
-    bool useLosslessTimestamp) {
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    velox::memory::MemoryPool* pool,
+    const SerdeOpts& opts,
+    VectorPtr& result) {
   const auto size = source->read<int32_t>();
+  const int32_t numNewValues = sizeWithIncomingNulls(size, numIncomingNulls);
   std::vector<TypePtr> childTypes = {type};
   std::vector<VectorPtr> children{BaseVector::create(type, 0, pool)};
-  readColumns(source, pool, childTypes, children, 0, useLosslessTimestamp);
+  readColumns(source, childTypes, 0, nullptr, 0, pool, opts, children);
   VELOX_CHECK_EQ(1, children[0]->size());
 
+  auto constantVector =
+      BaseVector::wrapInConstant(numNewValues, 0, children[0]);
+
   // If there are no previous results, we output this as a constant. RowVectors
   // with top-level nulls can have child ConstantVector (even though they can't
   // have nulls explicitly set on them), so we don't need to try to apply
   // incomingNulls here.
-  auto constantVector = BaseVector::wrapInConstant(size, 0, children[0]);
-
   if (resultOffset == 0) {
     result = std::move(constantVector);
   } else {
-    result->resize(resultOffset + size);
+    if (!incomingNulls &&
+        opts.nullsFirst && // TODO remove when removing scatter nulls pass.
+        result->encoding() == VectorEncoding::Simple::CONSTANT &&
+        constantVector->equalValueAt(result.get(), 0, 0)) {
+      result->resize(resultOffset + numNewValues);
+      return;
+    }
+    result->resize(resultOffset + numNewValues);
 
-    SelectivityVector rows(resultOffset + size, false);
-    rows.setValidRange(resultOffset, resultOffset + size, true);
+    SelectivityVector rows(resultOffset + numNewValues, false);
+    rows.setValidRange(resultOffset, resultOffset + numNewValues, true);
     rows.updateBounds();
 
     BaseVector::ensureWritable(rows, type, pool, result);
-    result->copy(constantVector.get(), resultOffset, 0, size);
+    result->copy(constantVector.get(), resultOffset, 0, numNewValues);
+    if (incomingNulls) {
+      bits::forEachUnsetBit(incomingNulls, 0, numNewValues, [&](auto row) {
+        result->setNull(resultOffset + row, true);
+      });
+    }
   }
 }
 
 void readDictionaryVector(
     ByteInputStream* source,
     const TypePtr& type,
-    velox::memory::MemoryPool* pool,
-    VectorPtr& result,
     vector_size_t resultOffset,
-    bool useLosslessTimestamp) {
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    velox::memory::MemoryPool* pool,
+    const SerdeOpts& opts,
+    VectorPtr& result) {
   const auto size = source->read<int32_t>();
+  const int32_t numNewValues = sizeWithIncomingNulls(size, numIncomingNulls);
 
   std::vector<TypePtr> childTypes = {type};
   std::vector<VectorPtr> children{BaseVector::create(type, 0, pool)};
-  readColumns(source, pool, childTypes, children, 0, useLosslessTimestamp);
+  readColumns(source, childTypes, 0, nullptr, 0, pool, opts, children);
 
   // Read indices.
-  BufferPtr indices = allocateIndices(size, pool);
-  source->readBytes(indices->asMutable<char>(), size * sizeof(int32_t));
+  BufferPtr indices = allocateIndices(numNewValues, pool);
+  if (incomingNulls) {
+    auto rawIndices = indices->asMutable<int32_t>();
+    for (auto i = 0; i < numNewValues; ++i) {
+      if (bits::isBitNull(incomingNulls, i)) {
+        rawIndices[i] = 0;
+      } else {
+        rawIndices[i] = source->read<int32_t>();
+      }
+    }
+  } else {
+    source->readBytes(
+        indices->asMutable<char>(), numNewValues * sizeof(int32_t));
+  }
 
   // Skip 3 * 8 bytes of 'instance id'. Velox doesn't use 'instance id' for
   // dictionary vectors.
   source->skip(24);
 
-  auto dictionaryVector =
-      BaseVector::wrapInDictionary(nullptr, indices, size, children[0]);
+  BufferPtr incomingNullsBuffer = nullptr;
+  if (incomingNulls) {
+    incomingNullsBuffer = AlignedBuffer::allocate<bool>(numIncomingNulls, pool);
+    memcpy(
+        incomingNullsBuffer->asMutable<char>(),
+        incomingNulls,
+        bits::nbytes(numIncomingNulls));
+  }
+  auto dictionaryVector = BaseVector::wrapInDictionary(
+      incomingNullsBuffer, indices, numNewValues, children[0]);
   if (resultOffset == 0) {
     result = std::move(dictionaryVector);
   } else {
-    result->resize(resultOffset + size);
+    result->resize(resultOffset + numNewValues);
 
-    SelectivityVector rows(resultOffset + size, false);
-    rows.setValidRange(resultOffset, resultOffset + size, true);
+    SelectivityVector rows(resultOffset + numNewValues, false);
+    rows.setValidRange(resultOffset, resultOffset + numNewValues, true);
     rows.updateBounds();
 
     BaseVector::ensureWritable(rows, type, pool, result);
-    result->copy(dictionaryVector.get(), resultOffset, 0, size);
+    result->copy(dictionaryVector.get(), resultOffset, 0, numNewValues);
   }
 }
 
 void readArrayVector(
     ByteInputStream* source,
     const TypePtr& type,
-    velox::memory::MemoryPool* pool,
-    VectorPtr& result,
     vector_size_t resultOffset,
-    bool useLosslessTimestamp) {
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    velox::memory::MemoryPool* pool,
+    const SerdeOpts& opts,
+    VectorPtr& result) {
   ArrayVector* arrayVector = result->as<ArrayVector>();
 
   const auto resultElementsOffset = arrayVector->elements()->size();
@@ -518,49 +661,67 @@ void readArrayVector(
   std::vector<VectorPtr> children{arrayVector->elements()};
   readColumns(
       source,
-      pool,
       childTypes,
-      children,
       resultElementsOffset,
-      useLosslessTimestamp);
+      nullptr,
+      0,
+      pool,
+      opts,
+      children);
 
-  vector_size_t size = source->read<int32_t>();
-  arrayVector->resize(resultOffset + size);
+  const vector_size_t size = source->read<int32_t>();
+  const auto numNewValues = sizeWithIncomingNulls(size, numIncomingNulls);
+  arrayVector->resize(resultOffset + numNewValues);
   arrayVector->setElements(children[0]);
 
-  BufferPtr offsets = arrayVector->mutableOffsets(resultOffset + size);
+  BufferPtr offsets = arrayVector->mutableOffsets(resultOffset + numNewValues);
   auto rawOffsets = offsets->asMutable<vector_size_t>();
-  BufferPtr sizes = arrayVector->mutableSizes(resultOffset + size);
+  BufferPtr sizes = arrayVector->mutableSizes(resultOffset + numNewValues);
   auto rawSizes = sizes->asMutable<vector_size_t>();
   int32_t base = source->read<int32_t>();
-  for (int32_t i = 0; i < size; ++i) {
+  for (int32_t i = 0; i < numNewValues; ++i) {
+    if (incomingNulls && bits::isBitNull(incomingNulls, i)) {
+      rawOffsets[resultOffset + i] = 0;
+      rawSizes[resultOffset + i] = 0;
+      continue;
+    }
     int32_t offset = source->read<int32_t>();
     rawOffsets[resultOffset + i] = resultElementsOffset + base;
     rawSizes[resultOffset + i] = offset - base;
     base = offset;
   }
 
-  readNulls(source, size, *arrayVector, resultOffset);
+  readNulls(
+      source,
+      size,
+      resultOffset,
+      incomingNulls,
+      numIncomingNulls,
+      *arrayVector);
 }
 
 void readMapVector(
     ByteInputStream* source,
     const TypePtr& type,
-    velox::memory::MemoryPool* pool,
-    VectorPtr& result,
     vector_size_t resultOffset,
-    bool useLosslessTimestamp) {
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    velox::memory::MemoryPool* pool,
+    const SerdeOpts& opts,
+    VectorPtr& result) {
   MapVector* mapVector = result->as<MapVector>();
   const auto resultElementsOffset = mapVector->mapKeys()->size();
   std::vector<TypePtr> childTypes = {type->childAt(0), type->childAt(1)};
   std::vector<VectorPtr> children{mapVector->mapKeys(), mapVector->mapValues()};
   readColumns(
       source,
-      pool,
       childTypes,
-      children,
       resultElementsOffset,
-      useLosslessTimestamp);
+      nullptr,
+      0,
+      pool,
+      opts,
+      children);
 
   int32_t hashTableSize = source->read<int32_t>();
   if (hashTableSize != -1) {
@@ -568,23 +729,31 @@ void readMapVector(
     source->skip(hashTableSize * sizeof(int32_t));
   }
 
-  vector_size_t size = source->read<int32_t>();
-  mapVector->resize(resultOffset + size);
+  const vector_size_t size = source->read<int32_t>();
+  const vector_size_t numNewValues =
+      sizeWithIncomingNulls(size, numIncomingNulls);
+  mapVector->resize(resultOffset + numNewValues);
   mapVector->setKeysAndValues(children[0], children[1]);
 
-  BufferPtr offsets = mapVector->mutableOffsets(resultOffset + size);
+  BufferPtr offsets = mapVector->mutableOffsets(resultOffset + numNewValues);
   auto rawOffsets = offsets->asMutable<vector_size_t>();
-  BufferPtr sizes = mapVector->mutableSizes(resultOffset + size);
+  BufferPtr sizes = mapVector->mutableSizes(resultOffset + numNewValues);
   auto rawSizes = sizes->asMutable<vector_size_t>();
   int32_t base = source->read<int32_t>();
-  for (int32_t i = 0; i < size; ++i) {
+  for (int32_t i = 0; i < numNewValues; ++i) {
+    if (incomingNulls && bits::isBitNull(incomingNulls, i)) {
+      rawOffsets[resultOffset + i] = 0;
+      rawSizes[resultOffset + i] = 0;
+      continue;
+    }
     int32_t offset = source->read<int32_t>();
     rawOffsets[resultOffset + i] = resultElementsOffset + base;
     rawSizes[resultOffset + i] = offset - base;
     base = offset;
   }
 
-  readNulls(source, size, *mapVector, resultOffset);
+  readNulls(
+      source, size, resultOffset, incomingNulls, numIncomingNulls, *mapVector);
 }
 
 int64_t packTimestampWithTimeZone(int64_t timestamp, int16_t timezone) {
@@ -603,9 +772,21 @@ void readTimestampWithTimeZone(
     ByteInputStream* source,
     velox::memory::MemoryPool* pool,
     RowVector* result,
-    vector_size_t resultOffset) {
+    vector_size_t resultOffset,
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls) {
+  SerdeOpts opts;
+  opts.useLosslessTimestamp = false;
   auto& timestamps = result->childAt(0);
-  read<int64_t>(source, BIGINT(), pool, timestamps, resultOffset, false);
+  read<int64_t>(
+      source,
+      BIGINT(),
+      resultOffset,
+      incomingNulls,
+      numIncomingNulls,
+      pool,
+      opts,
+      timestamps);
 
   auto rawTimestamps = timestamps->asFlatVector<int64_t>()->mutableRawValues();
 
@@ -934,32 +1115,64 @@ void scatterStructNulls(
 void readRowVector(
     ByteInputStream* source,
     const TypePtr& type,
-    velox::memory::MemoryPool* pool,
-    VectorPtr& result,
     vector_size_t resultOffset,
-    bool useLosslessTimestamp) {
-  auto* row = result->as<RowVector>();
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    velox::memory::MemoryPool* pool,
+    const SerdeOpts& opts,
+    VectorPtr& result) {
+  auto* row = result->asUnchecked<RowVector>();
   if (isTimestampWithTimeZoneType(type)) {
-    readTimestampWithTimeZone(source, pool, row, resultOffset);
+    readTimestampWithTimeZone(
+        source, pool, row, resultOffset, incomingNulls, numIncomingNulls);
     return;
   }
-
+  BufferPtr combinedNulls;
+  const uint64_t* childNulls = incomingNulls;
+  int32_t numChildNulls = numIncomingNulls;
+  if (opts.nullsFirst) {
+    const auto size = source->read<int32_t>();
+    const auto numNewValues = sizeWithIncomingNulls(size, numIncomingNulls);
+    row->resize(resultOffset + numNewValues);
+    readNulls(
+        source, size, resultOffset, incomingNulls, numIncomingNulls, *result);
+    if (row->rawNulls()) {
+      combinedNulls = AlignedBuffer::allocate<bool>(numNewValues, pool);
+      bits::copyBits(
+          row->rawNulls(),
+          resultOffset,
+          combinedNulls->asMutable<uint64_t>(),
+          0,
+          numNewValues);
+      childNulls = combinedNulls->as<uint64_t>();
+      numChildNulls = numNewValues;
+    }
+  }
   const int32_t numChildren = source->read<int32_t>();
   auto& children = row->children();
 
   const auto& childTypes = type->asRow().children();
   readColumns(
-      source, pool, childTypes, children, resultOffset, useLosslessTimestamp);
-
-  auto size = source->read<int32_t>();
-  // Set the size of the row but do not alter the size of the
-  // children. The children get adjusted in a separate pass over the
-  // data. The parent and child size MUST be separate until the second pass.
-  row->BaseVector::resize(resultOffset + size);
-  for (int32_t i = 0; i <= size; ++i) {
-    source->read<int32_t>();
+      source,
+      childTypes,
+      resultOffset,
+      childNulls,
+      numChildNulls,
+      pool,
+      opts,
+      children);
+  if (!opts.nullsFirst) {
+    const auto size = source->read<int32_t>();
+    // Set the size of the row but do not alter the size of the
+    // children. The children get adjusted in a separate pass over the
+    // data. The parent and child size MUST be separate until the second pass.
+    row->BaseVector::resize(resultOffset + size);
+    // Read and discard the offsets. The number of offsets is not affected by
+    // incomingNulls.
+    source->skip((size + 1) * sizeof(int32_t));
+    readNulls(
+        source, size, resultOffset, incomingNulls, numIncomingNulls, *result);
   }
-  readNulls(source, size, *result, resultOffset);
 }
 
 std::string readLengthPrefixedString(ByteInputStream* source) {
@@ -988,20 +1201,24 @@ void checkTypeEncoding(std::string_view encoding, const TypePtr& type) {
 // null, we return true; otherwise, we return false.
 bool tryReadNullColumn(
     ByteInputStream* source,
-    velox::memory::MemoryPool* pool,
     const TypePtr& columnType,
-    VectorPtr& columnResult,
     vector_size_t resultOffset,
-    bool useLosslessTimestamp) {
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    velox::memory::MemoryPool* pool,
+    const SerdeOpts& opts,
+    VectorPtr& columnResult) {
   auto unknownType = UNKNOWN();
   VectorPtr tempResult = BaseVector::create(unknownType, 0, pool);
   read<UnknownValue>(
       source,
       unknownType,
-      pool,
-      tempResult,
       0 /*resultOffset*/,
-      useLosslessTimestamp);
+      incomingNulls,
+      numIncomingNulls,
+      pool,
+      opts,
+      tempResult);
   auto deserializedSize = tempResult->size();
   // Ensure it contains all null values.
   auto numNulls = BaseVector::countNulls(tempResult->nulls(), deserializedSize);
@@ -1026,20 +1243,24 @@ bool tryReadNullColumn(
 
 void readColumns(
     ByteInputStream* source,
-    velox::memory::MemoryPool* pool,
     const std::vector<TypePtr>& types,
-    std::vector<VectorPtr>& results,
     vector_size_t resultOffset,
-    bool useLosslessTimestamp) {
+    const uint64_t* incomingNulls,
+    int32_t numIncomingNulls,
+    velox::memory::MemoryPool* pool,
+    const SerdeOpts& opts,
+    std::vector<VectorPtr>& results) {
   static const std::unordered_map<
       TypeKind,
       std::function<void(
           ByteInputStream * source,
           const TypePtr& type,
-          velox::memory::MemoryPool* pool,
-          VectorPtr& result,
           vector_size_t resultOffset,
-          bool useLosslessTimestamp)>>
+          const uint64_t* incomingNulls,
+          int32_t numIncomingNulls,
+          velox::memory::MemoryPool* pool,
+          const SerdeOpts& opts,
+          VectorPtr& result)>>
       readers = {
           {TypeKind::BOOLEAN, &read<bool>},
           {TypeKind::TINYINT, &read<int8_t>},
@@ -1068,32 +1289,44 @@ void readColumns(
       readConstantVector(
           source,
           columnType,
-          pool,
-          columnResult,
           resultOffset,
-          useLosslessTimestamp);
+          incomingNulls,
+          numIncomingNulls,
+          pool,
+          opts,
+          columnResult);
     } else if (encoding == kDictionary) {
       readDictionaryVector(
           source,
           columnType,
-          pool,
-          columnResult,
           resultOffset,
-          useLosslessTimestamp);
+          incomingNulls,
+          numIncomingNulls,
+          pool,
+          opts,
+          columnResult);
     } else {
       auto typeToEncoding = typeToEncodingName(columnType);
       if (encoding != typeToEncoding) {
         if (encoding == "BYTE_ARRAY" &&
             tryReadNullColumn(
                 source,
-                pool,
                 columnType,
-                columnResult,
                 resultOffset,
-                useLosslessTimestamp)) {
+                incomingNulls,
+                numIncomingNulls,
+                pool,
+                opts,
+                columnResult)) {
           return;
         }
-        checkTypeEncoding(encoding, columnType);
+      }
+      checkTypeEncoding(encoding, columnType);
+      if (columnResult != nullptr &&
+          (columnResult->encoding() == VectorEncoding::Simple::CONSTANT ||
+           columnResult->encoding() == VectorEncoding::Simple::DICTIONARY)) {
+        BaseVector::ensureWritable(
+            SelectivityVector::empty(), types[i], pool, columnResult);
       }
       const auto it = readers.find(columnType->kind());
       VELOX_CHECK(
@@ -1104,10 +1337,12 @@ void readColumns(
       it->second(
           source,
           columnType,
-          pool,
-          columnResult,
           resultOffset,
-          useLosslessTimestamp);
+          incomingNulls,
+          numIncomingNulls,
+          pool,
+          opts,
+          columnResult);
     }
   }
 }
@@ -1171,10 +1406,11 @@ class VectorStream {
       std::optional<VectorPtr> vector,
       StreamArena* streamArena,
       int32_t initialNumRows,
-      bool useLosslessTimestamp)
+      const SerdeOpts& opts)
       : type_(type),
         encoding_(getEncoding(encoding, vector)),
-        useLosslessTimestamp_(useLosslessTimestamp),
+        useLosslessTimestamp_(opts.useLosslessTimestamp),
+        nullsFirst_(opts.nullsFirst),
         nulls_(streamArena, true, true),
         lengths_(streamArena),
         values_(streamArena),
@@ -1195,7 +1431,7 @@ class VectorStream {
               std::nullopt,
               streamArena,
               initialNumRows,
-              useLosslessTimestamp));
+              opts));
           return;
         }
         case VectorEncoding::Simple::DICTIONARY: {
@@ -1208,7 +1444,7 @@ class VectorStream {
               std::nullopt,
               streamArena,
               initialNumRows,
-              useLosslessTimestamp));
+              opts));
           return;
         }
         default:
@@ -1239,7 +1475,7 @@ class VectorStream {
               getChildAt(vector, i),
               streamArena,
               initialNumRows,
-              useLosslessTimestamp);
+              opts);
         }
         // The first element in the offsets in the wire format is always 0 for
         // nested types.
@@ -1455,13 +1691,19 @@ class VectorStream {
           return;
         }
 
+        if (nullsFirst_) {
+          writeInt32(out, nullCount_ + nonNullCount_);
+          flushNulls(out);
+        }
         writeInt32(out, children_.size());
         for (auto& child : children_) {
           child->flush(out);
         }
-        writeInt32(out, nullCount_ + nonNullCount_);
-        lengths_.flush(out);
-        flushNulls(out);
+        if (!nullsFirst_) {
+          writeInt32(out, nullCount_ + nonNullCount_);
+          lengths_.flush(out);
+          flushNulls(out);
+        }
         return;
 
       case TypeKind::ARRAY:
@@ -1521,7 +1763,7 @@ class VectorStream {
   /// If false, they are serialized with millisecond precision which is
   /// compatible with presto.
   const bool useLosslessTimestamp_;
-
+  const bool nullsFirst_;
   int32_t nonNullCount_{0};
   int32_t nullCount_{0};
   int32_t totalLength_{0};
@@ -3276,13 +3518,10 @@ void flushStreams(
 
 class PrestoBatchVectorSerializer : public BatchVectorSerializer {
  public:
-  PrestoBatchVectorSerializer(
-      memory::MemoryPool* pool,
-      bool useLosslessTimestamp,
-      common::CompressionKind compressionKind)
+  PrestoBatchVectorSerializer(memory::MemoryPool* pool, const SerdeOpts& opts)
       : pool_(pool),
-        useLosslessTimestamp_(useLosslessTimestamp),
-        codec_(common::compressionKindToCodec(compressionKind)) {}
+        codec_(common::compressionKindToCodec(opts.compressionKind)),
+        opts_(opts) {}
 
   void serialize(
       const RowVectorPtr& vector,
@@ -3302,7 +3541,7 @@ class PrestoBatchVectorSerializer : public BatchVectorSerializer {
           vector->childAt(i),
           &arena,
           numRows,
-          useLosslessTimestamp_);
+          opts_);
 
       serializeColumn(vector->childAt(i).get(), ranges, streams[i].get());
     }
@@ -3312,8 +3551,8 @@ class PrestoBatchVectorSerializer : public BatchVectorSerializer {
 
  private:
   memory::MemoryPool* pool_;
-  const bool useLosslessTimestamp_;
   const std::unique_ptr<folly::io::Codec> codec_;
+  SerdeOpts opts_;
 };
 
 class PrestoIterativeVectorSerializer : public IterativeVectorSerializer {
@@ -3322,22 +3561,16 @@ class PrestoIterativeVectorSerializer : public IterativeVectorSerializer {
       const RowTypePtr& rowType,
       int32_t numRows,
       StreamArena* streamArena,
-      bool useLosslessTimestamp,
-      common::CompressionKind compressionKind)
+      const SerdeOpts& opts)
       : streamArena_(streamArena),
-        codec_(common::compressionKindToCodec(compressionKind)) {
+        codec_(common::compressionKindToCodec(opts.compressionKind)) {
     const auto types = rowType->children();
     const auto numTypes = types.size();
     streams_.resize(numTypes);
 
     for (int i = 0; i < numTypes; ++i) {
       streams_[i] = std::make_unique<VectorStream>(
-          types[i],
-          std::nullopt,
-          std::nullopt,
-          streamArena,
-          numRows,
-          useLosslessTimestamp);
+          types[i], std::nullopt, std::nullopt, streamArena, numRows, opts);
     }
   }
 
@@ -3422,21 +3655,51 @@ PrestoVectorSerde::createIterativeSerializer(
     const Options* options) {
   const auto prestoOptions = toPrestoOptions(options);
   return std::make_unique<PrestoIterativeVectorSerializer>(
-      type,
-      numRows,
-      streamArena,
-      prestoOptions.useLosslessTimestamp,
-      prestoOptions.compressionKind);
+      type, numRows, streamArena, prestoOptions);
 }
 
 std::unique_ptr<BatchVectorSerializer> PrestoVectorSerde::createBatchSerializer(
     memory::MemoryPool* pool,
     const Options* options) {
   const auto prestoOptions = toPrestoOptions(options);
-  return std::make_unique<PrestoBatchVectorSerializer>(
-      pool, prestoOptions.useLosslessTimestamp, prestoOptions.compressionKind);
+  return std::make_unique<PrestoBatchVectorSerializer>(pool, prestoOptions);
 }
 
+namespace {
+void readTopColumns(
+    ByteInputStream& source,
+    const RowTypePtr& type,
+    velox::memory::MemoryPool* pool,
+    const RowVectorPtr& result,
+    int32_t resultOffset,
+    const SerdeOpts& opts) {
+  auto& children = result->children();
+  const auto& childTypes = type->asRow().children();
+  const auto numColumns = source.read<int32_t>();
+  // Bug for bug compatibility: Extra columns at the end are allowed for
+  // non-compressed data.
+  if (opts.compressionKind == common::CompressionKind_NONE) {
+    VELOX_USER_CHECK_GE(
+        numColumns,
+        type->size(),
+        "Number of columns in serialized data doesn't match "
+        "number of columns requested for deserialization");
+  } else {
+    VELOX_USER_CHECK_EQ(
+        numColumns,
+        type->size(),
+        "Number of columns in serialized data doesn't match "
+        "number of columns requested for deserialization");
+  }
+  readColumns(
+      &source, childTypes, resultOffset, nullptr, 0, pool, opts, children);
+  if (!opts.nullsFirst) {
+    scatterStructNulls(
+        result->size(), 0, nullptr, nullptr, *result, resultOffset);
+  }
+}
+} // namespace
+
 void PrestoVectorSerde::deserialize(
     ByteInputStream* source,
     velox::memory::MemoryPool* pool,
@@ -3487,18 +3750,8 @@ void PrestoVectorSerde::deserialize(
       common::compressionKindToString(
           common::codecTypeToCompressionKind(codec->type())));
 
-  auto& children = (*result)->children();
-  const auto& childTypes = type->asRow().children();
   if (!needCompression(*codec)) {
-    const auto numColumns = source->read<int32_t>();
-    // TODO Fix call sites and tighten the check to _EQ.
-    VELOX_USER_CHECK_GE(
-        numColumns,
-        type->size(),
-        "Number of columns in serialized data doesn't match "
-        "number of columns requested for deserialization");
-    readColumns(
-        source, pool, childTypes, children, resultOffset, useLosslessTimestamp);
+    readTopColumns(*source, type, pool, *result, resultOffset, prestoOptions);
   } else {
     auto compressBuf = folly::IOBuf::create(compressedSize);
     source->readBytes(compressBuf->writableData(), compressedSize);
@@ -3508,23 +3761,9 @@ void PrestoVectorSerde::deserialize(
         uncompress->writableData(), (int32_t)uncompress->length(), 0};
     ByteInputStream uncompressedSource({byteRange});
 
-    const auto numColumns = uncompressedSource.read<int32_t>();
-    VELOX_USER_CHECK_EQ(
-        numColumns,
-        type->size(),
-        "Number of columns in serialized data doesn't match "
-        "number of columns requested for deserialization");
-    readColumns(
-        &uncompressedSource,
-        pool,
-        childTypes,
-        children,
-        resultOffset,
-        useLosslessTimestamp);
+    readTopColumns(
+        uncompressedSource, type, pool, *result, resultOffset, prestoOptions);
   }
-
-  scatterStructNulls(
-      (*result)->size(), 0, nullptr, nullptr, **result, resultOffset);
 }
 
 void PrestoVectorSerde::deserializeSingleColumn(
@@ -3552,7 +3791,7 @@ void PrestoVectorSerde::deserializeSingleColumn(
 
   auto types = {type};
   std::vector<VectorPtr> resultList = {*result};
-  readColumns(source, pool, types, resultList, 0, useLosslessTimestamp);
+  readColumns(source, types, 0, nullptr, 0, pool, prestoOptions, resultList);
 
   auto rowType = asRowType(ROW(types));
   RowVectorPtr tempRow = std::make_shared<velox::RowVector>(
@@ -3564,16 +3803,6 @@ void PrestoVectorSerde::deserializeSingleColumn(
   *result = resultList[0];
 }
 
-void testingScatterStructNulls(
-    vector_size_t size,
-    vector_size_t scatterSize,
-    const vector_size_t* scatter,
-    const uint64_t* incomingNulls,
-    RowVector& row,
-    vector_size_t rowOffset) {
-  scatterStructNulls(size, scatterSize, scatter, incomingNulls, row, rowOffset);
-}
-
 // static
 void PrestoVectorSerde::registerVectorSerde() {
   auto toByte = [](int32_t number, int32_t bit) {
diff --git a/velox/serializers/PrestoSerializer.h b/velox/serializers/PrestoSerializer.h
index 287741afe0c8..ea754d0ce524 100644
--- a/velox/serializers/PrestoSerializer.h
+++ b/velox/serializers/PrestoSerializer.h
@@ -47,17 +47,27 @@ class PrestoVectorSerde : public VectorSerde {
 
     PrestoOptions(
         bool _useLosslessTimestamp,
-        common::CompressionKind _compressionKind)
+        common::CompressionKind _compressionKind,
+        bool _nullsFirst = false)
         : useLosslessTimestamp(_useLosslessTimestamp),
-          compressionKind(_compressionKind) {}
+          compressionKind(_compressionKind),
+          nullsFirst(_nullsFirst) {}
 
     /// Currently presto only supports millisecond precision and the serializer
     /// converts velox native timestamp to that resulting in loss of precision.
     /// This option allows it to serialize with nanosecond precision and is
     /// currently used for spilling. Is false by default.
     bool useLosslessTimestamp{false};
+
     common::CompressionKind compressionKind{
         common::CompressionKind::CompressionKind_NONE};
+
+    /// Serializes nulls of structs before the columns. Used to allow
+    /// single pass reading of in spilling.
+    ///
+    /// TODO: Make Presto also serialize nulls before columns of
+    /// structs.
+    bool nullsFirst{false};
   };
 
   /// Adds the serialized sizes of the rows of 'vector' in 'ranges[i]' to
@@ -123,15 +133,6 @@ class PrestoVectorSerde : public VectorSerde {
   static void registerVectorSerde();
 };
 
-// Testing function for nested encodings. See comments in scatterStructNulls().
-void testingScatterStructNulls(
-    vector_size_t size,
-    vector_size_t scatterSize,
-    const vector_size_t* scatter,
-    const uint64_t* incomingNulls,
-    RowVector& row,
-    vector_size_t rowOffset);
-
 class PrestoOutputStreamListener : public OutputStreamListener {
  public:
   void onWrite(const char* s, std::streamsize count) override {
diff --git a/velox/serializers/tests/PrestoSerializerTest.cpp b/velox/serializers/tests/PrestoSerializerTest.cpp
index 143abc7dc2cb..8904af1b007c 100644
--- a/velox/serializers/tests/PrestoSerializerTest.cpp
+++ b/velox/serializers/tests/PrestoSerializerTest.cpp
@@ -74,8 +74,11 @@ class PrestoSerializerTest
     const bool useLosslessTimestamp =
         serdeOptions == nullptr ? false : serdeOptions->useLosslessTimestamp;
     common::CompressionKind kind = GetParam();
+    const bool nullsFirst =
+        serdeOptions == nullptr ? false : serdeOptions->nullsFirst;
     serializer::presto::PrestoVectorSerde::PrestoOptions paramOptions{
-        useLosslessTimestamp, kind};
+        useLosslessTimestamp, kind, nullsFirst};
+
     return paramOptions;
   }
 
@@ -299,6 +302,77 @@ class PrestoSerializerTest
     assertEqualVectors(expected, result);
   }
 
+  void makePermutations(
+      const std::vector<VectorPtr>& vectors,
+      int32_t size,
+      std::vector<VectorPtr>& items,
+      std::vector<std::vector<VectorPtr>>& result) {
+    if (size == items.size()) {
+      result.push_back(items);
+      return;
+    }
+    for (const auto& vector : vectors) {
+      items.push_back(vector);
+      makePermutations(vectors, size, items, result);
+      items.pop_back();
+    }
+  }
+
+  // tests combining encodings in serialization and deserialization. Serializes
+  // each of 'vectors' with encoding, then reads them back into a single vector.
+  void testEncodedConcatenation(
+      std::vector<VectorPtr>& vectors,
+      const serializer::presto::PrestoVectorSerde::PrestoOptions* serdeOptions =
+          nullptr) {
+    std::vector<std::string> pieces;
+    auto rowType = ROW({{"f", vectors[0]->type()}});
+    auto concatenation = BaseVector::create(rowType, 0, pool_.get());
+    auto arena = std::make_unique<StreamArena>(pool_.get());
+    auto paramOptions = getParamSerdeOptions(serdeOptions);
+    auto serializer =
+        serde_->createSerializer(rowType, 10, arena.get(), &paramOptions);
+
+    for (const auto& vector : vectors) {
+      auto data = makeRowVector({"f"}, {vector});
+      concatenation->append(data.get());
+      std::ostringstream out;
+      serializeBatch(data, &out, &paramOptions);
+      pieces.push_back(out.str());
+      serializer->append(data);
+    }
+    facebook::velox::serializer::presto::PrestoOutputStreamListener listener;
+    std::ostringstream allOut;
+    OStreamOutputStream allOutStream(&allOut, &listener);
+    serializer->flush(&allOutStream);
+
+    auto allDeserialized = deserialize(rowType, allOut.str(), &paramOptions);
+    assertEqualVectors(allDeserialized, concatenation);
+    RowVectorPtr deserialized =
+        BaseVector::create<RowVector>(rowType, 0, pool_.get());
+    for (auto& piece : pieces) {
+      auto byteStream = toByteStream(piece);
+      serde_->deserialize(
+          &byteStream,
+          pool_.get(),
+          rowType,
+          &deserialized,
+          deserialized->size(),
+          &paramOptions);
+    }
+    assertEqualVectors(concatenation, deserialized);
+  }
+
+  void testEncodedRoundTrip(
+      const RowVectorPtr& data,
+      const serializer::presto::PrestoVectorSerde::PrestoOptions* serdeOptions =
+          nullptr) {
+    std::ostringstream out;
+    serializeBatch(data, &out, serdeOptions);
+    const auto serialized = out.str();
+
+    verifySerializedEncodedData(data, serialized, serdeOptions);
+  }
+
   void serializeBatch(
       const RowVectorPtr& rowVector,
       std::ostream* output,
@@ -734,56 +808,6 @@ TEST_P(PrestoSerializerTest, encodingsMapValuesBatchVectorSerializer) {
   testBatchVectorSerializerRoundTrip(encodingsMapValuesTestVector());
 }
 
-TEST_P(PrestoSerializerTest, scatterEncoded) {
-  // Makes a struct with nulls and constant/dictionary encoded children. The
-  // children need to get gaps where the parent struct has a null.
-  VectorFuzzer::Options opts;
-  opts.timestampPrecision =
-      VectorFuzzer::Options::TimestampPrecision::kMilliSeconds;
-  opts.nullRatio = 0.1;
-  VectorFuzzer fuzzer(opts, pool_.get());
-
-  auto rowType = ROW(
-      {{"inner",
-        ROW(
-            {{"i1", BIGINT()},
-             {"i2", VARCHAR()},
-             {"i3", ARRAY(INTEGER())},
-             {"i4", ROW({{"ii1", BIGINT()}})}})}});
-  auto row = fuzzer.fuzzInputRow(rowType);
-  auto inner =
-      const_cast<RowVector*>(row->childAt(0)->wrappedVector()->as<RowVector>());
-  if (!inner->mayHaveNulls()) {
-    return;
-  }
-  auto numNulls = BaseVector::countNulls(inner->nulls(), 0, inner->size());
-  auto numNonNull = inner->size() - numNulls;
-  auto indices = makeIndices(numNonNull, [](auto row) { return row; });
-
-  inner->children()[0] = BaseVector::createConstant(
-      BIGINT(),
-      variant::create<TypeKind::BIGINT>(11L),
-      numNonNull,
-      pool_.get());
-  inner->children()[1] = BaseVector::wrapInDictionary(
-      BufferPtr(nullptr), indices, numNonNull, inner->childAt(1));
-  inner->children()[2] =
-      BaseVector::wrapInConstant(numNonNull, 3, inner->childAt(2));
-
-  // i4 is a struct that we wrap in constant. We make ths struct like it was
-  // read from seriailization, needing scatter for struct nulls.
-  auto i4 = const_cast<RowVector*>(
-      inner->childAt(3)->wrappedVector()->as<RowVector>());
-  auto i4NonNull = i4->mayHaveNulls()
-      ? i4->size() - BaseVector::countNulls(i4->nulls(), 0, i4->size())
-      : i4->size();
-  i4->childAt(0)->resize(i4NonNull);
-  inner->children()[3] =
-      BaseVector::wrapInConstant(numNonNull, 3, inner->childAt(3));
-  serializer::presto::testingScatterStructNulls(
-      row->size(), row->size(), nullptr, nullptr, *row, 0);
-}
-
 TEST_P(PrestoSerializerTest, lazy) {
   constexpr int kSize = 1000;
   auto rowVector = makeTestVector(kSize);
@@ -802,7 +826,7 @@ TEST_P(PrestoSerializerTest, ioBufRoundTrip) {
   opts.nullRatio = 0.1;
   VectorFuzzer fuzzer(opts, pool_.get());
 
-  const size_t numRounds = 20;
+  const size_t numRounds = 100;
 
   for (size_t i = 0; i < numRounds; ++i) {
     auto rowType = fuzzer.randRowType();
@@ -826,14 +850,85 @@ TEST_P(PrestoSerializerTest, roundTrip) {
   nonNullOpts.nullRatio = 0;
   VectorFuzzer nonNullFuzzer(nonNullOpts, pool_.get());
 
-  const size_t numRounds = 20;
+  const size_t numRounds = 100;
 
   for (size_t i = 0; i < numRounds; ++i) {
     auto rowType = fuzzer.randRowType();
-
     auto inputRowVector = (i % 2 == 0) ? fuzzer.fuzzInputRow(rowType)
                                        : nonNullFuzzer.fuzzInputRow(rowType);
-    testRoundTrip(inputRowVector);
+    serializer::presto::PrestoVectorSerde::PrestoOptions prestoOpts;
+    // Test every 2/4 with struct nulls first.
+    prestoOpts.nullsFirst = i % 4 < 2;
+    testRoundTrip(inputRowVector, &prestoOpts);
+  }
+}
+
+TEST_P(PrestoSerializerTest, encodedRoundtrip) {
+  VectorFuzzer::Options opts;
+  opts.timestampPrecision =
+      VectorFuzzer::Options::TimestampPrecision::kMilliSeconds;
+  opts.nullRatio = 0.1;
+  opts.dictionaryHasNulls = false;
+  VectorFuzzer fuzzer(opts, pool_.get());
+
+  const size_t numRounds = 200;
+
+  for (size_t i = 0; i < numRounds; ++i) {
+    auto rowType = fuzzer.randRowType();
+    auto inputRowVector = fuzzer.fuzzInputRow(rowType);
+    serializer::presto::PrestoVectorSerde::PrestoOptions serdeOpts;
+    serdeOpts.nullsFirst = true;
+    testEncodedRoundTrip(inputRowVector, &serdeOpts);
+  }
+}
+
+TEST_P(PrestoSerializerTest, encodedConcatenation) {
+  // Slow test, run only for no compression.
+  if (GetParam() != common::CompressionKind::CompressionKind_NONE) {
+    return;
+  }
+
+  std::vector<TypePtr> types = {ROW({{"s0", VARCHAR()}}), VARCHAR()};
+  VectorFuzzer::Options nonNullOpts{
+      .nullRatio = 0, .dictionaryHasNulls = false};
+  VectorFuzzer nonNullFuzzer(nonNullOpts, pool_.get());
+  VectorFuzzer::Options nullOpts{.nullRatio = 1, .dictionaryHasNulls = false};
+  VectorFuzzer nullFuzzer(nullOpts, pool_.get());
+  VectorFuzzer::Options mixedOpts{
+      .nullRatio = 0.5, .dictionaryHasNulls = false};
+  VectorFuzzer mixedFuzzer(mixedOpts, pool_.get());
+
+  for (const auto& type : types) {
+    auto flatNull = nullFuzzer.fuzzFlat(type, 12);
+    auto flatNonNull = nonNullFuzzer.fuzzFlat(type, 13);
+
+    std::vector<VectorPtr> vectors = {
+        nullFuzzer.fuzzConstant(type, 10),
+        nonNullFuzzer.fuzzConstant(type, 11),
+        flatNonNull,
+        flatNull,
+        nonNullFuzzer.fuzzDictionary(flatNonNull),
+        nonNullFuzzer.fuzzDictionary(flatNull),
+        nullFuzzer.fuzzDictionary(flatNull),
+    };
+
+    if (type->isRow()) {
+      auto& rowType = type->as<TypeKind::ROW>();
+      auto row = makeRowVector(
+          {rowType.nameOf(0)},
+          {nonNullFuzzer.fuzzConstant(rowType.childAt(0), 14)});
+      row->setNull(2, true);
+      row->setNull(10, true);
+      vectors.push_back(row);
+    }
+    std::vector<std::vector<VectorPtr>> permutations;
+    std::vector<VectorPtr> temp;
+    makePermutations(vectors, 4, temp, permutations);
+    for (auto i = 0; i < permutations.size(); ++i) {
+      serializer::presto::PrestoVectorSerde::PrestoOptions opts;
+      opts.nullsFirst = true;
+      testEncodedConcatenation(permutations[i], &opts);
+    }
   }
 }
 
@@ -859,6 +954,17 @@ TEST_P(PrestoSerializerTest, typeMismatch) {
       "Number of columns in serialized data doesn't match "
       "number of columns requested for deserialization");
 
+  // TMore columns in serialization than in type.
+  if (GetParam() == common::CompressionKind_NONE) {
+    // No throw.
+    deserialize(ROW({BIGINT()}), serialized, nullptr);
+  } else {
+    VELOX_ASSERT_THROW(
+        deserialize(ROW({BIGINT()}), serialized, nullptr),
+        "Number of columns in serialized data doesn't match "
+        "number of columns requested for deserialization");
+  }
+
   // Wrong types of columns.
   VELOX_ASSERT_THROW(
       deserialize(ROW({BIGINT(), DOUBLE()}), serialized, nullptr),

From 9c79ef96fc552d07e83b08d05c8019076c482d31 Mon Sep 17 00:00:00 2001
From: Jimmy Lu <jimmylu@meta.com>
Date: Thu, 15 Feb 2024 16:29:43 -0800
Subject: [PATCH 29/38] Return remaining bytes from exchange sources (#8758)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8758

This is the first diff to upgrade the exchange protocol.  This change
only exposes the remaining bytes to buffer manager; it does not change the
existing protocol yet, and is compatible with the current Prestissimo code.

See https://github.com/prestodb/presto/issues/21926 for details about the
design.

Reviewed By: mbasmanova

Differential Revision: D53793123

fbshipit-source-id: 31d83f1e5b2f21db696a3aff094fa2953e65cf60
---
 velox/exec/ExchangeSource.h                   |  14 +-
 velox/exec/OutputBuffer.cpp                   |  49 ++++--
 velox/exec/OutputBuffer.h                     |  31 +++-
 velox/exec/OutputBufferManager.h              |  25 +++
 velox/exec/tests/LimitTest.cpp                |   3 +-
 velox/exec/tests/MultiFragmentTest.cpp        |   5 +-
 velox/exec/tests/OutputBufferManagerTest.cpp  | 147 ++++++++++++++++--
 .../exec/tests/utils/LocalExchangeSource.cpp  |  16 +-
 8 files changed, 248 insertions(+), 42 deletions(-)

diff --git a/velox/exec/ExchangeSource.h b/velox/exec/ExchangeSource.h
index 8cee990d8c02..9afa5de24d7f 100644
--- a/velox/exec/ExchangeSource.h
+++ b/velox/exec/ExchangeSource.h
@@ -66,6 +66,11 @@ class ExchangeSource : public std::enable_shared_from_this<ExchangeSource> {
 
     /// Boolean indicating that there will be no more data.
     const bool atEnd;
+
+    /// Number of bytes still buffered at the source.  Each element represent
+    /// one page, and the consumer can choose to fetch a prefix of them
+    /// according to the memory restriction.
+    const std::vector<int64_t> remainingBytes;
   };
 
   /// Requests the producer to generate up to 'maxBytes' more data and reply
@@ -73,7 +78,14 @@ class ExchangeSource : public std::enable_shared_from_this<ExchangeSource> {
   /// responds either with 'data' or with a message indicating that all data has
   /// been already produced or data will take more time to produce.
   virtual folly::SemiFuture<Response> request(
-      uint32_t /*maxBytes*/,
+      uint32_t maxBytes,
+      uint32_t maxWaitSeconds) = 0;
+
+  /// Ask for available data sizes that can be fetched.  Normally should not
+  /// fetching any actual data (i.e. Response::bytes should be 0).  However for
+  /// backward compatibility (e.g. communicating with coordinator), we allow
+  /// small data (1MB) to be returned.
+  virtual folly::SemiFuture<Response> requestDataSizes(
       uint32_t /*maxWaitSeconds*/) {
     VELOX_NYI();
   }
diff --git a/velox/exec/OutputBuffer.cpp b/velox/exec/OutputBuffer.cpp
index 1a9e01dc3bbc..14444092d8c7 100644
--- a/velox/exec/OutputBuffer.cpp
+++ b/velox/exec/OutputBuffer.cpp
@@ -35,6 +35,15 @@ void ArbitraryBuffer::enqueue(std::unique_ptr<SerializedPage> page) {
   pages_.push_back(std::shared_ptr<SerializedPage>(page.release()));
 }
 
+void ArbitraryBuffer::getAvailablePageSizes(std::vector<int64_t>& out) const {
+  out.reserve(out.size() + pages_.size());
+  for (const auto& page : pages_) {
+    if (page != nullptr) {
+      out.push_back(page->size());
+    }
+  }
+}
+
 std::vector<std::shared_ptr<SerializedPage>> ArbitraryBuffer::getPages(
     uint64_t maxBytes) {
   VELOX_CHECK_GT(maxBytes, 0, "maxBytes can't be zero");
@@ -90,7 +99,7 @@ void DestinationBuffer::Stats::recordDelete(const SerializedPage& data) {
   recordAcknowledge(data);
 }
 
-std::vector<std::unique_ptr<folly::IOBuf>> DestinationBuffer::getData(
+DestinationBuffer::Data DestinationBuffer::getData(
     uint64_t maxBytes,
     int64_t sequence,
     DataAvailableCallback notify,
@@ -98,6 +107,7 @@ std::vector<std::unique_ptr<folly::IOBuf>> DestinationBuffer::getData(
     ArbitraryBuffer* arbitraryBuffer) {
   VELOX_CHECK_GE(
       sequence, sequence_, "Get received for an already acknowledged item");
+  VELOX_CHECK_GT(maxBytes, 0);
   if (arbitraryBuffer != nullptr) {
     loadData(arbitraryBuffer, maxBytes);
   }
@@ -121,22 +131,39 @@ std::vector<std::unique_ptr<folly::IOBuf>> DestinationBuffer::getData(
     return {};
   }
 
-  std::vector<std::unique_ptr<folly::IOBuf>> result;
+  std::vector<std::unique_ptr<folly::IOBuf>> data;
   uint64_t resultBytes = 0;
-  for (auto i = sequence - sequence_; i < data_.size(); ++i) {
+  auto i = sequence - sequence_;
+  for (; i < data_.size(); ++i) {
     // nullptr is used as end marker
     if (data_[i] == nullptr) {
       VELOX_CHECK_EQ(i, data_.size() - 1, "null marker found in the middle");
-      result.push_back(nullptr);
+      data.push_back(nullptr);
+      ++i;
       break;
     }
-    result.push_back(data_[i]->getIOBuf());
+    data.push_back(data_[i]->getIOBuf());
     resultBytes += data_[i]->size();
     if (resultBytes >= maxBytes) {
+      ++i;
       break;
     }
   }
-  return result;
+  bool atEnd = false;
+  std::vector<int64_t> remainingBytes;
+  remainingBytes.reserve(data_.size() - i);
+  for (; i < data_.size(); ++i) {
+    if (data_[i] == nullptr) {
+      VELOX_CHECK_EQ(i, data_.size() - 1, "null marker found in the middle");
+      atEnd = true;
+      break;
+    }
+    remainingBytes.push_back(data_[i]->size());
+  }
+  if (!atEnd && arbitraryBuffer) {
+    arbitraryBuffer->getAvailablePageSizes(remainingBytes);
+  }
+  return {std::move(data), std::move(remainingBytes), true};
 }
 
 void DestinationBuffer::enqueue(std::shared_ptr<SerializedPage> data) {
@@ -159,7 +186,9 @@ DataAvailable DestinationBuffer::getAndClearNotify() {
   DataAvailable result;
   result.callback = notify_;
   result.sequence = notifySequence_;
-  result.data = getData(notifyMaxBytes_, notifySequence_, nullptr, nullptr);
+  auto data = getData(notifyMaxBytes_, notifySequence_, nullptr, nullptr);
+  result.data = std::move(data.data);
+  result.remainingBytes = std::move(data.remainingBytes);
   clearNotify();
   return result;
 }
@@ -666,7 +695,7 @@ void OutputBuffer::getData(
     int64_t sequence,
     DataAvailableCallback notify,
     DataConsumerActiveCheckCallback activeCheck) {
-  std::vector<std::unique_ptr<folly::IOBuf>> data;
+  DestinationBuffer::Data data;
   std::vector<std::shared_ptr<SerializedPage>> freed;
   std::vector<ContinuePromise> promises;
   {
@@ -689,8 +718,8 @@ void OutputBuffer::getData(
         maxBytes, sequence, notify, activeCheck, arbitraryBuffer_.get());
   }
   releaseAfterAcknowledge(freed, promises);
-  if (!data.empty()) {
-    notify(std::move(data), sequence);
+  if (data.immediate) {
+    notify(std::move(data.data), sequence, std::move(data.remainingBytes));
   }
 }
 
diff --git a/velox/exec/OutputBuffer.h b/velox/exec/OutputBuffer.h
index 1ff07bcce97b..401f7ab47078 100644
--- a/velox/exec/OutputBuffer.h
+++ b/velox/exec/OutputBuffer.h
@@ -24,8 +24,10 @@ namespace facebook::velox::exec {
 /// sequence is the same as specified in BufferManager::getData call. The
 /// caller is expected to advance sequence by the number of entries in groups
 /// and call BufferManager::acknowledge.
-using DataAvailableCallback = std::function<
-    void(std::vector<std::unique_ptr<folly::IOBuf>> pages, int64_t sequence)>;
+using DataAvailableCallback = std::function<void(
+    std::vector<std::unique_ptr<folly::IOBuf>> pages,
+    int64_t sequence,
+    std::vector<int64_t> remainingBytes)>;
 
 /// Callback provided to indicate if the consumer of a destination buffer is
 /// currently active or not. It is used by arbitrary output buffer to optimize
@@ -41,10 +43,11 @@ struct DataAvailable {
   DataAvailableCallback callback;
   int64_t sequence;
   std::vector<std::unique_ptr<folly::IOBuf>> data;
+  std::vector<int64_t> remainingBytes;
 
   void notify() {
     if (callback) {
-      callback(std::move(data), sequence);
+      callback(std::move(data), sequence, remainingBytes);
     }
   }
 };
@@ -78,6 +81,9 @@ class ArbitraryBuffer {
   /// there are sufficient buffered pages.
   std::vector<std::shared_ptr<SerializedPage>> getPages(uint64_t maxBytes);
 
+  /// Append the available page sizes to `out'.
+  void getAvailablePageSizes(std::vector<int64_t>& out) const;
+
   std::string toString() const;
 
  private:
@@ -125,13 +131,30 @@ class DestinationBuffer {
   /// arbitrary buffer on demand.
   void loadData(ArbitraryBuffer* buffer, uint64_t maxBytes);
 
+  struct Data {
+    /// The actual data available at this buffer.
+    std::vector<std::unique_ptr<folly::IOBuf>> data;
+
+    /// The byte sizes of pages that can be fetched.
+    std::vector<int64_t> remainingBytes;
+
+    /// Whether the result is returned immediately without invoking the `notify'
+    /// callback.
+    bool immediate;
+  };
+
   /// Returns a shallow copy (folly::IOBuf::clone) of the data starting at
   /// 'sequence', stopping after exceeding 'maxBytes'. If there is no data,
   /// 'notify' is installed so that this gets called when data is added. If not
   /// null, 'activeCheck' is used to check if the consumer of a destination
   /// buffer with 'notify' installed is currently active or not. This only
   /// applies for arbitrary output buffer for now.
-  std::vector<std::unique_ptr<folly::IOBuf>> getData(
+  ///
+  /// When arbitraryBuffer is provided, and this buffer is not at end (no null
+  /// marker received), we append the remaining bytes from arbitraryBuffer in
+  /// the result, even the arbitraryBuffer could be shared among multiple
+  /// DestinationBuffers.
+  Data getData(
       uint64_t maxBytes,
       int64_t sequence,
       DataAvailableCallback notify,
diff --git a/velox/exec/OutputBufferManager.h b/velox/exec/OutputBufferManager.h
index 410be0bed83e..7870a1be114d 100644
--- a/velox/exec/OutputBufferManager.h
+++ b/velox/exec/OutputBufferManager.h
@@ -82,6 +82,31 @@ class OutputBufferManager {
       DataAvailableCallback notify,
       DataConsumerActiveCheckCallback activeCheck = nullptr);
 
+#ifdef VELOX_ENABLE_BACKWARD_COMPATIBILITY
+  bool getData(
+      const std::string& taskId,
+      int destination,
+      uint64_t maxBytes,
+      int64_t sequence,
+      std::function<void(
+          std::vector<std::unique_ptr<folly::IOBuf>> pages,
+          int64_t sequence)> notify,
+      DataConsumerActiveCheckCallback activeCheck = nullptr) {
+    return getData(
+        taskId,
+        destination,
+        maxBytes,
+        sequence,
+        [notify = std::move(notify)](
+            std::vector<std::unique_ptr<folly::IOBuf>> pages,
+            int64_t sequence,
+            std::vector<int64_t> /*remainingBytes*/) mutable {
+          notify(std::move(pages), sequence);
+        },
+        std::move(activeCheck));
+  }
+#endif
+
   void removeTask(const std::string& taskId);
 
   static std::weak_ptr<OutputBufferManager> getInstance();
diff --git a/velox/exec/tests/LimitTest.cpp b/velox/exec/tests/LimitTest.cpp
index fa37779e61de..70249fb944d9 100644
--- a/velox/exec/tests/LimitTest.cpp
+++ b/velox/exec/tests/LimitTest.cpp
@@ -131,7 +131,8 @@ TEST_F(LimitTest, partialLimitEagerFlush) {
         [numPagesPromise =
              std::make_shared<folly::Promise<int>>(std::move(numPagesPromise))](
             std::vector<std::unique_ptr<folly::IOBuf>> pages,
-            int64_t /*sequence*/) {
+            int64_t /*sequence*/,
+            std::vector<int64_t> /*remainingBytes*/) {
           numPagesPromise->setValue(pages.size());
         }));
     ASSERT_GE(std::move(numPagesFuture).get(std::chrono::seconds(1)), 10);
diff --git a/velox/exec/tests/MultiFragmentTest.cpp b/velox/exec/tests/MultiFragmentTest.cpp
index 32957ad8154e..126c77deccd8 100644
--- a/velox/exec/tests/MultiFragmentTest.cpp
+++ b/velox/exec/tests/MultiFragmentTest.cpp
@@ -1537,7 +1537,8 @@ TEST_F(MultiFragmentTest, taskTerminateWithPendingOutputBuffers) {
         maxBytes,
         sequence,
         [&](std::vector<std::unique_ptr<folly::IOBuf>> iobufs,
-            int64_t inSequence) {
+            int64_t inSequence,
+            std::vector<int64_t> /*remainingBytes*/) {
           for (auto& iobuf : iobufs) {
             if (iobuf != nullptr) {
               ++inSequence;
@@ -1726,7 +1727,7 @@ class DataFetcher {
         destination_,
         maxBytes_,
         sequence,
-        [&](auto pages, auto sequence) mutable {
+        [&](auto pages, auto sequence, auto /*remainingBytes*/) mutable {
           const auto nextSequence = sequence + pages.size();
           const bool atEnd = processData(std::move(pages), sequence);
           bufferManager_->acknowledge(taskId_, destination_, nextSequence);
diff --git a/velox/exec/tests/OutputBufferManagerTest.cpp b/velox/exec/tests/OutputBufferManagerTest.cpp
index f1e014abe50f..068ac5932b6e 100644
--- a/velox/exec/tests/OutputBufferManagerTest.cpp
+++ b/velox/exec/tests/OutputBufferManagerTest.cpp
@@ -161,7 +161,8 @@ class OutputBufferManagerTest : public testing::Test {
          expectedEndMarker,
          &receivedData](
             std::vector<std::unique_ptr<folly::IOBuf>> pages,
-            int64_t inSequence) {
+            int64_t inSequence,
+            std::vector<int64_t> /*remainingBytes*/) {
           ASSERT_FALSE(receivedData) << "for destination " << destination;
           ASSERT_EQ(pages.size(), expectedGroups)
               << "for destination " << destination;
@@ -213,11 +214,13 @@ class OutputBufferManagerTest : public testing::Test {
   receiveEndMarker(int destination, int64_t sequence, bool& receivedEndMarker) {
     return [destination, sequence, &receivedEndMarker](
                std::vector<std::unique_ptr<folly::IOBuf>> pages,
-               int64_t inSequence) {
+               int64_t inSequence,
+               std::vector<int64_t> remainingBytes) {
       EXPECT_FALSE(receivedEndMarker) << "for destination " << destination;
       EXPECT_EQ(pages.size(), 1) << "for destination " << destination;
       EXPECT_TRUE(pages[0] == nullptr) << "for destination " << destination;
       EXPECT_EQ(inSequence, sequence) << "for destination " << destination;
+      EXPECT_TRUE(remainingBytes.empty());
       receivedEndMarker = true;
     };
   }
@@ -262,7 +265,8 @@ class OutputBufferManagerTest : public testing::Test {
     receivedData = false;
     return [destination, sequence, expectedGroups, &receivedData](
                std::vector<std::unique_ptr<folly::IOBuf>> pages,
-               int64_t inSequence) {
+               int64_t inSequence,
+               std::vector<int64_t> /*remainingBytes*/) {
       EXPECT_FALSE(receivedData) << "for destination " << destination;
       EXPECT_EQ(pages.size(), expectedGroups)
           << "for destination " << destination;
@@ -314,7 +318,8 @@ class OutputBufferManagerTest : public testing::Test {
           maxBytes,
           nextSequence,
           [&](std::vector<std::unique_ptr<folly::IOBuf>> pages,
-              int64_t inSequence) {
+              int64_t inSequence,
+              std::vector<int64_t> /*remainingBytes*/) {
             ASSERT_EQ(inSequence, nextSequence);
             for (int i = 0; i < pages.size(); ++i) {
               if (pages[i] != nullptr) {
@@ -507,16 +512,20 @@ TEST_F(OutputBufferManagerTest, destinationBuffer) {
         destinationBuffer.loadData(&buffer, 0), "maxBytes can't be zero");
     destinationBuffer.loadData(&buffer, 100);
     std::atomic<bool> notified{false};
-    destinationBuffer.getData(
+    auto buffers = destinationBuffer.getData(
         1'000'000,
         0,
         [&](std::vector<std::unique_ptr<folly::IOBuf>> buffers,
-            int64_t sequence) {
+            int64_t sequence,
+            std::vector<int64_t> remainingBytes) {
           ASSERT_EQ(buffers.size(), 1);
           ASSERT_TRUE(buffers[0].get() == nullptr);
+          ASSERT_EQ(sequence, 0);
+          ASSERT_TRUE(remainingBytes.empty());
           notified = true;
         },
         nullptr);
+    ASSERT_FALSE(buffers.immediate);
     ASSERT_TRUE(buffer.empty());
     ASSERT_FALSE(buffer.hasNoMoreData());
     ASSERT_FALSE(notified);
@@ -545,10 +554,13 @@ TEST_F(OutputBufferManagerTest, destinationBuffer) {
         1'000'000'000,
         0,
         [&](std::vector<std::unique_ptr<folly::IOBuf>> /*unused*/,
-            int64_t /*unused*/) { notified = true; },
+            int64_t /*unused*/,
+            std::vector<int64_t> /*remainingBytes*/) { notified = true; },
         []() { return true; });
-    for (const auto& buffer : buffers) {
-      numBytes += buffer->length();
+    ASSERT_TRUE(buffers.immediate);
+    ASSERT_TRUE(buffers.remainingBytes.empty());
+    for (const auto& iobuf : buffers.data) {
+      numBytes += iobuf->length();
     }
     ASSERT_GT(numBytes, 0);
     ASSERT_FALSE(notified);
@@ -565,16 +577,19 @@ TEST_F(OutputBufferManagerTest, destinationBuffer) {
         1'000'000,
         1,
         [&](std::vector<std::unique_ptr<folly::IOBuf>> buffers,
-            int64_t sequence) {
+            int64_t sequence,
+            std::vector<int64_t> remainingBytes) {
           ASSERT_EQ(sequence, 1);
           ASSERT_EQ(buffers.size(), 9);
+          ASSERT_TRUE(remainingBytes.empty());
           for (const auto& buffer : buffers) {
             numBytes += buffer->length();
           }
           notified = true;
         },
         []() { return true; });
-    ASSERT_TRUE(buffers.empty());
+    ASSERT_FALSE(buffers.immediate);
+    ASSERT_TRUE(buffers.data.empty());
     ASSERT_FALSE(notified);
 
     destinationBuffer.maybeLoadData(&buffer);
@@ -584,6 +599,99 @@ TEST_F(OutputBufferManagerTest, destinationBuffer) {
     ASSERT_FALSE(buffer.hasNoMoreData());
     ASSERT_EQ(numBytes, expectedNumBytes);
   }
+
+  auto noNotify = [](std::vector<std::unique_ptr<folly::IOBuf>> /*buffers*/,
+                     int64_t /*sequence*/,
+                     std::vector<int64_t> /*remainingBytes*/) { FAIL(); };
+
+  {
+    ArbitraryBuffer buffer;
+    for (int i = 0; i < 10; ++i) {
+      buffer.enqueue(makeSerializedPage(rowType_, 100));
+    }
+    DestinationBuffer destinationBuffer;
+    destinationBuffer.loadData(&buffer, 1e9);
+    ASSERT_TRUE(buffer.empty());
+    int64_t sequence = 0;
+
+    auto buffers =
+        destinationBuffer.getData(1, sequence, noNotify, [] { return true; });
+    ASSERT_TRUE(buffers.immediate);
+    ASSERT_EQ(buffers.data.size(), 1);
+    ASSERT_GT(buffers.data[0]->length(), 0);
+    ASSERT_EQ(buffers.remainingBytes.size(), 9);
+    ++sequence;
+    ASSERT_EQ(destinationBuffer.acknowledge(sequence, false).size(), 1);
+
+    auto bytes = buffers.remainingBytes[0];
+    buffers = destinationBuffer.getData(
+        bytes, sequence, noNotify, [] { return true; });
+    ASSERT_TRUE(buffers.immediate);
+    ASSERT_EQ(buffers.data.size(), 1);
+    ASSERT_EQ(buffers.data[0]->length(), bytes);
+    ASSERT_EQ(buffers.remainingBytes.size(), 8);
+    ++sequence;
+    ASSERT_EQ(destinationBuffer.acknowledge(sequence, false).size(), 1);
+
+    bytes = buffers.remainingBytes[0];
+    auto bytes2 = buffers.remainingBytes[1];
+    buffers = destinationBuffer.getData(
+        bytes + 1, sequence, noNotify, [] { return true; });
+    ASSERT_TRUE(buffers.immediate);
+    ASSERT_EQ(buffers.data.size(), 2);
+    ASSERT_EQ(buffers.data[0]->length(), bytes);
+    ASSERT_EQ(buffers.data[1]->length(), bytes2);
+    ASSERT_EQ(buffers.remainingBytes.size(), 6);
+    sequence += 2;
+    ASSERT_EQ(destinationBuffer.acknowledge(sequence, false).size(), 2);
+
+    bytes = std::accumulate(
+        buffers.remainingBytes.begin(), buffers.remainingBytes.end(), 0ll);
+    buffers = destinationBuffer.getData(
+        bytes, sequence, noNotify, [] { return true; });
+    ASSERT_TRUE(buffers.immediate);
+    ASSERT_EQ(buffers.data.size(), 6);
+    ASSERT_EQ(buffers.remainingBytes.size(), 0);
+    sequence += 6;
+    ASSERT_EQ(destinationBuffer.acknowledge(sequence, false).size(), 6);
+
+    bool notified = false;
+    buffers = destinationBuffer.getData(
+        1,
+        sequence,
+        [&](std::vector<std::unique_ptr<folly::IOBuf>> buffers,
+            int64_t sequence2,
+            std::vector<int64_t> remainingBytes) {
+          ASSERT_EQ(buffers.size(), 1);
+          ASSERT_TRUE(buffers[0]);
+          ASSERT_EQ(sequence2, sequence);
+          ASSERT_TRUE(remainingBytes.empty());
+          notified = true;
+        },
+        [] { return true; });
+    ASSERT_FALSE(buffers.immediate);
+    ASSERT_FALSE(notified);
+    for (int i = 0; i < 10; ++i) {
+      buffer.enqueue(makeSerializedPage(rowType_, 100));
+    }
+    destinationBuffer.maybeLoadData(&buffer);
+    destinationBuffer.getAndClearNotify().notify();
+    ASSERT_TRUE(notified);
+  }
+
+  {
+    ArbitraryBuffer buffer;
+    for (int i = 0; i < 10; ++i) {
+      buffer.enqueue(makeSerializedPage(rowType_, 100));
+    }
+    DestinationBuffer destinationBuffer;
+    auto buffers = destinationBuffer.getData(
+        1, 0, noNotify, [] { return true; }, &buffer);
+    ASSERT_TRUE(buffers.immediate);
+    ASSERT_EQ(buffers.data.size(), 1);
+    ASSERT_GT(buffers.data[0]->length(), 0);
+    ASSERT_EQ(buffers.remainingBytes.size(), 9);
+  }
 }
 
 TEST_F(OutputBufferManagerTest, basicPartitioned) {
@@ -858,7 +966,8 @@ TEST_F(OutputBufferManagerTest, inactiveDestinationBuffer) {
         /*sequence=*/sequences[destination],
         [&, destination](
             std::vector<std::unique_ptr<folly::IOBuf>> pages,
-            int64_t sequence) {
+            int64_t sequence,
+            std::vector<int64_t> /*remainingBytes*/) {
           notifyCb(destination, std::move(pages), sequence);
         },
         [&, destination]() { return actives[destination].load(); }));
@@ -895,7 +1004,9 @@ TEST_F(OutputBufferManagerTest, inactiveDestinationBuffer) {
       /*destination=*/0,
       maxBytes,
       /*sequence=*/sequences[0],
-      [&](std::vector<std::unique_ptr<folly::IOBuf>> pages, int64_t sequence) {
+      [&](std::vector<std::unique_ptr<folly::IOBuf>> pages,
+          int64_t sequence,
+          std::vector<int64_t> /*remainingBytes*/) {
         notifyCb(0, std::move(pages), sequence);
       }));
   ASSERT_EQ(sequences[0], 2);
@@ -916,7 +1027,9 @@ TEST_F(OutputBufferManagerTest, inactiveDestinationBuffer) {
       /*destination=*/1,
       maxBytes,
       /*sequence=*/sequences[1],
-      [&](std::vector<std::unique_ptr<folly::IOBuf>> pages, int64_t sequence) {
+      [&](std::vector<std::unique_ptr<folly::IOBuf>> pages,
+          int64_t sequence,
+          std::vector<int64_t> /*remainingBytes*/) {
         notifyCb(1, std::move(pages), sequence);
       },
       [&]() { return actives[1].load(); }));
@@ -1296,9 +1409,9 @@ TEST_F(OutputBufferManagerTest, getDataOnFailedTask) {
       1,
       10,
       1,
-      [](std::vector<std::unique_ptr<folly::IOBuf>> pages, int64_t sequence) {
-        VELOX_UNREACHABLE();
-      }));
+      [](std::vector<std::unique_ptr<folly::IOBuf>> /*pages*/,
+         int64_t /*sequence*/,
+         std::vector<int64_t> /*remainingBytes*/) { VELOX_UNREACHABLE(); }));
 
   // Missing tasks should be ignored in this call.
   ASSERT_FALSE(bufferManager_->updateNumDrivers("test.0.2", 1));
diff --git a/velox/exec/tests/utils/LocalExchangeSource.cpp b/velox/exec/tests/utils/LocalExchangeSource.cpp
index 848bb03ba9e8..a451ee1518c7 100644
--- a/velox/exec/tests/utils/LocalExchangeSource.cpp
+++ b/velox/exec/tests/utils/LocalExchangeSource.cpp
@@ -61,7 +61,8 @@ class LocalExchangeSource : public exec::ExchangeSource {
     // shared_ptr to the current object (self).
     auto resultCallback = [self, requestedSequence, buffers, this](
                               std::vector<std::unique_ptr<folly::IOBuf>> data,
-                              int64_t sequence) {
+                              int64_t sequence,
+                              std::vector<int64_t> remainingBytes) {
       {
         std::lock_guard<std::mutex> l(timeoutMutex_);
         // This function is called either for a result or timeout. Only the
@@ -103,7 +104,6 @@ class LocalExchangeSource : public exec::ExchangeSource {
       numPages_ += pages.size();
       totalBytes_ += totalBytes;
       if (data.empty()) {
-        LOG(INFO) << "adjust timeout";
         common::testutil::TestValue::adjust(
             "facebook::velox::exec::test::LocalExchangeSource::timeout", this);
       }
@@ -148,7 +148,7 @@ class LocalExchangeSource : public exec::ExchangeSource {
       }
 
       if (!requestPromise.isFulfilled()) {
-        requestPromise.setValue(Response{totalBytes, atEnd_});
+        requestPromise.setValue(Response{totalBytes, atEnd_, remainingBytes});
       }
     };
 
@@ -189,8 +189,10 @@ class LocalExchangeSource : public exec::ExchangeSource {
   }
 
  private:
-  using ResultCallback = std::function<
-      void(std::vector<std::unique_ptr<folly::IOBuf>> data, int64_t sequence)>;
+  using ResultCallback = std::function<void(
+      std::vector<std::unique_ptr<folly::IOBuf>> data,
+      int64_t sequence,
+      std::vector<int64_t> remainingBytes)>;
   static void registerTimeout(
       const std::shared_ptr<ExchangeSource>& self,
       ResultCallback callback,
@@ -218,7 +220,7 @@ class LocalExchangeSource : public exec::ExchangeSource {
           }
           if (callback) {
             // Outside of mutex.
-            callback({}, 0);
+            callback({}, 0, {});
             continue;
           }
           std::this_thread::sleep_for(std::chrono::seconds(1));
@@ -235,7 +237,7 @@ class LocalExchangeSource : public exec::ExchangeSource {
       promise = std::move(promise_);
     }
     if (promise.valid() && !promise.isFulfilled()) {
-      promise.setValue(Response{0, false});
+      promise.setValue(Response{0, false, {}});
       return true;
     }
 

From c41716b9a5b9c382b374f64ff39a0c5a2889d384 Mon Sep 17 00:00:00 2001
From: Jimmy Lu <jimmylu@meta.com>
Date: Thu, 15 Feb 2024 17:31:23 -0800
Subject: [PATCH 30/38] Add numTopBuffers to OutputBuffer::Stats (#8763)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8763

This can be used to spot skewed exchange.

Reviewed By: mbasmanova

Differential Revision: D53827154

fbshipit-source-id: 6da9ac6e55ce2d0236abff2c31d787abc6311136
---
 velox/exec/OutputBuffer.cpp | 34 ++++++++++++++++++++++++++++++++++
 velox/exec/OutputBuffer.h   |  5 +++++
 velox/exec/Task.cpp         | 11 ++++++++++-
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/velox/exec/OutputBuffer.cpp b/velox/exec/OutputBuffer.cpp
index 14444092d8c7..57326a96dd33 100644
--- a/velox/exec/OutputBuffer.cpp
+++ b/velox/exec/OutputBuffer.cpp
@@ -774,6 +774,39 @@ int64_t OutputBuffer::getAverageBufferTimeMsLocked() const {
   return 0;
 }
 
+namespace {
+
+// Find out how many buffers hold 80% of the data. Useful to identify skew.
+int32_t countTopBuffers(
+    const std::vector<DestinationBuffer::Stats>& bufferStats,
+    int64_t totalBytes) {
+  std::vector<int64_t> bufferSizes;
+  bufferSizes.reserve(bufferStats.size());
+  for (auto i = 0; i < bufferStats.size(); ++i) {
+    const auto& stats = bufferStats[i];
+    bufferSizes.push_back(stats.bytesBuffered + stats.bytesSent);
+  }
+
+  // Sort descending.
+  std::sort(bufferSizes.begin(), bufferSizes.end(), std::greater<int64_t>());
+
+  const auto limit = totalBytes * 0.8;
+  int32_t numBuffers = 0;
+  int32_t runningTotal = 0;
+  for (auto size : bufferSizes) {
+    runningTotal += size;
+    numBuffers++;
+
+    if (runningTotal >= limit) {
+      break;
+    }
+  }
+
+  return numBuffers;
+}
+
+} // namespace
+
 OutputBuffer::Stats OutputBuffer::stats() {
   std::lock_guard<std::mutex> l(mutex_);
   std::vector<DestinationBuffer::Stats> bufferStats;
@@ -801,6 +834,7 @@ OutputBuffer::Stats OutputBuffer::stats() {
       numOutputRows_,
       numOutputPages_,
       getAverageBufferTimeMsLocked(),
+      countTopBuffers(bufferStats, numOutputBytes_),
       bufferStats);
 }
 
diff --git a/velox/exec/OutputBuffer.h b/velox/exec/OutputBuffer.h
index 401f7ab47078..a3ebf31fb8b1 100644
--- a/velox/exec/OutputBuffer.h
+++ b/velox/exec/OutputBuffer.h
@@ -214,6 +214,7 @@ class OutputBuffer {
         int64_t _totalRowsSent,
         int64_t _totalPagesSent,
         int64_t _averageBufferTimeMs,
+        int32_t _numTopBuffers,
         const std::vector<DestinationBuffer::Stats>& _buffersStats)
         : kind(_kind),
           noMoreBuffers(_noMoreBuffers),
@@ -225,6 +226,7 @@ class OutputBuffer {
           totalRowsSent(_totalRowsSent),
           totalPagesSent(_totalPagesSent),
           averageBufferTimeMs(_averageBufferTimeMs),
+          numTopBuffers(_numTopBuffers),
           buffersStats(_buffersStats) {}
 
     core::PartitionedOutputNode::Kind kind;
@@ -246,6 +248,9 @@ class OutputBuffer {
     /// Average time each piece of data has been buffered for in milliseconds.
     int64_t averageBufferTimeMs{0};
 
+    /// The number of largest buffers that handle 80% of the total data.
+    int32_t numTopBuffers{0};
+
     /// Stats of the OutputBuffer's destinations.
     std::vector<DestinationBuffer::Stats> buffersStats;
 
diff --git a/velox/exec/Task.cpp b/velox/exec/Task.cpp
index 364ba829fb4e..c7d503060933 100644
--- a/velox/exec/Task.cpp
+++ b/velox/exec/Task.cpp
@@ -1897,6 +1897,13 @@ ContinueFuture Task::terminate(TaskState terminalState) {
 void Task::maybeRemoveFromOutputBufferManager() {
   if (hasPartitionedOutput()) {
     if (auto bufferManager = bufferManager_.lock()) {
+      // Capture output buffer stats before deleting the buffer.
+      {
+        std::lock_guard<std::timed_mutex> l(mutex_);
+        if (!taskStats_.outputBufferStats.has_value()) {
+          taskStats_.outputBufferStats = bufferManager->stats(taskId_);
+        }
+      }
       bufferManager->removeTask(taskId_);
     }
   }
@@ -1982,7 +1989,9 @@ TaskStats Task::taskStats() const {
   auto bufferManager = bufferManager_.lock();
   taskStats.outputBufferUtilization = bufferManager->getUtilization(taskId_);
   taskStats.outputBufferOverutilized = bufferManager->isOverutilized(taskId_);
-  taskStats.outputBufferStats = bufferManager->stats(taskId_);
+  if (!taskStats.outputBufferStats.has_value()) {
+    taskStats.outputBufferStats = bufferManager->stats(taskId_);
+  }
   return taskStats;
 }
 

From ad0baf0ceb5653118734cfa08dafaafd98e902d0 Mon Sep 17 00:00:00 2001
From: Richard Barnes <rbarnes@meta.com>
Date: Fri, 16 Feb 2024 05:42:58 -0800
Subject: [PATCH 31/38] Remove unused exception parameter from
 velox/common/caching/AsyncDataCache.cpp (#8768)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8768

`-Wunused-exception-parameter` has identified an unused exception parameter. This diff removes it.

This:
```
try {
    ...
} catch (exception& e) {
    // no use of e
}
```
should instead be written as
```
} catch (exception&) {
```

If the code compiles, this is safe to land.

Reviewed By: dmm-fb

Differential Revision: D53780449

fbshipit-source-id: 98854aee7779e5a53a7fc8079c81728c63321fe9
---
 velox/common/caching/AsyncDataCache.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/velox/common/caching/AsyncDataCache.cpp b/velox/common/caching/AsyncDataCache.cpp
index 1362befdcae5..517aaaf3603c 100644
--- a/velox/common/caching/AsyncDataCache.cpp
+++ b/velox/common/caching/AsyncDataCache.cpp
@@ -851,7 +851,7 @@ bool AsyncDataCache::removeFileEntries(
   for (auto& shard : shards_) {
     try {
       success &= shard->removeFileEntries(filesToRemove, filesRetained);
-    } catch (const std::exception& e) {
+    } catch (const std::exception&) {
       VELOX_CACHE_LOG(ERROR)
           << "Error removing file entries from AsyncDataCache shard.";
       success = false;

From 641f5587f7af85094b1a3f0ccaa7e7a34cf08e1e Mon Sep 17 00:00:00 2001
From: Richard Barnes <rbarnes@meta.com>
Date: Fri, 16 Feb 2024 05:43:48 -0800
Subject: [PATCH 32/38] Remove unused exception parameter from
 velox/functions/prestosql/FindFirst.cpp (#8766)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8766

`-Wunused-exception-parameter` has identified an unused exception parameter. This diff removes it.

This:
```
try {
    ...
} catch (exception& e) {
    // no use of e
}
```
should instead be written as
```
} catch (exception&) {
```

If the code compiles, this is safe to land.

Reviewed By: dmm-fb

Differential Revision: D53780448

fbshipit-source-id: 869ce8bdc864632c31f9dc5ef5efda2073ff26d3
---
 velox/functions/prestosql/FindFirst.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/velox/functions/prestosql/FindFirst.cpp b/velox/functions/prestosql/FindFirst.cpp
index a4cc009336fb..d56c2c196915 100644
--- a/velox/functions/prestosql/FindFirst.cpp
+++ b/velox/functions/prestosql/FindFirst.cpp
@@ -23,7 +23,7 @@ namespace {
 void recordInvalidStartIndex(vector_size_t row, exec::EvalCtx& context) {
   try {
     VELOX_USER_FAIL("SQL array indices start at 1. Got 0.");
-  } catch (const VeloxUserError& exception) {
+  } catch (const VeloxUserError&) {
     context.setVeloxExceptionError(row, std::current_exception());
   }
 }
@@ -322,7 +322,7 @@ class FindFirstFunction : public FindFirstFunctionBase {
           if (flatArray->elements()->isNullAt(firstMatchingIndex)) {
             try {
               VELOX_USER_FAIL("find_first found NULL as the first match");
-            } catch (const VeloxUserError& exception) {
+            } catch (const VeloxUserError&) {
               context.setVeloxExceptionError(row, std::current_exception());
             }
           } else {

From dadade0d423c1458052e84c00cd4fd3aa7d0b9bc Mon Sep 17 00:00:00 2001
From: Richard Barnes <rbarnes@meta.com>
Date: Fri, 16 Feb 2024 05:47:38 -0800
Subject: [PATCH 33/38] Remove unused exception parameter from
 velox/functions/lib/SubscriptUtil.cpp (#8767)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8767

`-Wunused-exception-parameter` has identified an unused exception parameter. This diff removes it.

This:
```
try {
    ...
} catch (exception& e) {
    // no use of e
}
```
should instead be written as
```
} catch (exception&) {
```

If the code compiles, this is safe to land.

Reviewed By: dmm-fb

Differential Revision: D53780438

fbshipit-source-id: c414827341cea1227983ba80c373f15d8c0aaa7b
---
 velox/functions/lib/SubscriptUtil.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/velox/functions/lib/SubscriptUtil.cpp b/velox/functions/lib/SubscriptUtil.cpp
index 58dc24269037..7c079a57cd0c 100644
--- a/velox/functions/lib/SubscriptUtil.cpp
+++ b/velox/functions/lib/SubscriptUtil.cpp
@@ -318,7 +318,7 @@ namespace {
 std::exception_ptr makeZeroSubscriptError() {
   try {
     VELOX_USER_FAIL("SQL array indices start at 1");
-  } catch (const std::exception& e) {
+  } catch (const std::exception&) {
     return std::current_exception();
   }
 }
@@ -326,7 +326,7 @@ std::exception_ptr makeZeroSubscriptError() {
 std::exception_ptr makeBadSubscriptError() {
   try {
     VELOX_USER_FAIL("Array subscript out of bounds.");
-  } catch (const std::exception& e) {
+  } catch (const std::exception&) {
     return std::current_exception();
   }
 }
@@ -334,7 +334,7 @@ std::exception_ptr makeBadSubscriptError() {
 std::exception_ptr makeNegativeSubscriptError() {
   try {
     VELOX_USER_FAIL("Array subscript is negative.");
-  } catch (const std::exception& e) {
+  } catch (const std::exception&) {
     return std::current_exception();
   }
 }

From e3ec8bbaddffdcb871d14914a7f9d443f37afa7e Mon Sep 17 00:00:00 2001
From: Pramod <pramod@ahana.io>
Date: Fri, 16 Feb 2024 11:56:22 -0800
Subject: [PATCH 34/38] Add window function tests with decimal inputs (#7243)

Summary:
Adds tests with decimal inputs for window aggregate functions and `nth_value`
window function.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/7243

Reviewed By: Yuhta

Differential Revision: D53860822

Pulled By: kagamiori

fbshipit-source-id: ee8dc4f834539c9a1d3cd017b1f980a60e05b1d4
---
 .../window/tests/AggregateWindowTest.cpp      | 23 +++++++++++++++++++
 .../prestosql/window/tests/NthValueTest.cpp   |  2 ++
 2 files changed, 25 insertions(+)

diff --git a/velox/functions/prestosql/window/tests/AggregateWindowTest.cpp b/velox/functions/prestosql/window/tests/AggregateWindowTest.cpp
index f6e4a652a4ad..3a362650af8c 100644
--- a/velox/functions/prestosql/window/tests/AggregateWindowTest.cpp
+++ b/velox/functions/prestosql/window/tests/AggregateWindowTest.cpp
@@ -175,5 +175,28 @@ TEST_F(AggregateWindowTest, nonNullEmptyResult) {
   WindowTestBase::testWindowFunction(
       {input}, "count(c1)", overClause, frameClause, expected);
 }
+
+TEST_F(AggregateWindowTest, testDecimal) {
+  auto size = 30;
+  auto testAggregate = [&](const TypePtr& type) {
+    auto input = {makeRowVector({
+        makeRandomInputVector(BIGINT(), size, 0.2),
+        makeRandomInputVector(type, size, 0.2),
+        makeFlatVector<int64_t>(size, [](auto row) { return row % 11 + 1; }),
+        makeFlatVector<int64_t>(size, [](auto row) { return row % 13 + 1; }),
+    })};
+
+    WindowTestBase::testWindowFunction(input, "min(c1)", kOverClauses);
+    WindowTestBase::testWindowFunction(
+        input, "max(c1)", kOverClauses, {""}, false);
+    WindowTestBase::testWindowFunction(
+        input, "sum(c1)", kOverClauses, {""}, false);
+    WindowTestBase::testWindowFunction(
+        input, "count(c1)", kOverClauses, {""}, false);
+  };
+
+  testAggregate(DECIMAL(5, 2));
+  testAggregate(DECIMAL(20, 5));
+}
 }; // namespace
 }; // namespace facebook::velox::window::test
diff --git a/velox/functions/prestosql/window/tests/NthValueTest.cpp b/velox/functions/prestosql/window/tests/NthValueTest.cpp
index c27c690fabcc..fcea6cc4bca7 100644
--- a/velox/functions/prestosql/window/tests/NthValueTest.cpp
+++ b/velox/functions/prestosql/window/tests/NthValueTest.cpp
@@ -88,6 +88,8 @@ class NthValueTest : public WindowTestBase {
         makeRandomInputVector(VARBINARY(), size, 0.7),
         makeRandomInputVector(TIMESTAMP(), size, 0.8),
         makeRandomInputVector(DATE(), size, 0.9),
+        makeRandomInputVector(DECIMAL(10, 2), size, 0.1),
+        makeRandomInputVector(DECIMAL(20, 5), size, 0.2),
     });
 
     const std::string overClause =

From 9882fdd858fcca4843cffc382afc27ed33845b53 Mon Sep 17 00:00:00 2001
From: Christian Zentgraf <kitgocz@gmail.com>
Date: Fri, 16 Feb 2024 12:01:24 -0800
Subject: [PATCH 35/38] Add support for proxy in S3FileSystem (#8574)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
The AWS C++ S3 Client does not automatically pick up
http proxy configurations set at the OS environment
level using the HTTP_PROXY, HTTPS_PROXY and
NO_PROXY environment variables.
The AWS Java S3 Client does.

The environment variables themselves can be
lower case and upper case. If both versions are provided
then the lower case values take precedence.
The following is supported:
1. http_proxy/HTTP_PROXY, https_proxy/HTTPS_PROXY�and no_proxy/NO_PROXY environment variables are read.
2. The NO_PROXY content is scanned for exact and�suffix matches.
3. CIDR expressions provided in NO_PROXY �are not supported for matching.
4. IP addresses or domains can be specified.
5. NO_PROXY list is comma separated.
6. Use . or *. to indicate domain suffix matching, e.g.�.foobar.com will match test.foobar.com or �foo.foobar.com.

A new a new hive config variable “hive.s3.use-proxy-from-env”
 is added to enable reading the environment variables if present.
The default is false.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8574

Reviewed By: Yuhta

Differential Revision: D53857729

Pulled By: pedroerp

fbshipit-source-id: b444cf43d5c2dd6448dc5430144cce9f9cf389b6
---
 velox/connectors/hive/HiveConfig.cpp          |   4 +
 velox/connectors/hive/HiveConfig.h            |   5 +
 .../storage_adapters/s3fs/S3FileSystem.cpp    |  14 ++
 .../hive/storage_adapters/s3fs/S3Util.cpp     |  97 +++++++++++++
 .../hive/storage_adapters/s3fs/S3Util.h       |  26 ++++
 .../s3fs/tests/S3FileSystemTest.cpp           |   3 +
 .../s3fs/tests/S3UtilTest.cpp                 | 136 ++++++++++++++++++
 velox/docs/develop/connectors.rst             |  25 +++-
 velox/exec/tests/utils/TempDirectoryPath.cpp  |   2 +-
 9 files changed, 310 insertions(+), 2 deletions(-)

diff --git a/velox/connectors/hive/HiveConfig.cpp b/velox/connectors/hive/HiveConfig.cpp
index 99c45c574ab5..e6048f0d6aed 100644
--- a/velox/connectors/hive/HiveConfig.cpp
+++ b/velox/connectors/hive/HiveConfig.cpp
@@ -202,4 +202,8 @@ uint64_t HiveConfig::filePreloadThreshold() const {
   return config_->get<uint64_t>(kFilePreloadThreshold, 8UL << 20);
 }
 
+bool HiveConfig::s3UseProxyFromEnv() const {
+  return config_->get<bool>(kS3UseProxyFromEnv, false);
+}
+
 } // namespace facebook::velox::connector::hive
diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h
index 7a00f94d2512..e50b41157036 100644
--- a/velox/connectors/hive/HiveConfig.h
+++ b/velox/connectors/hive/HiveConfig.h
@@ -175,6 +175,9 @@ class HiveConfig {
   static constexpr const char* kSortWriterMaxOutputBytesSession =
       "sort_writer_max_output_bytes";
 
+  static constexpr const char* kS3UseProxyFromEnv =
+      "hive.s3.use-proxy-from-env";
+
   InsertExistingPartitionsBehavior insertExistingPartitionsBehavior(
       const Config* session) const;
 
@@ -242,6 +245,8 @@ class HiveConfig {
 
   uint64_t filePreloadThreshold() const;
 
+  bool s3UseProxyFromEnv() const;
+
   HiveConfig(std::shared_ptr<const Config> config) {
     VELOX_CHECK_NOT_NULL(
         config, "Config is null for HiveConfig initialization");
diff --git a/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp b/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp
index fd3c6efe8572..6a147b016a1a 100644
--- a/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp
+++ b/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp
@@ -515,6 +515,20 @@ class S3FileSystem::Impl {
     Aws::Client::ClientConfiguration clientConfig;
     clientConfig.endpointOverride = hiveConfig_->s3Endpoint();
 
+    if (hiveConfig_->s3UseProxyFromEnv()) {
+      auto proxyConfig = S3ProxyConfigurationBuilder(hiveConfig_->s3Endpoint())
+                             .useSsl(hiveConfig_->s3UseSSL())
+                             .build();
+      if (proxyConfig.has_value()) {
+        clientConfig.proxyScheme = Aws::Http::SchemeMapper::FromString(
+            proxyConfig.value().scheme().c_str());
+        clientConfig.proxyHost = awsString(proxyConfig.value().host());
+        clientConfig.proxyPort = proxyConfig.value().port();
+        clientConfig.proxyUserName = awsString(proxyConfig.value().username());
+        clientConfig.proxyPassword = awsString(proxyConfig.value().password());
+      }
+    }
+
     if (hiveConfig_->s3UseSSL()) {
       clientConfig.scheme = Aws::Http::Scheme::HTTPS;
     } else {
diff --git a/velox/connectors/hive/storage_adapters/s3fs/S3Util.cpp b/velox/connectors/hive/storage_adapters/s3fs/S3Util.cpp
index 2df80d356cc4..cc4930aaedb0 100644
--- a/velox/connectors/hive/storage_adapters/s3fs/S3Util.cpp
+++ b/velox/connectors/hive/storage_adapters/s3fs/S3Util.cpp
@@ -43,4 +43,101 @@ std::string getErrorStringFromS3Error(
   }
 }
 
+/// The noProxyList is a comma separated list of subdomains or domains.
+/// For a given hostname check if it has a matching domain or subdomain in
+/// the noProxyList.
+bool isHostExcludedFromProxy(
+    const std::string& hostname,
+    const std::string& noProxyList) {
+  std::vector<std::string> noProxyListElements{};
+
+  if (noProxyList.empty()) {
+    return false;
+  }
+
+  folly::split(',', noProxyList, noProxyListElements);
+  // An exact match or subdomain match is needed.
+  for (auto elem : noProxyListElements) {
+    if (elem.length() < hostname.length() && elem[0] == '.' &&
+        !hostname.compare(
+            hostname.length() - elem.length(), elem.length(), elem)) {
+      return true;
+    } else if (
+        elem.length() < hostname.length() && elem[0] == '*' && elem[1] == '.' &&
+        !hostname.compare(
+            hostname.length() - elem.length() + 1,
+            elem.length() - 1,
+            elem.substr(1))) {
+      return true;
+    } else if (elem.length() == hostname.length() && !hostname.compare(elem)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/// Reading the various proxy related environment variables.
+/// There is a lacking standard. The environment variables can be
+/// defined lower case or upper case. The lower case values are checked
+/// first and, if set, returned, therefore taking precendence.
+/// Note, the envVar input is expected to be lower case.
+namespace {
+std::string readProxyEnvVar(std::string envVar) {
+  auto httpProxy = getenv(envVar.c_str());
+  if (httpProxy) {
+    return std::string(httpProxy);
+  }
+
+  std::transform(envVar.begin(), envVar.end(), envVar.begin(), ::toupper);
+  httpProxy = getenv(envVar.c_str());
+  if (httpProxy) {
+    return std::string(httpProxy);
+  }
+  return "";
+};
+} // namespace
+
+std::string getHttpProxyEnvVar() {
+  return readProxyEnvVar("http_proxy");
+}
+
+std::string getHttpsProxyEnvVar() {
+  return readProxyEnvVar("https_proxy");
+};
+
+std::string getNoProxyEnvVar() {
+  return readProxyEnvVar("no_proxy");
+};
+
+std::optional<folly::Uri> S3ProxyConfigurationBuilder::build() {
+  std::string proxyUrl;
+  if (useSsl_) {
+    proxyUrl = getHttpsProxyEnvVar();
+  } else {
+    proxyUrl = getHttpProxyEnvVar();
+  }
+
+  if (proxyUrl.empty()) {
+    return std::nullopt;
+  }
+  folly::Uri proxyUri(proxyUrl);
+
+  /// The endpoint is usually a domain with port or an
+  /// IP address with port. It is assumed that there are
+  /// 2 parts separated by a colon.
+  std::vector<std::string> endpointElements{};
+  folly::split(':', s3Endpoint_, endpointElements);
+  if (FOLLY_UNLIKELY(endpointElements.size() > 2)) {
+    LOG(ERROR) << fmt::format(
+        "Too many parts in S3 endpoint URI {} ", s3Endpoint_);
+    return std::nullopt;
+  }
+
+  auto noProxy = getNoProxyEnvVar();
+  if (isHostExcludedFromProxy(endpointElements[0], noProxy)) {
+    return std::nullopt;
+  }
+  return proxyUri;
+}
+
 } // namespace facebook::velox
diff --git a/velox/connectors/hive/storage_adapters/s3fs/S3Util.h b/velox/connectors/hive/storage_adapters/s3fs/S3Util.h
index ec67fb0c4175..3d24ba5c510d 100644
--- a/velox/connectors/hive/storage_adapters/s3fs/S3Util.h
+++ b/velox/connectors/hive/storage_adapters/s3fs/S3Util.h
@@ -23,6 +23,7 @@
 
 #include <aws/s3/S3Errors.h>
 #include <aws/s3/model/HeadObjectResult.h>
+#include <folly/Uri.h>
 
 #include "velox/common/base/Exceptions.h"
 
@@ -177,6 +178,31 @@ inline std::string getRequestID(
     }                                                                                                                          \
   }
 
+bool isHostExcludedFromProxy(
+    const std::string& hostname,
+    const std::string& noProxyList);
+
+std::string getHttpProxyEnvVar();
+std::string getHttpsProxyEnvVar();
+std::string getNoProxyEnvVar();
+
+class S3ProxyConfigurationBuilder {
+ public:
+  S3ProxyConfigurationBuilder(const std::string& s3Endpoint)
+      : s3Endpoint_(s3Endpoint){};
+
+  S3ProxyConfigurationBuilder& useSsl(const bool& useSsl) {
+    useSsl_ = useSsl;
+    return *this;
+  }
+
+  std::optional<folly::Uri> build();
+
+ private:
+  const std::string s3Endpoint_;
+  bool useSsl_;
+};
+
 } // namespace facebook::velox
 
 template <>
diff --git a/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp b/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp
index 1f383e37b381..c677f76ed60e 100644
--- a/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp
+++ b/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp
@@ -42,6 +42,9 @@ class S3FileSystemTest : public S3Test {
 } // namespace
 
 TEST_F(S3FileSystemTest, writeAndRead) {
+  /// The hive config used for Minio defaults to turning
+  /// off using proxy settings if the environment provides them.
+  setenv("HTTP_PROXY", "http://test:test@127.0.0.1:8888", 1);
   const char* bucketName = "data";
   const char* file = "test.txt";
   const std::string filename = localPath(bucketName) + "/" + file;
diff --git a/velox/connectors/hive/storage_adapters/s3fs/tests/S3UtilTest.cpp b/velox/connectors/hive/storage_adapters/s3fs/tests/S3UtilTest.cpp
index fccf254838fd..326bfa6b786d 100644
--- a/velox/connectors/hive/storage_adapters/s3fs/tests/S3UtilTest.cpp
+++ b/velox/connectors/hive/storage_adapters/s3fs/tests/S3UtilTest.cpp
@@ -108,4 +108,140 @@ TEST(S3UtilTest, bucketAndKeyFromS3Path) {
   EXPECT_EQ(bucket, "bucket");
   EXPECT_EQ(key, "file.txt");
 }
+
+TEST(S3UtilTest, isDomainExcludedFromProxy) {
+  auto hostname = "test.foobar.com";
+
+  std::vector<std::pair<std::string, bool>> tests = {
+      {"localhost,.foobar.com", true},
+      {"localhost,.,foobar.com,.com", true},
+      {"localhost,test.foobar.com", true},
+      {"localhost,foobar.com,*.com", true},
+      {"localhost,*.foobar.com", true},
+      {"localhost", false},
+      {"localhost,foobar.com", false},
+      {"", false},
+  };
+
+  for (auto pair : tests) {
+    EXPECT_EQ(isHostExcludedFromProxy(hostname, pair.first), pair.second);
+  }
+}
+
+TEST(S3UtilTest, isIpExcludedFromProxy) {
+  auto hostname = "127.0.0.1";
+
+  std::vector<std::pair<std::string, bool>> tests = {
+      {"localhost,127.0.0.1,.foobar.com", true},
+      {"localhost,foobar.com,.1,.com", true},
+      {"localhost,test.foobar.com", false},
+      {"localhost,foobar.com,*.1,*.com", true},
+      {"localhost", false},
+      {"localhost,127.1.0.1", false},
+      {"", false},
+  };
+
+  for (auto pair : tests) {
+    EXPECT_EQ(isHostExcludedFromProxy(hostname, pair.first), pair.second)
+        << pair.first;
+  }
+}
+
+class S3UtilProxyTest : public ::testing::TestWithParam<bool> {};
+
+TEST_P(S3UtilProxyTest, proxyBuilderBadEndpoint) {
+  auto s3Endpoint = "http://127.0.0.1:8888";
+  auto useSsl = GetParam();
+
+  setenv("HTTP_PROXY", "http://127.0.0.1:12345", 1);
+  setenv("HTTPS_PROXY", "http://127.0.0.1:12345", 1);
+  EXPECT_FALSE(S3ProxyConfigurationBuilder(s3Endpoint)
+                   .useSsl(useSsl)
+                   .build()
+                   .has_value());
+}
+
+TEST_P(S3UtilProxyTest, proxyBuilderNoProxy) {
+  auto s3Endpoint = "127.0.0.1:8888";
+  auto useSsl = GetParam();
+
+  setenv("HTTP_PROXY", "", 1);
+  setenv("HTTPS_PROXY", "", 1);
+  EXPECT_FALSE(S3ProxyConfigurationBuilder(s3Endpoint)
+                   .useSsl(useSsl)
+                   .build()
+                   .has_value());
+}
+
+TEST_P(S3UtilProxyTest, proxyBuilderSameHttpProxy) {
+  auto s3Endpoint = "192.168.0.1:12345";
+  auto useSsl = GetParam();
+
+  setenv("HTTP_PROXY", "http://127.0.0.1:8888", 1);
+  setenv("HTTPS_PROXY", "http://127.0.0.1:8888", 1);
+  auto proxyConfig =
+      S3ProxyConfigurationBuilder(s3Endpoint).useSsl(useSsl).build();
+  ASSERT_TRUE(proxyConfig.has_value());
+  EXPECT_EQ(proxyConfig.value().scheme(), "http");
+  EXPECT_EQ(proxyConfig.value().host(), "127.0.0.1");
+  EXPECT_EQ(proxyConfig.value().port(), 8888);
+  EXPECT_EQ(proxyConfig.value().username(), "");
+  EXPECT_EQ(proxyConfig.value().password(), "");
+}
+
+TEST_P(S3UtilProxyTest, proxyBuilderMixProxy) {
+  auto s3Endpoint = "192.168.0.1:12345";
+  auto useSsl = GetParam();
+
+  const std::string httpProxy = "https://test1:testpw1@80.67.3.1:35631";
+  setenv("HTTP_PROXY", httpProxy.c_str(), 1);
+  EXPECT_EQ(getHttpProxyEnvVar(), httpProxy)
+      << "HTTP_PROXY environment variable not set.";
+  const std::string httpsProxy = "http://test2:testpw2@80.80.5.1:45631";
+  setenv("HTTPS_PROXY", httpsProxy.c_str(), 1);
+  EXPECT_EQ(getHttpsProxyEnvVar(), httpsProxy)
+      << "HTTPS_PROXY environment variable not set.";
+  auto proxyConfig =
+      S3ProxyConfigurationBuilder(s3Endpoint).useSsl(useSsl).build();
+  ASSERT_TRUE(proxyConfig.has_value());
+  EXPECT_EQ(proxyConfig.value().scheme(), (useSsl ? "http" : "https"));
+  EXPECT_EQ(proxyConfig.value().host(), (useSsl ? "80.80.5.1" : "80.67.3.1"));
+  EXPECT_EQ(proxyConfig.value().port(), (useSsl ? 45631 : 35631));
+  EXPECT_EQ(proxyConfig.value().username(), (useSsl ? "test2" : "test1"));
+  EXPECT_EQ(proxyConfig.value().password(), (useSsl ? "testpw2" : "testpw1"));
+}
+
+TEST_P(S3UtilProxyTest, proxyBuilderMixProxyLowerCase) {
+  auto s3Endpoint = "192.168.0.1:12345";
+  auto useSsl = GetParam();
+
+  const std::string lcHttpProxy = "https://lctest1:lctestpw1@80.67.3.1:35631";
+  const std::string ucHttpProxy = "https://uctest1:uctestpw1@80.67.3.2:35632";
+  setenv("http_proxy", lcHttpProxy.c_str(), 1);
+  setenv("HTTP_PROXY", ucHttpProxy.c_str(), 1);
+  // Lower case value takes precedence.
+  EXPECT_EQ(getHttpProxyEnvVar(), lcHttpProxy)
+      << "http_proxy environment variable not set.";
+  const std::string lcHttpsProxy = "http://lctest2:lctestpw2@80.80.5.1:45631";
+  const std::string ucHttpsProxy = "http://uctest2:uctestpw2@80.80.5.2:45632";
+  setenv("https_proxy", lcHttpsProxy.c_str(), 1);
+  setenv("HTTPS_PROXY", ucHttpsProxy.c_str(), 1);
+  EXPECT_EQ(getHttpsProxyEnvVar(), lcHttpsProxy)
+      << "https_proxy environment variable not set.";
+  auto proxyConfig =
+      S3ProxyConfigurationBuilder(s3Endpoint).useSsl(useSsl).build();
+  ASSERT_TRUE(proxyConfig.has_value());
+  EXPECT_EQ(proxyConfig.value().scheme(), (useSsl ? "http" : "https"));
+  EXPECT_EQ(proxyConfig.value().host(), (useSsl ? "80.80.5.1" : "80.67.3.1"));
+  EXPECT_EQ(proxyConfig.value().port(), (useSsl ? 45631 : 35631));
+  EXPECT_EQ(proxyConfig.value().username(), (useSsl ? "lctest2" : "lctest1"));
+  EXPECT_EQ(
+      proxyConfig.value().password(), (useSsl ? "lctestpw2" : "lctestpw1"));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    S3UtilTest,
+    S3UtilProxyTest,
+    ::testing::Values(true, false));
+
 } // namespace facebook::velox
diff --git a/velox/docs/develop/connectors.rst b/velox/docs/develop/connectors.rst
index 2d4602508741..6d4442daa2d8 100644
--- a/velox/docs/develop/connectors.rst
+++ b/velox/docs/develop/connectors.rst
@@ -98,4 +98,27 @@ GCS is supported using the
 are `gs://`.
 
 ABS (Azure Blob Storage) is supported using the
-`Azure SDK for C++ <https://github.com/Azure/azure-sdk-for-cpp>`_ library. ABS supported schemes are `abfs(s)://`.
\ No newline at end of file
+`Azure SDK for C++ <https://github.com/Azure/azure-sdk-for-cpp>`_ library. ABS supported schemes are `abfs(s)://`.
+
+S3 Storage adapter using a proxy
+********************************
+
+By default, the C++ AWS S3 client does not honor the configuration of the
+environment variables http_proxy, https_proxy, and no_proxy.
+The Java AWS S3 client supports this.
+The environment variables can be specified as lower case, upper case or both.
+In order to enable the use of a proxy the hive connector configuration variable
+`hive.s3.use-proxy-from-env` must be set to `true`. By default, the value
+is `false`.
+
+This is the behavior when the proxy settings are enabled:
+
+1. http_proxy/HTTP_PROXY, https_proxy/HTTPS_PROXY and no_proxy/NO_PROXY
+   environment variables are read. If lower case and upper case variables are set
+   lower case variables take precendence.
+2. The no_proxy/NO_PROXY content is scanned for exact and suffix matches.
+3. CIDR expressions provided in no_proxy/NO_PROXY are not supported for matching.
+4. IP addresses or domains can be specified.
+5. The no_proxy/NO_PROXY list is comma separated.
+6. Use . or \*. to indicate domain suffix matching, e.g. `.foobar.com` will
+   match `test.foobar.com` or `foo.foobar.com`.
diff --git a/velox/exec/tests/utils/TempDirectoryPath.cpp b/velox/exec/tests/utils/TempDirectoryPath.cpp
index 146cbff328a7..b34815a0cd5a 100644
--- a/velox/exec/tests/utils/TempDirectoryPath.cpp
+++ b/velox/exec/tests/utils/TempDirectoryPath.cpp
@@ -28,7 +28,7 @@ std::shared_ptr<TempDirectoryPath> TempDirectoryPath::create() {
 }
 
 TempDirectoryPath::~TempDirectoryPath() {
-  LOG(INFO) << "TempDirectoryPath:: removing all files from" << path;
+  LOG(INFO) << "TempDirectoryPath:: removing all files from " << path;
   try {
     boost::filesystem::remove_all(path.c_str());
   } catch (...) {

From 4243a7190fd2d006275d69c94e20392cfa99a86c Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Fri, 16 Feb 2024 12:19:37 -0800
Subject: [PATCH 36/38] Move header & format check to Github Actions (#8546)

Summary:
This is the first in a number of PRs to move all of our CI over to GHA.

The format and header checks will now produce a markdown summary that is rendered in the job summary. This includes a code block with copy2clipboard-button for easy application of the required changes without looking through the logs.

Closes https://github.com/facebookincubator/velox/issues/8548

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8546

Reviewed By: kagamiori

Differential Revision: D53862082

Pulled By: kgpai

fbshipit-source-id: c85dc39c268b3369c0f5da1597a23a9d3d110b9c
---
 .circleci/dist_compile.yml               | 36 ------------
 .github/workflows/macos.yml              | 22 +++++--
 .github/workflows/preliminary_checks.yml | 75 ++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 40 deletions(-)
 create mode 100644 .github/workflows/preliminary_checks.yml

diff --git a/.circleci/dist_compile.yml b/.circleci/dist_compile.yml
index 94a71eab4f7e..bfd986484707 100644
--- a/.circleci/dist_compile.yml
+++ b/.circleci/dist_compile.yml
@@ -431,38 +431,6 @@ jobs:
           fuzzer_exe: "_build/debug/velox/exec/tests/velox_join_fuzzer_test"
           fuzzer_args: " --seed ${RANDOM} --duration_sec 3600 --logtostderr=1 --minloglevel=0"
 
-  format-check:
-    executor: check
-    steps:
-      - checkout
-      - run:
-          name: Check formatting
-          command: |
-            if ! make format-check; then
-              make format-fix
-              echo -e "\n==== Apply using:"
-              echo "patch -p1 \<<EOF"
-              git --no-pager diff
-              echo "EOF"
-              false
-            fi
-
-  header-check:
-    executor: check
-    steps:
-      - checkout
-      - run:
-          name: Check license headers
-          command: |
-            if ! make header-check; then
-              make header-fix
-              echo -e "\n==== Apply using:"
-              echo "patch -p1 \<<EOF"
-              git --no-pager diff
-              echo "EOF"
-              false
-            fi
-
   doc-gen-job:
     executor: build
     steps:
@@ -604,8 +572,6 @@ workflows:
       - linux-build-options
       - linux-adapters
       - linux-presto-fuzzer-run
-      - format-check
-      - header-check
       - doc-gen-job:
           filters:
             branches:
@@ -619,8 +585,6 @@ workflows:
       - linux-pr-fuzzer-run
       - linux-build-options
       - linux-adapters
-      - format-check
-      - header-check
       - doc-gen-job:
           filters:
             branches:
diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
index 7c6c87661cd7..2396ec834129 100644
--- a/.github/workflows/macos.yml
+++ b/.github/workflows/macos.yml
@@ -16,7 +16,22 @@ name: macOS Build
 
 on:
   push:
+    paths:
+      - "velox/**"
+      - "!velox/docs/**"
+      - "CMakeLists.txt"
+      - "CMake/**"
+      - "third_party/**"
+      - ".github/workflows/macos.yml"
+
   pull_request:
+    paths:
+      - "velox/**"
+      - "!velox/docs/**"
+      - "CMakeLists.txt"
+      - "CMake/**"
+      - "third_party/**"
+      - ".github/workflows/macos.yml"
 
 permissions:
   contents: read
@@ -48,16 +63,16 @@ jobs:
             bison boost ccache double-conversion flex fmt gflags glog \
             icu4c libevent libsodium lz4 lzo ninja openssl range-v3 simdjson \
             snappy thrift xz xsimd zstd
-          
+
           echo "NJOBS=`sysctl -n hw.ncpu`" >> $GITHUB_ENV
-          
+
       - name: Cache ccache
         uses: actions/cache@v4
         with:
           path: '${{ env.CCACHE_DIR }}'
           key: ccache-macos-${{ matrix.os }}-${{ hashFiles('velox/*') }}
           restore-keys: ccache-macos-${{ matrix.os }}
-      
+
       - name: Configure Build
         env:
           folly_SOURCE: BUNDLED
@@ -78,4 +93,3 @@ jobs:
       - name: Run Tests
         if: false
         run: ctest -j $NJOBS --test-dir _build/debug --output-on-failure
-
diff --git a/.github/workflows/preliminary_checks.yml b/.github/workflows/preliminary_checks.yml
new file mode 100644
index 000000000000..d9929285e279
--- /dev/null
+++ b/.github/workflows/preliminary_checks.yml
@@ -0,0 +1,75 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Run Checks
+
+on:
+  pull_request:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.repository }}-${{ github.head_ref || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  check-matrix:
+    name: ${{ matrix.config.name }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - { name: "License Header", 
+              command: "header-fix",
+              message: "Found missing License Header(s)",
+              reqs: "regex" 
+            }
+          - { name: "Code Format",
+              command: "format-fix",
+              message: "Found format issues",
+              reqs: "regex cmake-format black" 
+            }
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Install Dependencies
+        run: |
+          python -m venv check_env
+          source check_env/bin/activate
+          pip install ${{ matrix.config.reqs }}
+
+      - name: Check ${{ matrix.config.name }} 
+        run: |
+          source check_env/bin/activate
+          make ${{ matrix.config.command }}
+
+          if ! git diff --quiet; then
+            diff=`git --no-pager diff`
+            echo "${{ matrix.command.message }} in the following files:"
+            git --no-pager diff --name-only
+            echo "Check the Job summary for a copy-pasteable patch."
+
+            echo "> [!IMPORTANT]" >> $GITHUB_STEP_SUMMARY
+            echo "${{ matrix.config.message }}" >> $GITHUB_STEP_SUMMARY
+            echo "> Please apply fix using:"  >> $GITHUB_STEP_SUMMARY
+            echo "\`\`\`sh" >> $GITHUB_STEP_SUMMARY
+            echo "patch -p1 <<EOF" >> $GITHUB_STEP_SUMMARY
+            echo "$diff" >> $GITHUB_STEP_SUMMARY
+            echo "EOF" >> $GITHUB_STEP_SUMMARY
+            echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+            exit 1
+          fi

From c850581677e111efaf20813d41884ba950dece26 Mon Sep 17 00:00:00 2001
From: Richard Barnes <rbarnes@meta.com>
Date: Fri, 16 Feb 2024 12:57:56 -0800
Subject: [PATCH 37/38] Remove unused variables in
 velox/dwio/common/ReaderFactory.cpp (#8774)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8774

LLVM-15 has a warning `-Wunused-but-set-variable` which we treat as an error because it's so often diagnostic of a code issue. Unused variables can compromise readability or, worse, performance.

This diff either (a) removes an unused variable and, possibly, it's associated code, or (b) qualifies the variable with `[[maybe_unused]]`, mostly in cases where the variable _is_ used, but, eg, in an `assert` statement that isn't present in production code.

 - If you approve of this diff, please use the "Accept & Ship" button :-)

Reviewed By: palmje

Differential Revision: D53779583

fbshipit-source-id: bde647c481c86cc81cbc6920fe0eac668147819e
---
 velox/dwio/common/ReaderFactory.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/velox/dwio/common/ReaderFactory.cpp b/velox/dwio/common/ReaderFactory.cpp
index dcb120aab907..56599c620fd9 100644
--- a/velox/dwio/common/ReaderFactory.cpp
+++ b/velox/dwio/common/ReaderFactory.cpp
@@ -31,7 +31,8 @@ ReaderFactoriesMap& readerFactories() {
 } // namespace
 
 bool registerReaderFactory(std::shared_ptr<ReaderFactory> factory) {
-  bool ok = readerFactories().insert({factory->fileFormat(), factory}).second;
+  [[maybe_unused]] const bool ok =
+      readerFactories().insert({factory->fileFormat(), factory}).second;
   // NOTE: re-enable this check after Prestissimo has updated dwrf registration.
 #if 0
   VELOX_CHECK(

From 164903523e39d58b10d4d65db9e6fa2cee57e7df Mon Sep 17 00:00:00 2001
From: Richard Barnes <rbarnes@meta.com>
Date: Fri, 16 Feb 2024 14:05:19 -0800
Subject: [PATCH 38/38] Remove unused variables in
 velox/expression/CastExpr-inl.h (#8773)

Summary:
Pull Request resolved: https://github.com/facebookincubator/velox/pull/8773

LLVM-15 has a warning `-Wunused-but-set-variable` which we treat as an error because it's so often diagnostic of a code issue. Unused variables can compromise readability or, worse, performance.

This diff either (a) removes an unused variable and, possibly, it's associated code, or (b) qualifies the variable with `[[maybe_unused]]`, mostly in cases where the variable _is_ used, but, eg, in an `assert` statement that isn't present in production code.

 - If you approve of this diff, please use the "Accept & Ship" button :-)

Reviewed By: palmje

Differential Revision: D53779591

fbshipit-source-id: 67b39c9f26a6cd6f783e7d621551cbfe158fe434
---
 velox/expression/CastExpr-inl.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/velox/expression/CastExpr-inl.h b/velox/expression/CastExpr-inl.h
index 5397a6f23aa1..1110ce28170a 100644
--- a/velox/expression/CastExpr-inl.h
+++ b/velox/expression/CastExpr-inl.h
@@ -717,8 +717,6 @@ void CastExpr::applyCastPrimitives(
   auto* resultFlatVector = result->as<FlatVector<To>>();
   auto* inputSimpleVector = input.as<SimpleVector<From>>();
 
-  auto& resultType = resultFlatVector->type();
-
   if (!hooks_->truncate()) {
     if (!hooks_->legacy()) {
       applyToSelectedNoThrowLocal(context, rows, result, [&](int row) {