From b61ac061e5c45c447f65c438837b145efe6f9a79 Mon Sep 17 00:00:00 2001
From: Jimmy Lu <jimmylu@meta.com>
Date: Thu, 25 Apr 2024 11:03:10 -0700
Subject: [PATCH 01/18] Filter out null map key entries by default

Summary:
When reading from file, Presto Java filter out null maps by default, so
we need to do the same in Velox.

https://github.com/facebookincubator/velox/pull/9604

Reviewed By: mbasmanova

Differential Revision: D56522977

fbshipit-source-id: 52118795c3327811c12d404adf38f48d6c70a83d
---
 velox/connectors/hive/HiveConnectorUtil.cpp   | 21 ++++++++++++--
 .../hive/tests/HiveConnectorTest.cpp          |  9 ++++--
 velox/dwio/common/ScanSpec.h                  | 29 +++++++++++++++++++
 velox/exec/tests/TableScanTest.cpp            | 17 +++++++++++
 .../aggregates/tests/MaxSizeForStatsTest.cpp  |  4 +--
 .../tests/SumDataSizeForStatsTest.cpp         |  4 +--
 6 files changed, 76 insertions(+), 8 deletions(-)
diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp
index 8189a92b812f..ecdf6d2c48c2 100644
--- a/velox/connectors/hive/HiveConnectorUtil.cpp
+++ b/velox/connectors/hive/HiveConnectorUtil.cpp
@@ -319,6 +319,19 @@ void checkColumnNameLowerCase(const core::TypedExprPtr& typeExpr) {
   }
 }
 
+namespace {
+
+void filterOutNullMapKeys(const Type& rootType, common::ScanSpec& rootSpec) {
+  rootSpec.visit(rootType, [](const Type& type, common::ScanSpec& spec) {
+    if (type.isMap()) {
+      spec.childByName(common::ScanSpec::kMapKeysFieldName)
+          ->addFilter(common::IsNotNull());
+    }
+  });
+}
+
+} // namespace
+
 std::shared_ptr<common::ScanSpec> makeScanSpec(
     const RowTypePtr& rowType,
     const folly::F14FastMap<std::string, std::vector<const common::Subfield*>>&
@@ -348,7 +361,8 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
     auto& type = rowType->childAt(i);
     auto it = outputSubfields.find(name);
     if (it == outputSubfields.end()) {
-      spec->addFieldRecursively(name, *type, i);
+      auto* fieldSpec = spec->addFieldRecursively(name, *type, i);
+      filterOutNullMapKeys(*type, *fieldSpec);
       filterSubfields.erase(name);
       continue;
     }
@@ -362,7 +376,9 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
       }
       filterSubfields.erase(it);
     }
-    addSubfields(*type, subfieldSpecs, 1, pool, *spec->addField(name, i));
+    auto* fieldSpec = spec->addField(name, i);
+    addSubfields(*type, subfieldSpecs, 1, pool, *fieldSpec);
+    filterOutNullMapKeys(*type, *fieldSpec);
     subfieldSpecs.clear();
   }
 
@@ -376,6 +392,7 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
       auto& type = dataColumns->findChild(fieldName);
       auto* fieldSpec = spec->getOrCreateChild(common::Subfield(fieldName));
       addSubfields(*type, subfieldSpecs, 1, pool, *fieldSpec);
+      filterOutNullMapKeys(*type, *fieldSpec);
       subfieldSpecs.clear();
     }
   }
diff --git a/velox/connectors/hive/tests/HiveConnectorTest.cpp b/velox/connectors/hive/tests/HiveConnectorTest.cpp
index a21d98210641..3daa1d792dff 100644
--- a/velox/connectors/hive/tests/HiveConnectorTest.cpp
+++ b/velox/connectors/hive/tests/HiveConnectorTest.cpp
@@ -63,6 +63,11 @@ groupSubfields(const std::vector<Subfield>& subfields) {
   return grouped;
 }
 
+bool mapKeyIsNotNull(const ScanSpec& mapSpec) {
+  return dynamic_cast<IsNotNull*>(
+      mapSpec.childByName(ScanSpec::kMapKeysFieldName)->filter());
+}
+
 TEST_F(HiveConnectorTest, hiveConfig) {
   ASSERT_EQ(
       HiveConfig::insertExistingPartitionsBehaviorString(
@@ -210,7 +215,7 @@ TEST_F(HiveConnectorTest, makeScanSpec_requiredSubfields_allSubscripts) {
         pool_.get());
     auto* c0 = scanSpec->childByName("c0");
     ASSERT_TRUE(c0->flatMapFeatureSelection().empty());
-    ASSERT_FALSE(c0->childByName(ScanSpec::kMapKeysFieldName)->filter());
+    ASSERT_TRUE(mapKeyIsNotNull(*c0));
     auto* values = c0->childByName(ScanSpec::kMapValuesFieldName);
     ASSERT_EQ(
         values->maxArrayElementsCount(),
@@ -229,7 +234,7 @@ TEST_F(HiveConnectorTest, makeScanSpec_requiredSubfields_allSubscripts) {
       {},
       pool_.get());
   auto* c0 = scanSpec->childByName("c0");
-  ASSERT_FALSE(c0->childByName(ScanSpec::kMapKeysFieldName)->filter());
+  ASSERT_TRUE(mapKeyIsNotNull(*c0));
   auto* values = c0->childByName(ScanSpec::kMapValuesFieldName);
   ASSERT_EQ(
       values->maxArrayElementsCount(),
diff --git a/velox/dwio/common/ScanSpec.h b/velox/dwio/common/ScanSpec.h
index e980baaf9020..9d998cce896a 100644
--- a/velox/dwio/common/ScanSpec.h
+++ b/velox/dwio/common/ScanSpec.h
@@ -322,6 +322,10 @@ class ScanSpec {
     flatMapFeatureSelection_ = std::move(features);
   }
 
+  /// Invoke the function provided on each node of the ScanSpec tree.
+  template <typename F>
+  void visit(const Type& type, F&& f);
+
  private:
   void reorder();
 
@@ -403,6 +407,31 @@ class ScanSpec {
   std::vector<std::string> flatMapFeatureSelection_;
 };
 
+template <typename F>
+void ScanSpec::visit(const Type& type, F&& f) {
+  f(type, *this);
+  switch (type.kind()) {
+    case TypeKind::ROW:
+      for (auto& child : children_) {
+        VELOX_CHECK_NE(child->channel(), kNoChannel);
+        child->visit(*type.childAt(child->channel()), std::forward<F>(f));
+      }
+      break;
+    case TypeKind::MAP:
+      childByName(kMapKeysFieldName)
+          ->visit(*type.childAt(0), std::forward<F>(f));
+      childByName(kMapValuesFieldName)
+          ->visit(*type.childAt(1), std::forward<F>(f));
+      break;
+    case TypeKind::ARRAY:
+      childByName(kArrayElementsFieldName)
+          ->visit(*type.childAt(0), std::forward<F>(f));
+      break;
+    default:
+      break;
+  }
+}
+
 // Returns false if no value from a range defined by stats can pass the
 // filter. True, otherwise.
 bool testFilter(
diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp
index ad7e5b13200c..0fa6e5b272e6 100644
--- a/velox/exec/tests/TableScanTest.cpp
+++ b/velox/exec/tests/TableScanTest.cpp
@@ -971,6 +971,23 @@ TEST_F(TableScanTest, subfieldPruningArrayType) {
   }
 }
 
+TEST_F(TableScanTest, skipNullMapKeys) {
+  auto vector = makeRowVector({makeMapVector(
+      {0, 2},
+      makeNullableFlatVector<int64_t>({std::nullopt, 2}),
+      makeFlatVector<int64_t>({1, 2}))});
+  auto rowType = asRowType(vector->type());
+  auto filePath = TempFilePath::create();
+  writeToFile(filePath->getPath(), {vector});
+  auto plan = PlanBuilder().tableScan(rowType).planNode();
+  auto split = makeHiveConnectorSplit(filePath->getPath());
+  auto expected = makeRowVector({makeMapVector(
+      {0, 1},
+      makeNullableFlatVector(std::vector<std::optional<int64_t>>(1, 2)),
+      makeFlatVector(std::vector<int64_t>(1, 2)))});
+  AssertQueryBuilder(plan).split(split).assertResults(expected);
+}
+
 // Test reading files written before schema change, e.g. missing newly added
 // columns.
 TEST_F(TableScanTest, missingColumns) {
diff --git a/velox/functions/prestosql/aggregates/tests/MaxSizeForStatsTest.cpp b/velox/functions/prestosql/aggregates/tests/MaxSizeForStatsTest.cpp
index 8ba6c0d95837..5a79f23a01ef 100644
--- a/velox/functions/prestosql/aggregates/tests/MaxSizeForStatsTest.cpp
+++ b/velox/functions/prestosql/aggregates/tests/MaxSizeForStatsTest.cpp
@@ -218,7 +218,7 @@ TEST_F(MaxSizeForStatsTest, complexRecursiveGlobalAggregate) {
           createMapOfArraysVector<int8_t, int64_t>({
               {{1, std::nullopt}},
               {{2, {{4, 5, std::nullopt}}}},
-              {{std::nullopt, {{7, 8, 9}}}},
+              {{3, {{7, 8, 9}}}},
           }),
       }),
   })};
@@ -261,7 +261,7 @@ TEST_F(MaxSizeForStatsTest, dictionaryEncodingTest) {
       createMapOfArraysVector<int8_t, int64_t>({
           {{1, std::nullopt}},
           {{2, {{4, 5, std::nullopt}}}},
-          {{std::nullopt, {{7, 8, 9}}}},
+          {{3, {{7, 8, 9}}}},
       }),
   });
   vector_size_t size = 3;
diff --git a/velox/functions/prestosql/aggregates/tests/SumDataSizeForStatsTest.cpp b/velox/functions/prestosql/aggregates/tests/SumDataSizeForStatsTest.cpp
index eab51e9e6a66..3c3b05893632 100644
--- a/velox/functions/prestosql/aggregates/tests/SumDataSizeForStatsTest.cpp
+++ b/velox/functions/prestosql/aggregates/tests/SumDataSizeForStatsTest.cpp
@@ -212,7 +212,7 @@ TEST_F(SumDataSizeForStatsTest, complexRecursiveGlobalAggregate) {
           createMapOfArraysVector<int8_t, int64_t>({
               {{1, std::nullopt}},
               {{2, {{4, 5, std::nullopt}}}},
-              {{std::nullopt, {{7, 8, 9}}}},
+              {{3, {{7, 8, 9}}}},
           }),
       }),
   })};
@@ -256,7 +256,7 @@ TEST_F(SumDataSizeForStatsTest, dictionaryEncodingTest) {
       createMapOfArraysVector<int8_t, int64_t>({
           {{1, std::nullopt}},
           {{2, {{4, 5, std::nullopt}}}},
-          {{std::nullopt, {{7, 8, 9}}}},
+          {{3, {{7, 8, 9}}}},
       }),
   });
   vector_size_t size = 3;

From 6664d6454bd77d07a82e8b808853cd9620c63423 Mon Sep 17 00:00:00 2001
From: Yoav Helfman <ifdef@meta.com>
Date: Thu, 25 Apr 2024 11:34:52 -0700
Subject: [PATCH 02/18] Adding backward compat format conversion

Summary:
Some tools, like validation service, still send file format "Alpha", which now fails because of recent rename.
Adding previous string for backward compat, until we clean up all callsites.

Reviewed By: zzhao0, pedroerp

Differential Revision: D56580198

fbshipit-source-id: 5fe56db45c48b109edf1176785e32e1d8ca09d3d
---
 velox/dwio/common/Options.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/velox/dwio/common/Options.cpp b/velox/dwio/common/Options.cpp
index 494d93cd2170..63b45b5b5845 100644
--- a/velox/dwio/common/Options.cpp
+++ b/velox/dwio/common/Options.cpp
@@ -36,7 +36,7 @@ FileFormat toFileFormat(std::string s) {
     return FileFormat::JSON;
   } else if (s == "parquet") {
     return FileFormat::PARQUET;
-  } else if (s == "nimble") {
+  } else if (s == "nimble" || s == "alpha") {
     return FileFormat::NIMBLE;
   } else if (s == "orc") {
     return FileFormat::ORC;

From 6570db59630c6381146486275b0d6242e98fc69b Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Thu, 25 Apr 2024 11:52:51 -0700
Subject: [PATCH 03/18] Fix naming of generated signatures files. (#9624)

Summary:
A typo prevented the files from being renamed correctly, causing errors in workflows using the stash.
Fixes : https://github.com/facebookincubator/velox/issues/9622

Pull Request resolved: https://github.com/facebookincubator/velox/pull/9624

Reviewed By: Yuhta

Differential Revision: D56582979

Pulled By: kgpai

fbshipit-source-id: e98eae113289021c4cfb9bfe8d036532ee0ea665
---
 .github/workflows/scheduled.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml
index 7e383e5f2577..d73cbb2bc19a 100644
--- a/.github/workflows/scheduled.yml
+++ b/.github/workflows/scheduled.yml
@@ -228,9 +228,9 @@ jobs:
           source .venv/bin/activate
           python3 -m pip install deepdiff
           python3 scripts/signature.py gh_bias_check presto spark
-          python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_contendor.json
+          python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_contender.json
           python3 scripts/signature.py bias_aggregates  /tmp/signatures/presto_aggregate_signatures_main.json \
-           /tmp/signatures/presto_aggregate_signatures_contendor.json /tmp/signatures/presto_aggregate_bias_functions \
+           /tmp/signatures/presto_aggregate_signatures_contender.json /tmp/signatures/presto_aggregate_bias_functions \
            /tmp/signatures/presto_aggregate_errors
 
       - name: Upload Signature Artifacts

From a9a620d4d324f82b8a76b5e0137482206bde4463 Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Thu, 25 Apr 2024 12:05:16 -0700
Subject: [PATCH 04/18] Add nightly job to track build metrics (#9460)

Summary:
This PR adds a nightly job that collect build time and binary size metrics for a debug and release build and uploads the data to conbench. For example: https://velox-conbench.voltrondata.run/runs/BM-release-8665191090-1/

The adapters also add aggregate statistics, for time the `wall_time` is of course dependent on the number of cores so there will be clear differences between release and debug as debug runs on 16 cores (because the 8 cores don't have enough disk space for a debug build).

Pull Request resolved: https://github.com/facebookincubator/velox/pull/9460

Reviewed By: Yuhta

Differential Revision: D56496198

Pulled By: kgpai

fbshipit-source-id: e6d0c3f83d10c053e90d1ad18532ff474eee25e0
---
 .github/workflows/benchmark.yml     |   2 +
 .github/workflows/build-metrics.yml | 118 +++++++++++
 .github/workflows/linux-build.yml   |   1 -
 scripts/benchmark-requirements.txt  |   6 +-
 scripts/build-metrics.py            | 295 ++++++++++++++++++++++++++++
 5 files changed, 418 insertions(+), 4 deletions(-)
 create mode 100644 .github/workflows/build-metrics.yml
 create mode 100755 scripts/build-metrics.py

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 4ff826df7ad7..dbbf25cc6f55 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -22,6 +22,8 @@ on:
       - 'third_party/**'
       - 'pyvelox/**'
       - '.github/workflows/benchmark.yml'
+      - 'scripts/benchmark-requirements.txt'
+
   push:
     branches: [main]
 
diff --git a/.github/workflows/build-metrics.yml b/.github/workflows/build-metrics.yml
new file mode 100644
index 000000000000..c493d70c0f0c
--- /dev/null
+++ b/.github/workflows/build-metrics.yml
@@ -0,0 +1,118 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Collect Build Metrics
+
+on:
+  pull_request:
+      paths:
+        - ".github/workflows/build-metrics.yml"
+
+  workflow_dispatch:
+    inputs:
+      ref:
+        description: "ref to check"
+        required: true
+
+  schedule:
+    # Run every day at 04:05
+    - cron: "5 4 * * *"
+
+permissions:
+  contents: read
+
+jobs:
+  metrics:
+    name: Linux ${{ matrix.type }} with adapters
+    if: ${{ github.repository == 'facebookincubator/velox' }}
+    runs-on: ${{ matrix.runner }}
+    container: ghcr.io/facebookincubator/velox-dev:adapters
+    strategy:
+      fail-fast: false
+      matrix:
+        - runner: "16-core"
+        - type: ["debug", "release"]
+    defaults:
+      run:
+        shell: bash
+    env:
+      VELOX_DEPENDENCY_SOURCE: SYSTEM
+      simdjson_SOURCE: BUNDLED
+      xsimd_SOURCE: BUNDLED
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.ref || github.sha }}
+
+      - name: Fix git permissions
+        # Usually actions/checkout does this but as we run in a container
+        # it doesn't work
+        run: git config --global --add safe.directory /__w/velox/velox
+
+      - name: Make ${{ matrix.type }} Build
+        env:
+          MAKEFLAGS: 'MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=4'
+        run: |
+          EXTRA_CMAKE_FLAGS=(
+            "-DVELOX_ENABLE_BENCHMARKS=ON"
+            "-DVELOX_ENABLE_ARROW=ON"
+            "-DVELOX_ENABLE_PARQUET=ON"
+            "-DVELOX_ENABLE_HDFS=ON"
+            "-DVELOX_ENABLE_S3=ON"
+            "-DVELOX_ENABLE_GCS=ON"
+            "-DVELOX_ENABLE_ABFS=ON"
+            "-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON"
+          )
+          make '${{ matrix.type }}'
+
+      - name: Log binary sizes
+        run: |
+          mkdir -p /tmp/metrics
+          sizes_file=/tmp/metrics/object_sizes
+          pushd '_build/${{ matrix.type }}'
+
+          find velox -type f -name '*.so' -o -name '*.a' -exec ls -l -BB {} \; |
+            awk '{print $5, $9; total += $5} END {print total," total_lib_size"}' > $sizes_file
+
+          find velox -type f -name '*.o' -exec ls -l -BB {} \; |
+            awk '{print $5, $9; total += $5} END {print total," total_obj_size"}' >> $sizes_file
+
+          find velox -type f -name 'velox_*' -exec ls -l -BB {} \; |
+            awk '{print $5, $9; total += $5} END {print total," total_exec_size"}' >> $sizes_file
+
+      - name: Copy ninja_log
+        run: cp _build/${{ matrix.type }}/.ninja_log /tmp/metrics/.ninja_log
+
+      - name: "Install dependencies"
+        run: |
+          python3 -m pip install setuptools
+          python3 -m pip install -r scripts/benchmark-requirements.txt
+
+      - name: "Upload Metrics"
+        env:
+          CONBENCH_URL: "https://velox-conbench.voltrondata.run/"
+          CONBENCH_MACHINE_INFO_NAME: "GitHub-runner-${{ matrix.runner }}"
+          CONBENCH_EMAIL: "${{ secrets.CONBENCH_EMAIL }}"
+          CONBENCH_PASSWORD: "${{ secrets.CONBENCH_PASSWORD }}"
+          # These don't actually work https://github.com/conbench/conbench/issues/1484
+          # but have to be there to work regardless??
+          CONBENCH_PROJECT_REPOSITORY: "${{ github.repository }}"
+          CONBENCH_PROJECT_COMMIT: "${{ inputs.ref || github.sha }}"
+        run: |
+          ./scripts/build-metrics.py upload \
+            --build_type "${{ matrix.type }}" \
+            --run_id "BM-${{ matrix.type }}-${{ github.run_id }}-${{ github.run_attempt }}" \
+            --pr_number "${{ github.event.number }}" \
+            --sha "${{ inputs.ref || github.sha }}" \
+            "/tmp/metrics"
diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml
index 6c547b35f548..e8a68eda92f5 100644
--- a/.github/workflows/linux-build.yml
+++ b/.github/workflows/linux-build.yml
@@ -104,7 +104,6 @@ jobs:
             "-DVELOX_ENABLE_S3=ON"
             "-DVELOX_ENABLE_GCS=ON"
             "-DVELOX_ENABLE_ABFS=ON"
-            "-DVELOX_ENABLE_SUBSTRAIT=ON"
             "-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON"
             "-DVELOX_ENABLE_GPU=ON"
           )
diff --git a/scripts/benchmark-requirements.txt b/scripts/benchmark-requirements.txt
index 7ae4d8028461..3df0a15d0bd4 100644
--- a/scripts/benchmark-requirements.txt
+++ b/scripts/benchmark-requirements.txt
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-benchadapt@git+https://github.com/conbench/conbench.git@44e81d1#subdirectory=benchadapt/python
-benchalerts@git+https://github.com/conbench/conbench.git@44e81d1#subdirectory=benchalerts
-benchclients@git+https://github.com/conbench/conbench.git@44e81d1#subdirectory=benchclients/python
+benchadapt==2024.3.20
+benchalerts==2024.1.10.1
+benchclients==2024.3.29.1
diff --git a/scripts/build-metrics.py b/scripts/build-metrics.py
new file mode 100755
index 000000000000..d172a03708b3
--- /dev/null
+++ b/scripts/build-metrics.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import sys
+import uuid
+from os.path import join, splitext
+from pathlib import Path
+from typing import Any, Dict, List
+
+from benchadapt import BenchmarkResult
+from benchadapt.adapters import BenchmarkAdapter
+
+
+class BinarySizeAdapter(BenchmarkAdapter):
+    """
+    Adapter to track build artifact sizes in conbench.
+    Expects the `size_file` to be formatted like this:
+    <size in bytes> <path/to/binary|arbitrary_name>
+
+    Suite meta data will be library, object or executable
+    based on file ending.
+    """
+
+    size_file: Path
+
+    def __init__(
+        self,
+        command: List[str],
+        size_file: str,
+        build_type: str,
+        result_fields_override: Dict[str, Any] = {},
+        result_fields_append: Dict[str, Any] = {},
+    ) -> None:
+        self.size_file = Path(size_file)
+        if build_type not in ["debug", "release"]:
+            raise ValueError(f"Build type '{build_type}' is not valid!")
+        self.build_type = build_type
+        super().__init__(command, result_fields_override, result_fields_append)
+
+    def _transform_results(self) -> List[BenchmarkResult]:
+        results = []
+
+        batch_id = uuid.uuid4().hex
+        with open(self.size_file, "r") as file:
+            sizes = [line.strip() for line in file]
+
+        if not sizes:
+            raise ValueError("'size_file' is empty!")
+
+        for line in sizes:
+            size, path = line.split(maxsplit=1)
+            path = path.strip()
+            _, ext = splitext(path)
+            if ext in [".so", ".a"]:
+                suite = "library"
+            elif ext == ".o":
+                suite = "object"
+            else:
+                suite = "executable"
+
+            parsed_size = BenchmarkResult(
+                run_reason="merge",
+                batch_id=batch_id,
+                stats={
+                    "data": [size],
+                    "unit": "B",
+                    "iterations": 1,
+                },
+                tags={
+                    "name": path,
+                    "suite": suite,
+                    "source": f"{self.build_type}_build_metrics_size",
+                },
+                info={},
+                context={"benchmark_language": "C++"},
+            )
+            results.append(parsed_size)
+
+        return results
+
+
+class NinjaLogAdapter(BenchmarkAdapter):
+    """
+    Adapter to extract compile and link times from a .ninja_log.
+    Will calculate aggregates for total, compile, link and wall time.
+    Suite metadata will be set based on binary ending to object, library or executable.
+
+    Only files in paths beginning with velox/ will be tracked to avoid dependencies.
+    """
+
+    ninja_log: Path
+
+    def __init__(
+        self,
+        command: List[str],
+        ninja_log: str,
+        build_type: str,
+        result_fields_override: Dict[str, Any] = {},
+        result_fields_append: Dict[str, Any] = {},
+    ) -> None:
+        self.ninja_log = Path(ninja_log)
+        if build_type not in ["debug", "release"]:
+            raise ValueError(f"Build type '{build_type}' is not valid!")
+        self.build_type = build_type
+        super().__init__(command, result_fields_override, result_fields_append)
+
+    def _transform_results(self) -> List[BenchmarkResult]:
+        results = []
+
+        batch_id = uuid.uuid4().hex
+        with open(self.ninja_log, "r") as file:
+            log_lines = [line.strip() for line in file]
+
+        if not log_lines[0].startswith("# ninja log v"):
+            raise ValueError("Malformed Ninja log found!")
+        else:
+            del log_lines[0]
+
+        ms2sec = lambda x: x / 1000
+        get_epoch = lambda l: int(l.split()[2])
+        totals = {
+            "link_time": 0,
+            "compile_time": 0,
+            "total_time": 0,
+            "wall_time": get_epoch(log_lines[-1]) - get_epoch(log_lines[0]),
+        }
+
+        for line in log_lines:
+            start, end, epoch, object_path, _ = line.split()
+            start = int(start)
+            end = int(end)
+            duration = ms2sec(end - start)
+
+            # Don't track dependency times (refine check potentially?)
+            if not object_path.startswith("velox"):
+                continue
+
+            _, ext = splitext(object_path)
+            if ext in [".so", ".a"] or not ext:
+                totals["link_time"] += duration
+                suite = "linking"
+            elif ext == ".o":
+                totals["compile_time"] += duration
+                suite = "compiling"
+            else:
+                print(f"Unkown file type found: {object_path}")
+                print("Skipping...")
+                continue
+
+            time_result = BenchmarkResult(
+                run_reason="merge",
+                batch_id=batch_id,
+                stats={
+                    "data": [duration],
+                    "unit": "s",
+                    "iterations": 1,
+                },
+                tags={
+                    "name": object_path,
+                    "suite": suite,
+                    "source": f"{self.build_type}_build_metrics_time",
+                },
+                info={},
+                context={"benchmark_language": "C++"},
+            )
+            results.append(time_result)
+
+        totals["total_time"] = totals["link_time"] + totals["compile_time"]
+        for total_name, total in totals.items():
+            total_result = BenchmarkResult(
+                run_reason="merge",
+                batch_id=batch_id,
+                stats={
+                    "data": [total],
+                    "unit": "s",
+                    "iterations": 1,
+                },
+                tags={
+                    "name": total_name,
+                    "suite": "total",
+                    "source": f"{self.build_type}_build_metrics_time",
+                },
+                info={},
+                context={"benchmark_language": "C++"},
+            )
+            results.append(total_result)
+
+        return results
+
+
+# find velox -type f -name '*.o' -exec ls -l -BB {} \; | awk '{print $5, $9}' |  sed 's|CMakeFiles/.*dir/||g' > /tmp/object-size
+
+
+def upload(args):
+    print("Uploading Build Metrics")
+    pr_number = int(args.pr_number) if args.pr_number else None
+    run_reason = "pull request" if pr_number else "commit"
+    run_name = f"{run_reason}: {args.sha}"
+    sizes = BinarySizeAdapter(
+        command=["true"],
+        size_file=join(args.base_path, args.size_file),
+        build_type=args.build_type,
+        result_fields_override={
+            "run_id": args.run_id,
+            "run_name": run_name,
+            "run_reason": run_reason,
+            "github": {
+                "repository": "https://github.com/facebookincubator/velox",
+                "pr_number": pr_number,
+                "commit": args.sha,
+            },
+        },
+    )
+    sizes()
+
+    times = NinjaLogAdapter(
+        command=["true"],
+        ninja_log=join(args.base_path, args.ninja_log),
+        build_type=args.build_type,
+        result_fields_override={
+            "run_id": args.run_id,
+            "run_name": run_name,
+            "run_reason": run_reason,
+            "github": {
+                "repository": "https://github.com/facebookincubator/velox",
+                "pr_number": pr_number,
+                "commit": args.sha,
+            },
+        },
+    )
+    times()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Velox Build Metric Utility.")
+    parser.set_defaults(func=lambda _: parser.print_help())
+
+    subparsers = parser.add_subparsers(help="Please specify one of the subcommands.")
+
+    upload_parser = subparsers.add_parser(
+        "upload", help="Parse and upload build metrics"
+    )
+    upload_parser.set_defaults(func=upload)
+    upload_parser.add_argument(
+        "--ninja_log", default=".ninja_log", help="Name of the ninja log file."
+    )
+    upload_parser.add_argument(
+        "--size_file",
+        default="object_sizes",
+        help="Name of the file containing size information.",
+    )
+    upload_parser.add_argument(
+        "--build_type",
+        required=True,
+        help="Type of build results come from, e.g. debug or release",
+    )
+    upload_parser.add_argument(
+        "--run_id",
+        required=True,
+        help="A Conbench run ID unique to this build.",
+    )
+    upload_parser.add_argument(
+        "--sha",
+        required=True,
+        help="HEAD sha for the result upload to conbench.",
+    )
+    upload_parser.add_argument(
+        "--pr_number",
+        default=0,
+        help="PR number for the result upload to conbench.",
+    )
+    upload_parser.add_argument(
+        "base_path",
+        help="Path in which the .ninja_log and sizes_file are found.",
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    args.func(args)

From 2f423d8681029ba0dd3780bd59d9b9abb5b6531a Mon Sep 17 00:00:00 2001
From: Wei He <weihe@meta.com>
Date: Thu, 25 Apr 2024 14:29:46 -0700
Subject: [PATCH 05/18] Run aggregation and window fuzzer tests at PR time

Summary: Exported to https://github.com/facebookincubator/velox/pull/9607.

Reviewed By: kgpai

Differential Revision: D56528440

fbshipit-source-id: cf7575c9a9f2a7cdef507acc5873d371082ac348
---
 .github/workflows/scheduled.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml
index d73cbb2bc19a..cfc074205669 100644
--- a/.github/workflows/scheduled.yml
+++ b/.github/workflows/scheduled.yml
@@ -662,7 +662,6 @@ jobs:
     runs-on: ubuntu-latest
     container: ghcr.io/facebookincubator/velox-dev:presto-java
     timeout-minutes: 120
-    if: ${{ github.event_name != 'pull_request' }}
     env:
       CCACHE_DIR: "${{ github.workspace }}/.ccache/"
       LINUX_DISTRO: "centos"
@@ -824,7 +823,6 @@ jobs:
     runs-on: ubuntu-latest
     container: ghcr.io/facebookincubator/velox-dev:presto-java
     timeout-minutes: 120
-    if: ${{ github.event_name != 'pull_request' }}
     env:
       CCACHE_DIR: "${{ github.workspace }}/.ccache/"
       LINUX_DISTRO: "centos"

From a586a6997b193374ad6bd1d5d055f47cf4dcc5dc Mon Sep 17 00:00:00 2001
From: Bikramjeet Vig <bikramjeet@meta.com>
Date: Thu, 25 Apr 2024 15:01:47 -0700
Subject: [PATCH 06/18] Fix execution of lambda expressions

Summary:
Lambda expressions can be executed multiple times on the same input
batch, such as with the `reduce` function which applies a lambda
function to each element of an input array. It is important to note
that each invocation receives a new set of inputs, and any state
relevant to one set of inputs should be reset before the next
invocation to avoid unintended consequences. An example of such
failure that we observed is when shared expressions inside `reduce`
inadvertently reused results between invocations because the shared
expressions held onto shared results that were indexed based on input
vector's address; due to sheer chance, some inputs ended up having
the same memory address.
Therefore, this change fixes this bug by ensuring this input specific
state, currently only limited to shared expressions is reset before
every invocation of the lambda.

Reviewed By: mbasmanova

Differential Revision: D56502765

fbshipit-source-id: f26bec5233d37d228b9f87b1844c18bb67225993
---
 velox/expression/LambdaExpr.cpp | 54 +++++++++++++++++++++++++++++++--
 velox/expression/LambdaExpr.h   | 17 +++++------
 2 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/velox/expression/LambdaExpr.cpp b/velox/expression/LambdaExpr.cpp
index 739e46cbaa22..67160a7d7fcc 100644
--- a/velox/expression/LambdaExpr.cpp
+++ b/velox/expression/LambdaExpr.cpp
@@ -32,10 +32,12 @@ class ExprCallable : public Callable {
   ExprCallable(
       RowTypePtr signature,
       RowVectorPtr capture,
-      std::shared_ptr<Expr> body)
+      std::shared_ptr<Expr> body,
+      std::vector<std::shared_ptr<Expr>> sharedExprsToReset)
       : signature_(std::move(signature)),
         capture_(std::move(capture)),
-        body_(std::move(body)) {}
+        body_(std::move(body)),
+        sharedExprsToReset_(std::move(sharedExprsToReset)) {}
 
   bool hasCapture() const override {
     return capture_->childrenSize() > signature_->size();
@@ -53,6 +55,7 @@ class ExprCallable : public Callable {
     EvalCtx lambdaCtx = createLambdaCtx(context, row, validRowsInReusedResult);
     ScopedVarSetter throwOnError(
         lambdaCtx.mutableThrowOnError(), context->throwOnError());
+    resetSharedExprs();
     body_->eval(rows, lambdaCtx, *result);
     transformErrorVector(lambdaCtx, context, rows, elementToTopLevelRows);
   }
@@ -68,11 +71,18 @@ class ExprCallable : public Callable {
     auto row = createRowVector(context, wrapCapture, args, rows.end());
     EvalCtx lambdaCtx = createLambdaCtx(context, row, validRowsInReusedResult);
     ScopedVarSetter throwOnError(lambdaCtx.mutableThrowOnError(), false);
+    resetSharedExprs();
     body_->eval(rows, lambdaCtx, *result);
     lambdaCtx.swapErrors(elementErrors);
   }
 
  private:
+  void resetSharedExprs() {
+    for (auto& expr : sharedExprsToReset_) {
+      expr->reset();
+    }
+  }
+
   EvalCtx createLambdaCtx(
       EvalCtx* context,
       std::shared_ptr<RowVector>& row,
@@ -129,10 +139,47 @@ class ExprCallable : public Callable {
   RowTypePtr signature_;
   RowVectorPtr capture_;
   std::shared_ptr<Expr> body_;
+  // List of Shared Exprs that are decendants of 'body_' for which reset() needs
+  // to be called before calling `body_->eval()`.
+  std::vector<std::shared_ptr<Expr>> sharedExprsToReset_;
 };
 
+void extractSharedExpressions(
+    const ExprPtr& expr,
+    std::unordered_set<ExprPtr>& shared) {
+  for (const auto& input : expr->inputs()) {
+    if (input->isMultiplyReferenced()) {
+      shared.insert(input);
+      continue;
+    }
+    extractSharedExpressions(input, shared);
+  }
+}
+
 } // namespace
 
+LambdaExpr::LambdaExpr(
+    TypePtr type,
+    RowTypePtr&& signature,
+    std::vector<std::shared_ptr<FieldReference>>&& capture,
+    std::shared_ptr<Expr>&& body,
+    bool trackCpuUsage)
+    : SpecialForm(
+          std::move(type),
+          std::vector<std::shared_ptr<Expr>>(),
+          "lambda",
+          false /* supportsFlatNoNullsFastPath */,
+          trackCpuUsage),
+      signature_(std::move(signature)),
+      body_(std::move(body)),
+      capture_(std::move(capture)) {
+  std::unordered_set<ExprPtr> shared;
+  extractSharedExpressions(body_, shared);
+  for (auto& expr : shared) {
+    sharedExprsToReset_.push_back(expr);
+  }
+}
+
 void LambdaExpr::computeDistinctFields() {
   SpecialForm::computeDistinctFields();
   std::vector<FieldReference*> capturedFields;
@@ -205,7 +252,8 @@ void LambdaExpr::evalSpecialForm(
       rows.end(),
       values,
       0);
-  auto callable = std::make_shared<ExprCallable>(signature_, capture, body_);
+  auto callable = std::make_shared<ExprCallable>(
+      signature_, capture, body_, sharedExprsToReset_);
   std::shared_ptr<FunctionVector> functions;
   if (!result) {
     functions = std::make_shared<FunctionVector>(context.pool(), type_);
diff --git a/velox/expression/LambdaExpr.h b/velox/expression/LambdaExpr.h
index 81327b81a880..bba0dc0e85f5 100644
--- a/velox/expression/LambdaExpr.h
+++ b/velox/expression/LambdaExpr.h
@@ -33,16 +33,7 @@ class LambdaExpr : public SpecialForm {
       RowTypePtr&& signature,
       std::vector<std::shared_ptr<FieldReference>>&& capture,
       std::shared_ptr<Expr>&& body,
-      bool trackCpuUsage)
-      : SpecialForm(
-            std::move(type),
-            std::vector<std::shared_ptr<Expr>>(),
-            "lambda",
-            false /* supportsFlatNoNullsFastPath */,
-            trackCpuUsage),
-        signature_(std::move(signature)),
-        body_(std::move(body)),
-        capture_(std::move(capture)) {}
+      bool trackCpuUsage);
 
   bool isConstant() const override {
     return false;
@@ -80,6 +71,12 @@ class LambdaExpr : public SpecialForm {
   /// array/map.
   ExprPtr body_;
 
+  // List of Shared Exprs that are decendants of 'body_' for which reset() needs
+  // to be called before calling `body_->eval()`.This is because every
+  // invocation of `body_->eval()` should treat its inputs like a fresh batch
+  // similar to how we operate in `ExprSet::eval()`.
+  std::vector<ExprPtr> sharedExprsToReset_;
+
   /// List of field references to columns in the input row vector.
   std::vector<std::shared_ptr<FieldReference>> capture_;
 

From 6e2184f51340fe5c6445c2e368116d4924e3565a Mon Sep 17 00:00:00 2001
From: Bikramjeet Vig <bikramjeet@meta.com>
Date: Thu, 25 Apr 2024 15:04:56 -0700
Subject: [PATCH 07/18] Back out "Back out "[velox][PR] Refactor greatest and
 least Presto functions using simple function API""

Summary:
Re-introducing this as the issue that initiated the backout is resolved.

Original PR: https://github.com/facebookincubator/velox/pull/9308

Reviewed By: mbasmanova, s4ayub

Differential Revision: D56548695

fbshipit-source-id: d0a9032f5cc958c8f4a3124c1ad81f290e31800b
---
 velox/functions/prestosql/CMakeLists.txt      |   1 -
 velox/functions/prestosql/GreatestLeast.cpp   | 207 ------------------
 velox/functions/prestosql/GreatestLeast.h     | 101 +++++++++
 .../GeneralFunctionsRegistration.cpp          |  32 ++-
 .../prestosql/tests/GreatestLeastTest.cpp     |  69 ++++--
 5 files changed, 175 insertions(+), 235 deletions(-)
 delete mode 100644 velox/functions/prestosql/GreatestLeast.cpp
 create mode 100644 velox/functions/prestosql/GreatestLeast.h

diff --git a/velox/functions/prestosql/CMakeLists.txt b/velox/functions/prestosql/CMakeLists.txt
index 3a8008be601b..54d959cbbb37 100644
--- a/velox/functions/prestosql/CMakeLists.txt
+++ b/velox/functions/prestosql/CMakeLists.txt
@@ -34,7 +34,6 @@ add_library(
   FindFirst.cpp
   FromUnixTime.cpp
   FromUtf8.cpp
-  GreatestLeast.cpp
   InPredicate.cpp
   JsonFunctions.cpp
   Map.cpp
diff --git a/velox/functions/prestosql/GreatestLeast.cpp b/velox/functions/prestosql/GreatestLeast.cpp
deleted file mode 100644
index afc085d4a7ea..000000000000
--- a/velox/functions/prestosql/GreatestLeast.cpp
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Copyright (c) Facebook, Inc. and its affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cmath>
-#include <type_traits>
-#include "velox/common/base/Exceptions.h"
-#include "velox/expression/Expr.h"
-#include "velox/expression/VectorFunction.h"
-#include "velox/type/Type.h"
-
-namespace facebook::velox::functions {
-
-namespace {
-
-template <bool>
-class ExtremeValueFunction;
-
-using LeastFunction = ExtremeValueFunction<true>;
-using GreatestFunction = ExtremeValueFunction<false>;
-
-/**
- * This class implements two functions:
- *
- * greatest(value1, value2, ..., valueN) → [same as input]
- * Returns the largest of the provided values.
- *
- * least(value1, value2, ..., valueN) → [same as input]
- * Returns the smallest of the provided values.
- **/
-template <bool isLeast>
-class ExtremeValueFunction : public exec::VectorFunction {
- private:
-  template <typename T>
-  bool shouldOverride(const T& currentValue, const T& candidateValue) const {
-    return isLeast ? candidateValue < currentValue
-                   : candidateValue > currentValue;
-  }
-
-  // For double, presto should throw error if input is Nan
-  template <typename T>
-  void checkNan(const T& value) const {
-    if constexpr (std::is_same_v<T, TypeTraits<TypeKind::DOUBLE>::NativeType>) {
-      if (std::isnan(value)) {
-        VELOX_USER_FAIL(
-            "Invalid argument to {}: NaN", isLeast ? "least()" : "greatest()");
-      }
-    }
-  }
-
-  template <typename T>
-  void applyTyped(
-      const SelectivityVector& rows,
-      const std::vector<VectorPtr>& args,
-      const TypePtr& outputType,
-      exec::EvalCtx& context,
-      VectorPtr& result) const {
-    context.ensureWritable(rows, outputType, result);
-    result->clearNulls(rows);
-
-    auto* flatResult = result->as<FlatVector<T>>();
-    BufferPtr resultValues = flatResult->mutableValues(rows.end());
-    T* __restrict rawResult = resultValues->asMutable<T>();
-
-    exec::DecodedArgs decodedArgs(rows, args, context);
-
-    std::set<size_t> usedInputs;
-    context.applyToSelectedNoThrow(rows, [&](int row) {
-      size_t valueIndex = 0;
-
-      T currentValue = decodedArgs.at(0)->valueAt<T>(row);
-      checkNan(currentValue);
-
-      for (auto i = 1; i < args.size(); ++i) {
-        auto candidateValue = decodedArgs.at(i)->template valueAt<T>(row);
-        checkNan(candidateValue);
-
-        if constexpr (isLeast) {
-          if (candidateValue < currentValue) {
-            currentValue = candidateValue;
-            valueIndex = i;
-          }
-        } else {
-          if (candidateValue > currentValue) {
-            currentValue = candidateValue;
-            valueIndex = i;
-          }
-        }
-      }
-      usedInputs.insert(valueIndex);
-
-      if constexpr (std::is_same_v<bool, T>) {
-        flatResult->set(row, currentValue);
-      } else {
-        rawResult[row] = currentValue;
-      }
-    });
-
-    if constexpr (std::is_same_v<T, StringView>) {
-      for (auto index : usedInputs) {
-        flatResult->acquireSharedStringBuffers(args[index].get());
-      }
-    }
-  }
-
- public:
-  void apply(
-      const SelectivityVector& rows,
-      std::vector<VectorPtr>& args,
-      const TypePtr& outputType,
-      exec::EvalCtx& context,
-      VectorPtr& result) const override {
-    switch (outputType.get()->kind()) {
-      case TypeKind::BOOLEAN:
-        applyTyped<bool>(rows, args, outputType, context, result);
-        return;
-      case TypeKind::TINYINT:
-        applyTyped<int8_t>(rows, args, outputType, context, result);
-        return;
-      case TypeKind::SMALLINT:
-        applyTyped<int16_t>(rows, args, outputType, context, result);
-        return;
-      case TypeKind::INTEGER:
-        applyTyped<int32_t>(rows, args, outputType, context, result);
-        return;
-      case TypeKind::BIGINT:
-        applyTyped<int64_t>(rows, args, outputType, context, result);
-        return;
-      case TypeKind::HUGEINT:
-        applyTyped<int128_t>(rows, args, outputType, context, result);
-        return;
-      case TypeKind::REAL:
-        applyTyped<float>(rows, args, outputType, context, result);
-        return;
-      case TypeKind::DOUBLE:
-        applyTyped<double>(rows, args, outputType, context, result);
-        return;
-      case TypeKind::VARCHAR:
-        applyTyped<StringView>(rows, args, outputType, context, result);
-        return;
-      case TypeKind::TIMESTAMP:
-        applyTyped<Timestamp>(rows, args, outputType, context, result);
-        return;
-      default:
-        VELOX_FAIL(
-            "Unsupported input type for {}: {}",
-            isLeast ? "least()" : "greatest()",
-            outputType->toString());
-    }
-  }
-
-  static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
-    const std::vector<std::string> types = {
-        "boolean",
-        "tinyint",
-        "smallint",
-        "integer",
-        "bigint",
-        "double",
-        "real",
-        "varchar",
-        "timestamp",
-        "date",
-    };
-    std::vector<std::shared_ptr<exec::FunctionSignature>> signatures;
-    for (const auto& type : types) {
-      signatures.emplace_back(exec::FunctionSignatureBuilder()
-                                  .returnType(type)
-                                  .argumentType(type)
-                                  .variableArity()
-                                  .build());
-    }
-    signatures.emplace_back(exec::FunctionSignatureBuilder()
-                                .integerVariable("precision")
-                                .integerVariable("scale")
-                                .returnType("DECIMAL(precision, scale)")
-                                .argumentType("DECIMAL(precision, scale)")
-                                .variableArity()
-                                .build());
-    return signatures;
-  }
-};
-} // namespace
-
-VELOX_DECLARE_VECTOR_FUNCTION(
-    udf_least,
-    LeastFunction::signatures(),
-    std::make_unique<LeastFunction>());
-
-VELOX_DECLARE_VECTOR_FUNCTION(
-    udf_greatest,
-    GreatestFunction::signatures(),
-    std::make_unique<GreatestFunction>());
-
-} // namespace facebook::velox::functions
diff --git a/velox/functions/prestosql/GreatestLeast.h b/velox/functions/prestosql/GreatestLeast.h
new file mode 100644
index 000000000000..a648aa5611d7
--- /dev/null
+++ b/velox/functions/prestosql/GreatestLeast.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cmath>
+#include "velox/functions/Macros.h"
+
+namespace facebook::velox::functions {
+namespace details {
+/**
+ * This class implements two functions:
+ *
+ * greatest(value1, value2, ..., valueN) → [same as input]
+ * Returns the largest of the provided values.
+ *
+ * least(value1, value2, ..., valueN) → [same as input]
+ * Returns the smallest of the provided values.
+ *
+ * For DOUBLE and REAL type, NaN is considered as the biggest according to
+ * https://github.com/prestodb/presto/issues/22391
+ **/
+template <typename TExec, typename T, bool isLeast>
+struct ExtremeValueFunction {
+  VELOX_DEFINE_FUNCTION_TYPES(TExec);
+
+  FOLLY_ALWAYS_INLINE void call(
+      out_type<T>& result,
+      const arg_type<T>& firstElement,
+      const arg_type<Variadic<T>>& remainingElement) {
+    auto currentValue = firstElement;
+
+    for (auto element : remainingElement) {
+      auto candidateValue = element.value();
+
+      if constexpr (isLeast) {
+        if (smallerThan(candidateValue, currentValue)) {
+          currentValue = candidateValue;
+        }
+      } else {
+        if (greaterThan(candidateValue, currentValue)) {
+          currentValue = candidateValue;
+        }
+      }
+    }
+
+    result = currentValue;
+  }
+
+ private:
+  template <typename K>
+  bool greaterThan(const K& lhs, const K& rhs) const {
+    if constexpr (std::is_same_v<K, double> || std::is_same_v<K, float>) {
+      if (std::isnan(lhs)) {
+        return true;
+      }
+
+      if (std::isnan(rhs)) {
+        return false;
+      }
+    }
+
+    return lhs > rhs;
+  }
+
+  template <typename K>
+  bool smallerThan(const K& lhs, const K& rhs) const {
+    if constexpr (std::is_same_v<K, double> || std::is_same_v<K, float>) {
+      if (std::isnan(lhs)) {
+        return false;
+      }
+
+      if (std::isnan(rhs)) {
+        return true;
+      }
+    }
+
+    return lhs < rhs;
+  }
+};
+} // namespace details
+
+template <typename TExec, typename T>
+using LeastFunction = details::ExtremeValueFunction<TExec, T, true>;
+
+template <typename TExec, typename T>
+using GreatestFunction = details::ExtremeValueFunction<TExec, T, false>;
+
+} // namespace facebook::velox::functions
diff --git a/velox/functions/prestosql/registration/GeneralFunctionsRegistration.cpp b/velox/functions/prestosql/registration/GeneralFunctionsRegistration.cpp
index c5a10411eb86..37acd5d68130 100644
--- a/velox/functions/prestosql/registration/GeneralFunctionsRegistration.cpp
+++ b/velox/functions/prestosql/registration/GeneralFunctionsRegistration.cpp
@@ -17,9 +17,35 @@
 #include "velox/functions/Registerer.h"
 #include "velox/functions/lib/IsNull.h"
 #include "velox/functions/prestosql/Cardinality.h"
+#include "velox/functions/prestosql/GreatestLeast.h"
 #include "velox/functions/prestosql/InPredicate.h"
 
 namespace facebook::velox::functions {
+
+template <typename T>
+inline void registerGreatestLeastFunction(const std::string& prefix) {
+  registerFunction<ParameterBinder<GreatestFunction, T>, T, T, Variadic<T>>(
+      {prefix + "greatest"});
+
+  registerFunction<ParameterBinder<LeastFunction, T>, T, T, Variadic<T>>(
+      {prefix + "least"});
+}
+
+inline void registerAllGreatestLeastFunctions(const std::string& prefix) {
+  registerGreatestLeastFunction<bool>(prefix);
+  registerGreatestLeastFunction<int8_t>(prefix);
+  registerGreatestLeastFunction<int16_t>(prefix);
+  registerGreatestLeastFunction<int32_t>(prefix);
+  registerGreatestLeastFunction<int64_t>(prefix);
+  registerGreatestLeastFunction<float>(prefix);
+  registerGreatestLeastFunction<double>(prefix);
+  registerGreatestLeastFunction<Varchar>(prefix);
+  registerGreatestLeastFunction<LongDecimal<P1, S1>>(prefix);
+  registerGreatestLeastFunction<ShortDecimal<P1, S1>>(prefix);
+  registerGreatestLeastFunction<Date>(prefix);
+  registerGreatestLeastFunction<Timestamp>(prefix);
+}
+
 extern void registerSubscriptFunction(
     const std::string& name,
     bool enableCaching);
@@ -47,12 +73,10 @@ void registerGeneralFunctions(const std::string& prefix) {
   VELOX_REGISTER_VECTOR_FUNCTION(udf_transform, prefix + "transform");
   VELOX_REGISTER_VECTOR_FUNCTION(udf_reduce, prefix + "reduce");
   VELOX_REGISTER_VECTOR_FUNCTION(udf_array_filter, prefix + "filter");
-
-  VELOX_REGISTER_VECTOR_FUNCTION(udf_least, prefix + "least");
-  VELOX_REGISTER_VECTOR_FUNCTION(udf_greatest, prefix + "greatest");
-
   VELOX_REGISTER_VECTOR_FUNCTION(udf_typeof, prefix + "typeof");
 
+  registerAllGreatestLeastFunctions(prefix);
+
   registerFunction<CardinalityFunction, int64_t, Array<Any>>(
       {prefix + "cardinality"});
   registerFunction<CardinalityFunction, int64_t, Map<Any, Any>>(
diff --git a/velox/functions/prestosql/tests/GreatestLeastTest.cpp b/velox/functions/prestosql/tests/GreatestLeastTest.cpp
index e19e13d61410..9bfddc552144 100644
--- a/velox/functions/prestosql/tests/GreatestLeastTest.cpp
+++ b/velox/functions/prestosql/tests/GreatestLeastTest.cpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <cmath>
+#include <limits>
 #include <optional>
 #include "velox/common/base/tests/GTestUtils.h"
 #include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"
@@ -83,29 +85,50 @@ TEST_F(GreatestLeastTest, leastReal) {
       {0, -100, -1.1});
 }
 
-TEST_F(GreatestLeastTest, nanInput) {
-  // Presto rejects NaN inputs of type DOUBLE, but allows NaN inputs of type
-  // REAL.
-  std::vector<double> input{0, 1.1, std::nan("1")};
-  VELOX_ASSERT_THROW(
-      runTest<double>("least(c0)", {{0.0 / 0.0}}, {0}),
-      "Invalid argument to least(): NaN");
-  runTest<double>("try(least(c0, 1.0))", {input}, {0, 1.0, std::nullopt});
-
-  VELOX_ASSERT_THROW(
-      runTest<double>("greatest(c0)", {1, {0.0 / 0.0}}, {1, 0}),
-      "Invalid argument to greatest(): NaN");
-  runTest<double>("try(greatest(c0, 1.0))", {input}, {1.0, 1.1, std::nullopt});
-
-  auto result = evaluateOnce<bool, float, float>(
-      "is_nan(least(c0))", std::nanf("1"), 1.2);
-  ASSERT_TRUE(result.has_value());
-  ASSERT_TRUE(result.value());
-
-  result = evaluateOnce<bool, float, float>(
-      "is_nan(greatest(c0))", std::nanf("1"), 1.2);
-  ASSERT_TRUE(result.has_value());
-  ASSERT_TRUE(result.value());
+TEST_F(GreatestLeastTest, greatestNanInput) {
+  auto constexpr kInf32 = std::numeric_limits<float>::infinity();
+  auto constexpr kInf64 = std::numeric_limits<double>::infinity();
+
+  auto greatestFloat = [&](float a, float b, float c) {
+    return evaluateOnce<float, float, float, float>(
+               "greatest(c0, c1, c2)", {a}, {b}, {c})
+        .value();
+  };
+
+  auto greatestDouble = [&](double a, double b, double c) {
+    return evaluateOnce<double, double, double, double>(
+               "greatest(c0, c1, c2)", {a}, {b}, {c})
+        .value();
+  };
+
+  EXPECT_TRUE(std::isnan(greatestFloat(1.0, std::nanf("1"), 2.0)));
+  EXPECT_TRUE(std::isnan(greatestFloat(std::nanf("1"), 1.0, kInf32)));
+
+  EXPECT_TRUE(std::isnan(greatestDouble(1.0, std::nan("1"), 2.0)));
+  EXPECT_TRUE(std::isnan(greatestDouble(std::nan("1"), 1.0, kInf64)));
+}
+
+TEST_F(GreatestLeastTest, leastNanInput) {
+  auto constexpr kInf32 = std::numeric_limits<float>::infinity();
+  auto constexpr kInf64 = std::numeric_limits<double>::infinity();
+
+  auto leastFloat = [&](float a, float b, float c) {
+    return evaluateOnce<float, float, float, float>(
+               "least(c0, c1, c2)", {a}, {b}, {c})
+        .value();
+  };
+
+  auto leastDouble = [&](double a, double b, double c) {
+    return evaluateOnce<double, double, double, double>(
+               "least(c0, c1, c2)", {a}, {b}, {c})
+        .value();
+  };
+
+  EXPECT_EQ(leastFloat(1.0, std::nanf("1"), 0.5), 0.5);
+  EXPECT_EQ(leastFloat(std::nanf("1"), 1.0, -kInf32), -kInf32);
+
+  EXPECT_EQ(leastDouble(1.0, std::nan("1"), 0.5), 0.5);
+  EXPECT_EQ(leastDouble(std::nan("1"), 1.0, -kInf64), -kInf64);
 }
 
 TEST_F(GreatestLeastTest, greatestDouble) {

From 1daeb9dc704a0d7eaee3de603d6c165fef9bb6d7 Mon Sep 17 00:00:00 2001
From: Masha Basmanova <mbasmanova@meta.com>
Date: Thu, 25 Apr 2024 17:33:41 -0700
Subject: [PATCH 08/18] Optimize copying null bits in
 FlatVector::copyValuesAndNulls

Summary:
Copying of null bits in FlatVector::copyValuesAndNulls showed up at 40% of the
profile in a query that's computing a dozen or so coalesce(a, b) expressions.

Optimize by copying null bits in whole words.

After the optimization, the affected query uses 2-3x less CPU time and ~half wall time.

Reviewed By: Yuhta

Differential Revision: D56571917

fbshipit-source-id: c64f3a6efda1ea14a51393d085abcd736fb1b7a5
---
 velox/vector/FlatVector-inl.h                | 67 ++++++++-----
 velox/vector/SelectivityVector.cpp           | 34 +++++++
 velox/vector/SelectivityVector.h             |  8 ++
 velox/vector/tests/SelectivityVectorTest.cpp | 98 ++++++++++++++++++--
 4 files changed, 178 insertions(+), 29 deletions(-)

diff --git a/velox/vector/FlatVector-inl.h b/velox/vector/FlatVector-inl.h
index c9198d79f0e9..d56037114e7d 100644
--- a/velox/vector/FlatVector-inl.h
+++ b/velox/vector/FlatVector-inl.h
@@ -135,7 +135,7 @@ void FlatVector<T>::copyValuesAndNulls(
     const vector_size_t* toSourceRow) {
   if (source->typeKind() == TypeKind::UNKNOWN) {
     auto* rawNulls = BaseVector::mutableRawNulls();
-    rows.applyToSelected([&](auto row) { bits::setNull(rawNulls, row, true); });
+    rows.setNulls(rawNulls);
     return;
   }
 
@@ -145,7 +145,7 @@ void FlatVector<T>::copyValuesAndNulls(
   if (!toSourceRow) {
     VELOX_CHECK_GE(source->size(), rows.end());
   }
-  const uint64_t* sourceNulls = source->rawNulls();
+
   uint64_t* rawNulls = const_cast<uint64_t*>(BaseVector::rawNulls_);
   if (source->mayHaveNulls()) {
     rawNulls = BaseVector::mutableRawNulls();
@@ -161,8 +161,7 @@ void FlatVector<T>::copyValuesAndNulls(
     auto* flatSource = source->asUnchecked<FlatVector<T>>();
     if (flatSource->values() == nullptr) {
       // All source values are null.
-      rows.applyToSelected(
-          [&](auto row) { bits::setNull(rawNulls, row, true); });
+      rows.setNulls(rawNulls);
       return;
     }
 
@@ -202,9 +201,10 @@ void FlatVector<T>::copyValuesAndNulls(
     }
 
     if (rawNulls) {
+      const uint64_t* sourceNulls = source->rawNulls();
+
       if (!sourceNulls) {
-        rows.applyToSelected(
-            [&](vector_size_t row) { bits::setNull(rawNulls, row, false); });
+        rows.clearNulls(rawNulls);
       } else {
         if (toSourceRow) {
           rows.applyToSelected([&](auto row) {
@@ -214,9 +214,7 @@ void FlatVector<T>::copyValuesAndNulls(
                 rawNulls, row, bits::isBitNull(sourceNulls, sourceRow));
           });
         } else {
-          rows.applyToSelected([&](vector_size_t row) {
-            bits::setNull(rawNulls, row, bits::isBitNull(sourceNulls, row));
-          });
+          rows.copyNulls(rawNulls, sourceNulls);
         }
       }
     }
@@ -243,24 +241,47 @@ void FlatVector<T>::copyValuesAndNulls(
     rows.clearNulls(rawNulls);
   } else {
     DecodedVector decoded(*source);
-    rows.applyToSelected([&](auto row) {
-      auto sourceRow = toSourceRow ? toSourceRow[row] : row;
-      VELOX_DCHECK_GT(source->size(), sourceRow);
-      if (!decoded.isNullAt(sourceRow)) {
-        if constexpr (std::is_same_v<T, bool>) {
-          auto* rawValues = reinterpret_cast<uint64_t*>(rawValues_);
-          bits::setBit(rawValues, row, decoded.valueAt<T>(sourceRow));
-        } else {
-          rawValues_[row] = decoded.valueAt<T>(sourceRow);
+    if (toSourceRow == nullptr) {
+      rows.applyToSelected([&](auto row) {
+        if (!decoded.isNullAt(row)) {
+          if constexpr (std::is_same_v<T, bool>) {
+            auto* rawValues = reinterpret_cast<uint64_t*>(rawValues_);
+            bits::setBit(rawValues, row, decoded.valueAt<T>(row));
+          } else {
+            rawValues_[row] = decoded.valueAt<T>(row);
+          }
         }
+      });
 
-        if (rawNulls) {
-          bits::clearNull(rawNulls, row);
+      if (rawNulls != nullptr) {
+        auto* sourceNulls = decoded.nulls();
+        if (sourceNulls == nullptr) {
+          rows.clearNulls(rawNulls);
+        } else {
+          rows.copyNulls(rawNulls, sourceNulls);
         }
-      } else {
-        bits::setNull(rawNulls, row);
       }
-    });
+
+    } else {
+      rows.applyToSelected([&](auto row) {
+        const auto sourceRow = toSourceRow[row];
+        VELOX_DCHECK_GT(source->size(), sourceRow);
+        if (!decoded.isNullAt(sourceRow)) {
+          if constexpr (std::is_same_v<T, bool>) {
+            auto* rawValues = reinterpret_cast<uint64_t*>(rawValues_);
+            bits::setBit(rawValues, row, decoded.valueAt<T>(sourceRow));
+          } else {
+            rawValues_[row] = decoded.valueAt<T>(sourceRow);
+          }
+
+          if (rawNulls) {
+            bits::clearNull(rawNulls, row);
+          }
+        } else {
+          bits::setNull(rawNulls, row);
+        }
+      });
+    }
   }
 }
 
diff --git a/velox/vector/SelectivityVector.cpp b/velox/vector/SelectivityVector.cpp
index d46cc4197f0a..7967f815c60a 100644
--- a/velox/vector/SelectivityVector.cpp
+++ b/velox/vector/SelectivityVector.cpp
@@ -54,6 +54,40 @@ std::string SelectivityVector::toString(
   return out.str();
 }
 
+void SelectivityVector::copyNulls(uint64_t* dest, const uint64_t* src) const {
+  if (isAllSelected()) {
+    bits::copyBits(src, 0, dest, 0, size_);
+    return;
+  }
+
+  const auto* rowBits = bits_.data();
+
+  bits::forEachWord(
+      begin_,
+      end_,
+      [dest, src, rowBits](int32_t idx, uint64_t mask) {
+        // Set 'dest' to 0 for selected rows.
+        dest[idx] = (dest[idx] & ~mask) | (mask & dest[idx] & ~rowBits[idx]);
+
+        // Set 'copySrc' to 0 for non-selected rows.
+        uint64_t copySrc =
+            (src[idx] & ~mask) | (mask & src[idx] & rowBits[idx]);
+
+        // Combine 'dest' and 'copySrc' with an OR.
+        dest[idx] = (dest[idx] & ~mask) | (mask & (dest[idx] | copySrc));
+      },
+      [dest, src, rowBits](int32_t idx) {
+        // Set 'dest' to 0 for selected rows.
+        dest[idx] = dest[idx] & ~rowBits[idx];
+
+        // Set 'copySrc' to 0 for non-selected rows.
+        uint64_t copySrc = src[idx] & rowBits[idx];
+
+        // Combine 'dest' and 'copySrc' with an OR.
+        dest[idx] = dest[idx] | copySrc;
+      });
+}
+
 void translateToInnerRows(
     const SelectivityVector& outerRows,
     const vector_size_t* indices,
diff --git a/velox/vector/SelectivityVector.h b/velox/vector/SelectivityVector.h
index f0a890040db0..7c4fa3ec29f5 100644
--- a/velox/vector/SelectivityVector.h
+++ b/velox/vector/SelectivityVector.h
@@ -248,6 +248,14 @@ class SelectivityVector {
         nulls->asMutable<uint64_t>(), bits_.data(), begin_, end_);
   }
 
+  void setNulls(uint64_t* rawNulls) const {
+    VELOX_CHECK_NOT_NULL(rawNulls);
+    bits::andWithNegatedBits(rawNulls, bits_.data(), begin_, end_);
+  }
+
+  /// Copy null bits from 'src' to 'dest' for active rows.
+  void copyNulls(uint64_t* dest, const uint64_t* src) const;
+
   /// Merges the valid vector of another SelectivityVector by or'ing
   /// them together. This is used to support memoization where a state
   /// may acquire new values over time. Grows 'this' if size of 'other' exceeds
diff --git a/velox/vector/tests/SelectivityVectorTest.cpp b/velox/vector/tests/SelectivityVectorTest.cpp
index 31b0a75de037..5c1430e499c0 100644
--- a/velox/vector/tests/SelectivityVectorTest.cpp
+++ b/velox/vector/tests/SelectivityVectorTest.cpp
@@ -18,9 +18,9 @@
 
 #include <gtest/gtest.h>
 
-namespace facebook {
-namespace velox {
-namespace test {
+#include "velox/common/base/Nulls.h"
+
+namespace facebook::velox {
 namespace {
 
 void assertState(
@@ -531,6 +531,92 @@ TEST(SelectivityVectorTest, toString) {
       "147 out of 1024 rows selected between 0 and 1023: 0, 7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 91, 98, 105, 112, 119, 126, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203, 210, 217, 224, 231, 238, 245, 252, 259, 266, 273, 280, 287, 294, 301, 308, 315, 322, 329, 336, 343, 350, 357, 364, 371, 378, 385, 392, 399, 406, 413, 420, 427, 434, 441, 448, 455, 462, 469, 476, 483, 490, 497, 504, 511, 518, 525, 532, 539, 546, 553, 560, 567, 574, 581, 588, 595, 602, 609, 616, 623, 630, 637, 644, 651, 658, 665, 672, 679, 686, 693, 700, 707, 714, 721, 728, 735, 742, 749, 756, 763, 770, 777, 784, 791, 798, 805, 812, 819, 826, 833, 840, 847, 854, 861, 868, 875, 882, 889, 896, 903, 910, 917, 924, 931, 938, 945, 952, 959, 966, 973, 980, 987, 994, 1001, 1008, 1015, 1022");
 }
 
-} // namespace test
-} // namespace velox
-} // namespace facebook
+TEST(SelectivityVectorTest, copyNulls) {
+  // nnnnn.....nnnnn.....
+  std::vector<uint64_t> dest(bits::nwords(1024));
+  for (auto i = 0; i < 1024; ++i) {
+    bits::setNull(dest.data(), i, i % 10 < 5);
+  }
+
+  // ..nn..nn..nn..nn..nn
+  std::vector<uint64_t> src(bits::nwords(512));
+  for (auto i = 0; i < 512; ++i) {
+    bits::setNull(src.data(), i, i % 4 >= 2);
+  }
+
+  // Copy even rows. Skip some at the start and end.
+  {
+    SelectivityVector rows(512);
+    for (auto i = 0; i < rows.size(); ++i) {
+      rows.setValid(i, i % 2 == 0);
+    }
+    rows.setValidRange(0, 7, false);
+    rows.setValidRange(501, 512, false);
+    rows.updateBounds();
+
+    auto test = dest;
+    rows.copyNulls(test.data(), src.data());
+
+    for (auto i = 0; i < rows.size(); ++i) {
+      const auto isNull = bits::isBitNull(test.data(), i);
+      if (rows.isValid(i)) {
+        ASSERT_EQ(bits::isBitNull(src.data(), i), isNull);
+      } else {
+        ASSERT_EQ(bits::isBitNull(dest.data(), i), isNull);
+      }
+    }
+
+    for (auto i = rows.size(); i < 1024; ++i) {
+      const auto isNull = bits::isBitNull(test.data(), i);
+      ASSERT_EQ(bits::isBitNull(dest.data(), i), isNull);
+    }
+  }
+
+  // Copy odd rows.
+  {
+    SelectivityVector rows(512);
+    for (auto i = 0; i < rows.size(); ++i) {
+      rows.setValid(i, i % 2 != 0);
+    }
+    rows.setValidRange(0, 7, false);
+    rows.setValidRange(501, 512, false);
+    rows.updateBounds();
+
+    auto test = dest;
+    rows.copyNulls(test.data(), src.data());
+
+    for (auto i = 0; i < rows.size(); ++i) {
+      const auto isNull = bits::isBitNull(test.data(), i);
+      if (rows.isValid(i)) {
+        ASSERT_EQ(bits::isBitNull(src.data(), i), isNull);
+      } else {
+        ASSERT_EQ(bits::isBitNull(dest.data(), i), isNull);
+      }
+    }
+
+    for (auto i = rows.size(); i < 1024; ++i) {
+      const auto isNull = bits::isBitNull(test.data(), i);
+      ASSERT_EQ(bits::isBitNull(dest.data(), i), isNull);
+    }
+  }
+
+  // Copy all rows.
+  {
+    SelectivityVector rows(478);
+
+    auto test = dest;
+    rows.copyNulls(test.data(), src.data());
+
+    for (auto i = 0; i < rows.size(); ++i) {
+      const auto isNull = bits::isBitNull(test.data(), i);
+      EXPECT_EQ(bits::isBitNull(src.data(), i), isNull) << i;
+    }
+
+    for (auto i = rows.size(); i < 1024; ++i) {
+      const auto isNull = bits::isBitNull(test.data(), i);
+      ASSERT_EQ(bits::isBitNull(dest.data(), i), isNull);
+    }
+  }
+}
+
+} // namespace facebook::velox

From 32289f98670667325158f6b07112c67b894dfbbe Mon Sep 17 00:00:00 2001
From: Tengfei Huang <tengfei.h@gmail.com>
Date: Thu, 25 Apr 2024 20:09:48 -0700
Subject: [PATCH 09/18] Add flatten Spark function (#9593)

Summary:
In Presto, `flatten` ignores NULL array element in the input.
In Spark, `flatten` returns NULL if any array element of the input is NULL.

Spark 3.5 ref: [Flatten function](https://github.com/apache/spark/blob/v3.5.1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L2796)

Pull Request resolved: https://github.com/facebookincubator/velox/pull/9593

Reviewed By: pedroerp

Differential Revision: D56580116

Pulled By: kagamiori

fbshipit-source-id: 535f8112e3d37921f46ee41a8a05401f76c092cf
---
 velox/docs/functions/spark/array.rst          |  9 +++
 .../functions/sparksql/ArrayFlattenFunction.h | 57 +++++++++++++++++
 velox/functions/sparksql/Register.cpp         |  6 ++
 .../sparksql/tests/ArrayFlattenTest.cpp       | 62 +++++++++++++++++++
 velox/functions/sparksql/tests/CMakeLists.txt |  1 +
 5 files changed, 135 insertions(+)
 create mode 100644 velox/functions/sparksql/ArrayFlattenFunction.h
 create mode 100644 velox/functions/sparksql/tests/ArrayFlattenTest.cpp

diff --git a/velox/docs/functions/spark/array.rst b/velox/docs/functions/spark/array.rst
index 705dde524c4e..aa5932e16732 100644
--- a/velox/docs/functions/spark/array.rst
+++ b/velox/docs/functions/spark/array.rst
@@ -106,6 +106,15 @@ Array Functions
         SELECT filter(array(0, 2, 3), (x, i) -> x > i); -- [2, 3]
         SELECT filter(array(0, null, 2, 3, null), x -> x IS NOT NULL); -- [0, 2, 3]
 
+.. function:: flatten(array(array(E))) -> array(E)
+
+    Transforms an array of arrays into a single array.
+    Returns NULL if the input is NULL or any of the nested arrays is NULL. ::
+
+        SELECT flatten(array(array(1, 2), array(3, 4))); -- [1, 2, 3, 4]
+        SELECT flatten(array(array(1, 2), array(3, NULL))); -- [1, 2, 3, NULL]
+        SELECT flatten(array(array(1, 2), NULL, array(3, 4))); -- NULL
+
 .. spark:function:: in(value, array(E)) -> boolean
 
     Returns true if value matches at least one of the elements of the array.
diff --git a/velox/functions/sparksql/ArrayFlattenFunction.h b/velox/functions/sparksql/ArrayFlattenFunction.h
new file mode 100644
index 000000000000..ac369714de25
--- /dev/null
+++ b/velox/functions/sparksql/ArrayFlattenFunction.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "velox/functions/Udf.h"
+
+namespace facebook::velox::functions::sparksql {
+
+/// flatten(array(array(E))) → array(E)
+/// Flattens nested array by concatenating the contained arrays.
+template <typename T>
+struct ArrayFlattenFunction {
+  VELOX_DEFINE_FUNCTION_TYPES(T)
+
+  // INT_MAX - 15, keep the same limit with spark.
+  static constexpr int32_t kMaxNumberOfElements = 2147483632;
+
+  FOLLY_ALWAYS_INLINE bool call(
+      out_type<Array<Generic<T1>>>& out,
+      const arg_type<Array<Array<Generic<T1>>>>& arrays) {
+    int64_t elementCount = 0;
+    for (const auto& array : arrays) {
+      if (array.has_value()) {
+        elementCount += array.value().size();
+      } else {
+        // Return NULL if any of the nested arrays is NULL.
+        return false;
+      }
+    }
+
+    VELOX_USER_CHECK_LE(
+        elementCount,
+        kMaxNumberOfElements,
+        "array flatten result exceeds the max array size limit {}",
+        kMaxNumberOfElements);
+
+    out.reserve(elementCount);
+    for (const auto& array : arrays) {
+      out.add_items(array.value());
+    };
+    return true;
+  }
+};
+} // namespace facebook::velox::functions::sparksql
diff --git a/velox/functions/sparksql/Register.cpp b/velox/functions/sparksql/Register.cpp
index 8b8f93a8d373..81f42fbced02 100644
--- a/velox/functions/sparksql/Register.cpp
+++ b/velox/functions/sparksql/Register.cpp
@@ -25,6 +25,7 @@
 #include "velox/functions/prestosql/ArrayFunctions.h"
 #include "velox/functions/prestosql/DateTimeFunctions.h"
 #include "velox/functions/prestosql/StringFunctions.h"
+#include "velox/functions/sparksql/ArrayFlattenFunction.h"
 #include "velox/functions/sparksql/ArrayMinMaxFunction.h"
 #include "velox/functions/sparksql/ArraySizeFunction.h"
 #include "velox/functions/sparksql/ArraySort.h"
@@ -407,6 +408,11 @@ void registerFunctions(const std::string& prefix) {
       {prefix + "monotonically_increasing_id"});
 
   registerFunction<UuidFunction, Varchar, Constant<int64_t>>({prefix + "uuid"});
+
+  registerFunction<
+      ArrayFlattenFunction,
+      Array<Generic<T1>>,
+      Array<Array<Generic<T1>>>>({prefix + "flatten"});
 }
 
 } // namespace sparksql
diff --git a/velox/functions/sparksql/tests/ArrayFlattenTest.cpp b/velox/functions/sparksql/tests/ArrayFlattenTest.cpp
new file mode 100644
index 000000000000..b531c0b903c7
--- /dev/null
+++ b/velox/functions/sparksql/tests/ArrayFlattenTest.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h"
+
+using namespace facebook::velox::test;
+
+namespace facebook::velox::functions::sparksql::test {
+namespace {
+
+class ArrayFlattenTest : public SparkFunctionBaseTest {
+ protected:
+  void testExpression(
+      const std::string& expression,
+      const std::vector<VectorPtr>& input,
+      const VectorPtr& expected) {
+    const auto result = evaluate(expression, makeRowVector(input));
+    assertEqualVectors(expected, result);
+  }
+};
+
+// Flatten integer arrays.
+TEST_F(ArrayFlattenTest, intArrays) {
+  const auto arrayOfArrays = makeNestedArrayVectorFromJson<int64_t>({
+      "[[1, 1], [2, 2], [3, 3]]",
+      "[[4, 4]]",
+      "[[5, 5], [6, 6]]",
+  });
+
+  const auto expected = makeArrayVectorFromJson<int64_t>(
+      {"[1, 1, 2, 2, 3, 3]", "[4, 4]", "[5, 5, 6, 6]"});
+
+  testExpression("flatten(c0)", {arrayOfArrays}, expected);
+}
+
+// Flatten arrays with null.
+TEST_F(ArrayFlattenTest, nullArray) {
+  const auto arrayOfArrays = makeNestedArrayVectorFromJson<int64_t>({
+      "[[1, 1], null, [3, 3]]",
+      "null",
+      "[[5, null], [null, 6], [null, null], []]",
+  });
+
+  const auto expected = makeArrayVectorFromJson<int64_t>(
+      {"null", "null", "[5, null, null, 6, null, null]"});
+
+  testExpression("flatten(c0)", {arrayOfArrays}, expected);
+}
+} // namespace
+} // namespace facebook::velox::functions::sparksql::test
diff --git a/velox/functions/sparksql/tests/CMakeLists.txt b/velox/functions/sparksql/tests/CMakeLists.txt
index 0089957f10d1..a1b00031f6ab 100644
--- a/velox/functions/sparksql/tests/CMakeLists.txt
+++ b/velox/functions/sparksql/tests/CMakeLists.txt
@@ -15,6 +15,7 @@
 add_executable(
   velox_functions_spark_test
   ArithmeticTest.cpp
+  ArrayFlattenTest.cpp
   ArrayMaxTest.cpp
   ArrayMinTest.cpp
   ArraySizeTest.cpp

From 97160cdc385e5212c3b89a200690e0e85d0ff19f Mon Sep 17 00:00:00 2001
From: xiaoxmeng <xiaoxmeng@fb.com>
Date: Fri, 26 Apr 2024 10:41:58 -0700
Subject: [PATCH 10/18] =?UTF-8?q?Avoid=20unnecessary=20memory=20capacity?=
 =?UTF-8?q?=20growth=20in=20case=20of=20concurrent=20arbitr=E2=80=A6=20(#9?=
 =?UTF-8?q?557)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
This PR avoids unnecessary memory capacity growth in case of concurrent arbitration requests
from the same query. The first arbitration request might have reserved enough capacity from the
arbitrator (we allocate more than request to reduce the number of arbitrations). We avoid this by
checking if there is sufficient free capacity in the request pool itself before allocating more capacity
from the arbitrator. Also to avoid unnecessary retries from the memory pool, we support to commit
the reservation bytes before return arbitration success back to the memory pool. Correspondingly,
the memory pool doesn't have to increase the reservation and check for retry on capacity grow success
from the arbitrator.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/9557

Reviewed By: tanjialiang, oerling

Differential Revision: D56444509

Pulled By: xiaoxmeng

fbshipit-source-id: adbff6ba18389c30c601e627a325c6d7df1f907c
---
 velox/common/file/tests/FaultyFile.cpp        |   5 +-
 velox/common/file/tests/FaultyFile.h          |   4 +-
 velox/common/file/tests/FileTest.cpp          |   2 +-
 velox/common/memory/Memory.cpp                |   7 +-
 velox/common/memory/Memory.h                  |  10 +-
 velox/common/memory/MemoryArbitrator.cpp      |   4 +-
 velox/common/memory/MemoryArbitrator.h        |   2 +-
 velox/common/memory/MemoryPool.cpp            |  53 +++---
 velox/common/memory/MemoryPool.h              |  18 ++-
 velox/common/memory/SharedArbitrator.cpp      | 105 ++++++------
 velox/common/memory/SharedArbitrator.h        |  32 ++--
 .../memory/tests/MemoryArbitratorTest.cpp     |  41 ++---
 velox/common/memory/tests/MemoryPoolTest.cpp  | 113 ++++++++-----
 .../memory/tests/MockSharedArbitratorTest.cpp |  61 +++++--
 velox/dwio/dwrf/test/WriterFlushTest.cpp      |   3 +-
 velox/exec/tests/HashJoinTest.cpp             | 109 +------------
 velox/exec/tests/OrderByTest.cpp              | 153 ++++--------------
 velox/exec/tests/TableScanTest.cpp            |   3 +-
 velox/exec/tests/TableWriteTest.cpp           |  13 +-
 19 files changed, 328 insertions(+), 410 deletions(-)

diff --git a/velox/common/file/tests/FaultyFile.cpp b/velox/common/file/tests/FaultyFile.cpp
index a8342506dd51..80bc292052f2 100644
--- a/velox/common/file/tests/FaultyFile.cpp
+++ b/velox/common/file/tests/FaultyFile.cpp
@@ -80,10 +80,9 @@ void FaultyWriteFile::append(std::string_view data) {
   if (injectionHook_ != nullptr) {
     FaultFileWriteOperation op(path_, data);
     injectionHook_(&op);
-    if (op.delegate) {
-      delegatedFile_->append(op.data);
+    if (!op.delegate) {
+      return;
     }
-    return;
   }
   delegatedFile_->append(data);
 }
diff --git a/velox/common/file/tests/FaultyFile.h b/velox/common/file/tests/FaultyFile.h
index c5dfaa721ead..4bba0faf89aa 100644
--- a/velox/common/file/tests/FaultyFile.h
+++ b/velox/common/file/tests/FaultyFile.h
@@ -87,13 +87,13 @@ struct FaultFileReadvOperation : FaultFileOperation {
 
 /// Fault injection parameters for file write API.
 struct FaultFileWriteOperation : FaultFileOperation {
-  std::string_view data;
+  std::string_view* data;
 
   FaultFileWriteOperation(
       const std::string& _path,
       const std::string_view& _data)
       : FaultFileOperation(FaultFileOperation::Type::kWrite, _path),
-        data(_data) {}
+        data(const_cast<std::string_view*>(&_data)) {}
 };
 
 /// The fault injection hook on the file operation path.
diff --git a/velox/common/file/tests/FileTest.cpp b/velox/common/file/tests/FileTest.cpp
index 11a914336a2e..b1eb01399275 100644
--- a/velox/common/file/tests/FileTest.cpp
+++ b/velox/common/file/tests/FileTest.cpp
@@ -642,7 +642,7 @@ TEST_F(FaultyFsTest, fileWriteFaultHookInjection) {
       return;
     }
     auto* writeOp = static_cast<FaultFileWriteOperation*>(op);
-    writeOp->data = "Error data";
+    *writeOp->data = "Error data";
   });
   {
     auto writeFile = fs_->openFileForWrite(path1, {});
diff --git a/velox/common/memory/Memory.cpp b/velox/common/memory/Memory.cpp
index 241501e6546a..d96474a5febc 100644
--- a/velox/common/memory/Memory.cpp
+++ b/velox/common/memory/Memory.cpp
@@ -105,7 +105,12 @@ MemoryManager::MemoryManager(const MemoryManagerOptions& options)
   VELOX_USER_CHECK_GE(capacity(), 0);
   VELOX_CHECK_GE(allocator_->capacity(), arbitrator_->capacity());
   MemoryAllocator::alignmentCheck(0, alignment_);
-  defaultRoot_->grow(defaultRoot_->maxCapacity());
+  const bool ret = defaultRoot_->grow(defaultRoot_->maxCapacity(), 0);
+  VELOX_CHECK(
+      ret,
+      "Failed to set max capacity {} for {}",
+      succinctBytes(defaultRoot_->maxCapacity()),
+      defaultRoot_->name());
   const size_t numSharedPools =
       std::max(1, FLAGS_velox_memory_num_shared_leaf_pools);
   sharedLeafPools_.reserve(numSharedPools);
diff --git a/velox/common/memory/Memory.h b/velox/common/memory/Memory.h
index ee163da915bf..35bda7bf2463 100644
--- a/velox/common/memory/Memory.h
+++ b/velox/common/memory/Memory.h
@@ -161,7 +161,11 @@ struct MemoryManagerOptions {
   /// pool.
   uint64_t memoryPoolInitCapacity{256 << 20};
 
-  /// The minimal memory capacity reserved for a query memory pool to run.
+  /// The minimal query memory pool capacity that is ensured during arbitration.
+  /// During arbitration, memory arbitrator ensures the participants' memory
+  /// pool capacity to be no less than this value on a best-effort basis, for
+  /// more smooth executions of the queries, to avoid frequent arbitration
+  /// requests.
   uint64_t memoryPoolReservedCapacity{0};
 
   /// The minimal memory capacity to transfer out of or into a memory pool
@@ -287,10 +291,6 @@ class MemoryManager {
     return sharedLeafPools_;
   }
 
-  bool testingGrowPool(MemoryPool* pool, uint64_t incrementBytes) {
-    return growPool(pool, incrementBytes);
-  }
-
  private:
   void dropPool(MemoryPool* pool);
 
diff --git a/velox/common/memory/MemoryArbitrator.cpp b/velox/common/memory/MemoryArbitrator.cpp
index e379a78c1f10..a0466f544f00 100644
--- a/velox/common/memory/MemoryArbitrator.cpp
+++ b/velox/common/memory/MemoryArbitrator.cpp
@@ -94,8 +94,8 @@ class NoopArbitrator : public MemoryArbitrator {
   // Noop arbitrator has no memory capacity limit so no operation needed for
   // memory pool capacity reserve.
   uint64_t growCapacity(MemoryPool* pool, uint64_t /*unused*/) override {
-    pool->grow(pool->maxCapacity());
-    return pool->maxCapacity();
+    pool->grow(pool->maxCapacity(), 0);
+    return pool->capacity();
   }
 
   // Noop arbitrator has no memory capacity limit so no operation needed for
diff --git a/velox/common/memory/MemoryArbitrator.h b/velox/common/memory/MemoryArbitrator.h
index 6107ca825314..ca66ff2e7833 100644
--- a/velox/common/memory/MemoryArbitrator.h
+++ b/velox/common/memory/MemoryArbitrator.h
@@ -128,7 +128,7 @@ class MemoryArbitrator {
   /// Invoked by the memory manager to allocate up to 'targetBytes' of free
   /// memory capacity without triggering memory arbitration. The function will
   /// grow the memory pool's capacity based on the free available memory
-  /// capacity in the arbitrator, and returns the actual growed capacity in
+  /// capacity in the arbitrator, and returns the actual grown capacity in
   /// bytes.
   virtual uint64_t growCapacity(MemoryPool* pool, uint64_t bytes) = 0;
 
diff --git a/velox/common/memory/MemoryPool.cpp b/velox/common/memory/MemoryPool.cpp
index 5b99577d8b05..4939931b1530 100644
--- a/velox/common/memory/MemoryPool.cpp
+++ b/velox/common/memory/MemoryPool.cpp
@@ -809,15 +809,9 @@ bool MemoryPoolImpl::incrementReservationThreadSafe(
     TestValue::adjust(
         "facebook::velox::memory::MemoryPoolImpl::incrementReservationThreadSafe::AfterGrowCallback",
         this);
-    // NOTE: the memory reservation might still fail even if the memory grow
-    // callback succeeds. The reason is that we don't hold the root tracker's
-    // mutex lock while running the grow callback. Therefore, there is a
-    // possibility in theory that a concurrent memory reservation request
-    // might steal away the increased memory capacity after the grow callback
-    // finishes and before we increase the reservation. If it happens, we can
-    // simply fall back to retry the memory reservation from the leaf memory
-    // pool which should happen rarely.
-    return maybeIncrementReservation(size);
+    // NOTE: if memory arbitration succeeds, it should have already committed
+    // the reservation 'size' in the root memory pool.
+    return true;
   }
   VELOX_MEM_POOL_CAP_EXCEEDED(fmt::format(
       "Exceeded memory pool cap of {} with max {} when requesting {}, memory "
@@ -846,12 +840,16 @@ bool MemoryPoolImpl::maybeIncrementReservation(uint64_t size) {
       return false;
     }
   }
-  reservationBytes_ += size;
+  incrementReservationLocked(size);
+  return true;
+}
+
+void MemoryPoolImpl::incrementReservationLocked(uint64_t bytes) {
+  reservationBytes_ += bytes;
   if (!isLeaf()) {
-    cumulativeBytes_ += size;
+    cumulativeBytes_ += bytes;
     maybeUpdatePeakBytesLocked(reservationBytes_);
   }
-  return true;
 }
 
 void MemoryPoolImpl::release() {
@@ -1029,20 +1027,29 @@ uint64_t MemoryPoolImpl::shrink(uint64_t targetBytes) {
   return freeBytes;
 }
 
-uint64_t MemoryPoolImpl::grow(uint64_t bytes) noexcept {
+bool MemoryPoolImpl::grow(uint64_t growBytes, uint64_t reservationBytes) {
   if (parent_ != nullptr) {
-    return parent_->grow(bytes);
+    return parent_->grow(growBytes, reservationBytes);
   }
   // TODO: add to prevent from growing beyond the max capacity and the
   // corresponding support in memory arbitrator.
   std::lock_guard<std::mutex> l(mutex_);
   // We don't expect to grow a memory pool without capacity limit.
   VELOX_CHECK_NE(capacity_, kMaxMemory, "Can't grow with unlimited capacity");
-  VELOX_CHECK_LE(
-      capacity_ + bytes, maxCapacity_, "Can't grow beyond the max capacity");
-  capacity_ += bytes;
-  VELOX_CHECK_GE(capacity_, bytes);
-  return capacity_;
+  if (capacity_ + growBytes > maxCapacity_) {
+    return false;
+  }
+  if (reservationBytes_ + reservationBytes > capacity_ + growBytes) {
+    return false;
+  }
+
+  capacity_ += growBytes;
+  VELOX_CHECK_GE(capacity_, growBytes);
+  if (reservationBytes > 0) {
+    incrementReservationLocked(reservationBytes);
+    VELOX_CHECK_LE(reservationBytes, reservationBytes_);
+  }
+  return true;
 }
 
 void MemoryPoolImpl::abort(const std::exception_ptr& error) {
@@ -1081,6 +1088,14 @@ void MemoryPoolImpl::testingSetCapacity(int64_t bytes) {
   capacity_ = bytes;
 }
 
+void MemoryPoolImpl::testingSetReservation(int64_t bytes) {
+  if (parent_ != nullptr) {
+    return toImpl(parent_)->testingSetReservation(bytes);
+  }
+  std::lock_guard<std::mutex> l(mutex_);
+  reservationBytes_ = bytes;
+}
+
 bool MemoryPoolImpl::needRecordDbg(bool /* isAlloc */) {
   if (!debugPoolNameRegex_.empty()) {
     return RE2::FullMatch(name_, debugPoolNameRegex_);
diff --git a/velox/common/memory/MemoryPool.h b/velox/common/memory/MemoryPool.h
index bbbe06352747..de8a864a02f4 100644
--- a/velox/common/memory/MemoryPool.h
+++ b/velox/common/memory/MemoryPool.h
@@ -362,9 +362,15 @@ class MemoryPool : public std::enable_shared_from_this<MemoryPool> {
   /// 'targetBytes' is zero, the function frees all the free memory capacity.
   virtual uint64_t shrink(uint64_t targetBytes = 0) = 0;
 
-  /// Invoked to increase the memory pool's capacity by 'bytes'. The function
-  /// returns the memory pool's capacity after the growth.
-  virtual uint64_t grow(uint64_t bytes) noexcept = 0;
+  /// Invoked to increase the memory pool's capacity by 'growBytes' and commit
+  /// the reservation by 'reservationBytes'. The function makes the two updates
+  /// atomic. The function returns true if the updates succeed, otherwise false
+  /// and neither change will apply.
+  ///
+  /// NOTE: this should only be called by memory arbitrator when a root memory
+  /// pool tries to grow its capacity for a new reservation request which
+  /// exceeds its current capacity limit.
+  virtual bool grow(uint64_t growBytes, uint64_t reservationBytes = 0) = 0;
 
   /// Sets the memory reclaimer for this memory pool.
   ///
@@ -646,7 +652,7 @@ class MemoryPoolImpl : public MemoryPool {
 
   uint64_t shrink(uint64_t targetBytes = 0) override;
 
-  uint64_t grow(uint64_t bytes) noexcept override;
+  bool grow(uint64_t growBytes, uint64_t reservationBytes = 0) override;
 
   void abort(const std::exception_ptr& error) override;
 
@@ -684,6 +690,8 @@ class MemoryPoolImpl : public MemoryPool {
 
   void testingSetCapacity(int64_t bytes);
 
+  void testingSetReservation(int64_t bytes);
+
   MemoryManager* testingManager() const {
     return manager_;
   }
@@ -845,6 +853,8 @@ class MemoryPoolImpl : public MemoryPool {
   // returns true, otherwise the function returns false.
   bool maybeIncrementReservation(uint64_t size);
 
+  void incrementReservationLocked(uint64_t bytes);
+
   // Release memory reservation for an allocation free or memory release with
   // specified 'size'. If 'releaseOnly' is true, then we only release the unused
   // reservation if 'minReservationBytes_' is set. 'releaseThreadSafe' processes
diff --git a/velox/common/memory/SharedArbitrator.cpp b/velox/common/memory/SharedArbitrator.cpp
index b03badb51a62..01bd3802f6de 100644
--- a/velox/common/memory/SharedArbitrator.cpp
+++ b/velox/common/memory/SharedArbitrator.cpp
@@ -79,7 +79,7 @@ void sortCandidatesByReclaimableFreeCapacity(
       &candidates);
 }
 
-void sortCandidatesByReclaimableUsedMemory(
+void sortCandidatesByReclaimableUsedCapacity(
     std::vector<SharedArbitrator::Candidate>& candidates) {
   std::sort(
       candidates.begin(),
@@ -90,7 +90,7 @@ void sortCandidatesByReclaimableUsedMemory(
       });
 
   TestValue::adjust(
-      "facebook::velox::memory::SharedArbitrator::sortCandidatesByReclaimableUsedMemory",
+      "facebook::velox::memory::SharedArbitrator::sortCandidatesByReclaimableUsedCapacity",
       &candidates);
 }
 
@@ -162,23 +162,9 @@ std::string SharedArbitrator::Candidate::toString() const {
 }
 
 SharedArbitrator::~SharedArbitrator() {
-  if (freeReservedCapacity_ != reservedCapacity_) {
-    const std::string errMsg = fmt::format(
-        "There is unexpected free reserved capacity not given back to arbitrator "
-        "on destruction: freeReservedCapacity_{} != reservedCapacity_{}\\n{}",
-        freeReservedCapacity_,
-        reservedCapacity_,
-        toString());
-    if (checkUsageLeak_) {
-      VELOX_FAIL(errMsg);
-    } else {
-      VELOX_MEM_LOG(ERROR) << errMsg;
-    }
-  }
   if (freeNonReservedCapacity_ + freeReservedCapacity_ != capacity_) {
     const std::string errMsg = fmt::format(
-        "There is unexpected free capacity not given back to arbitrator "
-        "on destruction: freeNonReservedCapacity_[{}] + freeReservedCapacity_[{}] != capacity_[{}])\\n{}",
+        "Unexpected free capacity leak in arbitrator: freeNonReservedCapacity_[{}] + freeReservedCapacity_[{}] != capacity_[{}])\\n{}",
         freeNonReservedCapacity_,
         freeReservedCapacity_,
         capacity_,
@@ -214,18 +200,18 @@ void SharedArbitrator::updateFreeCapacityMetrics() const {
       kMetricArbitratorFreeReservedCapacityBytes, freeReservedCapacity_);
 }
 
-int64_t SharedArbitrator::reclaimableCapacity(const MemoryPool& pool) const {
+int64_t SharedArbitrator::maxReclaimableCapacity(const MemoryPool& pool) const {
   return std::max<int64_t>(0, pool.capacity() - memoryPoolReservedCapacity_);
 }
 
 int64_t SharedArbitrator::reclaimableFreeCapacity(
     const MemoryPool& pool) const {
-  return std::min<int64_t>(pool.freeBytes(), reclaimableCapacity(pool));
+  return std::min<int64_t>(pool.freeBytes(), maxReclaimableCapacity(pool));
 }
 
 int64_t SharedArbitrator::reclaimableUsedCapacity(
     const MemoryPool& pool) const {
-  const auto maxReclaimableBytes = reclaimableCapacity(pool);
+  const auto maxReclaimableBytes = maxReclaimableCapacity(pool);
   const auto reclaimableBytes = pool.reclaimableBytes();
   return std::min<int64_t>(maxReclaimableBytes, reclaimableBytes.value_or(0));
 }
@@ -240,9 +226,8 @@ int64_t SharedArbitrator::minGrowCapacity(const MemoryPool& pool) const {
 uint64_t SharedArbitrator::growCapacity(
     MemoryPool* pool,
     uint64_t targetBytes) {
-  const auto freeCapacityUpdateCb =
+  const auto freeCapacityMetricUpdateCb =
       folly::makeGuard([this]() { updateFreeCapacityMetrics(); });
-
   uint64_t reservedBytes{0};
   {
     std::lock_guard<std::mutex> l(mutex_);
@@ -252,33 +237,36 @@ uint64_t SharedArbitrator::growCapacity(
     const int64_t minBytesToReserve = minGrowCapacity(*pool);
     reservedBytes =
         decrementFreeCapacityLocked(maxBytesToReserve, minBytesToReserve);
-    pool->grow(reservedBytes);
+    try {
+      checkedGrow(pool, reservedBytes, 0);
+    } catch (const VeloxRuntimeError& error) {
+      reservedBytes = 0;
+    }
   }
   return reservedBytes;
 }
 
 uint64_t SharedArbitrator::decrementFreeCapacity(
-    uint64_t maxBytes,
-    uint64_t minBytes) {
+    uint64_t maxBytesToReserve,
+    uint64_t minBytesToReserve) {
   uint64_t reservedBytes{0};
   {
     std::lock_guard<std::mutex> l(mutex_);
-    reservedBytes = decrementFreeCapacityLocked(maxBytes, minBytes);
+    reservedBytes =
+        decrementFreeCapacityLocked(maxBytesToReserve, minBytesToReserve);
   }
   return reservedBytes;
 }
 
 uint64_t SharedArbitrator::decrementFreeCapacityLocked(
-    uint64_t maxBytes,
-    uint64_t minBytes) {
+    uint64_t maxBytesToReserve,
+    uint64_t minBytesToReserve) {
   uint64_t allocatedBytes =
-      std::min<uint64_t>(freeNonReservedCapacity_, maxBytes);
-  VELOX_CHECK_LE(allocatedBytes, freeNonReservedCapacity_);
+      std::min<uint64_t>(freeNonReservedCapacity_, maxBytesToReserve);
   freeNonReservedCapacity_ -= allocatedBytes;
-  if (allocatedBytes < minBytes) {
-    const uint64_t reservedBytes =
-        std::min<uint64_t>(minBytes - allocatedBytes, freeReservedCapacity_);
-    VELOX_CHECK_LE(reservedBytes, freeReservedCapacity_);
+  if (allocatedBytes < minBytesToReserve) {
+    const uint64_t reservedBytes = std::min<uint64_t>(
+        minBytesToReserve - allocatedBytes, freeReservedCapacity_);
     freeReservedCapacity_ -= reservedBytes;
     allocatedBytes += reservedBytes;
   }
@@ -349,6 +337,10 @@ void SharedArbitrator::testingFreeCapacity(uint64_t capacity) {
   incrementFreeCapacityLocked(capacity);
 }
 
+uint64_t SharedArbitrator::testingNumRequests() const {
+  return numRequests_;
+}
+
 bool SharedArbitrator::growCapacity(
     MemoryPool* pool,
     const std::vector<std::shared_ptr<MemoryPool>>& candidatePools,
@@ -364,6 +356,16 @@ bool SharedArbitrator::growCapacity(
     VELOX_MEM_POOL_ABORTED("The requestor pool has been aborted");
   }
 
+  // Checks if the request pool already has enough free capacity for the new
+  // request. This could happen if there is multiple concurrent arbitration
+  // requests from the same query. When the first served request succeeds, it
+  // might have reserved enough memory capacity for the followup requests.
+  if (requestor->freeBytes() >= targetBytes) {
+    if (requestor->grow(0, targetBytes)) {
+      return true;
+    }
+  }
+
   if (!ensureCapacity(requestor, targetBytes)) {
     RECORD_METRIC_VALUE(kMetricArbitratorFailuresCount);
     ++numFailures_;
@@ -466,6 +468,19 @@ bool SharedArbitrator::handleOOM(
   return true;
 }
 
+void SharedArbitrator::checkedGrow(
+    MemoryPool* pool,
+    uint64_t growBytes,
+    uint64_t reservationBytes) {
+  const auto ret = pool->grow(growBytes, reservationBytes);
+  VELOX_CHECK(
+      ret,
+      "Failed to grow pool {} with {} and commit {} used reservation",
+      pool->name(),
+      succinctBytes(growBytes),
+      succinctBytes(reservationBytes));
+}
+
 bool SharedArbitrator::arbitrateMemory(
     MemoryPool* requestor,
     std::vector<Candidate>& candidates,
@@ -476,24 +491,24 @@ bool SharedArbitrator::arbitrateMemory(
       std::max(memoryPoolTransferCapacity_, targetBytes));
   const uint64_t minGrowTarget = minGrowCapacity(*requestor);
   uint64_t freedBytes = decrementFreeCapacity(growTarget, minGrowTarget);
-  if (freedBytes >= targetBytes) {
-    requestor->grow(freedBytes);
-    return true;
-  }
-  VELOX_CHECK_LT(freedBytes, growTarget);
-
   auto freeGuard = folly::makeGuard([&]() {
     // Returns the unused freed memory capacity back to the arbitrator.
     if (freedBytes > 0) {
       incrementFreeCapacity(freedBytes);
     }
   });
+  if (freedBytes >= targetBytes) {
+    checkedGrow(requestor, freedBytes, targetBytes);
+    freedBytes = 0;
+    return true;
+  }
+  VELOX_CHECK_LT(freedBytes, growTarget);
 
   freedBytes +=
       reclaimFreeMemoryFromCandidates(candidates, growTarget - freedBytes);
   if (freedBytes >= targetBytes) {
     const uint64_t bytesToGrow = std::min(growTarget, freedBytes);
-    requestor->grow(bytesToGrow);
+    checkedGrow(requestor, bytesToGrow, targetBytes);
     freedBytes -= bytesToGrow;
     return true;
   }
@@ -519,7 +534,7 @@ bool SharedArbitrator::arbitrateMemory(
   }
 
   const uint64_t bytesToGrow = std::min(freedBytes, growTarget);
-  requestor->grow(bytesToGrow);
+  checkedGrow(requestor, bytesToGrow, targetBytes);
   freedBytes -= bytesToGrow;
   return true;
 }
@@ -556,7 +571,7 @@ uint64_t SharedArbitrator::reclaimUsedMemoryFromCandidatesBySpill(
     std::vector<Candidate>& candidates,
     uint64_t targetBytes) {
   // Sort candidate memory pools based on their reclaimable used capacity.
-  sortCandidatesByReclaimableUsedMemory(candidates);
+  sortCandidatesByReclaimableUsedCapacity(candidates);
 
   uint64_t freedBytes{0};
   for (const auto& candidate : candidates) {
@@ -608,7 +623,7 @@ uint64_t SharedArbitrator::reclaim(
     bool isLocalArbitration) noexcept {
   int64_t bytesToReclaim = std::min<uint64_t>(
       std::max(targetBytes, memoryPoolTransferCapacity_),
-      reclaimableCapacity(*pool));
+      maxReclaimableCapacity(*pool));
   if (bytesToReclaim == 0) {
     return 0;
   }
@@ -803,7 +818,7 @@ void SharedArbitrator::startArbitration(const std::string& contextMsg) {
   }
 
   TestValue::adjust(
-      "facebook::velox::memory::SharedArbitrator::startArbitration", nullptr);
+      "facebook::velox::memory::SharedArbitrator::startArbitration", this);
 
   if (waitPromise.valid()) {
     uint64_t waitTimeUs{0};
diff --git a/velox/common/memory/SharedArbitrator.h b/velox/common/memory/SharedArbitrator.h
index 3332c770322b..50dc8c015d18 100644
--- a/velox/common/memory/SharedArbitrator.h
+++ b/velox/common/memory/SharedArbitrator.h
@@ -75,6 +75,8 @@ class SharedArbitrator : public memory::MemoryArbitrator {
   /// Returns 'freeCapacity' back to the arbitrator for testing.
   void testingFreeCapacity(uint64_t freeCapacity);
 
+  uint64_t testingNumRequests() const;
+
   /// Operator level runtime stats that are reported during a shared arbitration
   /// attempt.
   static inline const std::string kMemoryArbitrationWallNanos{
@@ -160,12 +162,18 @@ class SharedArbitrator : public memory::MemoryArbitrator {
       std::vector<Candidate>& candidates,
       uint64_t targetBytes);
 
-  // Invoded to reclaim used memroy capacity from 'candidates' by aborting the
+  // Invoked to reclaim used memory capacity from 'candidates' by aborting the
   // top memory users' queries.
   uint64_t reclaimUsedMemoryFromCandidatesByAbort(
       std::vector<Candidate>& candidates,
       uint64_t targetBytes);
 
+  // Invoked to grow 'pool' capacity by 'growBytes' and commit used reservation
+  // by 'reservationBytes'. The function throws if the grow fails from memory
+  // pool.
+  void
+  checkedGrow(MemoryPool* pool, uint64_t growBytes, uint64_t reservationBytes);
+
   // Invoked to reclaim used memory from 'targetPool' with specified
   // 'targetBytes'. The function returns the actually freed capacity.
   // 'isLocalArbitration' is true when the reclaim attempt is within a local
@@ -188,14 +196,18 @@ class SharedArbitrator : public memory::MemoryArbitrator {
       uint64_t targetBytes,
       std::vector<Candidate>& candidates);
 
-  // Decrements free capacity from the arbitrator with up to 'maxBytes'. The
-  // arbitrator might have less free available capacity. The function returns
-  // the actual decremented free capacity bytes. If 'minBytes' is not zero and
-  // there is less than 'minBytes' available in non-reserved capacity, then
-  // the arbitrator tries to decrement up to 'minBytes' from the reserved
-  // capacity.
-  uint64_t decrementFreeCapacity(uint64_t maxBytes, uint64_t minBytes);
-  uint64_t decrementFreeCapacityLocked(uint64_t maxBytes, uint64_t minBytes);
+  // Decrements free capacity from the arbitrator with up to
+  // 'maxBytesToReserve'. The arbitrator might have less free available
+  // capacity. The function returns the actual decremented free capacity bytes.
+  // If 'minBytesToReserve' is not zero and there is less than 'minBytes'
+  // available in non-reserved capacity, then the arbitrator tries to decrement
+  // up to 'minBytes' from the reserved capacity.
+  uint64_t decrementFreeCapacity(
+      uint64_t maxBytesToReserve,
+      uint64_t minBytesToReserve);
+  uint64_t decrementFreeCapacityLocked(
+      uint64_t maxBytesToReserve,
+      uint64_t minBytesToReserve);
 
   // Increment free capacity by 'bytes'.
   void incrementFreeCapacity(uint64_t bytes);
@@ -213,7 +225,7 @@ class SharedArbitrator : public memory::MemoryArbitrator {
 
   // Returns the max reclaimable capacity from 'pool' which includes both used
   // and free capacities.
-  int64_t reclaimableCapacity(const MemoryPool& pool) const;
+  int64_t maxReclaimableCapacity(const MemoryPool& pool) const;
 
   // Returns the free memory capacity that can be reclaimed from 'pool' by
   // shrink.
diff --git a/velox/common/memory/tests/MemoryArbitratorTest.cpp b/velox/common/memory/tests/MemoryArbitratorTest.cpp
index 347c7eaa9c8f..8153b32a905a 100644
--- a/velox/common/memory/tests/MemoryArbitratorTest.cpp
+++ b/velox/common/memory/tests/MemoryArbitratorTest.cpp
@@ -293,36 +293,21 @@ TEST_F(MemoryArbitrationTest, reservedCapacityFreeByPoolShrink) {
   options.memoryPoolInitCapacity = 2 << 20;
   options.memoryPoolReservedCapacity = 1 << 20;
 
-  for (bool shrinkByCapacityGrow : {false, true}) {
-    SCOPED_TRACE(fmt::format("shrinkByCapacityGrow {}", shrinkByCapacityGrow));
-
-    MemoryManager manager(options);
-    auto* arbitrator = manager.arbitrator();
-    const int numPools = 6;
-    std::vector<std::shared_ptr<MemoryPool>> pools;
-    for (int i = 0; i < numPools; ++i) {
-      pools.push_back(manager.addRootPool("", kMaxMemory));
-      ASSERT_GE(pools.back()->capacity(), 1 << 20);
-    }
-    ASSERT_EQ(arbitrator->stats().freeCapacityBytes, 0);
+  MemoryManager manager(options);
+  auto* arbitrator = manager.arbitrator();
+  const int numPools = 6;
+  std::vector<std::shared_ptr<MemoryPool>> pools;
+  for (int i = 0; i < numPools; ++i) {
     pools.push_back(manager.addRootPool("", kMaxMemory));
-
-    ASSERT_GE(pools.back()->capacity(), 0);
-    if (shrinkByCapacityGrow) {
-      ASSERT_TRUE(
-          arbitrator->growCapacity(pools[numPools - 1].get(), pools, 1 << 20));
-      ASSERT_EQ(arbitrator->stats().freeReservedCapacityBytes, 0);
-      ASSERT_EQ(arbitrator->stats().freeCapacityBytes, 0);
-      ASSERT_EQ(
-          arbitrator->growCapacity(pools[numPools - 1].get(), 1 << 20), 0);
-      ASSERT_EQ(arbitrator->growCapacity(pools.back().get(), 2 << 20), 0);
-    } else {
-      ASSERT_EQ(arbitrator->shrinkCapacity(pools, 1 << 20), 2 << 20);
-      ASSERT_EQ(
-          arbitrator->growCapacity(pools[numPools - 1].get(), 1 << 20), 0);
-      ASSERT_EQ(arbitrator->growCapacity(pools.back().get(), 2 << 20), 1 << 20);
-    }
+    ASSERT_GE(pools.back()->capacity(), 1 << 20);
   }
+  ASSERT_EQ(arbitrator->stats().freeCapacityBytes, 0);
+  pools.push_back(manager.addRootPool("", kMaxMemory));
+
+  ASSERT_GE(pools.back()->capacity(), 0);
+  ASSERT_EQ(arbitrator->shrinkCapacity(pools, 1 << 20), 2 << 20);
+  ASSERT_EQ(arbitrator->growCapacity(pools[numPools - 1].get(), 1 << 20), 0);
+  ASSERT_EQ(arbitrator->growCapacity(pools.back().get(), 2 << 20), 1 << 20);
 }
 
 TEST_F(MemoryArbitrationTest, arbitratorStats) {
diff --git a/velox/common/memory/tests/MemoryPoolTest.cpp b/velox/common/memory/tests/MemoryPoolTest.cpp
index a5e4ec8415d8..19d359f674bc 100644
--- a/velox/common/memory/tests/MemoryPoolTest.cpp
+++ b/velox/common/memory/tests/MemoryPoolTest.cpp
@@ -401,27 +401,89 @@ TEST_P(MemoryPoolTest, DISABLED_memoryLeakCheck) {
   child->free(oneChunk, kChunkSize);
 }
 
-TEST_P(MemoryPoolTest, DISABLED_growBeyondMaxCapacity) {
-  gflags::FlagSaver flagSaver;
-  testing::FLAGS_gtest_death_test_style = "fast";
+TEST_P(MemoryPoolTest, growFailures) {
   auto manager = getMemoryManager();
+  // Grow beyond limit.
   {
     auto poolWithoutLimit = manager->addRootPool("poolWithoutLimit");
     ASSERT_EQ(poolWithoutLimit->capacity(), kMaxMemory);
-    ASSERT_DEATH(
-        poolWithoutLimit->grow(1), "Can't grow with unlimited capacity");
+    ASSERT_EQ(poolWithoutLimit->currentBytes(), 0);
+    VELOX_ASSERT_THROW(
+        poolWithoutLimit->grow(1, 0), "Can't grow with unlimited capacity");
+    ASSERT_EQ(poolWithoutLimit->currentBytes(), 0);
+    VELOX_ASSERT_THROW(
+        poolWithoutLimit->grow(1, 1'000), "Can't grow with unlimited capacity");
+    ASSERT_EQ(poolWithoutLimit->currentBytes(), 0);
   }
   {
     const int64_t capacity = 4 * GB;
     auto poolWithLimit = manager->addRootPool("poolWithLimit", capacity);
     ASSERT_EQ(poolWithLimit->capacity(), capacity);
+    ASSERT_EQ(poolWithLimit->currentBytes(), 0);
     ASSERT_EQ(poolWithLimit->shrink(poolWithLimit->currentBytes()), capacity);
-    ASSERT_EQ(poolWithLimit->grow(capacity / 2), capacity / 2);
-    ASSERT_DEATH(
-        poolWithLimit->grow(capacity), "Can't grow beyond the max capacity");
+    ASSERT_EQ(poolWithLimit->currentBytes(), 0);
+    ASSERT_TRUE(poolWithLimit->grow(capacity / 2, 0));
+    ASSERT_EQ(poolWithLimit->currentBytes(), 0);
+    ASSERT_FALSE(poolWithLimit->grow(capacity, 0));
+    ASSERT_EQ(poolWithLimit->currentBytes(), 0);
+    ASSERT_EQ(poolWithLimit->capacity(), capacity / 2);
+    ASSERT_FALSE(poolWithLimit->grow(capacity, 1'000));
+    ASSERT_EQ(poolWithLimit->currentBytes(), 0);
+  }
+
+  // Insufficient capacity for new reservation.
+  {
+    const int64_t capacity = 4 * GB;
+    auto poolWithLimit = manager->addRootPool("poolWithLimit", capacity);
+    ASSERT_EQ(poolWithLimit->capacity(), capacity);
+    ASSERT_EQ(poolWithLimit->currentBytes(), 0);
+    ASSERT_EQ(poolWithLimit->shrink(poolWithLimit->capacity()), capacity);
+    ASSERT_EQ(poolWithLimit->currentBytes(), 0);
+    ASSERT_EQ(poolWithLimit->capacity(), 0);
+
+    ASSERT_FALSE(poolWithLimit->grow(capacity / 2, capacity));
+    ASSERT_EQ(poolWithLimit->currentBytes(), 0);
+    ASSERT_EQ(poolWithLimit->capacity(), 0);
+
+    ASSERT_FALSE(poolWithLimit->grow(0, capacity));
+    ASSERT_EQ(poolWithLimit->currentBytes(), 0);
+    ASSERT_EQ(poolWithLimit->capacity(), 0);
+    ASSERT_EQ(poolWithLimit->currentBytes(), 0);
   }
 }
 
+TEST_P(MemoryPoolTest, grow) {
+  auto manager = getMemoryManager();
+  const int64_t capacity = 4 * GB;
+  auto root = manager->addRootPool("grow", capacity);
+  root->shrink(capacity / 2);
+  ASSERT_EQ(root->capacity(), capacity / 2);
+
+  auto leaf = root->addLeafChild("leafPool");
+  void* buf = leaf->allocate(1 * MB);
+  ASSERT_EQ(root->capacity(), capacity / 2);
+  ASSERT_EQ(root->currentBytes(), 1 * MB);
+
+  ASSERT_TRUE(root->grow(0, 2 * MB));
+  ASSERT_EQ(root->currentBytes(), 3 * MB);
+  ASSERT_EQ(root->capacity(), capacity / 2);
+
+  ASSERT_TRUE(root->grow(0, 4 * MB));
+  ASSERT_EQ(root->currentBytes(), 7 * MB);
+  ASSERT_EQ(root->capacity(), capacity / 2);
+
+  ASSERT_TRUE(root->grow(1 * MB, 2 * MB));
+  ASSERT_EQ(root->currentBytes(), 9 * MB);
+  ASSERT_EQ(root->capacity(), capacity / 2 + 1 * MB);
+
+  ASSERT_TRUE(root->grow(6 * MB, 4 * MB));
+  ASSERT_EQ(root->currentBytes(), 13 * MB);
+  ASSERT_EQ(root->capacity(), capacity / 2 + 7 * MB);
+
+  static_cast<MemoryPoolImpl*>(root.get())->testingSetReservation(1 * MB);
+  leaf->free(buf, 1 * MB);
+}
+
 TEST_P(MemoryPoolTest, ReallocTestSameSize) {
   auto manager = getMemoryManager();
   auto root = manager->addRootPool();
@@ -431,7 +493,6 @@ TEST_P(MemoryPoolTest, ReallocTestSameSize) {
   const int64_t kChunkSize{32L * MB};
 
   // Realloc the same size.
-
   void* oneChunk = pool->allocate(kChunkSize);
   ASSERT_EQ(kChunkSize, pool->currentBytes());
   ASSERT_EQ(kChunkSize, pool->stats().peakBytes);
@@ -2708,11 +2769,14 @@ TEST_P(MemoryPoolTest, shrinkAndGrowAPIs) {
     for (int i = 0; i < step; ++i) {
       const int expectedCapacity = (i + 1) * allocationSize;
       if (i % 3 == 0) {
-        ASSERT_EQ(leafPool->grow(allocationSize), expectedCapacity);
+        ASSERT_TRUE(leafPool->grow(allocationSize, 0));
+        ASSERT_EQ(leafPool->capacity(), expectedCapacity);
       } else if (i % 3 == 1) {
-        ASSERT_EQ(aggregationPool->grow(allocationSize), expectedCapacity);
+        ASSERT_TRUE(aggregationPool->grow(allocationSize, 0));
+        ASSERT_EQ(leafPool->capacity(), expectedCapacity);
       } else {
-        ASSERT_EQ(rootPool->grow(allocationSize), expectedCapacity);
+        ASSERT_TRUE(rootPool->grow(allocationSize, 0));
+        ASSERT_EQ(leafPool->capacity(), expectedCapacity);
       }
       ASSERT_EQ(leafPool->capacity(), expectedCapacity);
       ASSERT_EQ(aggregationPool->capacity(), expectedCapacity);
@@ -3287,31 +3351,6 @@ TEST_P(MemoryPoolTest, maybeReserveFailWithAbort) {
       child->maybeReserve(2 * kMaxSize), "Manual MemoryPool Abortion");
 }
 
-// Model implementation of a GrowCallback.
-bool grow(int64_t size, int64_t hardLimit, MemoryPool& pool) {
-  static std::mutex mutex;
-  // The calls from different threads on the same tracker must be serialized.
-  std::lock_guard<std::mutex> l(mutex);
-  // The total includes the allocation that exceeded the limit. This
-  // function's job is to raise the limit to >= current + size.
-  auto current = pool.reservedBytes();
-  auto limit = pool.capacity();
-  if (current + size <= limit) {
-    // No need to increase. It could be another thread already
-    // increased the cap far enough while this thread was waiting to
-    // enter the lock_guard.
-    return true;
-  }
-  if (current + size > hardLimit) {
-    // The caller will revert the allocation that called this and signal an
-    // error.
-    return false;
-  }
-  // We set the new limit to be the requested size.
-  static_cast<MemoryPoolImpl*>(&pool)->testingSetCapacity(current + size);
-  return true;
-}
-
 DEBUG_ONLY_TEST_P(MemoryPoolTest, raceBetweenFreeAndFailedAllocation) {
   if (!isLeafThreadSafe_) {
     return;
diff --git a/velox/common/memory/tests/MockSharedArbitratorTest.cpp b/velox/common/memory/tests/MockSharedArbitratorTest.cpp
index 1c0e70c11536..261a996d14f5 100644
--- a/velox/common/memory/tests/MockSharedArbitratorTest.cpp
+++ b/velox/common/memory/tests/MockSharedArbitratorTest.cpp
@@ -519,7 +519,7 @@ TEST_F(MockSharedArbitrationTest, constructor) {
     if (i < nonReservedCapacity / kMemoryPoolInitCapacity) {
       ASSERT_EQ(task->capacity(), kMemoryPoolInitCapacity);
     } else {
-      ASSERT_EQ(task->capacity(), kMemoryPoolReservedCapacity);
+      ASSERT_EQ(task->capacity(), kMemoryPoolReservedCapacity) << i;
     }
     remainingFreeCapacity -= task->capacity();
     tasks.push_back(std::move(task));
@@ -586,8 +586,8 @@ TEST_F(MockSharedArbitrationTest, arbitrationFailsTask) {
   // handleOOM().
   auto growTask = addTask(328 * MB);
   auto* growOp = growTask->addMemoryOp(false);
-  auto* bufGrow = growOp->allocate(64 * MB);
-  ASSERT_NO_THROW(manager_->testingGrowPool(growOp->pool(), 128 * MB));
+  auto* bufGrow1 = growOp->allocate(64 * MB);
+  auto* bufGrow2 = growOp->allocate(128 * MB);
   ASSERT_NE(nonReclaimTask->error(), nullptr);
   try {
     std::rethrow_exception(nonReclaimTask->error());
@@ -1639,7 +1639,7 @@ DEBUG_ONLY_TEST_F(MockSharedArbitrationTest, orderedArbitration) {
             }
           })));
   SCOPED_TESTVALUE_SET(
-      "facebook::velox::memory::SharedArbitrator::sortCandidatesByReclaimableUsedMemory",
+      "facebook::velox::memory::SharedArbitrator::sortCandidatesByReclaimableUsedCapacity",
       std::function<void(const std::vector<SharedArbitrator::Candidate>*)>(
           ([&](const std::vector<SharedArbitrator::Candidate>* candidates) {
             for (int i = 1; i < candidates->size(); ++i) {
@@ -1977,8 +1977,8 @@ DEBUG_ONLY_TEST_F(MockSharedArbitrationTest, failedToReclaimFromRequestor) {
     std::atomic_bool arbitrationStarted{false};
     SCOPED_TESTVALUE_SET(
         "facebook::velox::memory::SharedArbitrator::startArbitration",
-        std::function<void(const MemoryPool*)>(
-            ([&](const MemoryPool* /*unused*/) {
+        std::function<void(const SharedArbitrator*)>(
+            ([&](const SharedArbitrator* /*unused*/) {
               if (!arbitrationStarted) {
                 return;
               }
@@ -2167,8 +2167,8 @@ DEBUG_ONLY_TEST_F(MockSharedArbitrationTest, failedToReclaimFromOtherTask) {
     std::atomic<bool> arbitrationStarted{false};
     SCOPED_TESTVALUE_SET(
         "facebook::velox::memory::SharedArbitrator::startArbitration",
-        std::function<void(const MemoryPool*)>(
-            ([&](const MemoryPool* /*unsed*/) {
+        std::function<void(const SharedArbitrator*)>(
+            ([&](const SharedArbitrator* /*unsed*/) {
               if (!arbitrationStarted) {
                 return;
               }
@@ -2308,6 +2308,40 @@ TEST_F(MockSharedArbitrationTest, memoryPoolAbortThrow) {
   ASSERT_EQ(arbitrator_->stats().numAborted, 1);
 }
 
+// This test makes sure the memory capacity grows as expected.
+DEBUG_ONLY_TEST_F(MockSharedArbitrationTest, concurrentArbitrationRequests) {
+  setupMemory(kMemoryCapacity, 0, 0, 0, 128 << 20);
+  std::shared_ptr<MockTask> task = addTask();
+  MockMemoryOperator* op1 = addMemoryOp(task);
+  MockMemoryOperator* op2 = addMemoryOp(task);
+
+  std::atomic_bool arbitrationWaitFlag{true};
+  folly::EventCount arbitrationWait;
+  std::atomic_bool injectOnce{true};
+  SCOPED_TESTVALUE_SET(
+      "facebook::velox::memory::SharedArbitrator::startArbitration",
+      std::function<void(const SharedArbitrator*)>(
+          ([&](const SharedArbitrator* arbitrator) {
+            if (!injectOnce.exchange(false)) {
+              return;
+            }
+            arbitrationWaitFlag = false;
+            arbitrationWait.notifyAll();
+            while (arbitrator->testingNumRequests() != 2) {
+              std::this_thread::sleep_for(std::chrono::seconds(5)); // NOLINT
+            }
+          })));
+
+  std::thread firstArbitrationThread([&]() { op1->allocate(64 << 20); });
+
+  std::thread secondArbitrationThread([&]() { op2->allocate(64 << 20); });
+
+  firstArbitrationThread.join();
+  secondArbitrationThread.join();
+
+  ASSERT_EQ(task->capacity(), 128 << 20);
+}
+
 DEBUG_ONLY_TEST_F(
     MockSharedArbitrationTest,
     freeUnusedCapacityWhenReclaimMemoryPool) {
@@ -2326,7 +2360,7 @@ DEBUG_ONLY_TEST_F(
   folly::EventCount reclaimBlock;
   auto reclaimBlockKey = reclaimBlock.prepareWait();
   SCOPED_TESTVALUE_SET(
-      "facebook::velox::memory::SharedArbitrator::sortCandidatesByReclaimableUsedMemory",
+      "facebook::velox::memory::SharedArbitrator::sortCandidatesByReclaimableUsedCapacity",
       std::function<void(const MemoryPool*)>(([&](const MemoryPool* /*unsed*/) {
         reclaimWait.notify();
         reclaimBlock.wait(reclaimBlockKey);
@@ -2368,10 +2402,11 @@ DEBUG_ONLY_TEST_F(
 
   SCOPED_TESTVALUE_SET(
       "facebook::velox::memory::SharedArbitrator::startArbitration",
-      std::function<void(const MemoryPool*)>(([&](const MemoryPool* /*unsed*/) {
-        arbitrationRun.notify();
-        arbitrationBlock.wait(arbitrationBlockKey);
-      })));
+      std::function<void(const SharedArbitrator*)>(
+          ([&](const SharedArbitrator* /*unsed*/) {
+            arbitrationRun.notify();
+            arbitrationBlock.wait(arbitrationBlockKey);
+          })));
 
   std::thread allocThread([&]() {
     // Allocate more than its capacity to trigger arbitration which is blocked
diff --git a/velox/dwio/dwrf/test/WriterFlushTest.cpp b/velox/dwio/dwrf/test/WriterFlushTest.cpp
index ae7d6ea3173e..140c8dd975e3 100644
--- a/velox/dwio/dwrf/test/WriterFlushTest.cpp
+++ b/velox/dwio/dwrf/test/WriterFlushTest.cpp
@@ -167,7 +167,6 @@ class MockMemoryPool : public velox::memory::MemoryPool {
   }
 
   MOCK_CONST_METHOD0(peakBytes, int64_t());
-  // MOCK_CONST_METHOD0(getMaxBytes, int64_t());
 
   MOCK_METHOD1(updateSubtreeMemoryUsage, int64_t(int64_t));
 
@@ -207,7 +206,7 @@ class MockMemoryPool : public velox::memory::MemoryPool {
     VELOX_UNSUPPORTED("{} unsupported", __FUNCTION__);
   }
 
-  uint64_t grow(uint64_t /*unused*/) noexcept override {
+  bool grow(uint64_t /*unused*/, uint64_t /*unused*/) noexcept override {
     VELOX_UNSUPPORTED("{} unsupported", __FUNCTION__);
   }
 
diff --git a/velox/exec/tests/HashJoinTest.cpp b/velox/exec/tests/HashJoinTest.cpp
index f4be9ddb1142..601a0d90a5e5 100644
--- a/velox/exec/tests/HashJoinTest.cpp
+++ b/velox/exec/tests/HashJoinTest.cpp
@@ -6512,8 +6512,6 @@ TEST_F(HashJoinTest, reclaimFromJoinBuilderWithMultiDrivers) {
 DEBUG_ONLY_TEST_F(
     HashJoinTest,
     failedToReclaimFromHashJoinBuildersInNonReclaimableSection) {
-  std::unique_ptr<memory::MemoryManager> memoryManager = createMemoryManager();
-  const auto& arbitrator = memoryManager->arbitrator();
   auto rowType = ROW({
       {"c0", INTEGER()},
       {"c1", INTEGER()},
@@ -6522,7 +6520,7 @@ DEBUG_ONLY_TEST_F(
   const auto vectors = createVectors(rowType, 64 << 20, fuzzerOpts_);
   const int numDrivers = 1;
   std::shared_ptr<core::QueryCtx> queryCtx =
-      newQueryCtx(memoryManager.get(), executor_.get(), kMemoryCapacity);
+      newQueryCtx(memory::memoryManager(), executor_.get(), 512 << 20);
   const auto expectedResult =
       runHashJoinTask(vectors, queryCtx, numDrivers, pool(), false).data;
 
@@ -6564,14 +6562,12 @@ DEBUG_ONLY_TEST_F(
     ASSERT_EQ(planStats.spilledBytes, 0);
   });
 
-  auto fakePool = queryCtx->pool()->addLeafChild(
-      "fakePool", true, FakeMemoryReclaimer::create());
   // Wait for the hash build operators to enter into non-reclaimable section.
   nonReclaimableSectionWait.await(
       [&]() { return !nonReclaimableSectionWaitFlag.load(); });
 
   // We expect capacity grow fails as we can't reclaim from hash join operators.
-  ASSERT_FALSE(memoryManager->testingGrowPool(fakePool.get(), kMemoryCapacity));
+  memory::testingRunArbitration();
 
   // Notify the hash build operator that memory arbitration has been done.
   memoryArbitrationWaitFlag = false;
@@ -6583,7 +6579,9 @@ DEBUG_ONLY_TEST_F(
   // one. We need to make sure any used memory got cleaned up before exiting
   // the scope
   waitForAllTasksToBeDeleted();
-  ASSERT_EQ(arbitrator->stats().numNonReclaimableAttempts, 2);
+  ASSERT_EQ(
+      memory::memoryManager()->arbitrator()->stats().numNonReclaimableAttempts,
+      2);
 }
 
 DEBUG_ONLY_TEST_F(HashJoinTest, reclaimFromHashJoinBuildInWaitForTableBuild) {
@@ -6769,103 +6767,6 @@ DEBUG_ONLY_TEST_F(HashJoinTest, arbitrationTriggeredByEnsureJoinTableFit) {
       .run();
 }
 
-DEBUG_ONLY_TEST_F(HashJoinTest, reclaimDuringJoinTableBuild) {
-  std::unique_ptr<memory::MemoryManager> memoryManager = createMemoryManager();
-  const auto& arbitrator = memoryManager->arbitrator();
-  auto rowType = ROW({
-      {"c0", INTEGER()},
-      {"c1", INTEGER()},
-      {"c2", VARCHAR()},
-  });
-  // Build a large vector to trigger memory arbitration.
-  fuzzerOpts_.vectorSize = 10'000;
-  std::vector<RowVectorPtr> vectors = createVectors(2, rowType, fuzzerOpts_);
-  createDuckDbTable(vectors);
-
-  std::shared_ptr<core::QueryCtx> joinQueryCtx =
-      newQueryCtx(memoryManager.get(), executor_.get(), kMemoryCapacity);
-
-  std::atomic<bool> blockTableBuildOpOnce{true};
-  std::atomic<bool> tableBuildBlocked{false};
-  folly::EventCount tableBuildBlockWait;
-  std::atomic<bool> unblockTableBuild{false};
-  folly::EventCount unblockTableBuildWait;
-  SCOPED_TESTVALUE_SET(
-      "facebook::velox::exec::HashTable::parallelJoinBuild",
-      std::function<void(memory::MemoryPool*)>(([&](memory::MemoryPool* pool) {
-        if (!blockTableBuildOpOnce.exchange(false)) {
-          return;
-        }
-        tableBuildBlocked = true;
-        tableBuildBlockWait.notifyAll();
-        unblockTableBuildWait.await([&]() { return unblockTableBuild.load(); });
-        void* buffer = pool->allocate(kMemoryCapacity / 4);
-        pool->free(buffer, kMemoryCapacity / 4);
-      })));
-
-  std::thread joinThread([&]() {
-    auto planNodeIdGenerator = std::make_shared<core::PlanNodeIdGenerator>();
-    const auto spillDirectory = exec::test::TempDirectoryPath::create();
-    auto task =
-        AssertQueryBuilder(duckDbQueryRunner_)
-            .spillDirectory(spillDirectory->getPath())
-            .config(core::QueryConfig::kSpillEnabled, true)
-            .config(core::QueryConfig::kJoinSpillEnabled, true)
-            .config(core::QueryConfig::kSpillNumPartitionBits, 2)
-            // Set multiple hash build drivers to trigger parallel build.
-            .maxDrivers(4)
-            .queryCtx(joinQueryCtx)
-            .plan(PlanBuilder(planNodeIdGenerator)
-                      .values(vectors, true)
-                      .project({"c0 AS t0", "c1 AS t1", "c2 AS t2"})
-                      .hashJoin(
-                          {"t0", "t1"},
-                          {"u1", "u0"},
-                          PlanBuilder(planNodeIdGenerator)
-                              .values(vectors, true)
-                              .project({"c0 AS u0", "c1 AS u1", "c2 AS u2"})
-                              .planNode(),
-                          "",
-                          {"t1"},
-                          core::JoinType::kInner)
-                      .planNode())
-            .assertResults(
-                "SELECT t.c1 FROM tmp as t, tmp AS u WHERE t.c0 == u.c1 AND t.c1 == u.c0");
-  });
-
-  tableBuildBlockWait.await([&]() { return tableBuildBlocked.load(); });
-
-  folly::EventCount taskPauseWait;
-  std::atomic<bool> taskPaused{false};
-  SCOPED_TESTVALUE_SET(
-      "facebook::velox::exec::Task::requestPauseLocked",
-      std::function<void(Task*)>(([&](Task* /*unused*/) {
-        taskPaused = true;
-        taskPauseWait.notifyAll();
-      })));
-
-  std::thread memThread([&]() {
-    std::shared_ptr<core::QueryCtx> fakeCtx =
-        newQueryCtx(memoryManager.get(), executor_.get(), kMemoryCapacity);
-    auto fakePool = fakeCtx->pool()->addLeafChild("fakePool");
-    ASSERT_FALSE(memoryManager->testingGrowPool(
-        fakePool.get(), memoryManager->arbitrator()->capacity()));
-  });
-
-  taskPauseWait.await([&]() { return taskPaused.load(); });
-
-  unblockTableBuild = true;
-  unblockTableBuildWait.notifyAll();
-
-  joinThread.join();
-  memThread.join();
-
-  // This test uses on-demand created memory manager instead of the global
-  // one. We need to make sure any used memory got cleaned up before exiting
-  // the scope
-  waitForAllTasksToBeDeleted();
-}
-
 DEBUG_ONLY_TEST_F(HashJoinTest, joinBuildSpillError) {
   const int kMemoryCapacity = 32 << 20;
   // Set a small memory capacity to trigger spill.
diff --git a/velox/exec/tests/OrderByTest.cpp b/velox/exec/tests/OrderByTest.cpp
index d2ad623e3be7..4f7fff3cf56c 100644
--- a/velox/exec/tests/OrderByTest.cpp
+++ b/velox/exec/tests/OrderByTest.cpp
@@ -1294,78 +1294,39 @@ TEST_F(OrderByTest, maxSpillBytes) {
 DEBUG_ONLY_TEST_F(OrderByTest, reclaimFromOrderBy) {
   std::vector<RowVectorPtr> vectors = createVectors(8, rowType_, fuzzerOpts_);
   createDuckDbTable(vectors);
-  std::unique_ptr<memory::MemoryManager> memoryManager = createMemoryManager();
-  std::vector<bool> sameQueries = {false, true};
-  for (bool sameQuery : sameQueries) {
-    SCOPED_TRACE(fmt::format("sameQuery {}", sameQuery));
-    const auto spillDirectory = exec::test::TempDirectoryPath::create();
-    std::shared_ptr<core::QueryCtx> fakeQueryCtx =
-        newQueryCtx(memoryManager.get(), executor_.get(), kMemoryCapacity * 2);
-    std::shared_ptr<core::QueryCtx> orderByQueryCtx;
-    if (sameQuery) {
-      orderByQueryCtx = fakeQueryCtx;
-    } else {
-      orderByQueryCtx = newQueryCtx(
-          memoryManager.get(), executor_.get(), kMemoryCapacity * 2);
-    }
-
-    folly::EventCount arbitrationWait;
-    std::atomic_bool arbitrationWaitFlag{true};
-    folly::EventCount taskPauseWait;
-    std::atomic_bool taskPauseWaitFlag{true};
-
-    std::atomic_int numInputs{0};
-    SCOPED_TESTVALUE_SET(
-        "facebook::velox::exec::Driver::runInternal::addInput",
-        std::function<void(Operator*)>(([&](Operator* op) {
-          if (op->operatorType() != "OrderBy") {
-            return;
-          }
-          if (++numInputs != 5) {
-            return;
-          }
-          arbitrationWaitFlag = false;
-          arbitrationWait.notifyAll();
-
-          // Wait for task pause to be triggered.
-          taskPauseWait.await([&] { return !taskPauseWaitFlag.load(); });
-        })));
-
-    SCOPED_TESTVALUE_SET(
-        "facebook::velox::exec::Task::requestPauseLocked",
-        std::function<void(Task*)>(([&](Task* /*unused*/) {
-          taskPauseWaitFlag = false;
-          taskPauseWait.notifyAll();
-        })));
-
-    std::thread orderByThread([&]() {
-      auto task =
-          AssertQueryBuilder(duckDbQueryRunner_)
-              .spillDirectory(spillDirectory->getPath())
-              .config(core::QueryConfig::kSpillEnabled, true)
-              .config(core::QueryConfig::kOrderBySpillEnabled, true)
-              .queryCtx(orderByQueryCtx)
-              .plan(PlanBuilder()
-                        .values(vectors)
-                        .orderBy({"c0 ASC NULLS LAST"}, false)
-                        .planNode())
-              .assertResults("SELECT * FROM tmp ORDER BY c0 ASC NULLS LAST");
-      auto stats = task->taskStats().pipelineStats;
-      ASSERT_GT(stats[0].operatorStats[1].spilledBytes, 0);
-    });
-
-    arbitrationWait.await([&] { return !arbitrationWaitFlag.load(); });
-
-    auto fakePool = fakeQueryCtx->pool()->addLeafChild(
-        "fakePool", true, FakeMemoryReclaimer::create());
-
-    memoryManager->testingGrowPool(
-        fakePool.get(), memoryManager->arbitrator()->capacity());
-
-    orderByThread.join();
+  std::atomic_int numInputs{0};
+  SCOPED_TESTVALUE_SET(
+      "facebook::velox::exec::Driver::runInternal::addInput",
+      std::function<void(Operator*)>(([&](Operator* op) {
+        if (op->operatorType() != "OrderBy") {
+          return;
+        }
+        if (++numInputs != 5) {
+          return;
+        }
+        auto* driver = op->testingOperatorCtx()->driver();
+        SuspendedSection suspendedSection(driver);
+        memory::testingRunArbitration();
+      })));
 
-    waitForAllTasksToBeDeleted();
-  }
+  const auto spillDirectory = exec::test::TempDirectoryPath::create();
+  core::PlanNodeId orderById;
+  auto task =
+      AssertQueryBuilder(duckDbQueryRunner_)
+          .spillDirectory(spillDirectory->getPath())
+          .config(core::QueryConfig::kSpillEnabled, true)
+          .config(core::QueryConfig::kOrderBySpillEnabled, true)
+          .plan(PlanBuilder()
+                    .values(vectors)
+                    .orderBy({"c0 ASC NULLS LAST"}, false)
+                    .capturePlanNodeId(orderById)
+                    .planNode())
+          .assertResults("SELECT * FROM tmp ORDER BY c0 ASC NULLS LAST");
+  auto taskStats = exec::toPlanStats(task->taskStats());
+  auto& planStats = taskStats.at(orderById);
+  ASSERT_GT(planStats.spilledBytes, 0);
+  task.reset();
+  waitForAllTasksToBeDeleted();
 }
 
 DEBUG_ONLY_TEST_F(OrderByTest, reclaimFromEmptyOrderBy) {
@@ -1403,54 +1364,4 @@ DEBUG_ONLY_TEST_F(OrderByTest, reclaimFromEmptyOrderBy) {
   ASSERT_EQ(stats[0].operatorStats[1].spilledBytes, 0);
   ASSERT_EQ(stats[0].operatorStats[1].spilledPartitions, 0);
 }
-
-TEST_F(OrderByTest, reclaimFromCompletedOrderBy) {
-  std::vector<RowVectorPtr> vectors = createVectors(8, rowType_, fuzzerOpts_);
-  createDuckDbTable(vectors);
-  std::unique_ptr<memory::MemoryManager> memoryManager = createMemoryManager();
-  std::vector<bool> sameQueries = {false, true};
-  for (bool sameQuery : sameQueries) {
-    SCOPED_TRACE(fmt::format("sameQuery {}", sameQuery));
-    const auto spillDirectory = exec::test::TempDirectoryPath::create();
-    std::shared_ptr<core::QueryCtx> fakeQueryCtx =
-        newQueryCtx(memoryManager.get(), executor_.get(), kMemoryCapacity * 2);
-    std::shared_ptr<core::QueryCtx> orderByQueryCtx;
-    if (sameQuery) {
-      orderByQueryCtx = fakeQueryCtx;
-    } else {
-      orderByQueryCtx = newQueryCtx(
-          memoryManager.get(), executor_.get(), kMemoryCapacity * 2);
-    }
-
-    folly::EventCount arbitrationWait;
-    std::atomic_bool arbitrationWaitFlag{true};
-
-    std::thread orderByThread([&]() {
-      auto task =
-          AssertQueryBuilder(duckDbQueryRunner_)
-              .queryCtx(orderByQueryCtx)
-              .plan(PlanBuilder()
-                        .values(vectors)
-                        .orderBy({"c0 ASC NULLS LAST"}, false)
-                        .planNode())
-              .assertResults("SELECT * FROM tmp ORDER BY c0 ASC NULLS LAST");
-      waitForTaskCompletion(task.get());
-      arbitrationWaitFlag = false;
-      arbitrationWait.notifyAll();
-    });
-
-    arbitrationWait.await([&] { return !arbitrationWaitFlag.load(); });
-
-    auto fakePool = fakeQueryCtx->pool()->addLeafChild(
-        "fakePool", true, FakeMemoryReclaimer::create());
-
-    memoryManager->testingGrowPool(
-        fakePool.get(), memoryManager->arbitrator()->capacity());
-
-    orderByThread.join();
-
-    waitForAllTasksToBeDeleted();
-  }
-}
-
 } // namespace facebook::velox::exec::test
diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp
index 0fa6e5b272e6..400e55c76758 100644
--- a/velox/exec/tests/TableScanTest.cpp
+++ b/velox/exec/tests/TableScanTest.cpp
@@ -4241,7 +4241,8 @@ DEBUG_ONLY_TEST_F(
         if (!injectOnce.exchange(false)) {
           return;
         }
-        memory::memoryManager()->testingGrowPool(pool, 1 << 20);
+        VELOX_ASSERT_THROW(
+            pool->allocate(memory::memoryManager()->capacity()), "");
       }));
 
   auto op =
diff --git a/velox/exec/tests/TableWriteTest.cpp b/velox/exec/tests/TableWriteTest.cpp
index 9ab7f52865b4..5a4a45b151ea 100644
--- a/velox/exec/tests/TableWriteTest.cpp
+++ b/velox/exec/tests/TableWriteTest.cpp
@@ -4205,11 +4205,8 @@ DEBUG_ONLY_TEST_F(
       createVectors(rowType_, memoryCapacity / 8, fuzzerOpts_);
   const auto expectedResult =
       runWriteTask(vectors, nullptr, 1, pool(), kHiveConnectorId, false).data;
-
-  auto memoryManager = createMemoryManager(memoryCapacity);
-  auto arbitrator = memoryManager->arbitrator();
   auto queryCtx =
-      newQueryCtx(memoryManager.get(), executor_.get(), kMemoryCapacity);
+      newQueryCtx(memory::memoryManager(), executor_.get(), memoryCapacity);
 
   std::atomic_bool writerCloseWaitFlag{true};
   folly::EventCount writerCloseWait;
@@ -4237,13 +4234,7 @@ DEBUG_ONLY_TEST_F(
 
   writerCloseWait.await([&]() { return !writerCloseWaitFlag.load(); });
 
-  // Creates a fake pool to trigger memory arbitration.
-  auto fakePool = queryCtx->pool()->addLeafChild(
-      "fakePool", true, FakeMemoryReclaimer::create());
-  ASSERT_TRUE(memoryManager->testingGrowPool(
-      fakePool.get(),
-      arbitrator->stats().freeCapacityBytes +
-          queryCtx->pool()->capacity() / 2));
+  memory::testingRunArbitration();
 
   queryThread.join();
   waitForAllTasksToBeDeleted();

From 959f939a185a244bf3331340da0cbf8afc4e5167 Mon Sep 17 00:00:00 2001
From: Jimmy Lu <jimmylu@meta.com>
Date: Fri, 26 Apr 2024 13:03:52 -0700
Subject: [PATCH 11/18] Fix crash when map subfield is pruned but we still want
 to add non-null filter on its keys

Summary: https://github.com/facebookincubator/velox/pull/9636

Reviewed By: xiaoxmeng

Differential Revision: D56642855

fbshipit-source-id: dc076c8b95b2a62dd21c61bb0561646b205fcc58
---
 velox/connectors/hive/HiveConnectorUtil.cpp   |  7 ++++---
 .../hive/tests/HiveConnectorTest.cpp          | 19 +++++++++++++++++++
 velox/dwio/common/ScanSpec.h                  |  4 ++++
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp
index ecdf6d2c48c2..2fadb1fd8acd 100644
--- a/velox/connectors/hive/HiveConnectorUtil.cpp
+++ b/velox/connectors/hive/HiveConnectorUtil.cpp
@@ -323,9 +323,10 @@ namespace {
 
 void filterOutNullMapKeys(const Type& rootType, common::ScanSpec& rootSpec) {
   rootSpec.visit(rootType, [](const Type& type, common::ScanSpec& spec) {
-    if (type.isMap()) {
-      spec.childByName(common::ScanSpec::kMapKeysFieldName)
-          ->addFilter(common::IsNotNull());
+    if (type.isMap() && !spec.isConstant()) {
+      auto* keys = spec.childByName(common::ScanSpec::kMapKeysFieldName);
+      VELOX_CHECK_NOT_NULL(keys);
+      keys->addFilter(common::IsNotNull());
     }
   });
 }
diff --git a/velox/connectors/hive/tests/HiveConnectorTest.cpp b/velox/connectors/hive/tests/HiveConnectorTest.cpp
index 3daa1d792dff..ebc0afdcb6b4 100644
--- a/velox/connectors/hive/tests/HiveConnectorTest.cpp
+++ b/velox/connectors/hive/tests/HiveConnectorTest.cpp
@@ -417,6 +417,25 @@ TEST_F(HiveConnectorTest, makeScanSpec_filterPartitionKey) {
   ASSERT_FALSE(scanSpec->childByName("ds")->projectOut());
 }
 
+TEST_F(HiveConnectorTest, makeScanSpec_prunedMapNonNullMapKey) {
+  auto rowType =
+      ROW({"c0"},
+          {ROW(
+              {{"c0c0", MAP(BIGINT(), MAP(BIGINT(), BIGINT()))},
+               {"c0c1", BIGINT()}})});
+  auto scanSpec = makeScanSpec(
+      rowType,
+      groupSubfields(makeSubfields({"c0.c0c1"})),
+      {},
+      nullptr,
+      {},
+      {},
+      pool_.get());
+  auto* c0 = scanSpec->childByName("c0");
+  ASSERT_EQ(c0->children().size(), 2);
+  ASSERT_TRUE(c0->childByName("c0c0")->isConstant());
+}
+
 TEST_F(HiveConnectorTest, extractFiltersFromRemainingFilter) {
   core::QueryCtx queryCtx;
   exec::SimpleExpressionEvaluator evaluator(&queryCtx, pool_.get());
diff --git a/velox/dwio/common/ScanSpec.h b/velox/dwio/common/ScanSpec.h
index 9d998cce896a..75ad6e3935e3 100644
--- a/velox/dwio/common/ScanSpec.h
+++ b/velox/dwio/common/ScanSpec.h
@@ -410,6 +410,10 @@ class ScanSpec {
 template <typename F>
 void ScanSpec::visit(const Type& type, F&& f) {
   f(type, *this);
+  if (isConstant()) {
+    // Child specs are not populated in this case.
+    return;
+  }
   switch (type.kind()) {
     case TypeKind::ROW:
       for (auto& child : children_) {

From 029d89ab1854b6ecccdcd62c237bf5c4eb8aad64 Mon Sep 17 00:00:00 2001
From: xiaoxmeng <xiaoxmeng@fb.com>
Date: Fri, 26 Apr 2024 14:22:59 -0700
Subject: [PATCH 12/18] Prevent driver thread running again if task reclaim
 failed (#9633)

Summary:
The join fuzzer test detects such race condition in task memory reclaim:
T1 : hash probe op triggers memory arbitration
T2: memory arbitration triggers memory reclaim from hash probe query
T3: memory arbitration pauses the task execution
T4: memory arbitration fails in steps after spills the pending output from the hash probe operator
T5: memory arbitration resumes the task execution
T7: another hash probe operator (driver thread) from the same query resumes execution
      and run into check failure (or some random failure depends how spill fails) when read spilled output
      which expects the hash table for probe is not empty
T8: memory arbitration aborts the query that sets the task error, wait for task to stop and abort task
      operators.

The current mechanism can guarantee that operators which under (or wait) memory arbitration won't
execute again if the query has been aborted during the memory arbitration but it has a time window
that paused task driver get a chance to run. We shall prevent this race condition by setting task error
before resume it. This is very hard to reproduce in unit test so we rely on fuzzer signal. Add a dedicated
unit test to cover the task memory reclaim failure is handle properly on the execution path.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/9633

Reviewed By: kevinwilfong

Differential Revision: D56620867

Pulled By: xiaoxmeng

fbshipit-source-id: 1353ef59bd6de325ffbf91794d39764b1154693c
---
 velox/exec/Spiller.cpp        |  3 +--
 velox/exec/Task.cpp           | 15 ++++++++++++---
 velox/exec/tests/TaskTest.cpp | 30 ++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/velox/exec/Spiller.cpp b/velox/exec/Spiller.cpp
index cd63c5545a2a..31b4ae2980a6 100644
--- a/velox/exec/Spiller.cpp
+++ b/velox/exec/Spiller.cpp
@@ -227,8 +227,7 @@ Spiller::Spiller(
           memory::spillMemoryPool(),
           spillStats,
           fileCreateConfig) {
-  TestValue::adjust(
-      "facebook::velox::exec::Spiller", const_cast<HashBitRange*>(&bits_));
+  TestValue::adjust("facebook::velox::exec::Spiller", this);
 
   VELOX_CHECK(!spillProbedFlag_ || type_ == Type::kHashJoinBuild);
   VELOX_CHECK_EQ(container_ == nullptr, type_ == Type::kHashJoinProbe);
diff --git a/velox/exec/Task.cpp b/velox/exec/Task.cpp
index 8948b99a4fe1..c0ac2f53283c 100644
--- a/velox/exec/Task.cpp
+++ b/velox/exec/Task.cpp
@@ -2833,9 +2833,18 @@ uint64_t Task::MemoryReclaimer::reclaimTask(
   if (shrunkBytes >= targetBytes) {
     return shrunkBytes;
   }
-  return shrunkBytes +
-      memory::MemoryReclaimer::reclaim(
-             task->pool(), targetBytes - shrunkBytes, maxWaitMs, stats);
+  uint64_t reclaimedBytes{0};
+  try {
+    reclaimedBytes = memory::MemoryReclaimer::reclaim(
+        task->pool(), targetBytes - shrunkBytes, maxWaitMs, stats);
+  } catch (...) {
+    // Set task error before resumes the task execution as the task operator
+    // might not be in consistent state anymore. This prevents any off thread
+    // from running again.
+    task->setError(std::current_exception());
+    std::rethrow_exception(std::current_exception());
+  }
+  return shrunkBytes + reclaimedBytes;
 }
 
 void Task::MemoryReclaimer::abort(
diff --git a/velox/exec/tests/TaskTest.cpp b/velox/exec/tests/TaskTest.cpp
index 719333208c24..a37c527ce1bc 100644
--- a/velox/exec/tests/TaskTest.cpp
+++ b/velox/exec/tests/TaskTest.cpp
@@ -25,6 +25,7 @@
 #include "velox/exec/OutputBufferManager.h"
 #include "velox/exec/PlanNodeStats.h"
 #include "velox/exec/Values.h"
+#include "velox/exec/tests/utils/AssertQueryBuilder.h"
 #include "velox/exec/tests/utils/Cursor.h"
 #include "velox/exec/tests/utils/HiveConnectorTestBase.h"
 #include "velox/exec/tests/utils/PlanBuilder.h"
@@ -2048,4 +2049,33 @@ DEBUG_ONLY_TEST_F(TaskTest, driverEnqueAfterFailedAndPausedTask) {
   task.reset();
   waitForAllTasksToBeDeleted();
 }
+
+DEBUG_ONLY_TEST_F(TaskTest, taskReclaimFailure) {
+  const auto rowType =
+      ROW({"c0", "c1", "c2"}, {INTEGER(), DOUBLE(), INTEGER()});
+  const auto inputVectors = makeVectors(rowType, 128, 256);
+  createDuckDbTable(inputVectors);
+
+  const std::string spillTableError{"spillTableError"};
+  SCOPED_TESTVALUE_SET(
+      "facebook::velox::exec::Spiller",
+      std::function<void(Spiller*)>(
+          [&](Spiller* /*unused*/) { VELOX_FAIL(spillTableError); }));
+
+  TestScopedSpillInjection injection(100);
+  const auto spillDirectory = exec::test::TempDirectoryPath::create();
+  VELOX_ASSERT_THROW(
+      AssertQueryBuilder(duckDbQueryRunner_)
+          .spillDirectory(spillDirectory->getPath())
+          .config(core::QueryConfig::kSpillEnabled, true)
+          .config(core::QueryConfig::kAggregationSpillEnabled, true)
+          .maxDrivers(1)
+          .plan(PlanBuilder()
+                    .values(inputVectors)
+                    .singleAggregation({"c0", "c1"}, {"array_agg(c2)"})
+                    .planNode())
+          .assertResults(
+              "SELECT c0, c1, array_agg(c2) FROM tmp GROUP BY c0, c1"),
+      spillTableError);
+}
 } // namespace facebook::velox::exec::test

From f9c8fd4cb5b3d05305d4777128639e1c11195355 Mon Sep 17 00:00:00 2001
From: PHILO-HE <feilong.he@intel.com>
Date: Fri, 26 Apr 2024 14:37:46 -0700
Subject: [PATCH 13/18] Add year_of_week Spark function (#9481)

Summary:
This is mostly copied from presto [YearOfWeek function](https://github.com/facebookincubator/velox/blob/main/velox/functions/prestosql/DateTimeFunctions.h#L594). The differences are:
1) Spark only supports Date type input.
2) The result type is int64_t for Presto, but int32_t for Spark.

See doc: https://docs.databricks.com/en/sql/language-manual/functions/extract.html

Pull Request resolved: https://github.com/facebookincubator/velox/pull/9481

Reviewed By: xiaoxmeng

Differential Revision: D56640766

Pulled By: kagamiori

fbshipit-source-id: a1a26afaebde1b2b2c4c4fd66b5caac6b7ab8d9c
---
 velox/docs/functions/spark/datetime.rst       |  7 +++++
 velox/functions/sparksql/DateTimeFunctions.h  | 29 +++++++++++++++++++
 velox/functions/sparksql/Register.cpp         |  2 ++
 .../sparksql/tests/DateTimeFunctionsTest.cpp  | 15 ++++++++++
 4 files changed, 53 insertions(+)

diff --git a/velox/docs/functions/spark/datetime.rst b/velox/docs/functions/spark/datetime.rst
index d7ed56e5fc51..5472b5a2c8db 100644
--- a/velox/docs/functions/spark/datetime.rst
+++ b/velox/docs/functions/spark/datetime.rst
@@ -282,3 +282,10 @@ These functions support TIMESTAMP and DATE input types.
 .. spark:function:: year(x) -> integer
 
     Returns the year from ``x``.
+
+.. spark:function:: year_of_week(x) -> integer
+
+    Returns the ISO week-numbering year that ``x`` falls in. For example, 2005-01-02 is
+    part of the 53rd week of year 2004, so the result is 2004. Only supports DATE type.
+
+        SELECT year_of_week('2005-01-02'); -- 2004
diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h
index 0ba287089e4a..458d61ade2f5 100644
--- a/velox/functions/sparksql/DateTimeFunctions.h
+++ b/velox/functions/sparksql/DateTimeFunctions.h
@@ -98,6 +98,35 @@ struct WeekFunction : public InitSessionTimezone<T> {
   }
 };
 
+template <typename T>
+struct YearOfWeekFunction : public InitSessionTimezone<T> {
+  VELOX_DEFINE_FUNCTION_TYPES(T);
+
+  FOLLY_ALWAYS_INLINE void call(int32_t& result, const arg_type<Date>& date) {
+    const auto dateTime = getDateTime(date);
+
+    int isoWeekDay = dateTime.tm_wday == 0 ? 7 : dateTime.tm_wday;
+    // The last few days in December may belong to the next year if they are
+    // in the same week as the next January 1 and this January 1 is a Thursday
+    // or before.
+    if (UNLIKELY(
+            dateTime.tm_mon == 11 && dateTime.tm_mday >= 29 &&
+            dateTime.tm_mday - isoWeekDay >= 31 - 3)) {
+      result = 1900 + dateTime.tm_year + 1;
+      return;
+    }
+    // The first few days in January may belong to the last year if they are
+    // in the same week as January 1 and January 1 is a Friday or after.
+    if (UNLIKELY(
+            dateTime.tm_mon == 0 && dateTime.tm_mday <= 3 &&
+            isoWeekDay - (dateTime.tm_mday - 1) >= 5)) {
+      result = 1900 + dateTime.tm_year - 1;
+      return;
+    }
+    result = 1900 + dateTime.tm_year;
+  }
+};
+
 template <typename T>
 struct UnixDateFunction {
   VELOX_DEFINE_FUNCTION_TYPES(T);
diff --git a/velox/functions/sparksql/Register.cpp b/velox/functions/sparksql/Register.cpp
index 81f42fbced02..b553b38ca0e3 100644
--- a/velox/functions/sparksql/Register.cpp
+++ b/velox/functions/sparksql/Register.cpp
@@ -318,6 +318,8 @@ void registerFunctions(const std::string& prefix) {
   registerFunction<YearFunction, int32_t, Date>({prefix + "year"});
   registerFunction<WeekFunction, int32_t, Timestamp>({prefix + "week_of_year"});
   registerFunction<WeekFunction, int32_t, Date>({prefix + "week_of_year"});
+  registerFunction<YearOfWeekFunction, int32_t, Date>(
+      {prefix + "year_of_week"});
 
   registerFunction<ToUtcTimestampFunction, Timestamp, Timestamp, Varchar>(
       {prefix + "to_utc_timestamp"});
diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
index 383f8bd48d64..9226f496cb4f 100644
--- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
+++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
@@ -968,5 +968,20 @@ TEST_F(DateTimeFunctionsTest, makeYMInterval) {
       fromYear(-178956971), "Integer overflow in make_ym_interval(-178956971)");
 }
 
+TEST_F(DateTimeFunctionsTest, yearOfWeek) {
+  const auto yearOfWeek = [&](std::optional<int32_t> date) {
+    return evaluateOnce<int32_t, int32_t>("year_of_week(c0)", {date}, {DATE()});
+  };
+  EXPECT_EQ(1970, yearOfWeek(0));
+  EXPECT_EQ(1970, yearOfWeek(-1));
+  EXPECT_EQ(1969, yearOfWeek(-4));
+  EXPECT_EQ(1970, yearOfWeek(-3));
+  EXPECT_EQ(1970, yearOfWeek(365));
+  EXPECT_EQ(1970, yearOfWeek(367));
+  EXPECT_EQ(1971, yearOfWeek(368));
+  EXPECT_EQ(2005, yearOfWeek(parseDate("2006-01-01")));
+  EXPECT_EQ(2006, yearOfWeek(parseDate("2006-01-02")));
+}
+
 } // namespace
 } // namespace facebook::velox::functions::sparksql::test

From c2526ba83b5b7abbf684716e8230cefd5dcf90d8 Mon Sep 17 00:00:00 2001
From: rui-mo <rui.mo@intel.com>
Date: Fri, 26 Apr 2024 15:47:02 -0700
Subject: [PATCH 14/18] Throw user error for decimal overflow in decimal divide
 Presto function (#9632)

Summary:
Decimal divide fails in fuzzer when the result scale exceeds 38. Changes to
throw user error instead.

Pull Request resolved: https://github.com/facebookincubator/velox/pull/9632

Reviewed By: Yuhta

Differential Revision: D56640827

Pulled By: kagamiori

fbshipit-source-id: 955c468dc1eb04b493f7e64737dc1ff59ab3e000
---
 .../functions/prestosql/DecimalFunctions.cpp  |  2 +-
 .../prestosql/tests/DecimalArithmeticTest.cpp | 22 +++++++++----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/velox/functions/prestosql/DecimalFunctions.cpp b/velox/functions/prestosql/DecimalFunctions.cpp
index 0cee53e6ba79..fa89a50cbf94 100644
--- a/velox/functions/prestosql/DecimalFunctions.cpp
+++ b/velox/functions/prestosql/DecimalFunctions.cpp
@@ -149,7 +149,7 @@ struct DecimalDivideFunction {
     auto bScale = getDecimalPrecisionScale(*bType).second;
     auto rScale = std::max(aScale, bScale);
     aRescale_ = rScale - aScale + bScale;
-    VELOX_CHECK_LE(
+    VELOX_USER_CHECK_LE(
         aRescale_, LongDecimalType::kMaxPrecision, "Decimal overflow");
   }
 
diff --git a/velox/functions/prestosql/tests/DecimalArithmeticTest.cpp b/velox/functions/prestosql/tests/DecimalArithmeticTest.cpp
index 475d9c4fa320..b363c71bb96b 100644
--- a/velox/functions/prestosql/tests/DecimalArithmeticTest.cpp
+++ b/velox/functions/prestosql/tests/DecimalArithmeticTest.cpp
@@ -116,7 +116,7 @@ TEST_F(DecimalArithmeticTest, add) {
            {1, 2, 5, std::nullopt, std::nullopt}, DECIMAL(10, 3))});
 
   // Addition overflow.
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       testDecimalExpr<TypeKind::HUGEINT>(
           {},
           "c0 + cast(1.00 as decimal(2,0))",
@@ -126,7 +126,7 @@ TEST_F(DecimalArithmeticTest, add) {
       "Decimal overflow. Value '100000000000000000000000000000000000000' is not in the range of Decimal Type");
 
   // Rescaling LHS overflows.
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       testDecimalExpr<TypeKind::HUGEINT>(
           {},
           "c0 + 0.01",
@@ -135,7 +135,7 @@ TEST_F(DecimalArithmeticTest, add) {
               DECIMAL(38, 0))}),
       "Decimal overflow: 99999999999999999999999999999999999999 + 1");
   // Rescaling RHS overflows.
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       testDecimalExpr<TypeKind::HUGEINT>(
           {},
           "0.01 + c0",
@@ -202,7 +202,7 @@ TEST_F(DecimalArithmeticTest, subtract) {
            {1, 2, 5, std::nullopt, std::nullopt}, DECIMAL(10, 3))});
 
   // Subtraction overflow.
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       testDecimalExpr<TypeKind::HUGEINT>(
           {},
           "c0 - cast(1.00 as decimal(2,0))",
@@ -211,7 +211,7 @@ TEST_F(DecimalArithmeticTest, subtract) {
               DECIMAL(38, 0))}),
       "Decimal overflow. Value '-100000000000000000000000000000000000000' is not in the range of Decimal Type");
   // Rescaling LHS overflows.
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       testDecimalExpr<TypeKind::HUGEINT>(
           {},
           "c0 - 0.01",
@@ -220,7 +220,7 @@ TEST_F(DecimalArithmeticTest, subtract) {
               DECIMAL(38, 0))}),
       "Decimal overflow: -99999999999999999999999999999999999999 - 1");
   // Rescaling RHS overflows.
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       testDecimalExpr<TypeKind::HUGEINT>(
           {},
           "0.01 - c0",
@@ -271,7 +271,7 @@ TEST_F(DecimalArithmeticTest, multiply) {
       expectedConstantFlat, "c0 * 1.00", {shortFlat});
 
   // Long decimal limits
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       testDecimalExpr<TypeKind::HUGEINT>(
           {},
           "c0 * cast(10.00 as decimal(2,0))",
@@ -282,7 +282,7 @@ TEST_F(DecimalArithmeticTest, multiply) {
       "Decimal overflow. Value '119630519620642428561342635425231011830' is not in the range of Decimal Type");
 
   // Rescaling the final result overflows.
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       testDecimalExpr<TypeKind::HUGEINT>(
           {},
           "c0 * cast(1.00 as decimal(2,1))",
@@ -365,12 +365,12 @@ TEST_F(DecimalArithmeticTest, decimalDivTest) {
       {makeFlatVector<int64_t>({-34, 5, 65, 90, 2, -49}, DECIMAL(2, 1))});
 
   // Divide by zero.
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       testDecimalExpr<TypeKind::BIGINT>({}, "c0 / 0.0", {shortFlat}),
       "Division by zero");
 
   // Long decimal limits.
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       testDecimalExpr<TypeKind::HUGEINT>(
           {},
           "c0 / 0.01",
@@ -380,7 +380,7 @@ TEST_F(DecimalArithmeticTest, decimalDivTest) {
       "Decimal overflow: 99999999999999999999999999999999999999 * 10000");
 
   // Rescale factor > max precision (38).
-  VELOX_ASSERT_THROW(
+  VELOX_ASSERT_USER_THROW(
       evaluate(
           "divide(c0, c1)",
           makeRowVector(

From 6bcf11edd9282e2e1ffaf9b72793b49b646497d9 Mon Sep 17 00:00:00 2001
From: Daniel Munoz <munozdaniel@meta.com>
Date: Fri, 26 Apr 2024 18:04:23 -0700
Subject: [PATCH 15/18] Fix bug in DwrfReader onRead

Summary:
https://github.com/facebookincubator/velox/pull/9640

We weren't translating to loadUnitIdx.

Reviewed By: kgpai, Sullivan-Patrick

Differential Revision: D56658575

fbshipit-source-id: 5c293be30a7e43afd67252d291b58d6bd1cb4ae6
---
 velox/dwio/dwrf/reader/DwrfReader.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/velox/dwio/dwrf/reader/DwrfReader.cpp b/velox/dwio/dwrf/reader/DwrfReader.cpp
index 99e981e40a75..258c09bb7f1b 100644
--- a/velox/dwio/dwrf/reader/DwrfReader.cpp
+++ b/velox/dwio/dwrf/reader/DwrfReader.cpp
@@ -648,7 +648,8 @@ uint64_t DwrfRowReader::next(
   // reading of the data.
   auto strideSize = getReader().getFooter().rowIndexStride();
   strideIndex_ = strideSize > 0 ? currentRowInStripe_ / strideSize : 0;
-  unitLoader_->onRead(currentStripe_, currentRowInStripe_, rowsToRead);
+  const auto loadUnitIdx = currentStripe_ - firstStripe_;
+  unitLoader_->onRead(loadUnitIdx, currentRowInStripe_, rowsToRead);
   readNext(rowsToRead, mutation, result);
   currentRowInStripe_ += rowsToRead;
   return rowsToRead;

From 47970417ac92135e862c0fde350d4d60fa2f1423 Mon Sep 17 00:00:00 2001
From: Yihong Wang <yh.wang@ibm.com>
Date: Fri, 26 Apr 2024 19:44:28 -0700
Subject: [PATCH 16/18] Add word_stem Presto function (#9363)

Summary:
Add snowball libstemmer v2.2.0 as one of the dependencies.
And use it to implement the word_stem() as a scalar UDF.

When using the libstemmer API, each language creates an `sb_stemmer`
instance which consumes 114 bytes, including the default 10 bytes for the output stem.
It uses the `realloc` to increase the memory block for the output stem if needed.

Fixes https://github.com/facebookincubator/velox/issues/8487

Pull Request resolved: https://github.com/facebookincubator/velox/pull/9363

Reviewed By: amitkdutta

Differential Revision: D56059511

Pulled By: pedroerp

fbshipit-source-id: b3a66956c3809e3f3dadfc8cc7b397b7116996d5
---
 CMake/resolve_dependency_modules/README.md    |   1 +
 .../libstemmer/Makefile.patch                 |  24 ++++
 .../resolve_dependency_modules/stemmer.cmake  |  57 ++++++++
 CMakeLists.txt                                |   3 +
 velox/docs/functions/presto/string.rst        |  37 +++++
 velox/functions/prestosql/CMakeLists.txt      |   3 +-
 velox/functions/prestosql/WordStem.h          | 132 ++++++++++++++++++
 .../StringFunctionsRegistration.cpp           |   6 +
 .../functions/prestosql/tests/CMakeLists.txt  |   1 +
 .../prestosql/tests/WordStemTest.cpp          |  80 +++++++++++
 10 files changed, 343 insertions(+), 1 deletion(-)
 create mode 100644 CMake/resolve_dependency_modules/libstemmer/Makefile.patch
 create mode 100644 CMake/resolve_dependency_modules/stemmer.cmake
 create mode 100644 velox/functions/prestosql/WordStem.h
 create mode 100644 velox/functions/prestosql/tests/WordStemTest.cpp

diff --git a/CMake/resolve_dependency_modules/README.md b/CMake/resolve_dependency_modules/README.md
index 651926be6f19..a88273280978 100644
--- a/CMake/resolve_dependency_modules/README.md
+++ b/CMake/resolve_dependency_modules/README.md
@@ -37,6 +37,7 @@ by Velox. See details on bundling below.
 | wangle            | v2024.04.01.00  | No       |
 | mvfst             | v2024.04.01.00  | No       |
 | fbthrift          | v2024.04.01.00  | No       |
+| libstemmer        | 2.2.0           | Yes      |
 | DuckDB (testing)  | 0.8.1           | Yes      |
 | cpr (testing)     | 1.10.15         | Yes      |
 
diff --git a/CMake/resolve_dependency_modules/libstemmer/Makefile.patch b/CMake/resolve_dependency_modules/libstemmer/Makefile.patch
new file mode 100644
index 000000000000..e7d691052111
--- /dev/null
+++ b/CMake/resolve_dependency_modules/libstemmer/Makefile.patch
@@ -0,0 +1,24 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+--- a/Makefile
++++ b/Makefile
+@@ -3,7 +3,7 @@
+ EXEEXT=.exe
+ endif
+ CFLAGS=-O2
+-CPPFLAGS=-Iinclude
++CPPFLAGS=-Iinclude -fPIC
+ all: libstemmer.a stemwords$(EXEEXT)
+ libstemmer.a: $(snowball_sources:.c=.o)
+ 	$(AR) -cru $@ $^
diff --git a/CMake/resolve_dependency_modules/stemmer.cmake b/CMake/resolve_dependency_modules/stemmer.cmake
new file mode 100644
index 000000000000..8fb16e160d68
--- /dev/null
+++ b/CMake/resolve_dependency_modules/stemmer.cmake
@@ -0,0 +1,57 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+include_guard(GLOBAL)
+
+set(VELOX_STEMMER_VERSION 2.2.0)
+set(VELOX_STEMMER_BUILD_SHA256_CHECKSUM
+    b941d9fe9cf36b4e2f8d3873cd4d8b8775bd94867a1df8d8c001bb8b688377c3)
+set(VELOX_STEMMER_SOURCE_URL
+    "https://snowballstem.org/dist/libstemmer_c-${VELOX_STEMMER_VERSION}.tar.gz"
+)
+
+resolve_dependency_url(STEMMER)
+
+message(STATUS "Building stemmer from source")
+find_program(MAKE_PROGRAM make REQUIRED)
+
+set(STEMMER_PREFIX "${CMAKE_BINARY_DIR}/_deps/libstemmer")
+set(STEMMER_INCLUDE_PATH ${STEMMER_PREFIX}/src/libstemmer/include)
+
+# We can not use FetchContent as libstemmer does not use cmake
+ExternalProject_Add(
+  libstemmer
+  PREFIX ${STEMMER_PREFIX}
+  SOURCE_DIR ${STEMMER_PREFIX}/src/libstemmer
+  URL ${VELOX_STEMMER_SOURCE_URL}
+  URL_HASH ${VELOX_STEMMER_BUILD_SHA256_CHECKSUM}
+  BUILD_IN_SOURCE TRUE
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ${MAKE_PROGRAM}
+  INSTALL_COMMAND ""
+  PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/libstemmer/Makefile.patch
+  BUILD_BYPRODUCTS
+    ${STEMMER_PREFIX}/src/libstemmer/${CMAKE_STATIC_LIBRARY_PREFIX}stemmer${CMAKE_STATIC_LIBRARY_SUFFIX}
+)
+
+add_library(stemmer STATIC IMPORTED)
+add_library(stemmer::stemmer ALIAS stemmer)
+file(MAKE_DIRECTORY ${STEMMER_INCLUDE_PATH})
+set_target_properties(
+  stemmer
+  PROPERTIES
+    IMPORTED_LOCATION
+    ${STEMMER_PREFIX}/src/libstemmer/${CMAKE_STATIC_LIBRARY_PREFIX}stemmer${CMAKE_STATIC_LIBRARY_SUFFIX}
+    INTERFACE_INCLUDE_DIRECTORIES ${STEMMER_INCLUDE_PATH})
+
+add_dependencies(stemmer libstemmer)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 45eb5ec955ed..4885f49bcb3a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -548,6 +548,9 @@ endif()
 set_source(xsimd)
 resolve_dependency(xsimd 10.0.0)
 
+set(stemmer_SOURCE BUNDLED)
+resolve_dependency(stemmer)
+
 if(VELOX_BUILD_TESTING)
   set(BUILD_TESTING ON)
   include(CTest) # include after project() but before add_subdirectory()
diff --git a/velox/docs/functions/presto/string.rst b/velox/docs/functions/presto/string.rst
index 2db49c6f6c81..44fb82cef4fa 100644
--- a/velox/docs/functions/presto/string.rst
+++ b/velox/docs/functions/presto/string.rst
@@ -255,6 +255,43 @@ String Functions
 
     Converts ``string`` to uppercase.
 
+.. function:: word_stem(word) -> varchar
+
+    Returns the stem of ``word`` in the English language. If the ``word`` is not an English word,
+    the ``word`` in lowercase is returned.
+
+.. function:: word_stem(word, lang) -> varchar
+
+    Returns the stem of ``word`` in the ``lang`` language. This function supports the following languages:
+
+    =========== ================
+    lang        Language
+    =========== ================
+    ``ca``      ``Catalan``
+    ``da``      ``Danish``
+    ``de``      ``German``
+    ``en``      ``English``
+    ``es``      ``Spanish``
+    ``eu``      ``Basque``
+    ``fi``      ``Finnish``
+    ``fr``      ``French``
+    ``hu``      ``Hungarian``
+    ``hy``      ``Armenian``
+    ``ir``      ``Irish``
+    ``it``      ``Italian``
+    ``lt``      ``Lithuanian``
+    ``nl``      ``Dutch``
+    ``no``      ``Norwegian``
+    ``pt``      ``Portuguese``
+    ``ro``      ``Romanian``
+    ``ru``      ``Russian``
+    ``sv``      ``Swedish``
+    ``tr``      ``Turkish``
+    =========== ================
+
+    If the specified ``lang`` is not supported, this function throws a user error.
+
+
 Unicode Functions
 -----------------
 
diff --git a/velox/functions/prestosql/CMakeLists.txt b/velox/functions/prestosql/CMakeLists.txt
index 54d959cbbb37..c532645f6f29 100644
--- a/velox/functions/prestosql/CMakeLists.txt
+++ b/velox/functions/prestosql/CMakeLists.txt
@@ -73,7 +73,8 @@ target_link_libraries(
   velox_type_tz
   velox_presto_types
   velox_functions_util
-  Folly::folly)
+  Folly::folly
+  stemmer::stemmer)
 
 set_property(TARGET velox_functions_prestosql_impl PROPERTY JOB_POOL_COMPILE
                                                             high_memory_pool)
diff --git a/velox/functions/prestosql/WordStem.h b/velox/functions/prestosql/WordStem.h
new file mode 100644
index 000000000000..9d088b202467
--- /dev/null
+++ b/velox/functions/prestosql/WordStem.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <folly/container/F14Map.h>
+#include <libstemmer.h> // @manual
+
+#include "velox/functions/Udf.h"
+#include "velox/functions/lib/string/StringImpl.h"
+
+namespace facebook::velox::functions {
+
+namespace detail {
+// Wrap the sbstemmer library and use its sb_stemmer_stem
+// to get word stem.
+class Stemmer {
+ public:
+  Stemmer(sb_stemmer* stemmer) : sbStemmer_(stemmer) {
+    VELOX_CHECK_NOT_NULL(stemmer);
+  }
+
+  ~Stemmer() {
+    sb_stemmer_delete(sbStemmer_);
+  }
+
+  // Get the word stem or NULL if out of memory.
+  const char* stem(const std::string& input) {
+    return (const char*)(sb_stemmer_stem(
+        sbStemmer_,
+        reinterpret_cast<unsigned char const*>(input.c_str()),
+        input.length()));
+  }
+
+ private:
+  sb_stemmer* sbStemmer_;
+};
+} // namespace detail
+
+/// word_stem function
+/// word_stem(word) -> varchar
+///     return the stem of the word in the English language
+/// word_stem(word, lang) -> varchar
+///     return the stem of the word in the specificed language
+///
+/// Use the snowball stemmer library to calculate the stem.
+/// https://snowballstem.org
+/// The website provides Java implementation which is used in Presto as well
+/// as C implementation. Therefore, both Presto and Prestimissio
+/// would have the same word stem results.
+template <typename TExec>
+struct WordStemFunction {
+  VELOX_DEFINE_FUNCTION_TYPES(TExec);
+
+  // ASCII input always produces ASCII result.
+  static constexpr bool is_default_ascii_behavior = true;
+
+  FOLLY_ALWAYS_INLINE void call(
+      out_type<Varchar>& result,
+      const arg_type<Varchar>& input) {
+    return doCall<false>(result, input);
+  }
+
+  FOLLY_ALWAYS_INLINE void callAscii(
+      out_type<Varchar>& result,
+      const arg_type<Varchar>& input) {
+    return doCall<true>(result, input);
+  }
+
+  FOLLY_ALWAYS_INLINE void call(
+      out_type<Varchar>& result,
+      const arg_type<Varchar>& input,
+      const arg_type<Varchar>& lang) {
+    return doCall<false>(result, input, lang);
+  }
+
+  FOLLY_ALWAYS_INLINE void callAscii(
+      out_type<Varchar>& result,
+      const arg_type<Varchar>& input,
+      const arg_type<Varchar>& lang) {
+    return doCall<true>(result, input, lang);
+  }
+
+  template <bool isAscii>
+  FOLLY_ALWAYS_INLINE void doCall(
+      out_type<Varchar>& result,
+      const arg_type<Varchar>& input,
+      const std::string& lang = "en") {
+    auto* stemmer = getStemmer(lang);
+    VELOX_USER_CHECK_NOT_NULL(
+        stemmer, "Unsupported stemmer language: {}", lang);
+
+    std::string lowerOutput;
+    stringImpl::lower<isAscii>(lowerOutput, input);
+    auto* stem = stemmer->stem(lowerOutput);
+    VELOX_CHECK_NOT_NULL(
+        stem, "Stemmer library returned a NULL (out-of-memory)")
+    result = stem;
+  }
+
+ private:
+  folly::F14FastMap<std::string, std::unique_ptr<detail::Stemmer>> stemmers_;
+
+  // Get a detail::Stemmer from the the map using the lang as the key or create
+  // a new one if it doesn't exist. Return nullptr if the specified lang is not
+  // supported.
+  detail::Stemmer* getStemmer(const std::string& lang) {
+    if (auto found = stemmers_.find(lang); found != stemmers_.end()) {
+      return found->second.get();
+    }
+    // Only support ASCII and UTF-8.
+    if (auto sbStemmer = sb_stemmer_new(lang.c_str(), "UTF_8")) {
+      auto* stemmer = new detail::Stemmer(sbStemmer);
+      stemmers_[lang] = std::unique_ptr<detail::Stemmer>(stemmer);
+      return stemmer;
+    }
+    return nullptr;
+  }
+};
+} // namespace facebook::velox::functions
diff --git a/velox/functions/prestosql/registration/StringFunctionsRegistration.cpp b/velox/functions/prestosql/registration/StringFunctionsRegistration.cpp
index f1e36049e92d..4ceaf4edab94 100644
--- a/velox/functions/prestosql/registration/StringFunctionsRegistration.cpp
+++ b/velox/functions/prestosql/registration/StringFunctionsRegistration.cpp
@@ -19,6 +19,7 @@
 #include "velox/functions/prestosql/SplitPart.h"
 #include "velox/functions/prestosql/SplitToMap.h"
 #include "velox/functions/prestosql/StringFunctions.h"
+#include "velox/functions/prestosql/WordStem.h"
 
 namespace facebook::velox::functions {
 
@@ -127,5 +128,10 @@ void registerStringFunctions(const std::string& prefix) {
       {prefix + "strrpos"});
   registerFunction<StrRPosFunction, int64_t, Varchar, Varchar, int64_t>(
       {prefix + "strrpos"});
+
+  // word_stem function
+  registerFunction<WordStemFunction, Varchar, Varchar>({prefix + "word_stem"});
+  registerFunction<WordStemFunction, Varchar, Varchar, Varchar>(
+      {prefix + "word_stem"});
 }
 } // namespace facebook::velox::functions
diff --git a/velox/functions/prestosql/tests/CMakeLists.txt b/velox/functions/prestosql/tests/CMakeLists.txt
index d10283a85c00..7f22424d641d 100644
--- a/velox/functions/prestosql/tests/CMakeLists.txt
+++ b/velox/functions/prestosql/tests/CMakeLists.txt
@@ -97,6 +97,7 @@ add_executable(
   URLFunctionsTest.cpp
   Utf8Test.cpp
   WidthBucketArrayTest.cpp
+  WordStemTest.cpp
   ZipTest.cpp
   ZipWithTest.cpp)
 
diff --git a/velox/functions/prestosql/tests/WordStemTest.cpp b/velox/functions/prestosql/tests/WordStemTest.cpp
new file mode 100644
index 000000000000..e94783e1dd1a
--- /dev/null
+++ b/velox/functions/prestosql/tests/WordStemTest.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <optional>
+#include <string>
+
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"
+
+using namespace facebook::velox::functions::test;
+
+namespace facebook::velox::functions {
+namespace {
+class WordStemTest : public FunctionBaseTest {
+ protected:
+  std::string wordStem(const std::string& word, const std::string& lang) {
+    return evaluateOnce<std::string>(
+               "word_stem(c0, c1)", std::optional(word), std::optional(lang))
+        .value();
+  }
+
+  std::string wordStem(const std::string& word) {
+    return evaluateOnce<std::string>("word_stem(c0)", std::optional(word))
+        .value();
+  }
+};
+
+/// Borrow test cases from Presto Java:
+/// https://github.com/prestodb/presto/blob/master/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestWordStemFunction.java
+TEST_F(WordStemTest, asciiWord) {
+  EXPECT_EQ(wordStem(""), "");
+  EXPECT_EQ(wordStem("x"), "x");
+  EXPECT_EQ(wordStem("abc"), "abc");
+  EXPECT_EQ(wordStem("generally"), "general");
+  EXPECT_EQ(wordStem("useful"), "use");
+  EXPECT_EQ(wordStem("runs"), "run");
+  EXPECT_EQ(wordStem("run"), "run");
+  EXPECT_EQ(wordStem("authorized", "en"), "author");
+  EXPECT_EQ(wordStem("accessories", "en"), "accessori");
+  EXPECT_EQ(wordStem("intensifying", "en"), "intensifi");
+  EXPECT_EQ(wordStem("resentment", "en"), "resent");
+  EXPECT_EQ(wordStem("faithfulness", "en"), "faith");
+  EXPECT_EQ(wordStem("continuerait", "fr"), "continu");
+  EXPECT_EQ(wordStem("torpedearon", "es"), "torped");
+  EXPECT_EQ(wordStem("quilomtricos", "pt"), "quilomtr");
+  EXPECT_EQ(wordStem("pronunziare", "it"), "pronunz");
+  EXPECT_EQ(wordStem("auferstnde", "de"), "auferstnd");
+}
+
+TEST_F(WordStemTest, invalidLang) {
+  VELOX_ASSERT_THROW(
+      wordStem("hello", "xx"), "Unsupported stemmer language: xx");
+}
+
+TEST_F(WordStemTest, unicodeWord) {
+  EXPECT_EQ(
+      wordStem(
+          "\u004b\u0069\u0074\u0061\u0062\u0131\u006d\u0131\u007a\u0064\u0131",
+          "tr"),
+      "kitap");
+  EXPECT_EQ(
+      wordStem("\u0432\u0435\u0441\u0435\u043d\u043d\u0438\u0439", "ru"),
+      "\u0432\u0435\u0441\u0435\u043d");
+}
+
+} // namespace
+} // namespace facebook::velox::functions

From adc5219b5f720cf2d95616f480fa3ea0934da128 Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Sat, 27 Apr 2024 14:00:18 -0700
Subject: [PATCH 17/18] Fix workflow syntax for build-metrix.yml (#9642)

Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/9642

Reviewed By: kgpai

Differential Revision: D56669996

Pulled By: kagamiori

fbshipit-source-id: c092b6cafed5290d4414318404e6d742c226539c
---
 .github/workflows/build-metrics.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-metrics.yml b/.github/workflows/build-metrics.yml
index c493d70c0f0c..e97c7d01121b 100644
--- a/.github/workflows/build-metrics.yml
+++ b/.github/workflows/build-metrics.yml
@@ -41,8 +41,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        - runner: "16-core"
-        - type: ["debug", "release"]
+        runner: ["16-core"]
+        type: ["debug", "release"]
     defaults:
       run:
         shell: bash

From 1426f33bc3cd80c4d47dfb88eda5482cd14bd7eb Mon Sep 17 00:00:00 2001
From: Jialiang Tan <jacob.jialiang.tan@gmail.com>
Date: Sun, 28 Apr 2024 12:48:12 -0700
Subject: [PATCH 18/18] Fix MultiFragmentTest.compression (#9639)

Summary:
The test was flaky because the task created later has the same task id. If the previous task does not get deleted in time before the second task comes in, the test will fail. The fix is to give the tasks different names

https://github.com/facebookincubator/velox/issues/9453

Pull Request resolved: https://github.com/facebookincubator/velox/pull/9639

Reviewed By: xiaoxmeng

Differential Revision: D56651191

Pulled By: tanjialiang

fbshipit-source-id: 3bdfbe9e7fbc30ac94f92788dd2c37194d5a99b5
---
 velox/exec/tests/MultiFragmentTest.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/velox/exec/tests/MultiFragmentTest.cpp b/velox/exec/tests/MultiFragmentTest.cpp
index ba061d4cf250..3503795bd447 100644
--- a/velox/exec/tests/MultiFragmentTest.cpp
+++ b/velox/exec/tests/MultiFragmentTest.cpp
@@ -2024,7 +2024,6 @@ TEST_F(MultiFragmentTest, compression) {
                                 .values({data}, false, kNumRepeats)
                                 .partitionedOutput({}, 1)
                                 .planNode();
-  const auto producerTaskId = "local://t1";
 
   const auto plan = test::PlanBuilder()
                         .exchange(asRowType(data->type()))
@@ -2034,7 +2033,9 @@ TEST_F(MultiFragmentTest, compression) {
   const auto expected =
       makeRowVector({makeFlatVector<int64_t>(std::vector<int64_t>{6000000})});
 
-  const auto test = [&](float minCompressionRatio, bool expectSkipCompression) {
+  const auto test = [&](const std::string& producerTaskId,
+                        float minCompressionRatio,
+                        bool expectSkipCompression) {
     PartitionedOutput::testingSetMinCompressionRatio(minCompressionRatio);
     auto producerTask = makeTask(producerTaskId, producerPlan);
     producerTask->start(1);
@@ -2061,8 +2062,8 @@ TEST_F(MultiFragmentTest, compression) {
     }
   };
 
-  test(0.7, false);
-  test(0.0000001, true);
+  test("local://t1", 0.7, false);
+  test("local://t2", 0.0000001, true);
 }
 
 } // namespace