Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/acvictor/velox into acvicto…
Browse files Browse the repository at this point in the history
…r/unixSeconds
  • Loading branch information
acvictor committed Apr 30, 2024
2 parents a5c23ae + 1426f33 commit 39ad22b
Show file tree
Hide file tree
Showing 320 changed files with 10,116 additions and 4,359 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ on:
- 'third_party/**'
- 'pyvelox/**'
- '.github/workflows/benchmark.yml'
- 'scripts/benchmark-requirements.txt'

push:
branches: [main]

Expand Down
118 changes: 118 additions & 0 deletions .github/workflows/build-metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Collect Build Metrics

on:
pull_request:
paths:
- ".github/workflows/build-metrics.yml"

workflow_dispatch:
inputs:
ref:
description: "ref to check"
required: true

schedule:
# Run every day at 04:05
- cron: "5 4 * * *"

permissions:
contents: read

jobs:
metrics:
name: Linux ${{ matrix.type }} with adapters
if: ${{ github.repository == 'facebookincubator/velox' }}
runs-on: ${{ matrix.runner }}
container: ghcr.io/facebookincubator/velox-dev:adapters
strategy:
fail-fast: false
matrix:
runner: ["16-core"]
type: ["debug", "release"]
defaults:
run:
shell: bash
env:
VELOX_DEPENDENCY_SOURCE: SYSTEM
simdjson_SOURCE: BUNDLED
xsimd_SOURCE: BUNDLED
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.sha }}

- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory /__w/velox/velox

- name: Make ${{ matrix.type }} Build
env:
MAKEFLAGS: 'MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=4'
run: |
EXTRA_CMAKE_FLAGS=(
"-DVELOX_ENABLE_BENCHMARKS=ON"
"-DVELOX_ENABLE_ARROW=ON"
"-DVELOX_ENABLE_PARQUET=ON"
"-DVELOX_ENABLE_HDFS=ON"
"-DVELOX_ENABLE_S3=ON"
"-DVELOX_ENABLE_GCS=ON"
"-DVELOX_ENABLE_ABFS=ON"
"-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON"
)
make '${{ matrix.type }}'
- name: Log binary sizes
run: |
mkdir -p /tmp/metrics
sizes_file=/tmp/metrics/object_sizes
pushd '_build/${{ matrix.type }}'
find velox -type f -name '*.so' -o -name '*.a' -exec ls -l -BB {} \; |
awk '{print $5, $9; total += $5} END {print total," total_lib_size"}' > $sizes_file
find velox -type f -name '*.o' -exec ls -l -BB {} \; |
awk '{print $5, $9; total += $5} END {print total," total_obj_size"}' >> $sizes_file
find velox -type f -name 'velox_*' -exec ls -l -BB {} \; |
awk '{print $5, $9; total += $5} END {print total," total_exec_size"}' >> $sizes_file
- name: Copy ninja_log
run: cp _build/${{ matrix.type }}/.ninja_log /tmp/metrics/.ninja_log

- name: "Install dependencies"
run: |
python3 -m pip install setuptools
python3 -m pip install -r scripts/benchmark-requirements.txt
- name: "Upload Metrics"
env:
CONBENCH_URL: "https://velox-conbench.voltrondata.run/"
CONBENCH_MACHINE_INFO_NAME: "GitHub-runner-${{ matrix.runner }}"
CONBENCH_EMAIL: "${{ secrets.CONBENCH_EMAIL }}"
CONBENCH_PASSWORD: "${{ secrets.CONBENCH_PASSWORD }}"
# These don't actually work https://github.com/conbench/conbench/issues/1484
# but have to be there to work regardless??
CONBENCH_PROJECT_REPOSITORY: "${{ github.repository }}"
CONBENCH_PROJECT_COMMIT: "${{ inputs.ref || github.sha }}"
run: |
./scripts/build-metrics.py upload \
--build_type "${{ matrix.type }}" \
--run_id "BM-${{ matrix.type }}-${{ github.run_id }}-${{ github.run_attempt }}" \
--pr_number "${{ github.event.number }}" \
--sha "${{ inputs.ref || github.sha }}" \
"/tmp/metrics"
16 changes: 15 additions & 1 deletion .github/workflows/linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:
VELOX_DEPENDENCY_SOURCE: SYSTEM
simdjson_SOURCE: BUNDLED
xsimd_SOURCE: BUNDLED
CUDA_VERSION: "11.8"
steps:
- uses: actions/checkout@v4

Expand All @@ -69,6 +70,15 @@ jobs:
# it doesn't work
run: git config --global --add safe.directory /__w/velox/velox

- name: Install Dependencies
run: |
# Allows to install arbitrary cuda-version whithout needing to update
# docker container before. It simplifies testing new/different versions
if ! yum list installed cuda-nvcc-$(echo ${CUDA_VERSION} | tr '.' '-') 1>/dev/null; then
source scripts/setup-centos8.sh
install_cuda ${CUDA_VERSION}
fi
- uses: assignUser/stash/restore@v1
with:
path: '${{ env.CCACHE_DIR }}'
Expand All @@ -81,6 +91,10 @@ jobs:
- name: Make Release Build
env:
MAKEFLAGS: 'NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4'
CUDA_ARCHITECTURES: 60
CUDA_COMPILER: /usr/local/cuda-${CUDA_VERSION}/bin/nvcc
# Without that, nvcc picks /usr/bin/c++ which is GCC 8
CUDA_FLAGS: "-ccbin /opt/rh/gcc-toolset-9/root/usr/bin"
run: |
EXTRA_CMAKE_FLAGS=(
"-DVELOX_ENABLE_BENCHMARKS=ON"
Expand All @@ -90,8 +104,8 @@ jobs:
"-DVELOX_ENABLE_S3=ON"
"-DVELOX_ENABLE_GCS=ON"
"-DVELOX_ENABLE_ABFS=ON"
"-DVELOX_ENABLE_SUBSTRAIT=ON"
"-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON"
"-DVELOX_ENABLE_GPU=ON"
)
make release EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS[*]}"
Expand Down
80 changes: 74 additions & 6 deletions .github/workflows/scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,9 @@ jobs:
source .venv/bin/activate
python3 -m pip install deepdiff
python3 scripts/signature.py gh_bias_check presto spark
python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_contendor.json
python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_contender.json
python3 scripts/signature.py bias_aggregates /tmp/signatures/presto_aggregate_signatures_main.json \
/tmp/signatures/presto_aggregate_signatures_contendor.json /tmp/signatures/presto_aggregate_bias_functions \
/tmp/signatures/presto_aggregate_signatures_contender.json /tmp/signatures/presto_aggregate_bias_functions \
/tmp/signatures/presto_aggregate_errors
- name: Upload Signature Artifacts
Expand All @@ -246,8 +246,8 @@ jobs:
if: ${{ github.event_name == 'push' }}
run: |
# Remove irrelevant artifacts
rm *_bias_functions
rm *_signatures_main.json
rm -f *_bias_functions
rm -f *_signatures_main.json
# Rename signature files as 'main' files
for f in *_signatures_contender.json; do
mv "$f" "${f/_contender.json/_main.json}"
Expand Down Expand Up @@ -302,6 +302,13 @@ jobs:
path: velox/_build/debug//velox/exec/tests/velox_exchange_fuzzer_test
retention-days: "${{ env.RETENTION }}"

- name: Upload window fuzzer
uses: actions/upload-artifact@v4
with:
name: window
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_window_fuzzer_test
retention-days: "${{ env.RETENTION }}"

presto-fuzzer-run:
name: Presto Fuzzer
if: ${{ needs.compile.outputs.presto_bias != 'true' }}
Expand Down Expand Up @@ -655,7 +662,6 @@ jobs:
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
if: ${{ github.event_name != 'pull_request' }}
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
Expand Down Expand Up @@ -711,7 +717,7 @@ jobs:
path: |
/tmp/aggregate_fuzzer_repro
/tmp/server.log
presto-bias-java-aggregation-fuzzer-run:
name: Biased Aggregation Fuzzer with Presto as source of truth
Expand Down Expand Up @@ -810,3 +816,65 @@ jobs:
run: |
cat /tmp/signatures/presto_aggregate_errors
exit 1
presto-java-window-fuzzer-run:
name: Window Fuzzer with Presto as source of truth
needs: compile
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
steps:

- name: Download window fuzzer
uses: actions/download-artifact@v4
with:
name: window

- name: "Checkout Repo"
uses: actions/checkout@v4
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref }}"

- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory /__w/velox/velox/velox


- name: "Run Window Fuzzer"
run: |
cd velox
cp ./scripts/etc/hive.properties $PRESTO_HOME/etc/catalog
ls -lR $PRESTO_HOME/etc
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 &
# Sleep for 60 seconds to allow Presto server to start.
sleep 60
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
cd -
mkdir -p /tmp/window_fuzzer_repro/
rm -rfv /tmp/window_fuzzer_repro/*
chmod -R 777 /tmp/window_fuzzer_repro
chmod +x velox_window_fuzzer_test
./velox_window_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--logtostderr=1 \
--minloglevel=0 \
--repro_persist_path=/tmp/window_fuzzer_repro \
--enable_window_reference_verification \
--presto_url=http://127.0.0.1:8080 \
&& echo -e "\n\nWindow fuzzer run finished successfully."
- name: Archive window production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: presto-sot-window-fuzzer-failure-artifacts
path: |
/tmp/window_fuzzer_repro
/tmp/server.log
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ settings.json

# User's build configuration
Makefile.config
CMakeUserPresets.json

# build, distribute, and bins (+ python proto bindings)
build
Expand Down
1 change: 1 addition & 0 deletions CMake/resolve_dependency_modules/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ by Velox. See details on bundling below.
| wangle | v2024.04.01.00 | No |
| mvfst | v2024.04.01.00 | No |
| fbthrift | v2024.04.01.00 | No |
| libstemmer | 2.2.0 | Yes |
| DuckDB (testing) | 0.8.1 | Yes |
| cpr (testing) | 1.10.15 | Yes |

Expand Down
24 changes: 24 additions & 0 deletions CMake/resolve_dependency_modules/libstemmer/Makefile.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
EXEEXT=.exe
endif
CFLAGS=-O2
-CPPFLAGS=-Iinclude
+CPPFLAGS=-Iinclude -fPIC
all: libstemmer.a stemwords$(EXEEXT)
libstemmer.a: $(snowball_sources:.c=.o)
$(AR) -cru $@ $^
57 changes: 57 additions & 0 deletions CMake/resolve_dependency_modules/stemmer.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include_guard(GLOBAL)

set(VELOX_STEMMER_VERSION 2.2.0)
set(VELOX_STEMMER_BUILD_SHA256_CHECKSUM
b941d9fe9cf36b4e2f8d3873cd4d8b8775bd94867a1df8d8c001bb8b688377c3)
set(VELOX_STEMMER_SOURCE_URL
"https://snowballstem.org/dist/libstemmer_c-${VELOX_STEMMER_VERSION}.tar.gz"
)

resolve_dependency_url(STEMMER)

message(STATUS "Building stemmer from source")
find_program(MAKE_PROGRAM make REQUIRED)

set(STEMMER_PREFIX "${CMAKE_BINARY_DIR}/_deps/libstemmer")
set(STEMMER_INCLUDE_PATH ${STEMMER_PREFIX}/src/libstemmer/include)

# We can not use FetchContent as libstemmer does not use cmake
ExternalProject_Add(
libstemmer
PREFIX ${STEMMER_PREFIX}
SOURCE_DIR ${STEMMER_PREFIX}/src/libstemmer
URL ${VELOX_STEMMER_SOURCE_URL}
URL_HASH ${VELOX_STEMMER_BUILD_SHA256_CHECKSUM}
BUILD_IN_SOURCE TRUE
CONFIGURE_COMMAND ""
BUILD_COMMAND ${MAKE_PROGRAM}
INSTALL_COMMAND ""
PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/libstemmer/Makefile.patch
BUILD_BYPRODUCTS
${STEMMER_PREFIX}/src/libstemmer/${CMAKE_STATIC_LIBRARY_PREFIX}stemmer${CMAKE_STATIC_LIBRARY_SUFFIX}
)

add_library(stemmer STATIC IMPORTED)
add_library(stemmer::stemmer ALIAS stemmer)
file(MAKE_DIRECTORY ${STEMMER_INCLUDE_PATH})
set_target_properties(
stemmer
PROPERTIES
IMPORTED_LOCATION
${STEMMER_PREFIX}/src/libstemmer/${CMAKE_STATIC_LIBRARY_PREFIX}stemmer${CMAKE_STATIC_LIBRARY_SUFFIX}
INTERFACE_INCLUDE_DIRECTORIES ${STEMMER_INCLUDE_PATH})

add_dependencies(stemmer libstemmer)
Loading

0 comments on commit 39ad22b

Please sign in to comment.