Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/acvictor/velox into acvicto…
Browse files Browse the repository at this point in the history
…r/addAlias
  • Loading branch information
acvictor committed Apr 29, 2024
2 parents f1aa4db + 1426f33 commit 3fa6e05
Show file tree
Hide file tree
Showing 66 changed files with 1,853 additions and 727 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ on:
- 'third_party/**'
- 'pyvelox/**'
- '.github/workflows/benchmark.yml'
- 'scripts/benchmark-requirements.txt'

push:
branches: [main]

Expand Down
118 changes: 118 additions & 0 deletions .github/workflows/build-metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Collect Build Metrics

on:
pull_request:
paths:
- ".github/workflows/build-metrics.yml"

workflow_dispatch:
inputs:
ref:
description: "ref to check"
required: true

schedule:
# Run every day at 04:05
- cron: "5 4 * * *"

permissions:
contents: read

jobs:
metrics:
name: Linux ${{ matrix.type }} with adapters
if: ${{ github.repository == 'facebookincubator/velox' }}
runs-on: ${{ matrix.runner }}
container: ghcr.io/facebookincubator/velox-dev:adapters
strategy:
fail-fast: false
matrix:
runner: ["16-core"]
type: ["debug", "release"]
defaults:
run:
shell: bash
env:
VELOX_DEPENDENCY_SOURCE: SYSTEM
simdjson_SOURCE: BUNDLED
xsimd_SOURCE: BUNDLED
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.sha }}

- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory /__w/velox/velox

- name: Make ${{ matrix.type }} Build
env:
MAKEFLAGS: 'MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=4'
run: |
EXTRA_CMAKE_FLAGS=(
"-DVELOX_ENABLE_BENCHMARKS=ON"
"-DVELOX_ENABLE_ARROW=ON"
"-DVELOX_ENABLE_PARQUET=ON"
"-DVELOX_ENABLE_HDFS=ON"
"-DVELOX_ENABLE_S3=ON"
"-DVELOX_ENABLE_GCS=ON"
"-DVELOX_ENABLE_ABFS=ON"
"-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON"
)
make '${{ matrix.type }}'
- name: Log binary sizes
run: |
mkdir -p /tmp/metrics
sizes_file=/tmp/metrics/object_sizes
pushd '_build/${{ matrix.type }}'
find velox -type f -name '*.so' -o -name '*.a' -exec ls -l -BB {} \; |
awk '{print $5, $9; total += $5} END {print total," total_lib_size"}' > $sizes_file
find velox -type f -name '*.o' -exec ls -l -BB {} \; |
awk '{print $5, $9; total += $5} END {print total," total_obj_size"}' >> $sizes_file
find velox -type f -name 'velox_*' -exec ls -l -BB {} \; |
awk '{print $5, $9; total += $5} END {print total," total_exec_size"}' >> $sizes_file
- name: Copy ninja_log
run: cp _build/${{ matrix.type }}/.ninja_log /tmp/metrics/.ninja_log

- name: "Install dependencies"
run: |
python3 -m pip install setuptools
python3 -m pip install -r scripts/benchmark-requirements.txt
- name: "Upload Metrics"
env:
CONBENCH_URL: "https://velox-conbench.voltrondata.run/"
CONBENCH_MACHINE_INFO_NAME: "GitHub-runner-${{ matrix.runner }}"
CONBENCH_EMAIL: "${{ secrets.CONBENCH_EMAIL }}"
CONBENCH_PASSWORD: "${{ secrets.CONBENCH_PASSWORD }}"
# These don't actually work https://github.com/conbench/conbench/issues/1484
# but have to be there to work regardless??
CONBENCH_PROJECT_REPOSITORY: "${{ github.repository }}"
CONBENCH_PROJECT_COMMIT: "${{ inputs.ref || github.sha }}"
run: |
./scripts/build-metrics.py upload \
--build_type "${{ matrix.type }}" \
--run_id "BM-${{ matrix.type }}-${{ github.run_id }}-${{ github.run_attempt }}" \
--pr_number "${{ github.event.number }}" \
--sha "${{ inputs.ref || github.sha }}" \
"/tmp/metrics"
1 change: 0 additions & 1 deletion .github/workflows/linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ jobs:
"-DVELOX_ENABLE_S3=ON"
"-DVELOX_ENABLE_GCS=ON"
"-DVELOX_ENABLE_ABFS=ON"
"-DVELOX_ENABLE_SUBSTRAIT=ON"
"-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON"
"-DVELOX_ENABLE_GPU=ON"
)
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,9 @@ jobs:
source .venv/bin/activate
python3 -m pip install deepdiff
python3 scripts/signature.py gh_bias_check presto spark
python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_contendor.json
python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_contender.json
python3 scripts/signature.py bias_aggregates /tmp/signatures/presto_aggregate_signatures_main.json \
/tmp/signatures/presto_aggregate_signatures_contendor.json /tmp/signatures/presto_aggregate_bias_functions \
/tmp/signatures/presto_aggregate_signatures_contender.json /tmp/signatures/presto_aggregate_bias_functions \
/tmp/signatures/presto_aggregate_errors
- name: Upload Signature Artifacts
Expand Down Expand Up @@ -662,7 +662,6 @@ jobs:
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
if: ${{ github.event_name != 'pull_request' }}
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
Expand Down Expand Up @@ -824,7 +823,6 @@ jobs:
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
if: ${{ github.event_name != 'pull_request' }}
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
Expand Down
1 change: 1 addition & 0 deletions CMake/resolve_dependency_modules/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ by Velox. See details on bundling below.
| wangle | v2024.04.01.00 | No |
| mvfst | v2024.04.01.00 | No |
| fbthrift | v2024.04.01.00 | No |
| libstemmer | 2.2.0 | Yes |
| DuckDB (testing) | 0.8.1 | Yes |
| cpr (testing) | 1.10.15 | Yes |

Expand Down
24 changes: 24 additions & 0 deletions CMake/resolve_dependency_modules/libstemmer/Makefile.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
EXEEXT=.exe
endif
CFLAGS=-O2
-CPPFLAGS=-Iinclude
+CPPFLAGS=-Iinclude -fPIC
all: libstemmer.a stemwords$(EXEEXT)
libstemmer.a: $(snowball_sources:.c=.o)
$(AR) -cru $@ $^
57 changes: 57 additions & 0 deletions CMake/resolve_dependency_modules/stemmer.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include_guard(GLOBAL)

set(VELOX_STEMMER_VERSION 2.2.0)
set(VELOX_STEMMER_BUILD_SHA256_CHECKSUM
b941d9fe9cf36b4e2f8d3873cd4d8b8775bd94867a1df8d8c001bb8b688377c3)
set(VELOX_STEMMER_SOURCE_URL
"https://snowballstem.org/dist/libstemmer_c-${VELOX_STEMMER_VERSION}.tar.gz"
)

resolve_dependency_url(STEMMER)

message(STATUS "Building stemmer from source")
find_program(MAKE_PROGRAM make REQUIRED)

set(STEMMER_PREFIX "${CMAKE_BINARY_DIR}/_deps/libstemmer")
set(STEMMER_INCLUDE_PATH ${STEMMER_PREFIX}/src/libstemmer/include)

# We can not use FetchContent as libstemmer does not use cmake
ExternalProject_Add(
libstemmer
PREFIX ${STEMMER_PREFIX}
SOURCE_DIR ${STEMMER_PREFIX}/src/libstemmer
URL ${VELOX_STEMMER_SOURCE_URL}
URL_HASH ${VELOX_STEMMER_BUILD_SHA256_CHECKSUM}
BUILD_IN_SOURCE TRUE
CONFIGURE_COMMAND ""
BUILD_COMMAND ${MAKE_PROGRAM}
INSTALL_COMMAND ""
PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/libstemmer/Makefile.patch
BUILD_BYPRODUCTS
${STEMMER_PREFIX}/src/libstemmer/${CMAKE_STATIC_LIBRARY_PREFIX}stemmer${CMAKE_STATIC_LIBRARY_SUFFIX}
)

add_library(stemmer STATIC IMPORTED)
add_library(stemmer::stemmer ALIAS stemmer)
file(MAKE_DIRECTORY ${STEMMER_INCLUDE_PATH})
set_target_properties(
stemmer
PROPERTIES
IMPORTED_LOCATION
${STEMMER_PREFIX}/src/libstemmer/${CMAKE_STATIC_LIBRARY_PREFIX}stemmer${CMAKE_STATIC_LIBRARY_SUFFIX}
INTERFACE_INCLUDE_DIRECTORIES ${STEMMER_INCLUDE_PATH})

add_dependencies(stemmer libstemmer)
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,9 @@ endif()
set_source(xsimd)
resolve_dependency(xsimd 10.0.0)

set(stemmer_SOURCE BUNDLED)
resolve_dependency(stemmer)

if(VELOX_BUILD_TESTING)
set(BUILD_TESTING ON)
include(CTest) # include after project() but before add_subdirectory()
Expand Down
6 changes: 3 additions & 3 deletions scripts/benchmark-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

benchadapt@git+https://github.com/conbench/conbench.git@44e81d1#subdirectory=benchadapt/python
benchalerts@git+https://github.com/conbench/conbench.git@44e81d1#subdirectory=benchalerts
benchclients@git+https://github.com/conbench/conbench.git@44e81d1#subdirectory=benchclients/python
benchadapt==2024.3.20
benchalerts==2024.1.10.1
benchclients==2024.3.29.1
Loading

0 comments on commit 3fa6e05

Please sign in to comment.