Skip to content

Commit

Permalink
Script cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
gunnarmorling committed Jan 13, 2024
1 parent eff73db commit 45056e0
Show file tree
Hide file tree
Showing 5 changed files with 298 additions and 355 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -251,12 +251,13 @@ Any submissions (i.e. pull requests) created after Jan 31 2024 23:59 UTC will no
Results are determined by running the program on a [Hetzner AX161](https://www.hetzner.com/dedicated-rootserver/ax161) dedicated server (32 core AMD EPYC™ 7502P (Zen2), 128 GB RAM).
Programs are run from a RAM disk (i.o. the IO overhead for loading the file from disk is not relevant), using 8 cores of the machine.
Each contender must pass the 1BRC test suite (_/test.sh_).
The `hyperfine` program is used for measuring execution times of the launch scripts of all entries, i.e. end-to-end times are measured.
Each contender is run five times in a row.
The slowest and the fastest runs are discarded.
The mean value of the remaining three runs is the result for that contender and will be added to the results table above.
The exact same _measurements.txt_ file is used for evaluating all contenders.
See the script _evaluate2.sh_ for the exact implementation of the evaluation steps.
See the script _evaluate.sh_ for the exact implementation of the evaluation steps.
## Prize
Expand Down
2 changes: 1 addition & 1 deletion prepare.sh → checkout.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ set -x

if [ -z "$1" ]
then
echo "Usage: prepare.sh <fork name>:<branch name>"
echo "Usage: checkout.sh <fork name>:<branch name>"
exit 1
fi

Expand Down
24 changes: 0 additions & 24 deletions eval.sh

This file was deleted.

304 changes: 295 additions & 9 deletions evaluate.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
#
# Copyright 2023 The original authors
#
Expand All @@ -15,20 +15,306 @@
# limitations under the License.
#

set -eo pipefail

if [ -z "$1" ]
then
echo "Usage: evaluate.sh <fork name>"
echo "Usage: evaluate2.sh <fork name> (<fork name 2> ...)"
echo " for each fork, there must be a 'calculate_average_<fork name>.sh' script and an optional 'prepare_<fork name>.sh'."
exit 1
fi

BOLD_WHITE='\033[1;37m'
CYAN='\033[0;36m'
GREEN='\033[0;32m'
PURPLE='\033[0;35m'
BOLD_RED='\033[1;31m'
RED='\033[0;31m'
BOLD_YELLOW='\033[1;33m'
RESET='\033[0m' # No Color

MEASUREMENTS_FILE="measurements_1B.txt"
RUNS=5
DEFAULT_JAVA_VERSION="21.0.1-open"
RUN_TIME_LIMIT=300 # seconds

TIMEOUT=""
if [ "$(uname -s)" == "Linux" ]; then
TIMEOUT="timeout -v $RUN_TIME_LIMIT"
else # MacOs
if [ -x "$(command -v gtimeout)" ]; then
TIMEOUT="gtimeout -v $RUN_TIME_LIMIT" # from `brew install coreutils`
else
echo -e "${BOLD_YELLOW}WARNING${RESET} gtimeout not available, benchmark runs may take indefinitely long."
fi
fi

function check_command_installed {
if ! [ -x "$(command -v $1)" ]; then
echo "Error: $1 is not installed." >&2
exit 1
fi
}

function print_and_execute() {
echo "+ $@" >&2
"$@"
}

check_command_installed java
check_command_installed hyperfine
check_command_installed jq
check_command_installed bc

# Validate that ./calculate_average_<fork>.sh exists for each fork
for fork in "$@"; do
if [ ! -f "./calculate_average_$fork.sh" ]; then
echo -e "${BOLD_RED}ERROR${RESET}: ./calculate_average_$fork.sh does not exist." >&2
exit 1
fi
done

## SDKMAN Setup
# 1. Custom check for sdkman installed; not sure why check_command_installed doesn't detect it properly
if [ ! -f "$HOME/.sdkman/bin/sdkman-init.sh" ]; then
echo -e "${BOLD_RED}ERROR${RESET}: sdkman is not installed." >&2
exit 1
fi

java --version
# 2. Init sdkman in this script
source "$HOME/.sdkman/bin/sdkman-init.sh"

# 3. make sure the default java version is installed
if [ ! -d "$HOME/.sdkman/candidates/java/$DEFAULT_JAVA_VERSION" ]; then
print_and_execute sdk install java $DEFAULT_JAVA_VERSION
fi

# 4. Install missing SDK java versions in any of the prepare_*.sh scripts for the provided forks
for fork in "$@"; do
if [ -f "./prepare_$fork.sh" ]; then
grep -h "^sdk use" "./prepare_$fork.sh" | cut -d' ' -f4 | while read -r version; do
if [ ! -d "$HOME/.sdkman/candidates/java/$version" ]; then
print_and_execute sdk install java $version
fi
done || true # grep returns exit code 1 when no match, `|| true` prevents the script from exiting early
fi
done
## END - SDKMAN Setup

# Check if SMT is enabled (we want it disabled)
if [ -f "/sys/devices/system/cpu/smt/active" ]; then
if [ "$(cat /sys/devices/system/cpu/smt/active)" != "0" ]; then
echo -e "${BOLD_YELLOW}WARNING${RESET} SMT is enabled"
fi
fi

# Check if Turbo Boost is enabled (we want it disabled)
if [ -f "/sys/devices/system/cpu/cpufreq/boost" ]; then
if [ "$(cat /sys/devices/system/cpu/cpufreq/boost)" != "0" ]; then
echo -e "${BOLD_YELLOW}WARNING${RESET} Turbo Boost is enabled"
fi
fi

print_and_execute java --version
print_and_execute ./mvnw --quiet clean verify

print_and_execute rm -f measurements.txt
print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt

echo ""

# check if measurements_xxx.out exists
if [ ! -f "${MEASUREMENTS_FILE%.txt}.out" ]; then
echo -e "${BOLD_RED}ERROR${RESET}: ${MEASUREMENTS_FILE%.txt}.out does not exist." >&2
echo "Please create it with:"
echo ""
echo " ./calculate_average_baseline.sh > ${MEASUREMENTS_FILE%.txt}.out"
echo ""
exit 1
fi

# Run tests and benchmark for each fork
filetimestamp=$(date +"%Y%m%d%H%M%S") # same for all fork.out files from this run
failed=()
test_output=$(mktemp)
for fork in "$@"; do
set +e # we don't want prepare.sh, test.sh or hyperfine failing on 1 fork to exit the script early

# Run the test suite
print_and_execute $TIMEOUT ./test.sh $fork | tee $test_output > /dev/null 2>&1
if [ $? -ne 0 ]; then
failed+=("$fork")
echo ""
echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork failed"
cat $test_output
echo ""

continue
fi

# Run the test on $MEASUREMENTS_FILE; this serves as the warmup
print_and_execute $TIMEOUT ./test.sh $fork $MEASUREMENTS_FILE | tee $test_output > /dev/null 2>&1
if [ $? -ne 0 ]; then
failed+=("$fork")
echo ""
echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork $MEASUREMENTS_FILE failed"
cat $test_output
echo ""

continue
fi

# re-link measurements.txt since test.sh deleted it
print_and_execute rm -f measurements.txt
print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt

# Run prepare script
if [ -f "./prepare_$fork.sh" ]; then
print_and_execute source "./prepare_$fork.sh"
else
print_and_execute sdk use java $DEFAULT_JAVA_VERSION
fi

# Use hyperfine to run the benchmark for each fork
HYPERFINE_OPTS="--warmup 0 --runs $RUNS --export-json $fork-$filetimestamp-timing.json --output ./$fork-$filetimestamp.out"

# check if this script is running on a Linux box
if [ "$(uname -s)" == "Linux" ]; then
check_command_installed numactl

# Linux platform
# prepend this with numactl --physcpubind=0-7 for running it only with 8 cores
numactl --physcpubind=0-7 hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1"
else # MacOS
hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1"
fi
# Catch hyperfine command failed
if [ $? -ne 0 ]; then
failed+=("$fork")
# Hyperfine already prints the error message
echo ""
continue
fi
done
set -e
rm $test_output

# Summary
echo -e "${BOLD_WHITE}Summary${RESET}"
for fork in "$@"; do
# skip reporting results for failed forks
if [[ " ${failed[@]} " =~ " ${fork} " ]]; then
echo -e " ${RED}$fork${RESET}: command failed or output did not match"
continue
fi

# Trimmed mean = The slowest and the fastest runs are discarded, the
# mean value of the remaining three runs is the result for that contender
trimmed_mean=$(jq -r '.results[0].times | sort_by(.|tonumber) | .[1:-1] | add / length' $fork-$filetimestamp-timing.json)
raw_times=$(jq -r '.results[0].times | join(",")' $fork-$filetimestamp-timing.json)

if [ "$fork" == "$1" ]; then
color=$CYAN
elif [ "$fork" == "$2" ]; then
color=$GREEN
else
color=$PURPLE
fi

echo -e " ${color}$fork${RESET}: trimmed mean ${BOLD_WHITE}$trimmed_mean${RESET}, raw times ${BOLD_WHITE}$raw_times${RESET}"
done
echo ""

## Leaderboard - prints the leaderboard in Markdown table format
echo -e "${BOLD_WHITE}Leaderboard${RESET}"

# 1. Create a temp file to store the leaderboard entries
leaderboard_temp_file=$(mktemp)

# 2. Process each fork and append the 1-line entry to the temp file
for fork in "$@"; do
# skip reporting results for failed forks
if [[ " ${failed[@]} " =~ " ${fork} " ]]; then
continue
fi

trimmed_mean=$(jq -r '.results[0].times | sort_by(.|tonumber) | .[1:-1] | add / length' $fork-$filetimestamp-timing.json)

# trimmed_mean is in seconds
# Format trimmed_mean as MM::SS.mmm
# using bc
trimmed_mean_minutes=$(echo "$trimmed_mean / 60" | bc)
trimmed_mean_seconds=$(echo "$trimmed_mean % 60 / 1" | bc)
trimmed_mean_ms=$(echo "($trimmed_mean - $trimmed_mean_minutes * 60 - $trimmed_mean_seconds) * 1000 / 1" | bc)
trimmed_mean_formatted=$(printf "%02d:%02d.%03d" $trimmed_mean_minutes $trimmed_mean_seconds $trimmed_mean_ms)

# Get Github user's name from public Github API (rate limited after ~50 calls, so results are cached in github_users.txt)
set +e
github_user__name=$(grep "^$fork;" github_users.txt | cut -d ';' -f2)
if [ -z "$github_user__name" ]; then
github_user__name=$(curl -s https://api.github.com/users/$fork | jq -r '.name' | tr -d '"')
if [ "$github_user__name" != "null" ]; then
echo "$fork;$github_user__name" >> github_users.txt
else
github_user__name=$fork
fi
fi
set -e

# Read java version from prepare_$fork.sh if it exists, otherwise assume 21.0.1-open
java_version="21.0.1-open"
# Hard-coding the note message for now
notes=""
if [ -f "./prepare_$fork.sh" ]; then
java_version=$(grep -F "sdk use java" ./prepare_$fork.sh | cut -d' ' -f4)

if grep -F "native-image" -q ./prepare_$fork.sh ; then
notes="GraalVM native binary"
fi
fi

echo -n "$trimmed_mean;" >> $leaderboard_temp_file # for sorting
echo -n "| # " >> $leaderboard_temp_file
echo -n "| $trimmed_mean_formatted " >> $leaderboard_temp_file
echo -n "| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_$fork.java)" >> $leaderboard_temp_file
echo -n "| $java_version " >> $leaderboard_temp_file
echo -n "| [$github_user__name](https://github.com/$fork) " >> $leaderboard_temp_file
echo -n "| $notes " >> $leaderboard_temp_file
echo "|" >> $leaderboard_temp_file
done

# 3. Sort leaderboard_temp_file by trimmed_mean and remove the sorting column
sort -n $leaderboard_temp_file | cut -d ';' -f 2 > $leaderboard_temp_file.sorted

# 4. Print the leaderboard
echo ""
echo "| # | Result (m:s.ms) | Implementation | JDK | Submitter | Notes |"
echo "|---|-----------------|--------------------|-----|---------------|-----------|"
# If $leaderboard_temp_file.sorted has more than 3 entires, include rankings
if [ $(wc -l < $leaderboard_temp_file.sorted) -gt 3 ]; then
head -n 1 $leaderboard_temp_file.sorted | tr '#' 1
head -n 2 $leaderboard_temp_file.sorted | tail -n 1 | tr '#' 2
head -n 3 $leaderboard_temp_file.sorted | tail -n 1 | tr '#' 3
tail -n+4 $leaderboard_temp_file.sorted | tr '#' ' '
else
# Don't show rankings
cat $leaderboard_temp_file.sorted | tr '#' ' '
fi
echo ""

mvn clean verify
# 5. Cleanup
rm $leaderboard_temp_file
## END - Leaderboard

rm -f measurements.txt
ln -s measurements_1B.txt measurements.txt
# Finalize .out files
echo "Raw results saved to file(s):"
for fork in "$@"; do
if [ -f "$fork-$filetimestamp-timing.json" ]; then
cat $fork-$filetimestamp-timing.json >> $fork-$filetimestamp.out
rm $fork-$filetimestamp-timing.json
fi

for i in {1..5}
do
./calculate_average_$1.sh
if [ -f "$fork-$filetimestamp.out" ]; then
echo " $fork-$filetimestamp.out"
fi
done
Loading

0 comments on commit 45056e0

Please sign in to comment.