From 11a89d6cb81c513a9c1624740a97b43fc737417b Mon Sep 17 00:00:00 2001 From: zerninv Date: Wed, 31 Jan 2024 16:44:50 +0000 Subject: [PATCH] Attempt to fix segfault CalculateAverage_zerninv.java (#635) * attempt to fix segfault, graal native * fix last bytes for last line handler * fix typo * one more attempt --- calculate_average_zerninv.sh | 10 ++- prepare_zerninv.sh | 7 +- .../onebrc/CalculateAverage_zerninv.java | 72 ++++++++++++------- 3 files changed, 59 insertions(+), 30 deletions(-) diff --git a/calculate_average_zerninv.sh b/calculate_average_zerninv.sh index 2b76c7d7d..6dbda3022 100755 --- a/calculate_average_zerninv.sh +++ b/calculate_average_zerninv.sh @@ -15,5 +15,11 @@ # limitations under the License. # -JAVA_OPTS="--enable-preview" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_zerninv \ No newline at end of file +if [ -f target/CalculateAverage_zerninv_image ]; then + echo "Picking up existing native image 'target/CalculateAverage_zerninv_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_zerninv_image +else + JAVA_OPTS="--enable-preview -Xmx512m -XX:+UseSerialGC -XX:-TieredCompilation" + echo "Chosing to run the app in JVM mode as no native image was found, use prepare_zerninv.sh to generate." 1>&2 + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_zerninv +fi \ No newline at end of file diff --git a/prepare_zerninv.sh b/prepare_zerninv.sh index cd3641e0e..ae7343301 100755 --- a/prepare_zerninv.sh +++ b/prepare_zerninv.sh @@ -17,4 +17,9 @@ source "$HOME/.sdkman/bin/sdkman-init.sh" -sdk use java 21.0.1-graal 1>&2 \ No newline at end of file +sdk use java 21.0.2-graal 1>&2 + +if [ ! -f target/CalculateAverage_zerninv_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native -R:MaxHeapSize=512m -H:-GenLoopSafepoints --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_zerninv" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_zerninv_image dev.morling.onebrc.CalculateAverage_zerninv +fi \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java b/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java index b28750f77..47974ce67 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java @@ -56,8 +56,7 @@ public static void main(String[] args) throws IOException, InterruptedException tasks[i] = new TaskThread((int) (fileSize / minChunkSize / CORES + 1)); } - var results = new HashMap(); - var chunks = splitByChunks(segment.address(), segment.address() + fileSize, minChunkSize, results); + var chunks = splitByChunks(segment.address(), segment.address() + fileSize, minChunkSize); for (int i = 0; i < chunks.size() - 1; i++) { var task = tasks[i % tasks.length]; task.addChunk(chunks.get(i), chunks.get(i + 1)); @@ -67,6 +66,7 @@ public static void main(String[] args) throws IOException, InterruptedException task.start(); } + var results = new HashMap(); for (var task : tasks) { task.join(); task.collectTo(results); @@ -79,31 +79,8 @@ public static void main(String[] args) throws IOException, InterruptedException } } - private static List splitByChunks(long address, long end, long minChunkSize, Map results) { - // handle last line - long offset = end - 1; - int temperature = 0; - byte b; - int multiplier = 1; - while ((b = UNSAFE.getByte(offset--)) != ';') { - if (b >= '0' && b <= '9') { - temperature += (b - '0') * multiplier; - multiplier *= 10; - } - else if (b == '-') { - temperature = -temperature; - } - } - long cityNameEnd = offset; - while (UNSAFE.getByte(offset - 1) != '\n' && offset > address) { - offset--; - } - var cityName = new byte[(int) (cityNameEnd - offset + 1)]; - UNSAFE.copyMemory(null, offset, cityName, Unsafe.ARRAY_BYTE_BASE_OFFSET, cityName.length); - results.put(new String(cityName, StandardCharsets.UTF_8), new TemperatureAggregation(temperature, 1, (short) temperature, (short) temperature)); - + private static List splitByChunks(long address, long end, long minChunkSize) { // split by chunks - end = offset; List result = new ArrayList<>((int) ((end - address) / minChunkSize + 1)); result.add(address); while (address < end) { @@ -278,8 +255,49 @@ public void addChunk(long begin, long end) { @Override public void run() { for (int i = 0; i < begins.size(); i++) { - calcForChunk(begins.get(i), ends.get(i)); + var begin = begins.get(i); + var end = ends.get(i) - 1; + while (end > begin && UNSAFE.getByte(end - 1) != '\n') { + end--; + } + calcForChunk(begin, end); + calcLastLine(end); + } + } + + private void calcLastLine(long offset) { + long cityOffset = offset; + long lastBytes = 0; + int hashCode = 0; + byte cityNameSize = 0; + + byte b; + while ((b = UNSAFE.getByte(offset++)) != ';') { + lastBytes = (lastBytes << 8) | b; + hashCode = hashCode * 31 + b; + cityNameSize++; + } + + int temperature; + int word = UNSAFE.getInt(offset); + offset += 4; + + if ((word & TWO_NEGATIVE_DIGITS_MASK) == TWO_NEGATIVE_DIGITS_MASK) { + word >>>= 8; + temperature = ZERO * 11 - ((word & BYTE_MASK) * 10 + ((word >>> 16) & BYTE_MASK)); + } + else if ((word & THREE_DIGITS_MASK) == THREE_DIGITS_MASK) { + temperature = (word & BYTE_MASK) * 100 + ((word >>> 8) & BYTE_MASK) * 10 + ((word >>> 24) & BYTE_MASK) - ZERO * 111; + } + else if ((word & TWO_DIGITS_MASK) == TWO_DIGITS_MASK) { + temperature = (word & BYTE_MASK) * 10 + ((word >>> 16) & BYTE_MASK) - ZERO * 11; + } + else { + // #.##- + word = (word >>> 8) | (UNSAFE.getByte(offset) << 24); + temperature = ZERO * 111 - ((word & BYTE_MASK) * 100 + ((word >>> 8) & BYTE_MASK) * 10 + ((word >>> 24) & BYTE_MASK)); } + container.put(cityOffset, cityNameSize, hashCode, lastBytes, (short) temperature); } private void calcForChunk(long offset, long end) {