From ce8ecd62c494cbda1c744cc2992919723545717e Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Tue, 30 Jan 2024 21:37:25 +0800 Subject: [PATCH 01/12] first double as int --- calculate_average_martin2038.sh | 19 ++ prepare_martin2038.sh | 25 ++ .../onebrc/CalculateAverage_martin2038.java | 234 ++++++++++++++++++ 3 files changed, 278 insertions(+) create mode 100755 calculate_average_martin2038.sh create mode 100755 prepare_martin2038.sh create mode 100644 src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java diff --git a/calculate_average_martin2038.sh b/calculate_average_martin2038.sh new file mode 100755 index 000000000..580ec61cc --- /dev/null +++ b/calculate_average_martin2038.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-XX:+UseEpsilonGC" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_martin2038 diff --git a/prepare_martin2038.sh b/prepare_martin2038.sh new file mode 100755 index 000000000..0bb0b02c9 --- /dev/null +++ b/prepare_martin2038.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 +# +#if [ ! -f target/CalculateAverage_martin2038 ]; then +# NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native -R:MaxHeapSize=64m -H:-GenLoopSafepoints --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_martin2038" +# native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_martin2038_image dev.morling.onebrc.CalculateAverage_martin2038 +#fi \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java new file mode 100644 index 000000000..7295371d6 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java @@ -0,0 +1,234 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel.MapMode; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class CalculateAverage_martin2038 { + + private static final String FILE = "./measurements.txt"; + + private static record Measurement(String station, double value) { + private Measurement(String[] parts) { + this(parts[0], Double.parseDouble(parts[1])); + } + } + + private static record ResultRow(double min, double mean, double max) { + + public String toString() { + return round(min) + "/" + round(mean) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + }; + + private static class MeasurementAggregator { + private int min = Integer.MAX_VALUE; + private int max = Integer.MIN_VALUE; + private long sum; + private int count; + + void update(int temp) { + update(1,temp,temp,temp); + } + + void update(int cnt, long sm, int min, int max) { + sum += sm; + count += cnt; + if (this.min > min) { + this.min = min; + } + if (this.max < max) { + this.max = max; + } + } + + void merge(MeasurementAggregator it){ + update(it.count,it.sum,it.min,it.max); + } + + + public String toString() { + return (min/ 10f) + "/" + Math.round(sum/(float)count)/10f + "/" + (max/ 10f); + } + } + + public static void main(String[] args) throws IOException { + + var file = new RandomAccessFile(FILE, "r"); + final int maxNameLength = 100; + //.parallel(). + var fc = file.getChannel(); + split(file).stream().parallel().map(ck -> { + var map = new HashMap(200); + //var pb = System.currentTimeMillis(); + try { + var mb = fc.map(MapMode.READ_ONLY,ck.start, ck.length); + var buff = new byte[maxNameLength]; + while (mb.hasRemaining()){ + var name = readNextString(buff,mb);//.intern(); + var temp = readNextInt10Times(buff, mb); + add2map(map,name,temp); + } + //long end = ck.start + ck.length; + //do { + // var name = readNext(file, ';', 30).intern(); + // var temp = Double.parseDouble(readNext(file, '\n', 6)); + // var agg = map.computeIfAbsent(name,it->new MeasurementAggregator()); + // agg.update(temp); + //}while (file.getFilePointer(){ + + var sb = new StringBuilder(map.size() * 100); + sb.append('{'); + map.entrySet().stream().sorted(Map.Entry.comparingByKey()) + .forEachOrdered(kv->sb.append(kv.getKey()).append('=').append(kv.getValue()).append(", ")); + sb.deleteCharAt(sb.length()-1); + sb.setCharAt(sb.length()-1,'}'); + var resultStr = sb.toString(); + System.out.println(resultStr); + System.out.println(resultStr.hashCode()); + }); + + + //System.out.println(Runtime.getRuntime().availableProcessors()); + //System.out.println(); + //var resultStr = measurements.toString(); + //System.out.println("cost : "+(System.currentTimeMillis() -begin) +" ms"); + //System.out.println(resultStr); + //System.out.println(resultStr.hashCode()); + } + + static HashMap reduceMap(HashMap aMap + ,HashMap bMap){ + aMap.forEach((k,v)->{ + var b = bMap.get(k); + if(null == b){ + bMap.put(k,v); + }else{ + b.merge(v); + } + }); + return bMap; + } + + static void add2map(Map map,String name,int temp){ + // 比computeIfAbsent 节约1秒 + var agg = map.get(name); + if(null == agg){ + agg = new MeasurementAggregator(); + map.put(name,agg); + } + //var agg = map.computeIfAbsent(name,it->new MeasurementAggregator()); + agg.update(temp); + } + + record FileChunk(long start,long length){} + + static List split(RandomAccessFile file) throws IOException { + var threadNum = Runtime.getRuntime().availableProcessors(); + long avgChunkSize = file.length()/threadNum; + long lastStart = 0 ; + var list = new ArrayList(threadNum); + for(var i = 0 ; i< threadNum -1; i++){ + var length = avgChunkSize; + file.seek(lastStart+length); + while (file.readByte() != '\n') { + //file.seek(lastStart+ ++length); + ++length; + } + // include the '\n' + length ++ ; + list.add(new FileChunk(lastStart,length)); + lastStart += length; + } + list.add(new FileChunk(lastStart,file.length() - lastStart)); + return list; + } + + static final int MIN_NAME = 3; + + static String readNextString(byte[] buf, MappedByteBuffer mb) { + int i = MIN_NAME; + mb.get(buf,0,i); + byte b; + while ((b = mb.get()) != ';') { + buf[i++] = b; + } + return new String(buf,0,i); + } + + // copy from CalculateAverage_3j5a + // 替换 Double.parse + // 时间 38秒 -> 5418 ms + static int readNextInt10Times(byte[] buf, MappedByteBuffer mb) { + int i = MIN_NAME; + mb.get(buf, 0, i); + byte b; + while ((b = mb.get()) != '\n') { + buf[i++] = b; + } + //-3.2 + var zeroAscii = '0'; + int temperature = buf[--i] - zeroAscii; + i--; // skipping dot + var base = 10; + while (i > 0) { + b = buf[--i]; + if (b == '-') { + temperature = -temperature; + } else { + temperature = base * (b - zeroAscii) + temperature; + base *= base; + } + } + return temperature; + } + + //static String readNext(RandomAccessFile file, char endFlag,int initLength) throws IOException { + // StringBuilder input = new StringBuilder(initLength); + // int c = -1; + // //boolean eol = false; + // + // while (true) { + // c = file.read(); + // if( c == endFlag || c == -1) { + // break; + // } + // input.append((char)c); + // } + // + // //if ((c == -1) && (input.length() == 0)) { + // // return null; + // //} + // return input.toString(); + //} +} From 5968cbdc5cdf67a11b013dbac63187abbc10d57f Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Tue, 30 Jan 2024 21:39:57 +0800 Subject: [PATCH 02/12] - hashcode --- .../java/dev/morling/onebrc/CalculateAverage_martin2038.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java index 7295371d6..e45a52bd5 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java @@ -115,7 +115,7 @@ public static void main(String[] args) throws IOException { sb.setCharAt(sb.length()-1,'}'); var resultStr = sb.toString(); System.out.println(resultStr); - System.out.println(resultStr.hashCode()); + //System.out.println(resultStr.hashCode()); }); From 2c84b0a51ed4e8df84551924500ecdd389175f20 Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Tue, 30 Jan 2024 22:27:46 +0800 Subject: [PATCH 03/12] JAVA_OPTS empty --- calculate_average_martin2038.sh | 3 +- .../onebrc/CalculateAverage_martin2038.java | 61 +++++++++---------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/calculate_average_martin2038.sh b/calculate_average_martin2038.sh index 580ec61cc..ba69ae496 100755 --- a/calculate_average_martin2038.sh +++ b/calculate_average_martin2038.sh @@ -15,5 +15,6 @@ # limitations under the License. # -JAVA_OPTS="-XX:+UseEpsilonGC" +# JAVA_OPTS="-XX:-EnableJVMCI -Xms16g -Xmx16g -XX:+AlwaysPreTouch -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC" +JAVA_OPTS="" java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_martin2038 diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java index e45a52bd5..14def2d8b 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java @@ -26,6 +26,8 @@ public class CalculateAverage_martin2038 { + //private static final String FILE = "/Users/martin/Garden/blog/1BRC/1brc/./measurements.txt"; + private static final String FILE = "./measurements.txt"; private static record Measurement(String station, double value) { @@ -118,13 +120,6 @@ public static void main(String[] args) throws IOException { //System.out.println(resultStr.hashCode()); }); - - //System.out.println(Runtime.getRuntime().availableProcessors()); - //System.out.println(); - //var resultStr = measurements.toString(); - //System.out.println("cost : "+(System.currentTimeMillis() -begin) +" ms"); - //System.out.println(resultStr); - //System.out.println(resultStr.hashCode()); } static HashMap reduceMap(HashMap aMap @@ -153,31 +148,34 @@ static void add2map(Map map,String name,int temp) record FileChunk(long start,long length){} - static List split(RandomAccessFile file) throws IOException { - var threadNum = Runtime.getRuntime().availableProcessors(); - long avgChunkSize = file.length()/threadNum; - long lastStart = 0 ; - var list = new ArrayList(threadNum); - for(var i = 0 ; i< threadNum -1; i++){ - var length = avgChunkSize; - file.seek(lastStart+length); - while (file.readByte() != '\n') { - //file.seek(lastStart+ ++length); - ++length; - } - // include the '\n' - length ++ ; - list.add(new FileChunk(lastStart,length)); - lastStart += length; - } - list.add(new FileChunk(lastStart,file.length() - lastStart)); - return list; - } - - static final int MIN_NAME = 3; + static List split(RandomAccessFile file) throws IOException { + var threadNum = Runtime.getRuntime().availableProcessors(); + long total = file.length(); + long avgChunkSize = total / threadNum; + long lastStart = 0; + var list = new ArrayList(threadNum); + for (var i = 0; i < threadNum - 1; i++) { + var length = avgChunkSize; + file.seek(lastStart + length); + while (file.readByte() != '\n') { + //file.seek(lastStart+ ++length); + ++length; + } + // include the '\n' + length++; + list.add(new FileChunk(lastStart, length)); + lastStart += length; + if(lastStart>=total){ + return list; + } + } + list.add(new FileChunk(lastStart, total - lastStart)); + return list; + } + static String readNextString(byte[] buf, MappedByteBuffer mb) { - int i = MIN_NAME; + int i = 1; mb.get(buf,0,i); byte b; while ((b = mb.get()) != ';') { @@ -190,7 +188,8 @@ static String readNextString(byte[] buf, MappedByteBuffer mb) { // 替换 Double.parse // 时间 38秒 -> 5418 ms static int readNextInt10Times(byte[] buf, MappedByteBuffer mb) { - int i = MIN_NAME; + final int min_number_len = 3; + int i = min_number_len; mb.get(buf, 0, i); byte b; while ((b = mb.get()) != '\n') { From d3ca55cdc3f49a60458596f8c009d2e256ff2a59 Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Tue, 30 Jan 2024 22:41:46 +0800 Subject: [PATCH 04/12] native --- calculate_average_martin2038.sh | 16 +++++++++++++--- prepare_martin2038.sh | 9 +++++---- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/calculate_average_martin2038.sh b/calculate_average_martin2038.sh index ba69ae496..c141e2bdb 100755 --- a/calculate_average_martin2038.sh +++ b/calculate_average_martin2038.sh @@ -15,6 +15,16 @@ # limitations under the License. # -# JAVA_OPTS="-XX:-EnableJVMCI -Xms16g -Xmx16g -XX:+AlwaysPreTouch -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC" -JAVA_OPTS="" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_martin2038 + +if [ -f target/CalculateAverage_martin2038_image ]; then + echo "Picking up existing native image 'target/CalculateAverage_martin2038_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_martin2038_image +else + + #JAVA_OPTS="--enable-preview" + echo "Chosing to run the app in JVM mode as no native image was found, use prepare_martin2038.sh to generate." 1>&2 + # JAVA_OPTS="-XX:-EnableJVMCI -Xms16g -Xmx16g -XX:+AlwaysPreTouch -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC" + JAVA_OPTS="" + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_martin2038 + +fi diff --git a/prepare_martin2038.sh b/prepare_martin2038.sh index 0bb0b02c9..bfbe2aeff 100755 --- a/prepare_martin2038.sh +++ b/prepare_martin2038.sh @@ -19,7 +19,8 @@ source "$HOME/.sdkman/bin/sdkman-init.sh" sdk use java 21.0.2-graal 1>&2 # -#if [ ! -f target/CalculateAverage_martin2038 ]; then -# NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native -R:MaxHeapSize=64m -H:-GenLoopSafepoints --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_martin2038" -# native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_martin2038_image dev.morling.onebrc.CalculateAverage_martin2038 -#fi \ No newline at end of file +if [ ! -f target/CalculateAverage_martin2038 ]; then + MAIN=dev.morling.onebrc.CalculateAverage_martin2038 + NATIVE_IMAGE_OPTS="-H:+UnlockExperimentalVMOptions --initialize-at-build-time=$MAIN --gc=epsilon -O3 -march=native -R:MaxHeapSize=515m -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions" + $JAVA_HOME/bin/native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_martin2038_image $MAIN +fi \ No newline at end of file From 635569b1666d7847ef9d2849fefd31abb4fbc888 Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Tue, 30 Jan 2024 22:42:25 +0800 Subject: [PATCH 05/12] native --- prepare_martin2038.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prepare_martin2038.sh b/prepare_martin2038.sh index bfbe2aeff..ca7a8da56 100755 --- a/prepare_martin2038.sh +++ b/prepare_martin2038.sh @@ -22,5 +22,5 @@ sdk use java 21.0.2-graal 1>&2 if [ ! -f target/CalculateAverage_martin2038 ]; then MAIN=dev.morling.onebrc.CalculateAverage_martin2038 NATIVE_IMAGE_OPTS="-H:+UnlockExperimentalVMOptions --initialize-at-build-time=$MAIN --gc=epsilon -O3 -march=native -R:MaxHeapSize=515m -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions" - $JAVA_HOME/bin/native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_martin2038_image $MAIN + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_martin2038_image $MAIN fi \ No newline at end of file From 91c27db86c018c2e689b3c7c179c3c194bca126b Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Wed, 31 Jan 2024 00:38:30 +0800 Subject: [PATCH 06/12] CalculateAverage_melgenek https://questdb.io/blog/building-faster-hash-table-high-performance-sql-joins/#fastmap-internals --- .../onebrc/CalculateAverage_martin2038.java | 226 ++++++++++++------ 1 file changed, 159 insertions(+), 67 deletions(-) diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java index 14def2d8b..cdac09bb8 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java @@ -17,9 +17,13 @@ import java.io.IOException; import java.io.RandomAccessFile; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel.MapMode; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -30,31 +34,31 @@ public class CalculateAverage_martin2038 { private static final String FILE = "./measurements.txt"; - private static record Measurement(String station, double value) { - private Measurement(String[] parts) { - this(parts[0], Double.parseDouble(parts[1])); - } - } - - private static record ResultRow(double min, double mean, double max) { - - public String toString() { - return round(min) + "/" + round(mean) + "/" + round(max); - } - - private double round(double value) { - return Math.round(value * 10.0) / 10.0; - } - }; + //private static record Measurement(String station, double value) { + // private Measurement(String[] parts) { + // this(parts[0], Double.parseDouble(parts[1])); + // } + //} + // + //private static record ResultRow(double min, double mean, double max) { + // + // public String toString() { + // return round(min) + "/" + round(mean) + "/" + round(max); + // } + // + // private double round(double value) { + // return Math.round(value * 10.0) / 10.0; + // } + //}; private static class MeasurementAggregator { - private int min = Integer.MAX_VALUE; - private int max = Integer.MIN_VALUE; + private int min = Integer.MAX_VALUE; + private int max = Integer.MIN_VALUE; private long sum; - private int count; + private int count; void update(int temp) { - update(1,temp,temp,temp); + update(1, temp, temp, temp); } void update(int cnt, long sm, int min, int max) { @@ -68,13 +72,12 @@ void update(int cnt, long sm, int min, int max) { } } - void merge(MeasurementAggregator it){ - update(it.count,it.sum,it.min,it.max); + void merge(MeasurementAggregator it) { + update(it.count, it.sum, it.min, it.max); } - public String toString() { - return (min/ 10f) + "/" + Math.round(sum/(float)count)/10f + "/" + (max/ 10f); + return (min / 10f) + "/" + Math.round(sum / (float) count) / 10f + "/" + (max / 10f); } } @@ -82,18 +85,20 @@ public static void main(String[] args) throws IOException { var file = new RandomAccessFile(FILE, "r"); final int maxNameLength = 100; - //.parallel(). + //.parallel(). var fc = file.getChannel(); split(file).stream().parallel().map(ck -> { - var map = new HashMap(200); + // StrFastHashKey 比string快500ms + var map = new HashMap(200); //var pb = System.currentTimeMillis(); try { - var mb = fc.map(MapMode.READ_ONLY,ck.start, ck.length); + var mb = fc.map(MapMode.READ_ONLY, ck.start, ck.length); var buff = new byte[maxNameLength]; - while (mb.hasRemaining()){ - var name = readNextString(buff,mb);//.intern(); + while (mb.hasRemaining()) { + var name = readNextHashKey(buff, mb); + //var name = readNextString(buff,mb);//.intern(); var temp = readNextInt10Times(buff, mb); - add2map(map,name,temp); + add2map(map, name, temp); } //long end = ck.start + ck.length; //do { @@ -107,14 +112,14 @@ public static void main(String[] args) throws IOException { } //System.out.println("chunk end , cost : " + (System.currentTimeMillis() - pb)); return map; - }).reduce(CalculateAverage_martin2038::reduceMap).ifPresent(map->{ + }).reduce(CalculateAverage_martin2038::reduceMap).ifPresent(map -> { var sb = new StringBuilder(map.size() * 100); sb.append('{'); map.entrySet().stream().sorted(Map.Entry.comparingByKey()) - .forEachOrdered(kv->sb.append(kv.getKey()).append('=').append(kv.getValue()).append(", ")); - sb.deleteCharAt(sb.length()-1); - sb.setCharAt(sb.length()-1,'}'); + .forEachOrdered(kv -> sb.append(kv.getKey()).append('=').append(kv.getValue()).append(", ")); + sb.deleteCharAt(sb.length() - 1); + sb.setCharAt(sb.length() - 1, '}'); var resultStr = sb.toString(); System.out.println(resultStr); //System.out.println(resultStr.hashCode()); @@ -122,31 +127,31 @@ public static void main(String[] args) throws IOException { } - static HashMap reduceMap(HashMap aMap - ,HashMap bMap){ - aMap.forEach((k,v)->{ - var b = bMap.get(k); - if(null == b){ - bMap.put(k,v); - }else{ + static HashMap reduceMap(HashMap aMap + , HashMap bMap) { + aMap.forEach((k, v) -> { + var b = bMap.get(k); + if (null == b) { + bMap.put(k, v); + } else { b.merge(v); } }); return bMap; } - static void add2map(Map map,String name,int temp){ + static void add2map(Map map, Key name, int temp) { // 比computeIfAbsent 节约1秒 var agg = map.get(name); - if(null == agg){ + if (null == agg) { agg = new MeasurementAggregator(); - map.put(name,agg); + map.put(name, agg); } //var agg = map.computeIfAbsent(name,it->new MeasurementAggregator()); agg.update(temp); } - record FileChunk(long start,long length){} + record FileChunk(long start, long length) {} static List split(RandomAccessFile file) throws IOException { var threadNum = Runtime.getRuntime().availableProcessors(); @@ -165,7 +170,7 @@ static List split(RandomAccessFile file) throws IOException { length++; list.add(new FileChunk(lastStart, length)); lastStart += length; - if(lastStart>=total){ + if (lastStart >= total) { return list; } } @@ -173,22 +178,31 @@ static List split(RandomAccessFile file) throws IOException { return list; } + static StrFastHashKey readNextHashKey(byte[] buf, MappedByteBuffer mb) { + int i = 1; + mb.get(buf, 0, i); + byte b; + while ((b = mb.get()) != ';') { + buf[i++] = b; + } + return new StrFastHashKey(buf, i); + } static String readNextString(byte[] buf, MappedByteBuffer mb) { int i = 1; - mb.get(buf,0,i); + mb.get(buf, 0, i); byte b; while ((b = mb.get()) != ';') { buf[i++] = b; } - return new String(buf,0,i); + return new String(buf, 0, i); } // copy from CalculateAverage_3j5a // 替换 Double.parse // 时间 38秒 -> 5418 ms static int readNextInt10Times(byte[] buf, MappedByteBuffer mb) { - final int min_number_len = 3; + final int min_number_len = 3; int i = min_number_len; mb.get(buf, 0, i); byte b; @@ -212,22 +226,100 @@ static int readNextInt10Times(byte[] buf, MappedByteBuffer mb) { return temperature; } - //static String readNext(RandomAccessFile file, char endFlag,int initLength) throws IOException { - // StringBuilder input = new StringBuilder(initLength); - // int c = -1; - // //boolean eol = false; - // - // while (true) { - // c = file.read(); - // if( c == endFlag || c == -1) { - // break; - // } - // input.append((char)c); - // } - // - // //if ((c == -1) && (input.length() == 0)) { - // // return null; - // //} - // return input.toString(); - //} + //static String readNext(RandomAccessFile file, char endFlag,int initLength) throws IOException { + // StringBuilder input = new StringBuilder(initLength); + // int c = -1; + // //boolean eol = false; + // + // while (true) { + // c = file.read(); + // if( c == endFlag || c == -1) { + // break; + // } + // input.append((char)c); + // } + // + // //if ((c == -1) && (input.length() == 0)) { + // // return null; + // //} + // return input.toString(); + //} + + static class StrFastHashKey implements Comparable { + final byte[] name; + final int hash; + + String nameStr; + + StrFastHashKey(byte[] buf, int size) { + name = new byte[size]; + System.arraycopy(buf, 0, name, 0, size); + hash = calculateHash(name, 0, size - 1); + } + + @Override + public boolean equals(Object o) { + //if (this == o) {return true;} + //if (o == null || getClass() != o.getClass()) {return false;} + StrFastHashKey that = (StrFastHashKey) o; + return hash == that.hash && Arrays.equals(name, that.name); + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public String toString() { + if (null == nameStr) { + nameStr = new String(name); + } + return nameStr; + } + + @Override + public int compareTo(StrFastHashKey o) { + return toString().compareTo(o.toString()); + } + } + + private static final VarHandle LONG_VIEW = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.nativeOrder()) + .withInvokeExactBehavior(); + private static final VarHandle INT_VIEW = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.nativeOrder()) + .withInvokeExactBehavior(); + + /** + * This is a prime number that gives pretty + * good hash distributions + * on the data in this challenge. + */ + private static final long RANDOM_PRIME = 0x7A646E4D; + + /** + * The hash calculation is inspired by + * QuestDB FastMap + */ + private static int calculateHash(byte[] buffer, int startPosition, int endPosition) { + long hash = 0; + + int position = startPosition; + for (; position + Long.BYTES <= endPosition; position += Long.BYTES) { + long value = (long) LONG_VIEW.get(buffer, position); + hash = hash * RANDOM_PRIME + value; + } + + if (position + Integer.BYTES <= endPosition) { + int value = (int) INT_VIEW.get(buffer, position); + hash = hash * RANDOM_PRIME + value; + position += Integer.BYTES; + } + + for (; position <= endPosition; position++) { + hash = hash * RANDOM_PRIME + buffer[position]; + } + hash = hash * RANDOM_PRIME; + return (int) hash ^ (int) (hash >>> 32); + } + } From 0dd6a1783c0f64695dc6895289594eb22a077a0f Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Wed, 31 Jan 2024 16:49:09 +0800 Subject: [PATCH 07/12] mvn formatting --- .../onebrc/CalculateAverage_martin2038.java | 121 +++++++++--------- 1 file changed, 62 insertions(+), 59 deletions(-) diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java index cdac09bb8..13c852869 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java @@ -30,32 +30,32 @@ public class CalculateAverage_martin2038 { - //private static final String FILE = "/Users/martin/Garden/blog/1BRC/1brc/./measurements.txt"; + // private static final String FILE = "/Users/martin/Garden/blog/1BRC/1brc/./measurements.txt"; private static final String FILE = "./measurements.txt"; - //private static record Measurement(String station, double value) { - // private Measurement(String[] parts) { - // this(parts[0], Double.parseDouble(parts[1])); - // } - //} + // private static record Measurement(String station, double value) { + // private Measurement(String[] parts) { + // this(parts[0], Double.parseDouble(parts[1])); + // } + // } // - //private static record ResultRow(double min, double mean, double max) { + // private static record ResultRow(double min, double mean, double max) { // - // public String toString() { - // return round(min) + "/" + round(mean) + "/" + round(max); - // } + // public String toString() { + // return round(min) + "/" + round(mean) + "/" + round(max); + // } // - // private double round(double value) { - // return Math.round(value * 10.0) / 10.0; - // } - //}; + // private double round(double value) { + // return Math.round(value * 10.0) / 10.0; + // } + // }; private static class MeasurementAggregator { - private int min = Integer.MAX_VALUE; - private int max = Integer.MIN_VALUE; + private int min = Integer.MAX_VALUE; + private int max = Integer.MIN_VALUE; private long sum; - private int count; + private int count; void update(int temp) { update(1, temp, temp, temp); @@ -85,32 +85,33 @@ public static void main(String[] args) throws IOException { var file = new RandomAccessFile(FILE, "r"); final int maxNameLength = 100; - //.parallel(). + // .parallel(). var fc = file.getChannel(); split(file).stream().parallel().map(ck -> { // StrFastHashKey 比string快500ms var map = new HashMap(200); - //var pb = System.currentTimeMillis(); + // var pb = System.currentTimeMillis(); try { var mb = fc.map(MapMode.READ_ONLY, ck.start, ck.length); var buff = new byte[maxNameLength]; while (mb.hasRemaining()) { var name = readNextHashKey(buff, mb); - //var name = readNextString(buff,mb);//.intern(); + // var name = readNextString(buff,mb);//.intern(); var temp = readNextInt10Times(buff, mb); add2map(map, name, temp); } - //long end = ck.start + ck.length; - //do { - // var name = readNext(file, ';', 30).intern(); - // var temp = Double.parseDouble(readNext(file, '\n', 6)); - // var agg = map.computeIfAbsent(name,it->new MeasurementAggregator()); - // agg.update(temp); - //}while (file.getFilePointer()new MeasurementAggregator()); + // agg.update(temp); + // }while (file.getFilePointer() { @@ -122,18 +123,18 @@ public static void main(String[] args) throws IOException { sb.setCharAt(sb.length() - 1, '}'); var resultStr = sb.toString(); System.out.println(resultStr); - //System.out.println(resultStr.hashCode()); + // System.out.println(resultStr.hashCode()); }); } - static HashMap reduceMap(HashMap aMap - , HashMap bMap) { + static HashMap reduceMap(HashMap aMap, HashMap bMap) { aMap.forEach((k, v) -> { var b = bMap.get(k); if (null == b) { bMap.put(k, v); - } else { + } + else { b.merge(v); } }); @@ -147,11 +148,12 @@ static void add2map(Map map, Key name, int tem agg = new MeasurementAggregator(); map.put(name, agg); } - //var agg = map.computeIfAbsent(name,it->new MeasurementAggregator()); + // var agg = map.computeIfAbsent(name,it->new MeasurementAggregator()); agg.update(temp); } - record FileChunk(long start, long length) {} + record FileChunk(long start, long length) { + } static List split(RandomAccessFile file) throws IOException { var threadNum = Runtime.getRuntime().availableProcessors(); @@ -163,7 +165,7 @@ static List split(RandomAccessFile file) throws IOException { var length = avgChunkSize; file.seek(lastStart + length); while (file.readByte() != '\n') { - //file.seek(lastStart+ ++length); + // file.seek(lastStart+ ++length); ++length; } // include the '\n' @@ -200,7 +202,7 @@ static String readNextString(byte[] buf, MappedByteBuffer mb) { // copy from CalculateAverage_3j5a // 替换 Double.parse - // 时间 38秒 -> 5418 ms + // 时间 38秒 -> 5418 ms static int readNextInt10Times(byte[] buf, MappedByteBuffer mb) { final int min_number_len = 3; int i = min_number_len; @@ -209,7 +211,7 @@ static int readNextInt10Times(byte[] buf, MappedByteBuffer mb) { while ((b = mb.get()) != '\n') { buf[i++] = b; } - //-3.2 + // -3.2 var zeroAscii = '0'; int temperature = buf[--i] - zeroAscii; i--; // skipping dot @@ -218,7 +220,8 @@ static int readNextInt10Times(byte[] buf, MappedByteBuffer mb) { b = buf[--i]; if (b == '-') { temperature = -temperature; - } else { + } + else { temperature = base * (b - zeroAscii) + temperature; base *= base; } @@ -226,28 +229,28 @@ static int readNextInt10Times(byte[] buf, MappedByteBuffer mb) { return temperature; } - //static String readNext(RandomAccessFile file, char endFlag,int initLength) throws IOException { - // StringBuilder input = new StringBuilder(initLength); - // int c = -1; - // //boolean eol = false; + // static String readNext(RandomAccessFile file, char endFlag,int initLength) throws IOException { + // StringBuilder input = new StringBuilder(initLength); + // int c = -1; + // //boolean eol = false; // - // while (true) { - // c = file.read(); - // if( c == endFlag || c == -1) { - // break; - // } - // input.append((char)c); - // } + // while (true) { + // c = file.read(); + // if( c == endFlag || c == -1) { + // break; + // } + // input.append((char)c); + // } // - // //if ((c == -1) && (input.length() == 0)) { - // // return null; - // //} - // return input.toString(); - //} + // //if ((c == -1) && (input.length() == 0)) { + // // return null; + // //} + // return input.toString(); + // } static class StrFastHashKey implements Comparable { final byte[] name; - final int hash; + final int hash; String nameStr; @@ -259,8 +262,8 @@ static class StrFastHashKey implements Comparable { @Override public boolean equals(Object o) { - //if (this == o) {return true;} - //if (o == null || getClass() != o.getClass()) {return false;} + // if (this == o) {return true;} + // if (o == null || getClass() != o.getClass()) {return false;} StrFastHashKey that = (StrFastHashKey) o; return hash == that.hash && Arrays.equals(name, that.name); } @@ -286,7 +289,7 @@ public int compareTo(StrFastHashKey o) { private static final VarHandle LONG_VIEW = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.nativeOrder()) .withInvokeExactBehavior(); - private static final VarHandle INT_VIEW = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.nativeOrder()) + private static final VarHandle INT_VIEW = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.nativeOrder()) .withInvokeExactBehavior(); /** From b7bee91895ca06b23f0bd97d24445ff68a642119 Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Wed, 31 Jan 2024 17:11:19 +0800 Subject: [PATCH 08/12] jvm model --- prepare_martin2038.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/prepare_martin2038.sh b/prepare_martin2038.sh index ca7a8da56..cf8e83f77 100755 --- a/prepare_martin2038.sh +++ b/prepare_martin2038.sh @@ -18,9 +18,9 @@ # Uncomment below to use sdk source "$HOME/.sdkman/bin/sdkman-init.sh" sdk use java 21.0.2-graal 1>&2 -# -if [ ! -f target/CalculateAverage_martin2038 ]; then - MAIN=dev.morling.onebrc.CalculateAverage_martin2038 - NATIVE_IMAGE_OPTS="-H:+UnlockExperimentalVMOptions --initialize-at-build-time=$MAIN --gc=epsilon -O3 -march=native -R:MaxHeapSize=515m -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions" - native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_martin2038_image $MAIN -fi \ No newline at end of file +## +#if [ ! -f target/CalculateAverage_martin2038 ]; then +# MAIN=dev.morling.onebrc.CalculateAverage_martin2038 +# NATIVE_IMAGE_OPTS="-H:+UnlockExperimentalVMOptions --initialize-at-build-time=$MAIN --gc=epsilon -O3 -march=native -R:MaxHeapSize=515m -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions" +# native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_martin2038_image $MAIN +#fi \ No newline at end of file From 313d31d3b496bc3661b2ca7c53751e9c26a3b82f Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Thu, 1 Feb 2024 01:31:31 +0800 Subject: [PATCH 09/12] 10k name --- .../java/dev/morling/onebrc/CalculateAverage_martin2038.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java index 13c852869..5304da41d 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java @@ -84,7 +84,7 @@ public String toString() { public static void main(String[] args) throws IOException { var file = new RandomAccessFile(FILE, "r"); - final int maxNameLength = 100; + final int maxNameLength = 106; // .parallel(). var fc = file.getChannel(); split(file).stream().parallel().map(ck -> { From e85c07079b0dd9ee88283f98380580e1336b640e Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Thu, 1 Feb 2024 02:04:21 +0800 Subject: [PATCH 10/12] 10k name --- .../java/dev/morling/onebrc/CalculateAverage_martin2038.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java index 5304da41d..a69c47fd5 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java @@ -84,7 +84,7 @@ public String toString() { public static void main(String[] args) throws IOException { var file = new RandomAccessFile(FILE, "r"); - final int maxNameLength = 106; + final int maxNameLength = 110; // .parallel(). var fc = file.getChannel(); split(file).stream().parallel().map(ck -> { From 35f3da93f6dfb6f4108714a47109b1617f1f7ae6 Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Thu, 1 Feb 2024 02:57:32 +0800 Subject: [PATCH 11/12] round mean --- .../java/dev/morling/onebrc/CalculateAverage_martin2038.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java index a69c47fd5..8076165a3 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java @@ -77,7 +77,8 @@ void merge(MeasurementAggregator it) { } public String toString() { - return (min / 10f) + "/" + Math.round(sum / (float) count) / 10f + "/" + (max / 10f); + var mean = this.sum / 10.0 / this.count; + return (min / 10f) + "/" + Math.round(mean * 10) / 10f + "/" + (max / 10f); } } @@ -96,7 +97,7 @@ public static void main(String[] args) throws IOException { var buff = new byte[maxNameLength]; while (mb.hasRemaining()) { var name = readNextHashKey(buff, mb); - // var name = readNextString(buff,mb);//.intern(); + // var name = readNextString(buff, mb);// .intern(); var temp = readNextInt10Times(buff, mb); add2map(map, name, temp); } From 5c4c9b6e47e11b4f8b0c565ae2d9ad2adc89faa7 Mon Sep 17 00:00:00 2001 From: "martin.cong" Date: Fri, 2 Feb 2024 18:51:20 +0800 Subject: [PATCH 12/12] limit ChunkSize smaller than Integer.MAX_VALUE --- .../onebrc/CalculateAverage_martin2038.java | 48 +++++++++++-------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java index 8076165a3..073f157c3 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java @@ -34,23 +34,6 @@ public class CalculateAverage_martin2038 { private static final String FILE = "./measurements.txt"; - // private static record Measurement(String station, double value) { - // private Measurement(String[] parts) { - // this(parts[0], Double.parseDouble(parts[1])); - // } - // } - // - // private static record ResultRow(double min, double mean, double max) { - // - // public String toString() { - // return round(min) + "/" + round(mean) + "/" + round(max); - // } - // - // private double round(double value) { - // return Math.round(value * 10.0) / 10.0; - // } - // }; - private static class MeasurementAggregator { private int min = Integer.MAX_VALUE; private int max = Integer.MIN_VALUE; @@ -86,7 +69,6 @@ public static void main(String[] args) throws IOException { var file = new RandomAccessFile(FILE, "r"); final int maxNameLength = 110; - // .parallel(). var fc = file.getChannel(); split(file).stream().parallel().map(ck -> { // StrFastHashKey 比string快500ms @@ -157,9 +139,12 @@ record FileChunk(long start, long length) { } static List split(RandomAccessFile file) throws IOException { - var threadNum = Runtime.getRuntime().availableProcessors(); long total = file.length(); + var threadNum = Math.max((int) (total / Integer.MAX_VALUE + 1), Runtime.getRuntime().availableProcessors()); long avgChunkSize = total / threadNum; + // System.out.println(avgChunkSize +" \t avgChunkSize : INT/MAX \t"+Integer.MAX_VALUE); + // Exception in thread "main" java.lang.IllegalArgumentException: Size exceeds Integer.MAX_VALUE + // at java.base/sun.nio.ch.FileChannelImpl.map(FileChannelImpl.java:1183) long lastStart = 0; var list = new ArrayList(threadNum); for (var i = 0; i < threadNum - 1; i++) { @@ -258,7 +243,9 @@ static class StrFastHashKey implements Comparable { StrFastHashKey(byte[] buf, int size) { name = new byte[size]; System.arraycopy(buf, 0, name, 0, size); - hash = calculateHash(name, 0, size - 1); + // hash = calculateHash(name, 0, size - 1); + // FNV1a save 100+ms than calculateHash + hash = hashFNV1a(name, size); } @Override @@ -326,4 +313,25 @@ private static int calculateHash(byte[] buffer, int startPosition, int endPositi return (int) hash ^ (int) (hash >>> 32); } + private static final int FNV1_32_INIT = 0x811c9dc5; + private static final int FNV1_PRIME_32 = 16777619; + + /** + * https://github.com/prasanthj/hasher/blob/master/src/main/java/hasher/FNV1a.java + * + * FNV1a 32 bit variant. + * + * @param data - input byte array + * @param length - length of array + * @return - hashcode + */ + public static int hashFNV1a(byte[] data, int length) { + int hash = FNV1_32_INIT; + for (int i = 0; i < length; i++) { + hash ^= (data[i] & 0xff); + hash *= FNV1_PRIME_32; + } + + return hash; + } }