diff --git a/calculate_average_melgenek.sh b/calculate_average_melgenek.sh
index e0a88a352..ad709c31b 100755
--- a/calculate_average_melgenek.sh
+++ b/calculate_average_melgenek.sh
@@ -29,8 +29,8 @@ logicalCpuCount=$([ $(uname) = 'Darwin' ] &&
sysctl -n hw.logicalcpu_max ||
lscpu -p | egrep -v '^#' | wc -l)
# The required heap is proportional to the number of cores.
-# There's roughly 3.5MB heap per thread required for the 10k problem.
-requiredMemory=$(echo "(l(15 + 3.5 * $logicalCpuCount)/l(2))" | bc -l)
+# There's roughly 6MB heap per thread required for the 10k problem.
+requiredMemory=$(echo "(l(15 + 6 * $logicalCpuCount)/l(2))" | bc -l)
heapSize=$(echo "scale=0; 2^(($requiredMemory+1)/1)" | bc)
JAVA_OPTS="$JAVA_OPTS -Xms${heapSize}m -Xmx${heapSize}m"
diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_melgenek.java b/src/main/java/dev/morling/onebrc/CalculateAverage_melgenek.java
index 133573186..924cf15d8 100644
--- a/src/main/java/dev/morling/onebrc/CalculateAverage_melgenek.java
+++ b/src/main/java/dev/morling/onebrc/CalculateAverage_melgenek.java
@@ -15,12 +15,8 @@
*/
package dev.morling.onebrc;
-import jdk.incubator.vector.ByteVector;
-import jdk.incubator.vector.Vector;
-import jdk.incubator.vector.VectorOperators;
-import jdk.incubator.vector.VectorSpecies;
+import jdk.incubator.vector.*;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.lang.invoke.MethodHandles;
@@ -30,7 +26,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.TreeMap;
-import java.util.concurrent.Executors;
+import java.util.concurrent.*;
/**
* The implementation:
@@ -47,7 +43,6 @@ public class CalculateAverage_melgenek {
private static final int CORES_COUNT = Runtime.getRuntime().availableProcessors();
private static final String FILE = "./measurements.txt";
-
/**
* This is a prime number that gives pretty
* good hash distributions
@@ -63,28 +58,46 @@ public class CalculateAverage_melgenek {
private static final Vector NEWLINE_VECTOR = BYTE_SPECIES.broadcast(NEWLINE);
private static final Vector SEMICOLON_VECTOR = BYTE_SPECIES.broadcast(SEMICOLON);
private static final int MAX_LINE_LENGTH = 107; // 100 + len(";-11.1\n") = 100+7
- private static final TreeMap RESULT = new TreeMap<>();
public static void main(String[] args) throws Throwable {
long totalSize = Files.size(Path.of(FILE));
- try (var executor = Executors.newFixedThreadPool(CORES_COUNT - 1)) {
- long chunkSize = Math.max(1, totalSize / CORES_COUNT);
- long offset = 0;
+ long chunkSize = Math.max(MAX_LINE_LENGTH, totalSize / CORES_COUNT);
+ var result = new TreeMap();
+ try (var executor = Executors.newFixedThreadPool(CORES_COUNT)) {
+ var service = new ExecutorCompletionService(executor);
int i = 0;
- for (; offset < totalSize && i < CORES_COUNT - 1; i++) {
- long currentOffset = offset;
+ for (; i * chunkSize < totalSize; i++) {
+ long currentOffset = Math.max(0, i * chunkSize - 1);
long maxOffset = Math.min((i + 1) * chunkSize, totalSize);
- executor.submit(() -> processRange(currentOffset, maxOffset));
- offset = (i + 1) * chunkSize - 1;
+ service.submit(() -> processRange(currentOffset, maxOffset));
}
- if (offset < totalSize) {
- processRange(offset, totalSize);
+ for (; i > 0; i--) {
+ service.take().get().addRows(result);
}
}
- System.out.println(RESULT);
+ System.out.println(printTree(result));
}
- private static void processRange(long startOffset, long maxOffset) {
+ private static String printTree(TreeMap result) {
+ var sb = new StringBuilder(50 * result.size());
+ sb.append("{");
+ boolean first = true;
+ for (var entry : result.entrySet()) {
+ if (first) {
+ first = false;
+ }
+ else {
+ sb.append(", ");
+ }
+ sb.append(entry.getKey());
+ sb.append('=');
+ entry.getValue().appendToStringBuffer(sb);
+ }
+ sb.append("}");
+ return sb.toString();
+ }
+
+ private static CompositeTable processRange(long startOffset, long maxOffset) {
final var table = new CompositeTable();
try (var file = new BufferedFile(startOffset, maxOffset)) {
processChunk(file, table);
@@ -92,12 +105,10 @@ private static void processRange(long startOffset, long maxOffset) {
catch (Exception e) {
throw new RuntimeException(e);
}
- synchronized (RESULT) {
- table.addRows(RESULT);
- }
+ return table;
}
- private static void processChunk(BufferedFile file, CompositeTable table) {
+ private static void processChunk(BufferedFile file, CompositeTable table) throws IOException {
if (file.offset != 0) {
file.refillBuffer();
int newlinePosition = findDelimiter(file, 0, NEWLINE_VECTOR, NEWLINE);
@@ -223,18 +234,18 @@ private static int calculateHash(byte[] buffer, int startPosition, int endPositi
long hash = 0;
int position = startPosition;
- for (; position + Long.BYTES <= endPosition; position += Long.BYTES) {
+ for (; position + Long.BYTES < endPosition; position += Long.BYTES) {
long value = (long) LONG_VIEW.get(buffer, position);
hash = hash * RANDOM_PRIME + value;
}
- if (position + Integer.BYTES <= endPosition) {
+ if (position + Integer.BYTES < endPosition) {
int value = (int) INT_VIEW.get(buffer, position);
hash = hash * RANDOM_PRIME + value;
position += Integer.BYTES;
}
- for (; position <= endPosition; position++) {
+ for (; position < endPosition; position++) {
hash = hash * RANDOM_PRIME + buffer[position];
}
hash = hash * RANDOM_PRIME;
@@ -261,8 +272,6 @@ private static final class LongTable {
*/
private final long[] buckets = new long[TABLE_CAPACITY * 3];
- int keysCount = 0;
-
public void add(long str, short value) {
int hash = calculateLongHash(str);
int bucketIdx = hash & TABLE_CAPACITY_MASK;
@@ -273,7 +282,6 @@ public void add(long str, short value) {
}
else if (bucketStr == 0L) {
createBucket(bucketIdx, str, value);
- keysCount++;
}
else {
addWithProbing(str, value, (bucketIdx + 1) & TABLE_CAPACITY_MASK);
@@ -290,7 +298,6 @@ private void addWithProbing(long str, short value, int bucketIdx) {
}
else if (bucketStr == 0L) {
createBucket(bucketIdx, str, value);
- keysCount++;
break;
}
else {
@@ -367,15 +374,12 @@ private static final class RegularTable {
private static final int TABLE_CAPACITY_MASK = TABLE_CAPACITY - 1;
private final Bucket[] buckets = new Bucket[TABLE_CAPACITY];
- int keysCount = 0;
-
public void add(byte[] data, int start, int stringLength, int hash, short value) {
int bucketIdx = hash & TABLE_CAPACITY_MASK;
var bucket = buckets[bucketIdx];
if (bucket == null) {
buckets[bucketIdx] = new Bucket(data, start, stringLength, hash, value);
- keysCount++;
}
else if (hash == bucket.hash && bucket.isEqual(data, start, stringLength)) {
bucket.update(value);
@@ -391,7 +395,6 @@ private void addWithProbing(byte[] data, int start, int stringLength, int hash,
var bucket = buckets[bucketIdx];
if (bucket == null) {
buckets[bucketIdx] = new Bucket(data, start, stringLength, hash, value);
- keysCount++;
break;
}
else if (hash == bucket.hash && bucket.isEqual(data, start, stringLength)) {
@@ -449,6 +452,14 @@ public boolean isEqual(byte[] data, int start, int length) {
if (str.length != length)
return false;
int i = 0;
+ int vectorLoopBound = BYTE_SPECIES.loopBound(str.length);
+ for (; i < vectorLoopBound; i += BYTE_SPECIES_BYTE_SIZE) {
+ var vector1 = ByteVector.fromArray(BYTE_SPECIES, str, i);
+ var vector2 = ByteVector.fromArray(BYTE_SPECIES, data, start + i);
+ var comparisonResult = vector1.compare(VectorOperators.NE, vector2);
+ if (comparisonResult.anyTrue())
+ return false;
+ }
for (; i + Long.BYTES < str.length; i += Long.BYTES) {
long value1 = (long) LONG_VIEW.get(str, i);
long value2 = (long) LONG_VIEW.get(data, start + i);
@@ -493,10 +504,12 @@ public void add(long anotherSum, int anotherCount, short anotherMin, short anoth
min = anotherMin;
}
- public String toString() {
- return Math.round((double) min) / 10.0 + "/"
- + Math.round((double) sum / count) / 10.0 + "/"
- + Math.round((double) max) / 10.0;
+ public void appendToStringBuffer(StringBuilder sb) {
+ sb.append(Math.round((double) min) / 10.0);
+ sb.append('/');
+ sb.append(Math.round((double) sum / count) / 10.0);
+ sb.append('/');
+ sb.append(Math.round((double) max) / 10.0);
}
}
@@ -513,30 +526,19 @@ private static final class BufferedFile implements AutoCloseable {
private final RandomAccessFile file;
private long offset;
- private BufferedFile(long startOffset, long maxOffset) throws FileNotFoundException {
+ private BufferedFile(long startOffset, long maxOffset) throws IOException {
this.offset = startOffset;
this.maxOffset = maxOffset;
this.file = new RandomAccessFile(FILE, "r");
}
- private void refillBuffer() {
+ private void refillBuffer() throws IOException {
int remainingBytes = bufferLimit - bufferPosition;
if (remainingBytes < MAX_LINE_LENGTH) {
bufferPosition = 0;
- int bytesRead;
- try {
- file.seek(offset);
- bytesRead = file.read(buffer, 0, BUFFER_SIZE);
- }
- catch (IOException e) {
- throw new RuntimeException(e);
- }
- if (bytesRead > 0) {
- bufferLimit = bytesRead;
- }
- else {
- bufferLimit = 0;
- }
+ file.seek(offset);
+ int bytesRead = file.read(buffer, 0, BUFFER_SIZE);
+ bufferLimit = Math.max(bytesRead, 0);
}
}