diff --git a/calculate_average_JurenIvan.sh b/calculate_average_JurenIvan.sh new file mode 100755 index 000000000..73d956e90 --- /dev/null +++ b/calculate_average_JurenIvan.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_JurenIvan diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java b/src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java new file mode 100644 index 000000000..3f9306899 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java @@ -0,0 +1,219 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Objects; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.lang.Math.round; +import static java.nio.channels.FileChannel.MapMode.READ_ONLY; +import static java.nio.file.StandardOpenOption.READ; + +public class CalculateAverage_JurenIvan { + + private static final String FILE_NAME = "./measurements.txt"; + + public static void main(String[] args) throws IOException { + long[] segments = getSegments(Runtime.getRuntime().availableProcessors()); + + var result = IntStream.range(0, segments.length - 1) + .parallel() + .mapToObj(i -> processSegment(segments[i], segments[i + 1])) + .flatMap(m -> Arrays.stream(m.hashTable).filter(Objects::nonNull)) + .collect(Collectors.toMap(m -> new String(m.city), m -> m, Measurement::merge, TreeMap::new)); + + System.out.println(result); + } + + private static LinearProbingHashMap processSegment(long start, long end) { + var results = new LinearProbingHashMap(1 << 19); + + try (var fileChannel = (FileChannel) Files.newByteChannel(Path.of(FILE_NAME), READ)) { + var bb = fileChannel.map(READ_ONLY, start, end - start); + var buffer = new byte[100]; + + int limit = bb.limit(); + for (int startLine = bb.position(); startLine < limit; startLine = bb.position()) { + int currentPosition = startLine; + + byte b; + int hash = 7; + int wordLen = 0; + while (currentPosition < end && (b = bb.get(currentPosition++)) != ';') { + buffer[wordLen++] = b; + hash = hash * 31 + b; + } + + int temp; + int negative = 1; + if (bb.get(currentPosition) == '-') { + negative = -1; + currentPosition++; + } + + if (bb.get(currentPosition + 1) == '.') { + temp = negative * ((bb.get(currentPosition) - '0') * 10 + (bb.get(currentPosition + 2) - '0')); + currentPosition += 3; + } + else { + temp = negative * ((bb.get(currentPosition) - '0') * 100 + ((bb.get(currentPosition + 1) - '0') * 10 + (bb.get(currentPosition + 3) - '0'))); + currentPosition += 4; + } + + currentPosition++; + + results.put(hash, buffer, wordLen, temp); + + bb.position(currentPosition); + } + } + catch (IOException e) { + throw new RuntimeException(e); + } + return results; + } + + private static long[] getSegments(int segmentCount) throws IOException { + try (var raf = new RandomAccessFile(FILE_NAME, "r")) { + long fileSize = raf.length(); + + if (fileSize < 100000) { + long[] chunks = new long[2]; + chunks[1] = fileSize; + return chunks; + } + + while (fileSize / segmentCount >= (Integer.MAX_VALUE - 150)) { + segmentCount *= 2; + } + + long[] chunks = new long[segmentCount + 1]; + + chunks[0] = 0; + long segmentSize = fileSize / segmentCount; + + for (int i = 1; i < segmentCount; i++) { + long chunkOffset = chunks[i - 1] + segmentSize; + raf.seek(chunkOffset); + while (raf.readByte() != '\n') { + } + chunks[i] = raf.getFilePointer(); + } + chunks[segmentCount] = fileSize; + return chunks; + } + } + + public static class LinearProbingHashMap { + final Measurement[] hashTable; + int slots; + + public LinearProbingHashMap(int slots) { + this.slots = slots; + this.hashTable = new Measurement[slots]; + } + + void put(int hash, byte[] key, int len, int temperature) { + hash = Math.abs(hash); + int index = hash & (slots - 1); + + int i = index; + while (hashTable[i] != null) { + if (keyIsEqual(key, hashTable[i].city, len)) { // handling hash collisions + hashTable[i].add(temperature); + return; + } + i++; + if (i == slots) { + i = 0; + } + } + + var cityArr = new byte[len]; + System.arraycopy(key, 0, cityArr, 0, len); + hashTable[i] = new Measurement(cityArr, hash, temperature, temperature, 1, temperature); + } + + private boolean keyIsEqual(byte[] one, byte[] other, int len) { + if (len != other.length) + return false; + for (int i = 0; i < len; i++) { + if (one[i] != other[i]) { + return false; + } + } + return true; + } + + } + + static class Measurement { + byte[] city; + int hash; + int min; + int max; + int count; + long sum; + + public Measurement(byte[] city, int hash, int min, int max, int count, long sum) { + this.city = city; + this.hash = hash; + this.min = min; + this.max = max; + this.count = count; + this.sum = sum; + } + + public void add(int temperature) { + min = Math.min(min, temperature); + max = Math.max(max, temperature); + count++; + sum += temperature; + } + + public Measurement merge(Measurement other) { + min = Math.min(min, other.min); + max = Math.max(max, other.max); + count += other.count; + sum += other.sum; + return this; + } + + @Override + public String toString() { + return (min * 1.0) / 10 + "/" + round((sum * 1.0) / count) / 10.0 + "/" + (max * 1.0) / 10; + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public boolean equals(Object obj) { + return Arrays.equals(city, ((Measurement) obj).city); + } + } +}