Skip to content

Commit

Permalink
serkan-ozal's 7th submission: (#679)
Browse files Browse the repository at this point in the history
- use smaller regions (increased region count) so there will be less idle time for the workers who completed their tasks
- get rid of some configuration related stuff during initialization which might save a few tens of milliseconds hopefully
- update temperature value parsing instruction order to get benefit of ILP better (hopefully)
  • Loading branch information
serkan-ozal authored Jan 31, 2024
1 parent 9b9bb8e commit 6a2e505
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 40 deletions.
6 changes: 2 additions & 4 deletions calculate_average_serkan-ozal.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,16 @@
JAVA_OPTS="--enable-preview --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector "
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions"
JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000"
JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:GuaranteedSafepointInterval=0"
JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:LoopStripMiningIter=0 -XX:GuaranteedSafepointInterval=0"
JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -da -dsa -XX:+UseNUMA -XX:-EnableJVMCI"
JAVA_OPTS="$JAVA_OPTS -XX:SharedArchiveFile=target/CalculateAverage_serkan_ozal_cds.jsa"
JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0"
if [[ ! "$(uname -s)" = "Darwin" ]]; then
JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages"
fi

CONFIGS="USE_SHARED_ARENA=true USE_SHARED_REGION=true CLOSE_STDOUT_ON_RESULT=true REGION_COUNT=128"

#echo "Process started at $(date +%s%N | cut -b1-13)"
eval "exec 3< <({ $CONFIGS java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_serkan_ozal; })"
eval "exec 3< <({ java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_serkan_ozal; })"
read <&3 result
echo -e "$result"
#echo "Process finished at $(date +%s%N | cut -b1-13)"
77 changes: 41 additions & 36 deletions src/main/java/dev/morling/onebrc/CalculateAverage_serkan_ozal.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ public class CalculateAverage_serkan_ozal {

// Get configurations
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
private static final boolean VERBOSE = getBooleanConfig("VERBOSE", false);
private static final int THREAD_COUNT = getIntegerConfig("THREAD_COUNT", Runtime.getRuntime().availableProcessors());
private static final boolean USE_VTHREADS = getBooleanConfig("USE_VTHREADS", false);
private static final int VTHREAD_COUNT = getIntegerConfig("VTHREAD_COUNT", 1024);
private static final int REGION_COUNT = getIntegerConfig("REGION_COUNT", -1);
private static final boolean USE_SHARED_ARENA = getBooleanConfig("USE_SHARED_ARENA", true);
private static final boolean USE_SHARED_REGION = getBooleanConfig("USE_SHARED_REGION", true);
private static final int MAP_CAPACITY = getIntegerConfig("MAP_CAPACITY", 1 << 17);
private static final boolean CLOSE_STDOUT_ON_RESULT = getBooleanConfig("CLOSE_STDOUT_ON_RESULT", true);
private static final boolean VERBOSE = false; // getBooleanConfig("VERBOSE", false);
private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors(); // getIntegerConfig("THREAD_COUNT", Runtime.getRuntime().availableProcessors());
private static final boolean USE_VTHREADS = false; // getBooleanConfig("USE_VTHREADS", false);
private static final int VTHREAD_COUNT = 1024; // getIntegerConfig("VTHREAD_COUNT", 1024);
private static final int REGION_COUNT = 256; // getIntegerConfig("REGION_COUNT", -1);
private static final boolean USE_SHARED_ARENA = true; // getBooleanConfig("USE_SHARED_ARENA", true);
private static final boolean USE_SHARED_REGION = true; // getBooleanConfig("USE_SHARED_REGION", true);
private static final int MAP_CAPACITY = 1 << 17; // getIntegerConfig("MAP_CAPACITY", 1 << 17);
private static final boolean CLOSE_STDOUT_ON_RESULT = true; // getBooleanConfig("CLOSE_STDOUT_ON_RESULT", true);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

// My dear old friend Unsafe
Expand Down Expand Up @@ -346,10 +346,16 @@ private long findClosestLineEnd(long endPos, long minPos) {
// Credits: merykitty
private long extractValue(long regionPtr, long word, OpenMap map, int entryOffset) {
// Parse and extract value

// 1. level instruction set (no dependency between each other so can be run in parallel)
long signed = (~word << 59) >> 63;
int decimalSepPos = Long.numberOfTrailingZeros(~word & 0x10101000);

// 2. level instruction set (no dependency between each other so can be run in parallel)
long nextPtr = regionPtr + (decimalSepPos >>> 3) + 3;
int shift = 28 - decimalSepPos;
long signed = (~word << 59) >> 63;
long designMask = ~(signed & 0xFF);

long digits = ((word & designMask) << shift) & 0x0F000F0F00L;
long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
int value = (int) ((absValue ^ signed) - signed);
Expand All @@ -358,12 +364,10 @@ private long extractValue(long regionPtr, long word, OpenMap map, int entryOffse
map.putValue(entryOffset, value);

// Return new position
return regionPtr + (decimalSepPos >>> 3) + 3;
return nextPtr;
}

private void doProcessRegion(long regionStart, long regionEnd) {
final int vectorSize = BYTE_SPECIES.vectorByteSize();

final long size = regionEnd - regionStart;
final long segmentSize = size / 2;

Expand Down Expand Up @@ -392,26 +396,26 @@ private void doProcessRegion(long regionStart, long regionEnd) {
int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();

if (keyLength1 != vectorSize && keyLength2 != vectorSize) {
if (keyLength1 != BYTE_SPECIES_SIZE && keyLength2 != BYTE_SPECIES_SIZE) {
regionPtr1 += (keyLength1 + 1);
regionPtr2 += (keyLength2 + 1);
}
else {
if (keyLength1 != vectorSize) {
if (keyLength1 != BYTE_SPECIES_SIZE) {
regionPtr1 += (keyLength1 + 1);
}
else {
regionPtr1 += vectorSize;
regionPtr1 += BYTE_SPECIES_SIZE;
for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++)
;
keyLength1 = (int) (regionPtr1 - keyStartPtr1);
regionPtr1++;
}
if (keyLength2 != vectorSize) {
if (keyLength2 != BYTE_SPECIES_SIZE) {
regionPtr2 += (keyLength2 + 1);
}
else {
regionPtr2 += vectorSize;
regionPtr2 += BYTE_SPECIES_SIZE;
for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++)
;
keyLength2 = (int) (regionPtr2 - keyStartPtr2);
Expand All @@ -431,28 +435,28 @@ private void doProcessRegion(long regionStart, long regionEnd) {
// Calculate key hashes and find entry indexes
////////////////////////////////////////////////////////////////////////////////////////////////////////
int x1, y1, x2, y2;
if (keyLength1 >= Integer.BYTES && keyLength2 >= Integer.BYTES) {
if (keyLength1 > 3 && keyLength2 > 3) {
x1 = U.getInt(keyStartPtr1);
y1 = U.getInt(keyStartPtr1 + keyLength1 - Integer.BYTES);
y1 = U.getInt(regionPtr1 - 5);
x2 = U.getInt(keyStartPtr2);
y2 = U.getInt(keyStartPtr2 + keyLength2 - Integer.BYTES);
y2 = U.getInt(regionPtr2 - 5);
}
else {
if (keyLength1 >= Integer.BYTES) {
if (keyLength1 > 3) {
x1 = U.getInt(keyStartPtr1);
y1 = U.getInt(keyStartPtr1 + keyLength1 - Integer.BYTES);
y1 = U.getInt(regionPtr1 - 5);
}
else {
x1 = U.getByte(keyStartPtr1);
y1 = U.getByte(keyStartPtr1 + keyLength1 - Byte.BYTES);
y1 = U.getByte(regionPtr1 - 2);
}
if (keyLength2 >= Integer.BYTES) {
if (keyLength2 > 3) {
x2 = U.getInt(keyStartPtr2);
y2 = U.getInt(keyStartPtr2 + keyLength2 - Integer.BYTES);
y2 = U.getInt(regionPtr2 - 5);
}
else {
x2 = U.getByte(keyStartPtr2);
y2 = U.getByte(keyStartPtr2 + keyLength2 - Byte.BYTES);
y2 = U.getByte(regionPtr2 - 2);
}
}

Expand All @@ -477,19 +481,19 @@ private void doProcessRegion(long regionStart, long regionEnd) {
}

// Read and process region - tail
doProcessTail(regionPtr1, regionEnd1, regionPtr2, regionEnd2, vectorSize);
doProcessTail(regionPtr1, regionEnd1, regionPtr2, regionEnd2);
}

private void doProcessTail(long regionPtr1, long regionEnd1, long regionPtr2, long regionEnd2, int vectorSize) {
private void doProcessTail(long regionPtr1, long regionEnd1, long regionPtr2, long regionEnd2) {
while (regionPtr1 < regionEnd1) {
long keyStartPtr1 = regionPtr1;
ByteVector keyVector1 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr1, NATIVE_BYTE_ORDER);
int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
if (keyLength1 != vectorSize) {
if (keyLength1 != BYTE_SPECIES_SIZE) {
regionPtr1 += (keyLength1 + 1);
}
else {
regionPtr1 += vectorSize;
regionPtr1 += BYTE_SPECIES_SIZE;
for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++)
;
keyLength1 = (int) (regionPtr1 - keyStartPtr1);
Expand All @@ -507,11 +511,11 @@ private void doProcessTail(long regionPtr1, long regionEnd1, long regionPtr2, lo
long keyStartPtr2 = regionPtr2;
ByteVector keyVector2 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr2, NATIVE_BYTE_ORDER);
int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
if (keyLength2 != vectorSize) {
if (keyLength2 != BYTE_SPECIES_SIZE) {
regionPtr2 += (keyLength2 + 1);
}
else {
regionPtr2 += vectorSize;
regionPtr2 += BYTE_SPECIES_SIZE;
for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++)
;
keyLength2 = (int) (regionPtr2 - keyStartPtr2);
Expand Down Expand Up @@ -804,16 +808,17 @@ else if (keyLength <= BYTE_SPECIES_SIZE) {

private void putValue(int entryOffset, int value) {
int countOffset = entryOffset + COUNT_OFFSET;
U.putInt(data, countOffset, U.getInt(data, countOffset) + 1);
int minValueOffset = entryOffset + MIN_VALUE_OFFSET;
int maxValueOffset = entryOffset + MAX_VALUE_OFFSET;
int sumOffset = entryOffset + VALUE_SUM_OFFSET;

U.putInt(data, countOffset, U.getInt(data, countOffset) + 1);
if (value < U.getShort(data, minValueOffset)) {
U.putShort(data, minValueOffset, (short) value);
}
int maxValueOffset = entryOffset + MAX_VALUE_OFFSET;
if (value > U.getShort(data, maxValueOffset)) {
U.putShort(data, maxValueOffset, (short) value);
}
int sumOffset = entryOffset + VALUE_SUM_OFFSET;
U.putLong(data, sumOffset, U.getLong(data, sumOffset) + value);
}

Expand Down

0 comments on commit 6a2e505

Please sign in to comment.