Skip to content

Commit

Permalink
fix: performance fixes in case the sort contains only simple index
Browse files Browse the repository at this point in the history
  • Loading branch information
novoj committed Sep 20, 2023
1 parent 002db5b commit 75678ee
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@
@SuppressWarnings("ComparatorNotSerializable")
public interface EntityComparator extends Comparator<EntityContract> {

/**
* Methods is called just before the comparator is used to prepare the comparator internal data structures to
* accommodate expected entity count.
* @param entityCount expected entity count to be sorted
*/
default void prepareFor(int entityCount) {}

/**
* Returns references to all entities that were lacking the data we were sort along - in other words such values was
* evaluated to NULL. Such entities need to be propagated to further evaluation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

package io.evitadb.core.query.sort.attribute;

import com.carrotsearch.hppc.ObjectIntHashMap;
import com.carrotsearch.hppc.ObjectIntMap;
import io.evitadb.api.requestResponse.data.EntityContract;
import io.evitadb.core.query.QueryContext;
import io.evitadb.core.query.algebra.Formula;
Expand All @@ -49,6 +51,7 @@
import java.util.List;
import java.util.PrimitiveIterator.OfInt;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.ToIntFunction;

import static java.util.Optional.ofNullable;

Expand Down Expand Up @@ -95,23 +98,23 @@ public AttributeExactSorter(@Nonnull String attributeName, @Nonnull Comparable[]

@Nonnull
@Override
public Sorter andThen(Sorter sorterForUnknownRecords) {
public Sorter cloneInstance() {
return new AttributeExactSorter(
attributeName,
exactOrder,
sortIndex,
sorterForUnknownRecords
null
);
}

@Nonnull
@Override
public Sorter cloneInstance() {
public Sorter andThen(Sorter sorterForUnknownRecords) {
return new AttributeExactSorter(
attributeName,
exactOrder,
sortIndex,
null
sorterForUnknownRecords
);
}

Expand Down Expand Up @@ -150,8 +153,7 @@ private int[] sortOutputBasedOnIndex(
}

// retrieve array of "sorted" primary keys based on data from index
@SuppressWarnings({"unchecked"})
final int[] exactPkOrder = Arrays.stream(this.exactOrder)
@SuppressWarnings({"unchecked"}) final int[] exactPkOrder = Arrays.stream(this.exactOrder)
.map(sortIndex::getRecordsEqualTo)
.flatMapToInt(Bitmap::stream)
.toArray();
Expand Down Expand Up @@ -243,8 +245,15 @@ private int[] sortOutputByPrefetchedEntities(
private static class AttributePositionComparator implements EntityComparator {
private final String attributeName;
private final Comparable[] attributeValues;
private int estimatedCount = 100;
private ObjectIntMap<Serializable> cache;
private CompositeObjectArray<EntityContract> nonSortedEntities;

@Override
public void prepareFor(int entityCount) {
this.estimatedCount = entityCount;
}

@Nonnull
@Override
public Iterable<EntityContract> getNonSortedEntities() {
Expand Down Expand Up @@ -273,12 +282,43 @@ public int compare(EntityContract o1, EntityContract o2) {
this.nonSortedEntities.add(o2);
return 1;
} else {
final int attribute1Index = ArrayUtils.indexOf(attribute1, attributeValues);
final int attribute2Index = ArrayUtils.indexOf(attribute2, attributeValues);
// and try to find primary keys of both entities in each provider
if (cache == null) {
// let's create the cache with estimated size multiply 5 expected steps for binary search
cache = new ObjectIntHashMap<>(estimatedCount * 5);
}
final int attribute1Index = computeIfAbsent(cache, attribute1, it -> ArrayUtils.indexOf(it, attributeValues));
final int attribute2Index = computeIfAbsent(cache, attribute2, it -> ArrayUtils.indexOf(it, attributeValues));
return Integer.compare(attribute1Index, attribute2Index);
}
}

/**
* This method is used to cache the results of the `indexOf` method. It is used to speed up the
* sorting process.
*
* @param cache cache to use
* @param attribute attribute of the entity to find
* @param indexLocator function to compute the index of the entity
* @return index of the entity
*/
private static int computeIfAbsent(@Nonnull ObjectIntMap<Serializable> cache, @Nonnull Serializable attribute, @Nonnull ToIntFunction<Serializable> indexLocator) {
final int result = cache.get(attribute);
// when the value was not found 0 is returned
if (result == 0) {
final int computedIndex = indexLocator.applyAsInt(attribute);
// if the index was computed as 0 we need to remap it to some other "rare" value to distinguish it from NULL value
cache.put(attribute, computedIndex == 0 ? Integer.MIN_VALUE : computedIndex);
return computedIndex;
} else if (result == Integer.MIN_VALUE) {
// when the "rare" value was found - we know it represents index 0
return 0;
} else {
// otherwise cached value was found
return result;
}
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -71,34 +71,44 @@ public MergedSortedRecordsSupplier(
@Nonnull SortedRecordsProvider[] sortedRecordsProviders,
@Nullable Sorter unknownRecordIdsSorter
) {
final int expectedMaxLength = Arrays.stream(sortedRecordsProviders)
.map(SortedRecordsProvider::getAllRecords)
.mapToInt(Bitmap::size).sum();
final RoaringBitmap mergedAllRecords = new RoaringBitmap();
final int[] mergedSortedRecordIds = new int[expectedMaxLength];
final int[] mergedRecordPositions = new int[expectedMaxLength];
int writePeak = -1;

for (final SortedRecordsProvider sortedRecordsProvider : sortedRecordsProviders) {
final int[] instanceSortedRecordIds = sortedRecordsProvider.getSortedRecordIds();
for (int instanceSortedRecordId : instanceSortedRecordIds) {
if (mergedAllRecords.checkedAdd(instanceSortedRecordId)) {
writePeak++;
mergedSortedRecordIds[writePeak] = instanceSortedRecordId;
mergedRecordPositions[writePeak] = writePeak;
if (sortedRecordsProviders.length == 1) {
this.sortedRecordsProvider = new MergedSortedRecordsProvider(
sortedRecordsProviders[0].getAllRecords() instanceof RoaringBitmapBackedBitmap roaringBitmapBackedBitmap ?
roaringBitmapBackedBitmap : new BaseBitmap(sortedRecordsProviders[0].getAllRecords()),
sortedRecordsProviders[0].getSortedRecordIds(),
sortedRecordsProviders[0].getRecordPositions()
);
} else {
// we need to go the hard way and merge the sorted records
final int expectedMaxLength = Arrays.stream(sortedRecordsProviders)
.map(SortedRecordsProvider::getAllRecords)
.mapToInt(Bitmap::size).sum();
final RoaringBitmap mergedAllRecords = new RoaringBitmap();
final int[] mergedSortedRecordIds = new int[expectedMaxLength];
final int[] mergedRecordPositions = new int[expectedMaxLength];
int writePeak = -1;

for (final SortedRecordsProvider sortedRecordsProvider : sortedRecordsProviders) {
final int[] instanceSortedRecordIds = sortedRecordsProvider.getSortedRecordIds();
for (int instanceSortedRecordId : instanceSortedRecordIds) {
if (mergedAllRecords.checkedAdd(instanceSortedRecordId)) {
writePeak++;
mergedSortedRecordIds[writePeak] = instanceSortedRecordId;
mergedRecordPositions[writePeak] = writePeak;
}
}
}
final BaseBitmap allRecords = new BaseBitmap(mergedAllRecords);
final int[] sortedRecordIds = Arrays.copyOfRange(mergedSortedRecordIds, 0, writePeak + 1);
final int[] recordPositions = Arrays.copyOfRange(mergedRecordPositions, 0, writePeak + 1);
ArrayUtils.sortSecondAlongFirstArray(
sortedRecordIds,
recordPositions
);
this.sortedRecordsProvider = new MergedSortedRecordsProvider(
allRecords, sortedRecordIds, recordPositions
);
}
final BaseBitmap allRecords = new BaseBitmap(mergedAllRecords);
final int[] sortedRecordIds = Arrays.copyOfRange(mergedSortedRecordIds, 0, writePeak + 1);
final int[] recordPositions = Arrays.copyOfRange(mergedRecordPositions, 0, writePeak + 1);
ArrayUtils.sortSecondAlongFirstArray(
sortedRecordIds,
recordPositions
);
this.sortedRecordsProvider = new MergedSortedRecordsProvider(
allRecords, sortedRecordIds, recordPositions
);
this.unknownRecordIdsSorter = unknownRecordIdsSorter;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,9 @@ public CacheableSorter getCloneWithComputationCallback(@Nonnull Consumer<Cacheab
@Nonnull
public MergedSortedRecordsSupplier getMemoizedResult() {
if (memoizedResult == null) {
final SortedRecordsProvider[] sortedRecordsProviders = getSortedRecordsProviders();
memoizedResult = new MergedSortedRecordsSupplier(
getSortedRecordsProviders(),
sortedRecordsProviders,
unknownRecordIdsSorter
);
if (computationCallback != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ public int[] sortAndSlice(@Nonnull QueryContext queryContext, @Nonnull Formula i
entities.add(queryContext.translateToEntity(id));
}

entityComparator.prepareFor(endIndex - startIndex);
entities.sort(entityComparator);

int notFoundRecordsCnt = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,8 @@ public Stream<Sorter> createSorter(@Nonnull AttributeNatural attributeNatural, @
final EntityIndex<?>[] indexesForSort = orderByVisitor.getIndexesForSort();
final NamedSchemaContract attributeOrCompoundSchema = processingScope.getAttributeSchemaOrSortableAttributeCompound(attributeName);

final Function<ChainIndex, SortedRecordsProvider> chainIndexSupplier;
final Comparator<Comparable<?>> comparator;
if (orderDirection == ASC) {
chainIndexSupplier = ChainIndex::getAscendingOrderRecordsSupplier;
sortedRecordsSupplier = new AttributeSortedRecordsProviderSupplier(
SortIndex::getAscendingOrderRecordsSupplier,
ChainIndex::getAscendingOrderRecordsSupplier,
Expand All @@ -89,7 +87,6 @@ public Stream<Sorter> createSorter(@Nonnull AttributeNatural attributeNatural, @
//noinspection unchecked,rawtypes
comparator = (o1, o2) -> ((Comparable) o1).compareTo(o2);
} else {
chainIndexSupplier = ChainIndex::getDescendingOrderRecordsSupplier;
sortedRecordsSupplier = new AttributeSortedRecordsProviderSupplier(
SortIndex::getDescendingOrderRecordsSupplier,
ChainIndex::getDescendingOrderRecordsSupplier,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,20 @@

package io.evitadb.core.query.sort.attribute.translator;

import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntIntMap;
import io.evitadb.api.requestResponse.data.EntityContract;
import io.evitadb.core.query.sort.EntityComparator;
import io.evitadb.core.query.sort.SortedRecordsSupplierFactory.SortedRecordsProvider;
import io.evitadb.core.query.sort.attribute.PreSortedRecordsSorter;
import io.evitadb.index.array.CompositeObjectArray;
import io.evitadb.index.attribute.ChainIndex;
import io.evitadb.index.bitmap.Bitmap;
import lombok.RequiredArgsConstructor;

import javax.annotation.Nonnull;
import java.util.Collections;
import java.util.function.IntUnaryOperator;
import java.util.function.Supplier;

/**
Expand All @@ -48,13 +52,20 @@ public class PredecessorAttributeComparator implements EntityComparator {
private final Supplier<SortedRecordsProvider[]> sortedRecordsSupplier;
private SortedRecordsProvider[] resolvedSortedRecordsProviders;
private CompositeObjectArray<EntityContract> nonSortedEntities;
private int estimatedCount = 100;
private IntIntMap cache;

@Nonnull
@Override
public Iterable<EntityContract> getNonSortedEntities() {
return nonSortedEntities == null ? Collections.emptyList() : nonSortedEntities;
}

@Override
public void prepareFor(int entityCount) {
this.estimatedCount = entityCount;
}

@Override
public int compare(EntityContract o1, EntityContract o2) {
final SortedRecordsProvider[] sortedRecordsProviders = getSortedRecordsProviders();
Expand All @@ -63,9 +74,14 @@ public int compare(EntityContract o1, EntityContract o2) {
int result = 0;
// scan all providers
for (SortedRecordsProvider sortedRecordsProvider : sortedRecordsProviders) {
if (cache == null) {
// let's create the cache with estimated size multiply 5 expected steps for binary search
cache = new IntIntHashMap(estimatedCount * 5);
}
// and try to find primary keys of both entities in each provider
final int o1Index = o1Found ? -1 : sortedRecordsProvider.getAllRecords().indexOf(o1.getPrimaryKey());
final int o2Index = o2Found ? -1 : sortedRecordsProvider.getAllRecords().indexOf(o2.getPrimaryKey());
final Bitmap allRecords = sortedRecordsProvider.getAllRecords();
final int o1Index = o1Found ? -1 : computeIfAbsent(cache, o1.getPrimaryKey(), allRecords::indexOf);
final int o2Index = o2Found ? -1 : computeIfAbsent(cache, o2.getPrimaryKey(), allRecords::indexOf);
// if both entities are found in the same provider, compare their positions
if (o1Index >= 0 && o2Index >= 0) {
result = Integer.compare(
Expand Down Expand Up @@ -110,4 +126,30 @@ private SortedRecordsProvider[] getSortedRecordsProviders() {
return resolvedSortedRecordsProviders;
}

/**
* This method is used to cache the results of the `indexOf` method. It is used to speed up the
* sorting process.
*
* @param cache cache to use
* @param primaryKey primary key of the entity to find
* @param indexLocator function to compute the index of the entity
* @return index of the entity
*/
private static int computeIfAbsent(@Nonnull IntIntMap cache, @Nonnull Integer primaryKey, @Nonnull IntUnaryOperator indexLocator) {
final int result = cache.get(primaryKey);
// when the value was not found 0 is returned
if (result == 0) {
final int computedIndex = indexLocator.applyAsInt(primaryKey);
// if the index was computed as 0 we need to remap it to some other "rare" value to distinguish it from NULL value
cache.put(primaryKey, computedIndex == 0 ? Integer.MIN_VALUE : computedIndex);
return computedIndex;
} else if (result == Integer.MIN_VALUE) {
// when the "rare" value was found - we know it represents index 0
return 0;
} else {
// otherwise cached value was found
return result;
}
}

}

0 comments on commit 75678ee

Please sign in to comment.