From 75678ee7d39f9bcb269164d29598c6dd041063ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Novotn=C3=BD?= Date: Wed, 20 Sep 2023 12:32:54 +0200 Subject: [PATCH] fix: performance fixes in case the sort contains only simple index --- .../core/query/sort/EntityComparator.java | 7 +++ .../sort/attribute/AttributeExactSorter.java | 56 ++++++++++++++--- .../MergedSortedRecordsSupplier.java | 60 +++++++++++-------- .../attribute/PreSortedRecordsSorter.java | 3 +- .../attribute/PrefetchedRecordsSorter.java | 1 + .../AttributeNaturalTranslator.java | 3 - .../PredecessorAttributeComparator.java | 46 +++++++++++++- 7 files changed, 137 insertions(+), 39 deletions(-) diff --git a/evita_engine/src/main/java/io/evitadb/core/query/sort/EntityComparator.java b/evita_engine/src/main/java/io/evitadb/core/query/sort/EntityComparator.java index 3d1399924..97295436a 100644 --- a/evita_engine/src/main/java/io/evitadb/core/query/sort/EntityComparator.java +++ b/evita_engine/src/main/java/io/evitadb/core/query/sort/EntityComparator.java @@ -38,6 +38,13 @@ @SuppressWarnings("ComparatorNotSerializable") public interface EntityComparator extends Comparator { + /** + * Methods is called just before the comparator is used to prepare the comparator internal data structures to + * accommodate expected entity count. + * @param entityCount expected entity count to be sorted + */ + default void prepareFor(int entityCount) {} + /** * Returns references to all entities that were lacking the data we were sort along - in other words such values was * evaluated to NULL. Such entities need to be propagated to further evaluation. diff --git a/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/AttributeExactSorter.java b/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/AttributeExactSorter.java index 3b7980cd2..0f4123d85 100644 --- a/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/AttributeExactSorter.java +++ b/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/AttributeExactSorter.java @@ -23,6 +23,8 @@ package io.evitadb.core.query.sort.attribute; +import com.carrotsearch.hppc.ObjectIntHashMap; +import com.carrotsearch.hppc.ObjectIntMap; import io.evitadb.api.requestResponse.data.EntityContract; import io.evitadb.core.query.QueryContext; import io.evitadb.core.query.algebra.Formula; @@ -49,6 +51,7 @@ import java.util.List; import java.util.PrimitiveIterator.OfInt; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.ToIntFunction; import static java.util.Optional.ofNullable; @@ -95,23 +98,23 @@ public AttributeExactSorter(@Nonnull String attributeName, @Nonnull Comparable[] @Nonnull @Override - public Sorter andThen(Sorter sorterForUnknownRecords) { + public Sorter cloneInstance() { return new AttributeExactSorter( attributeName, exactOrder, sortIndex, - sorterForUnknownRecords + null ); } @Nonnull @Override - public Sorter cloneInstance() { + public Sorter andThen(Sorter sorterForUnknownRecords) { return new AttributeExactSorter( attributeName, exactOrder, sortIndex, - null + sorterForUnknownRecords ); } @@ -150,8 +153,7 @@ private int[] sortOutputBasedOnIndex( } // retrieve array of "sorted" primary keys based on data from index - @SuppressWarnings({"unchecked"}) - final int[] exactPkOrder = Arrays.stream(this.exactOrder) + @SuppressWarnings({"unchecked"}) final int[] exactPkOrder = Arrays.stream(this.exactOrder) .map(sortIndex::getRecordsEqualTo) .flatMapToInt(Bitmap::stream) .toArray(); @@ -243,8 +245,15 @@ private int[] sortOutputByPrefetchedEntities( private static class AttributePositionComparator implements EntityComparator { private final String attributeName; private final Comparable[] attributeValues; + private int estimatedCount = 100; + private ObjectIntMap cache; private CompositeObjectArray nonSortedEntities; + @Override + public void prepareFor(int entityCount) { + this.estimatedCount = entityCount; + } + @Nonnull @Override public Iterable getNonSortedEntities() { @@ -273,12 +282,43 @@ public int compare(EntityContract o1, EntityContract o2) { this.nonSortedEntities.add(o2); return 1; } else { - final int attribute1Index = ArrayUtils.indexOf(attribute1, attributeValues); - final int attribute2Index = ArrayUtils.indexOf(attribute2, attributeValues); + // and try to find primary keys of both entities in each provider + if (cache == null) { + // let's create the cache with estimated size multiply 5 expected steps for binary search + cache = new ObjectIntHashMap<>(estimatedCount * 5); + } + final int attribute1Index = computeIfAbsent(cache, attribute1, it -> ArrayUtils.indexOf(it, attributeValues)); + final int attribute2Index = computeIfAbsent(cache, attribute2, it -> ArrayUtils.indexOf(it, attributeValues)); return Integer.compare(attribute1Index, attribute2Index); } } + /** + * This method is used to cache the results of the `indexOf` method. It is used to speed up the + * sorting process. + * + * @param cache cache to use + * @param attribute attribute of the entity to find + * @param indexLocator function to compute the index of the entity + * @return index of the entity + */ + private static int computeIfAbsent(@Nonnull ObjectIntMap cache, @Nonnull Serializable attribute, @Nonnull ToIntFunction indexLocator) { + final int result = cache.get(attribute); + // when the value was not found 0 is returned + if (result == 0) { + final int computedIndex = indexLocator.applyAsInt(attribute); + // if the index was computed as 0 we need to remap it to some other "rare" value to distinguish it from NULL value + cache.put(attribute, computedIndex == 0 ? Integer.MIN_VALUE : computedIndex); + return computedIndex; + } else if (result == Integer.MIN_VALUE) { + // when the "rare" value was found - we know it represents index 0 + return 0; + } else { + // otherwise cached value was found + return result; + } + } + } } diff --git a/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/MergedSortedRecordsSupplier.java b/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/MergedSortedRecordsSupplier.java index 734efd5ba..d5dc00c2c 100644 --- a/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/MergedSortedRecordsSupplier.java +++ b/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/MergedSortedRecordsSupplier.java @@ -71,34 +71,44 @@ public MergedSortedRecordsSupplier( @Nonnull SortedRecordsProvider[] sortedRecordsProviders, @Nullable Sorter unknownRecordIdsSorter ) { - final int expectedMaxLength = Arrays.stream(sortedRecordsProviders) - .map(SortedRecordsProvider::getAllRecords) - .mapToInt(Bitmap::size).sum(); - final RoaringBitmap mergedAllRecords = new RoaringBitmap(); - final int[] mergedSortedRecordIds = new int[expectedMaxLength]; - final int[] mergedRecordPositions = new int[expectedMaxLength]; - int writePeak = -1; - - for (final SortedRecordsProvider sortedRecordsProvider : sortedRecordsProviders) { - final int[] instanceSortedRecordIds = sortedRecordsProvider.getSortedRecordIds(); - for (int instanceSortedRecordId : instanceSortedRecordIds) { - if (mergedAllRecords.checkedAdd(instanceSortedRecordId)) { - writePeak++; - mergedSortedRecordIds[writePeak] = instanceSortedRecordId; - mergedRecordPositions[writePeak] = writePeak; + if (sortedRecordsProviders.length == 1) { + this.sortedRecordsProvider = new MergedSortedRecordsProvider( + sortedRecordsProviders[0].getAllRecords() instanceof RoaringBitmapBackedBitmap roaringBitmapBackedBitmap ? + roaringBitmapBackedBitmap : new BaseBitmap(sortedRecordsProviders[0].getAllRecords()), + sortedRecordsProviders[0].getSortedRecordIds(), + sortedRecordsProviders[0].getRecordPositions() + ); + } else { + // we need to go the hard way and merge the sorted records + final int expectedMaxLength = Arrays.stream(sortedRecordsProviders) + .map(SortedRecordsProvider::getAllRecords) + .mapToInt(Bitmap::size).sum(); + final RoaringBitmap mergedAllRecords = new RoaringBitmap(); + final int[] mergedSortedRecordIds = new int[expectedMaxLength]; + final int[] mergedRecordPositions = new int[expectedMaxLength]; + int writePeak = -1; + + for (final SortedRecordsProvider sortedRecordsProvider : sortedRecordsProviders) { + final int[] instanceSortedRecordIds = sortedRecordsProvider.getSortedRecordIds(); + for (int instanceSortedRecordId : instanceSortedRecordIds) { + if (mergedAllRecords.checkedAdd(instanceSortedRecordId)) { + writePeak++; + mergedSortedRecordIds[writePeak] = instanceSortedRecordId; + mergedRecordPositions[writePeak] = writePeak; + } } } + final BaseBitmap allRecords = new BaseBitmap(mergedAllRecords); + final int[] sortedRecordIds = Arrays.copyOfRange(mergedSortedRecordIds, 0, writePeak + 1); + final int[] recordPositions = Arrays.copyOfRange(mergedRecordPositions, 0, writePeak + 1); + ArrayUtils.sortSecondAlongFirstArray( + sortedRecordIds, + recordPositions + ); + this.sortedRecordsProvider = new MergedSortedRecordsProvider( + allRecords, sortedRecordIds, recordPositions + ); } - final BaseBitmap allRecords = new BaseBitmap(mergedAllRecords); - final int[] sortedRecordIds = Arrays.copyOfRange(mergedSortedRecordIds, 0, writePeak + 1); - final int[] recordPositions = Arrays.copyOfRange(mergedRecordPositions, 0, writePeak + 1); - ArrayUtils.sortSecondAlongFirstArray( - sortedRecordIds, - recordPositions - ); - this.sortedRecordsProvider = new MergedSortedRecordsProvider( - allRecords, sortedRecordIds, recordPositions - ); this.unknownRecordIdsSorter = unknownRecordIdsSorter; } diff --git a/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/PreSortedRecordsSorter.java b/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/PreSortedRecordsSorter.java index 028ae0214..70337fd00 100644 --- a/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/PreSortedRecordsSorter.java +++ b/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/PreSortedRecordsSorter.java @@ -248,8 +248,9 @@ public CacheableSorter getCloneWithComputationCallback(@Nonnull Consumer createSorter(@Nonnull AttributeNatural attributeNatural, @ final EntityIndex[] indexesForSort = orderByVisitor.getIndexesForSort(); final NamedSchemaContract attributeOrCompoundSchema = processingScope.getAttributeSchemaOrSortableAttributeCompound(attributeName); - final Function chainIndexSupplier; final Comparator> comparator; if (orderDirection == ASC) { - chainIndexSupplier = ChainIndex::getAscendingOrderRecordsSupplier; sortedRecordsSupplier = new AttributeSortedRecordsProviderSupplier( SortIndex::getAscendingOrderRecordsSupplier, ChainIndex::getAscendingOrderRecordsSupplier, @@ -89,7 +87,6 @@ public Stream createSorter(@Nonnull AttributeNatural attributeNatural, @ //noinspection unchecked,rawtypes comparator = (o1, o2) -> ((Comparable) o1).compareTo(o2); } else { - chainIndexSupplier = ChainIndex::getDescendingOrderRecordsSupplier; sortedRecordsSupplier = new AttributeSortedRecordsProviderSupplier( SortIndex::getDescendingOrderRecordsSupplier, ChainIndex::getDescendingOrderRecordsSupplier, diff --git a/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/translator/PredecessorAttributeComparator.java b/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/translator/PredecessorAttributeComparator.java index 41699e51a..1bbf92066 100644 --- a/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/translator/PredecessorAttributeComparator.java +++ b/evita_engine/src/main/java/io/evitadb/core/query/sort/attribute/translator/PredecessorAttributeComparator.java @@ -23,16 +23,20 @@ package io.evitadb.core.query.sort.attribute.translator; +import com.carrotsearch.hppc.IntIntHashMap; +import com.carrotsearch.hppc.IntIntMap; import io.evitadb.api.requestResponse.data.EntityContract; import io.evitadb.core.query.sort.EntityComparator; import io.evitadb.core.query.sort.SortedRecordsSupplierFactory.SortedRecordsProvider; import io.evitadb.core.query.sort.attribute.PreSortedRecordsSorter; import io.evitadb.index.array.CompositeObjectArray; import io.evitadb.index.attribute.ChainIndex; +import io.evitadb.index.bitmap.Bitmap; import lombok.RequiredArgsConstructor; import javax.annotation.Nonnull; import java.util.Collections; +import java.util.function.IntUnaryOperator; import java.util.function.Supplier; /** @@ -48,6 +52,8 @@ public class PredecessorAttributeComparator implements EntityComparator { private final Supplier sortedRecordsSupplier; private SortedRecordsProvider[] resolvedSortedRecordsProviders; private CompositeObjectArray nonSortedEntities; + private int estimatedCount = 100; + private IntIntMap cache; @Nonnull @Override @@ -55,6 +61,11 @@ public Iterable getNonSortedEntities() { return nonSortedEntities == null ? Collections.emptyList() : nonSortedEntities; } + @Override + public void prepareFor(int entityCount) { + this.estimatedCount = entityCount; + } + @Override public int compare(EntityContract o1, EntityContract o2) { final SortedRecordsProvider[] sortedRecordsProviders = getSortedRecordsProviders(); @@ -63,9 +74,14 @@ public int compare(EntityContract o1, EntityContract o2) { int result = 0; // scan all providers for (SortedRecordsProvider sortedRecordsProvider : sortedRecordsProviders) { + if (cache == null) { + // let's create the cache with estimated size multiply 5 expected steps for binary search + cache = new IntIntHashMap(estimatedCount * 5); + } // and try to find primary keys of both entities in each provider - final int o1Index = o1Found ? -1 : sortedRecordsProvider.getAllRecords().indexOf(o1.getPrimaryKey()); - final int o2Index = o2Found ? -1 : sortedRecordsProvider.getAllRecords().indexOf(o2.getPrimaryKey()); + final Bitmap allRecords = sortedRecordsProvider.getAllRecords(); + final int o1Index = o1Found ? -1 : computeIfAbsent(cache, o1.getPrimaryKey(), allRecords::indexOf); + final int o2Index = o2Found ? -1 : computeIfAbsent(cache, o2.getPrimaryKey(), allRecords::indexOf); // if both entities are found in the same provider, compare their positions if (o1Index >= 0 && o2Index >= 0) { result = Integer.compare( @@ -110,4 +126,30 @@ private SortedRecordsProvider[] getSortedRecordsProviders() { return resolvedSortedRecordsProviders; } + /** + * This method is used to cache the results of the `indexOf` method. It is used to speed up the + * sorting process. + * + * @param cache cache to use + * @param primaryKey primary key of the entity to find + * @param indexLocator function to compute the index of the entity + * @return index of the entity + */ + private static int computeIfAbsent(@Nonnull IntIntMap cache, @Nonnull Integer primaryKey, @Nonnull IntUnaryOperator indexLocator) { + final int result = cache.get(primaryKey); + // when the value was not found 0 is returned + if (result == 0) { + final int computedIndex = indexLocator.applyAsInt(primaryKey); + // if the index was computed as 0 we need to remap it to some other "rare" value to distinguish it from NULL value + cache.put(primaryKey, computedIndex == 0 ? Integer.MIN_VALUE : computedIndex); + return computedIndex; + } else if (result == Integer.MIN_VALUE) { + // when the "rare" value was found - we know it represents index 0 + return 0; + } else { + // otherwise cached value was found + return result; + } + } + }