From f22421c0dcafd2862d6810f4c90705f4a6451b98 Mon Sep 17 00:00:00 2001 From: Jan Cizmar Date: Thu, 19 Dec 2024 08:44:52 +0000 Subject: [PATCH] fix: BigMeta (context) storing -> reduce the amount of stored data (#2780) --- .../v2/controllers/BigMetaControllerTest.kt | 76 ++++---- .../data/src/main/kotlin/io/tolgee/Metrics.kt | 6 + .../testDataBuilder/data/BigMetaTestData.kt | 6 +- .../data/SuggestionTestData.kt | 20 +-- .../tolgee/model/keyBigMeta/KeysDistance.kt | 16 +- .../repository/KeysDistanceRepository.kt | 4 +- .../tolgee/service/bigMeta/BigMetaService.kt | 49 +++-- .../tolgee/service/bigMeta/KeysDistanceDto.kt | 5 +- .../service/bigMeta/KeysDistanceUtil.kt | 169 +++++++++++++----- .../main/resources/db/changelog/schema.xml | 17 ++ .../io/tolgee/unit/KeysDistanceUtilTest.kt | 85 --------- .../unit/bigMeta/KeysDistanceUnitTestData.kt | 73 ++++++++ .../unit/bigMeta/KeysDistanceUtilTest.kt | 93 ++++++++++ 13 files changed, 416 insertions(+), 203 deletions(-) delete mode 100644 backend/data/src/test/kotlin/io/tolgee/unit/KeysDistanceUtilTest.kt create mode 100644 backend/data/src/test/kotlin/io/tolgee/unit/bigMeta/KeysDistanceUnitTestData.kt create mode 100644 backend/data/src/test/kotlin/io/tolgee/unit/bigMeta/KeysDistanceUtilTest.kt diff --git a/backend/app/src/test/kotlin/io/tolgee/api/v2/controllers/BigMetaControllerTest.kt b/backend/app/src/test/kotlin/io/tolgee/api/v2/controllers/BigMetaControllerTest.kt index 70e942e7b5..f653e28454 100644 --- a/backend/app/src/test/kotlin/io/tolgee/api/v2/controllers/BigMetaControllerTest.kt +++ b/backend/app/src/test/kotlin/io/tolgee/api/v2/controllers/BigMetaControllerTest.kt @@ -4,13 +4,13 @@ import io.tolgee.ProjectAuthControllerTest import io.tolgee.development.testDataBuilder.data.BigMetaTestData import io.tolgee.fixtures.andAssertThatJson import io.tolgee.fixtures.andIsOk +import io.tolgee.fixtures.waitForNotThrowing import io.tolgee.model.key.Key import io.tolgee.service.bigMeta.BigMetaService +import io.tolgee.service.bigMeta.KeysDistanceDto import io.tolgee.testing.annotations.ProjectJWTAuthTestMethod import io.tolgee.testing.assert import io.tolgee.util.Logging -import io.tolgee.util.infoMeasureTime -import io.tolgee.util.logger import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import org.springframework.beans.factory.annotation.Autowired @@ -53,59 +53,70 @@ class BigMetaControllerTest : ProjectAuthControllerTest("/v2/projects/"), Loggin ), ).andIsOk - bigMetaService.findExistingKeysDistancesDtosByIds(listOf(testData.yepKey.id)).assert.hasSize(1) + getDistances(listOf(testData.yepKey.id)).assert.hasSize(1) } @Test @ProjectJWTAuthTestMethod fun `it performs well`() { val keys = testData.addLotOfData() - testData.addLotOfReferences(keys) saveTestDataAndPrepare() - logger.infoMeasureTime("it performs well time 1") { - storeLogOfBigMeta(keys, 500, 100) - } - - logger.infoMeasureTime("it performs well time 2") { - storeLogOfBigMeta(keys, 500, 100) - } + measureTime { + storeAndAssertSize(keys, 0, 30, 354) + storeAndAssertSize(keys, 500, 100, 1409) + storeAndAssertSize(keys, 10, 200, 3155) + storeAndAssertSize(keys, 800, 50, 3710) + storeAndAssertSize(keys, 800, 50, 3710) + }.inWholeSeconds.assert.isLessThan(5) + } - logger.infoMeasureTime("it performs well time 3") { - storeLogOfBigMeta(keys, 10, 200) - } + private fun storeAndAssertSize( + allKeys: List, + drop: Int, + take: Int, + expectedSize: Int, + ) { + val keyIds = allKeys.map { it.id } - logger.infoMeasureTime("it performs well time 4") { - storeLogOfBigMeta(keys, 800, 50) + val stored = storeLotOfBigMeta(allKeys, drop, take) + waitForNotThrowing(pollTime = 50, timeout = 2000) { + val distances = getDistances(keyIds) + assertAllHaveAtLeast20Distances(stored, distances) + getDistances(keyIds).assert.hasSize(expectedSize) } - - measureTime { - storeLogOfBigMeta(keys, 800, 50) - }.inWholeSeconds.assert.isLessThan(10) - - bigMetaService.findExistingKeysDistancesDtosByIds(keys.map { it.id }).assert.hasSize(104790) } - @Test - @ProjectJWTAuthTestMethod - fun `it performs well (large)`() { - val keys = testData.addLotOfData() - testData.addLotOfReferences(keys) - saveTestDataAndPrepare() + private fun getDistances(keyIds: List) = bigMetaService.findExistingKeysDistancesDtosByIds(keyIds) - storeLogOfBigMeta(keys, 0, 200) + private fun assertAllHaveAtLeast20Distances( + stored: List, + distances: Set, + ) { + val distancesPerKey = + stored.associate { storedKey -> + val filtered = + distances.filter { + distance -> + distance.key1Id == storedKey.id || distance.key2Id == storedKey.id + } + storedKey.id to filtered + } + + distancesPerKey.values.assert.allSatisfy { it.assert.hasSizeGreaterThanOrEqualTo(20) } } - private fun storeLogOfBigMeta( + private fun storeLotOfBigMeta( keys: List, drop: Int, take: Int, - ) { + ): List { + val toStore = keys.drop(drop).take(take) performProjectAuthPost( "big-meta", mapOf( "relatedKeysInOrder" to - keys.drop(drop).take(take).reversed().map { + toStore.reversed().map { mapOf( "namespace" to it.namespace, "keyName" to it.name, @@ -113,6 +124,7 @@ class BigMetaControllerTest : ProjectAuthControllerTest("/v2/projects/"), Loggin }, ), ).andIsOk + return toStore } @Test diff --git a/backend/data/src/main/kotlin/io/tolgee/Metrics.kt b/backend/data/src/main/kotlin/io/tolgee/Metrics.kt index ccedf613f6..afc0f19931 100644 --- a/backend/data/src/main/kotlin/io/tolgee/Metrics.kt +++ b/backend/data/src/main/kotlin/io/tolgee/Metrics.kt @@ -41,6 +41,12 @@ class Metrics( .register(meterRegistry) } + val bigMetaDeletingAsyncTimer: Timer by lazy { + Timer.builder("tolgee.big_meta.deleting-async.timer") + .description("Time spent deleting big meta data (async)") + .register(meterRegistry) + } + val bigMetaNewDistancesComputeTimer: Timer by lazy { Timer.builder("tolgee.big_meta.new_distances.compute.timer") .description("Time spent computing new distances for big meta data") diff --git a/backend/data/src/main/kotlin/io/tolgee/development/testDataBuilder/data/BigMetaTestData.kt b/backend/data/src/main/kotlin/io/tolgee/development/testDataBuilder/data/BigMetaTestData.kt index ed8400a734..0a2dc2808a 100644 --- a/backend/data/src/main/kotlin/io/tolgee/development/testDataBuilder/data/BigMetaTestData.kt +++ b/backend/data/src/main/kotlin/io/tolgee/development/testDataBuilder/data/BigMetaTestData.kt @@ -5,7 +5,6 @@ import io.tolgee.development.testDataBuilder.builders.TestDataBuilder import io.tolgee.model.Project import io.tolgee.model.UserAccount import io.tolgee.model.key.Key -import io.tolgee.service.bigMeta.BigMetaService import kotlin.math.abs class BigMetaTestData { @@ -42,9 +41,10 @@ class BigMetaTestData { fun addLotOfReferences(keys: List) { keys.forEachIndexed forEach1@{ idx1, key1 -> keys.forEachIndexed forEach2@{ idx2, key2 -> - if (idx1 >= idx2 || abs(idx1 - idx2) > (BigMetaService.MAX_ORDER_DISTANCE + 1)) return@forEach2 + val distance = abs(idx1 - idx2).toDouble() + if (idx1 >= idx2 || distance > (20 + 1)) return@forEach2 projectBuilder.addKeysDistance(key1, key2) { - score = 10000 + this.distance = distance hits = 1 } } diff --git a/backend/data/src/main/kotlin/io/tolgee/development/testDataBuilder/data/SuggestionTestData.kt b/backend/data/src/main/kotlin/io/tolgee/development/testDataBuilder/data/SuggestionTestData.kt index dcfb3be5d6..591aaaec8f 100644 --- a/backend/data/src/main/kotlin/io/tolgee/development/testDataBuilder/data/SuggestionTestData.kt +++ b/backend/data/src/main/kotlin/io/tolgee/development/testDataBuilder/data/SuggestionTestData.kt @@ -103,34 +103,34 @@ class SuggestionTestData : BaseTestData() { private fun ProjectBuilder.addKeyDistances() { this.addKeysDistance(data.keys[0].self, data.keys[1].self) { - score = 10000 + distance = 1000.0 } this.addKeysDistance(data.keys[0].self, data.keys[2].self) { - score = 8000 + distance = 8.0 } this.addKeysDistance(data.keys[0].self, data.keys[3].self) { - score = 8000 + distance = 8.0 } this.addKeysDistance(data.keys[0].self, data.keys[4].self) { - score = 8000 + distance = 8.0 } this.addKeysDistance(data.keys[1].self, data.keys[2].self) { - score = 2000 + distance = 2.0 } this.addKeysDistance(data.keys[1].self, data.keys[3].self) { - score = 1000 + distance = 1.0 } this.addKeysDistance(data.keys[1].self, data.keys[4].self) { - score = 1000 + distance = 1.0 } this.addKeysDistance(data.keys[2].self, data.keys[3].self) { - score = 1000 + distance = 1.0 } this.addKeysDistance(data.keys[2].self, data.keys[4].self) { - score = 1000 + distance = 1.0 } this.addKeysDistance(data.keys[3].self, data.keys[4].self) { - score = 1000 + distance = 1.0 } } diff --git a/backend/data/src/main/kotlin/io/tolgee/model/keyBigMeta/KeysDistance.kt b/backend/data/src/main/kotlin/io/tolgee/model/keyBigMeta/KeysDistance.kt index f5e033105c..6724c87590 100644 --- a/backend/data/src/main/kotlin/io/tolgee/model/keyBigMeta/KeysDistance.kt +++ b/backend/data/src/main/kotlin/io/tolgee/model/keyBigMeta/KeysDistance.kt @@ -10,6 +10,7 @@ import jakarta.persistence.IdClass import jakarta.persistence.Index import jakarta.persistence.ManyToOne import jakarta.persistence.Table +import org.hibernate.annotations.ColumnDefault import org.springframework.data.domain.Persistable @Entity @@ -31,7 +32,12 @@ class KeysDistance( @ManyToOne(fetch = FetchType.LAZY) lateinit var project: Project - var score: Long = MAX_SCORE + // TODO: Remove this + @Deprecated("Kept for backward compatibility. Can be removed in any next path version.") + @ColumnDefault("10000") + var score: Long = 10000 + + var distance: Double = 0.0 var hits: Long = 1 @@ -43,14 +49,14 @@ class KeysDistance( if (key1Id != other.key1Id) return false if (key2Id != other.key2Id) return false - if (score != other.score) return false + if (distance != other.distance) return false return hits == other.hits } override fun hashCode(): Int { var result = key1Id.hashCode() result = 31 * result + key2Id.hashCode() - result = 31 * result + score.hashCode() + result = 31 * result + distance.hashCode() result = 31 * result + hits.hashCode() return result } @@ -66,8 +72,4 @@ class KeysDistance( @Transient @Column(insertable = false, updatable = false) var new = false - - companion object { - const val MAX_SCORE = 10000L - } } diff --git a/backend/data/src/main/kotlin/io/tolgee/repository/KeysDistanceRepository.kt b/backend/data/src/main/kotlin/io/tolgee/repository/KeysDistanceRepository.kt index df42cb06b7..45e073968b 100644 --- a/backend/data/src/main/kotlin/io/tolgee/repository/KeysDistanceRepository.kt +++ b/backend/data/src/main/kotlin/io/tolgee/repository/KeysDistanceRepository.kt @@ -17,7 +17,7 @@ interface KeysDistanceRepository : JpaRepository { select (case when kd.key1Id = :keyId then kd.key2Id else kd.key1Id end) from KeysDistance kd where kd.key1Id = :keyId or kd.key2Id = :keyId - order by kd.score desc + order by kd.distance """, ) fun getCloseKeys( @@ -36,7 +36,7 @@ interface KeysDistanceRepository : JpaRepository { select (case when kd.key1Id = :keyId then kd.key2Id else kd.key1Id end) from KeysDistance kd where kd.key1Id = :keyId or kd.key2Id = :keyId - order by kd.score desc + order by kd.distance ) and k.project.id = :projectId """, ) diff --git a/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/BigMetaService.kt b/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/BigMetaService.kt index 4401c1b380..8d25dc7c20 100644 --- a/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/BigMetaService.kt +++ b/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/BigMetaService.kt @@ -14,10 +14,7 @@ import io.tolgee.model.key.Key_ import io.tolgee.model.key.Namespace_ import io.tolgee.model.keyBigMeta.KeysDistance import io.tolgee.repository.KeysDistanceRepository -import io.tolgee.util.Logging -import io.tolgee.util.equalNullable -import io.tolgee.util.executeInNewTransaction -import io.tolgee.util.runSentryCatching +import io.tolgee.util.* import jakarta.persistence.EntityManager import jakarta.persistence.criteria.CriteriaBuilder import jakarta.persistence.criteria.CriteriaQuery @@ -45,12 +42,6 @@ class BigMetaService( @Autowired private lateinit var self: BigMetaService - companion object { - const val MAX_DISTANCE_SCORE = 10000L - const val MAX_POINTS = 2000L - const val MAX_ORDER_DISTANCE = 20 - } - fun saveKeyDistance(keysDistance: KeysDistance): KeysDistance { return keysDistanceRepository.save(keysDistance) } @@ -82,35 +73,55 @@ class BigMetaService( KeysDistanceUtil(relatedKeysInOrder, project, this) }!! + val (toStore, toDelete) = util.toStoreAndDelete + val (toStoreSync, toStoreAsync) = if (forKeyId == null) { - util.newDistances to emptyList() + toStore to emptyList() } else { - util.newDistances.partition { it.key1Id == forKeyId || it.key2Id == forKeyId } + toStore.partition { it.key1Id == forKeyId || it.key2Id == forKeyId } } metrics.bigMetaStoringTimer.recordCallable { insertNewDistances(toStoreSync) } + self.asyncInsertNewDistances(toStoreAsync) + self.asyncDeleteDistances(toDelete) + } + + @Async + fun asyncInsertNewDistances(toInsert: List) { metrics.bigMetaStoringAsyncTimer.recordCallable { - self.asyncInsertNewDistances(toStoreAsync) + insertNewDistances(toInsert) } } @Async - fun asyncInsertNewDistances(toInsert: List) { - insertNewDistances(toInsert) + fun asyncDeleteDistances(toDelete: MutableSet) { + if (toDelete.isEmpty()) { + return + } + jdbcTemplate.batchUpdate( + """ + delete from keys_distance where key1id = ? and key2id = ? + """, + toDelete, + 10000, + ) { ps, dto -> + ps.setLong(1, dto.key1Id) + ps.setLong(2, dto.key2Id) + } } - private fun insertNewDistances(toInsert: List) { + private fun insertNewDistances(toInsert: Collection) { if (toInsert.isEmpty()) { return } val timestamp = Timestamp(currentDateProvider.date.time) jdbcTemplate.batchUpdate( """ - insert into keys_distance (key1id, key2id, score, hits, created_at, updated_at, project_id) + insert into keys_distance (key1id, key2id, distance, hits, created_at, updated_at, project_id) values (?, ?, ?, ?, ?, ?, ?) on conflict (key1id, key2id) do update set score = excluded.score, hits = excluded.hits, updated_at = ? """, @@ -119,7 +130,7 @@ class BigMetaService( ) { ps, dto -> ps.setLong(1, dto.key1Id) ps.setLong(2, dto.key2Id) - ps.setLong(3, dto.score) + ps.setDouble(3, dto.distance) ps.setLong(4, dto.hits) ps.setTimestamp(5, timestamp) ps.setTimestamp(6, timestamp) @@ -153,7 +164,7 @@ class BigMetaService( fun findExistingKeysDistancesDtosByIds(keyIds: List): Set { return entityManager.createQuery( """ - select new io.tolgee.service.bigMeta.KeysDistanceDto(kd.key1Id, kd.key2Id, kd.score, kd.project.id, kd.hits) from KeysDistance kd + select new io.tolgee.service.bigMeta.KeysDistanceDto(kd.key1Id, kd.key2Id, kd.distance, kd.project.id, kd.hits, true) from KeysDistance kd where kd.key1Id in ( select kd2.key1Id from KeysDistance kd2 where kd2.key1Id in :data or kd2.key2Id in :data ) or kd.key2Id in ( diff --git a/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/KeysDistanceDto.kt b/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/KeysDistanceDto.kt index 477af97f81..0cb1793ecc 100644 --- a/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/KeysDistanceDto.kt +++ b/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/KeysDistanceDto.kt @@ -1,11 +1,10 @@ package io.tolgee.service.bigMeta -import io.tolgee.model.keyBigMeta.KeysDistance.Companion.MAX_SCORE - data class KeysDistanceDto( var key1Id: Long = 0, var key2Id: Long = 0, - var score: Long = MAX_SCORE, + var distance: Double = 0.0, var projectId: Long, var hits: Long = 1, + var stored: Boolean = true, ) diff --git a/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/KeysDistanceUtil.kt b/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/KeysDistanceUtil.kt index b92f6f8350..516a68a772 100644 --- a/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/KeysDistanceUtil.kt +++ b/backend/data/src/main/kotlin/io/tolgee/service/bigMeta/KeysDistanceUtil.kt @@ -7,43 +7,90 @@ import kotlin.math.abs import kotlin.math.max import kotlin.math.min +private const val MAX_STORED = 20 + class KeysDistanceUtil( private val relatedKeysInOrder: MutableList, private val project: Project, private val bigMetaService: BigMetaService, ) : Logging { - val newDistances by lazy { - increaseRelevant() - decreaseOthers() - distances.values.toList() + val toStoreAndDelete by lazy { + val toStore = mutableSetOf() + val toDelete = mutableSetOf() + val allTouchedIds = (keys.map { it.id }).toSet() + val updatedByIdMap = + allTouchedIds.associateWith { keyId -> + allUpdated.filterKeys { (key1Id, key2Id) -> key1Id == keyId || key2Id == keyId }.values + } + updatedByIdMap.values.forEach { distancesOdKey -> + val sortedByDistance = distancesOdKey.sortedBy { it.distance } + toStore.addAll(sortedByDistance.take(MAX_STORED)) + toDelete.addAll(sortedByDistance.drop(MAX_STORED).filter { it.stored }) + } + + // for some keys, distance might be out of the window, but for others, + // it can still be in the window, we don't want to delete such distances + toDelete.removeAll(toStore) + + toStore to toDelete } - private fun increaseRelevant() { + private val allUpdated by lazy { currentUpdated + otherThanCurrentUpdated } + + /** + * Updated values for currently provided distances + */ + private val currentUpdated by lazy { + val result: DistancesMutableMap = mutableMapOf() distinctKeys.forEachIndexed forEach1@{ index1, item1 -> - val key1Id = keyIdMap[item1.namespace to item1.keyName] ?: return@forEach1 + val key1Id = getKeyId(item1.namespace, item1.keyName) ?: return@forEach1 distinctKeys.forEachIndexed forEach2@{ index2, item2 -> - if (index2 <= index1 || abs(index1 - index2) > (BigMetaService.MAX_ORDER_DISTANCE + 1)) { - return@forEach2 - } - val key2Id = keyIdMap[item2.namespace to item2.keyName] ?: return@forEach2 - + if (index2 >= index1) return@forEach2 + val key2Id = getKeyId(item2.namespace, item2.keyName) ?: return@forEach2 val distance = - distances[min(key1Id, key2Id) to max(key1Id, key2Id)] - ?: createDistance(key1Id, key2Id) - relevant[distance.key1Id to distance.key2Id] = distance - distance.score = computeDistanceScore(distance.score, distance.hits, relatedKeysSize, index1, index2) - distance.hits++ + distances.get(key1Id, key2Id) + ?.also { + it.distance = computeDistance(it.distance, it.hits, index1 = index1, index2 = index2) + it.hits++ + } + ?: let { + val newDistance = computeDistance(index1 = index1, index2 = index2) + createDistance(key1Id, key2Id, newDistance) + } + result.add(key1Id, key2Id, distance) } } + result } - private fun decreaseOthers() { - existing.forEach { - if (relevant[it.key1Id to it.key2Id] == null) { - it.score = it.score * it.hits / (it.hits + 1) - it.hits++ + /** + * We pretend that keys not included in the current list are just after the current, + * so we make them distances higher than the max distance + */ + private val otherThanCurrentUpdated by lazy { + // Distances that are not currently provided + val otherThanCurrent = + existing.filter { + !currentUpdated.containsKey(it.key) + } + + distinctKeys.map { getKeyId(it.namespace, it.keyName) }.forEachIndexed { index, keyId -> + // by this, we are pushing unprovided keys out of the "focus zone", so they should "converge" to become deleted + val maxDistance = MAX_STORED + otherThanCurrent.asSequence().filter { + it.key.first == keyId || it.key.second == keyId + }.sortedBy { it.value.distance }.forEachIndexed { index, (key, value) -> + value.distance = + computeDistance( + oldDistance = value.distance, + hits = value.hits, + newDistance = maxDistance, + ) + value.hits++ } } + + otherThanCurrent } private val keys by lazy { @@ -54,52 +101,90 @@ class KeysDistanceUtil( relatedKeysInOrder.distinct() } - private val relevant = mutableMapOf, KeysDistanceDto>() - private val keyIdMap by lazy { keys.associate { (it.namespace to it.name) to it.id } } - private val existing by lazy { + private val existing: DistancesMap by lazy { bigMetaService.findExistingKeyDistances(keys, project) + .associateBy { + (it.key1Id to it.key2Id) + } } - private val distances by lazy { - existing.associateBy { - (it.key1Id to it.key2Id) - }.toMutableMap() + private fun getKeyId( + namespace: String?, + keyName: String, + ): Long? { + return keyIdMap[namespace to keyName] } - private val relatedKeysSize = relatedKeysInOrder.size + private val distances: DistancesMutableMap by lazy { + existing.toMutableMap() + } + + private fun DistancesMutableMap.add( + key1Id: Long, + key2Id: Long, + distance: KeysDistanceDto, + ) { + this[getDistancesMapKey(key1Id, key2Id)] = distance + } + + private fun DistancesMap.containsKey( + key1Id: Long, + key2Id: Long, + ): Boolean { + return this.containsKey(getDistancesMapKey(key1Id, key2Id)) + } + + fun DistancesMap.get( + key1Id: Long, + key2Id: Long, + ): KeysDistanceDto? { + return this[getDistancesMapKey(key1Id, key2Id)] + } + + private fun getDistancesMapKey( + key1Id: Long, + key2Id: Long, + ) = min(key1Id, key2Id) to max(key1Id, key2Id) private fun createDistance( key1Id: Long, key2Id: Long, + newDistance: Double, ): KeysDistanceDto { return KeysDistanceDto( key1Id = min(a = key1Id, b = key2Id), key2Id = max(key1Id, key2Id), projectId = project.id, + hits = 1, + distance = newDistance, + stored = false, ).apply { distances[this.key1Id to this.key2Id] = this } } - private fun computeDistanceScore( - oldDistance: Long, - hits: Long, - relatedKeysSize: Int, + private fun computeDistance( + oldDistance: Double = 0.0, + hits: Long = 0, index1: Int, index2: Int, - ): Long { - val maxDistance = (relatedKeysSize - 2) - - val points = - ( - (maxDistance - (abs(index1 - index2) - 1)) / maxDistance.toDouble() - ) * BigMetaService.MAX_POINTS + ): Double { + val newDistance = abs(index1 - index2) - 1 + return computeDistance(oldDistance, hits, newDistance) + } - val baseDistance = BigMetaService.MAX_DISTANCE_SCORE - BigMetaService.MAX_POINTS - return (oldDistance * hits + baseDistance + points).toLong() / (hits + 1) + private fun computeDistance( + oldDistance: Double = 0.0, + hits: Long = 0, + newDistance: Int, + ): Double { + return (oldDistance * hits + newDistance) / (hits + 1) } } + +private typealias DistancesMutableMap = MutableMap, KeysDistanceDto> +private typealias DistancesMap = Map, KeysDistanceDto> diff --git a/backend/data/src/main/resources/db/changelog/schema.xml b/backend/data/src/main/resources/db/changelog/schema.xml index 8be280aa3d..b0bea1ed13 100644 --- a/backend/data/src/main/resources/db/changelog/schema.xml +++ b/backend/data/src/main/resources/db/changelog/schema.xml @@ -4014,4 +4014,21 @@ + + + + + + + + + + update keys_distance + set distance = 10100 - score; + + + alter table keys_distance + alter column score set default 10000; + + diff --git a/backend/data/src/test/kotlin/io/tolgee/unit/KeysDistanceUtilTest.kt b/backend/data/src/test/kotlin/io/tolgee/unit/KeysDistanceUtilTest.kt deleted file mode 100644 index 82a45b5d23..0000000000 --- a/backend/data/src/test/kotlin/io/tolgee/unit/KeysDistanceUtilTest.kt +++ /dev/null @@ -1,85 +0,0 @@ -package io.tolgee.unit - -import io.tolgee.dtos.RelatedKeyDto -import io.tolgee.dtos.queryResults.KeyIdFindResult -import io.tolgee.model.Project -import io.tolgee.service.bigMeta.BigMetaService -import io.tolgee.service.bigMeta.KeysDistanceDto -import io.tolgee.service.bigMeta.KeysDistanceUtil -import io.tolgee.testing.assert -import org.junit.jupiter.api.Test -import org.mockito.kotlin.any -import org.mockito.kotlin.mock -import org.mockito.kotlin.whenever - -class KeysDistanceUtilTest { - private val relatedKeysRequest = - mutableListOf( - RelatedKeyDto(keyName = "key1", namespace = "a"), - RelatedKeyDto(keyName = "key2", namespace = null), - RelatedKeyDto(keyName = "key3", namespace = "a"), - ) - - private val project: Project = mock() - private val bigMetaService: BigMetaService = mock() - - init { - whenever(project.id).thenReturn(1) - - whenever(bigMetaService.findExistingKeyDistances(any(), any())) - .thenReturn( - setOf( - KeysDistanceDto(1, 3, projectId = 0).also { keysDistance -> - keysDistance.score = 10000 - keysDistance.hits = 10 - }, - KeysDistanceDto(3, 4, projectId = 0).also { keysDistance -> - keysDistance.score = 10000 - keysDistance.hits = 1 - }, - ), - ) - whenever(bigMetaService.getKeyIdsForItems(any(), any())).thenReturn( - mutableListOf( - KeyIdFindResult( - id = 1, - name = "key1", - namespace = "a", - ), - KeyIdFindResult( - id = 2, - name = "key2", - namespace = null, - ), - KeyIdFindResult( - id = 3, - name = "key3", - namespace = "a", - ), - KeyIdFindResult( - id = 4, - name = "key4", - namespace = "a", - ), - ), - ) - } - - @Test - fun `it works`() { - val result = - KeysDistanceUtil(relatedKeysRequest, project, bigMetaService) - .newDistances - - result.assert.hasSize(4) - result.singleOrNull { it.key1Id == 1L && it.key2Id == 2L }!!.score.assert.isEqualTo(10000) - result.singleOrNull { it.key1Id == 2L && it.key2Id == 3L }!!.score.assert.isEqualTo(10000) - val key1And3Distance = result.singleOrNull { it.key1Id == 1L && it.key2Id == 3L }!! - key1And3Distance.score.assert.isEqualTo(9818L) - key1And3Distance.hits.assert.isEqualTo(11) - - val key3And4Distance = result.singleOrNull { it.key1Id == 3L && it.key2Id == 4L }!! - key3And4Distance.score.assert.isEqualTo(5000) - key3And4Distance.hits.assert.isEqualTo(2) - } -} diff --git a/backend/data/src/test/kotlin/io/tolgee/unit/bigMeta/KeysDistanceUnitTestData.kt b/backend/data/src/test/kotlin/io/tolgee/unit/bigMeta/KeysDistanceUnitTestData.kt new file mode 100644 index 0000000000..9ae6ef9f82 --- /dev/null +++ b/backend/data/src/test/kotlin/io/tolgee/unit/bigMeta/KeysDistanceUnitTestData.kt @@ -0,0 +1,73 @@ +package io.tolgee.unit.bigMeta + +import io.tolgee.dtos.RelatedKeyDto +import io.tolgee.dtos.queryResults.KeyIdFindResult +import io.tolgee.service.bigMeta.KeysDistanceDto +import kotlin.math.abs + +class KeysDistanceUnitTestData { + val requestData = mutableListOf() + + private val _existingDistances = mutableSetOf() + + val existingDistances: Set + get() { + val existingKeyIds = existingKeys.map { it.id }.toSet() + return _existingDistances.filter { + it.key1Id in existingKeyIds || it.key2Id in existingKeyIds + }.toSet() + } + + fun createRequestData( + count: Int, + namespace: String?, + ) { + val created = + (0 until count).map { + RelatedKeyDto(keyName = getDefaultKeyName(it), namespace = namespace) + } + requestData.addAll(created) + } + + private fun getDefaultKeyName(it: Number) = "key$it" + + val existingKeys: List + get() { + return requestData.mapIndexed { index, relatedKeyDto -> + KeyIdFindResult( + id = index.toLong(), + name = relatedKeyDto.keyName, + namespace = relatedKeyDto.namespace, + ) + } + } + + private fun addExistingDistance( + key1Id: Long, + key2Id: Long, + modifyDistance: KeysDistanceDto.() -> Unit = {}, + ) { + val defaultDistance = abs(key1Id - key2Id) - 1 + val distanceDto = + KeysDistanceDto( + key1Id = key1Id, + key2Id = key2Id, + projectId = 0, + hits = 1, + distance = defaultDistance.toDouble(), + ) + distanceDto.modifyDistance() + _existingDistances.add(distanceDto) + } + + fun generateExistingDistances(range: IntRange) { + for (i in range) { + for (j in range) { + if (i >= j) { + continue + } + addExistingDistance(i.toLong(), j.toLong()) + } + } + } +} diff --git a/backend/data/src/test/kotlin/io/tolgee/unit/bigMeta/KeysDistanceUtilTest.kt b/backend/data/src/test/kotlin/io/tolgee/unit/bigMeta/KeysDistanceUtilTest.kt new file mode 100644 index 0000000000..0dad0319c8 --- /dev/null +++ b/backend/data/src/test/kotlin/io/tolgee/unit/bigMeta/KeysDistanceUtilTest.kt @@ -0,0 +1,93 @@ +package io.tolgee.unit.bigMeta + +import io.tolgee.model.Project +import io.tolgee.service.bigMeta.BigMetaService +import io.tolgee.service.bigMeta.KeysDistanceDto +import io.tolgee.service.bigMeta.KeysDistanceUtil +import io.tolgee.testing.assert +import org.assertj.core.api.ObjectAssert +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import org.mockito.kotlin.any +import org.mockito.kotlin.mock +import org.mockito.kotlin.whenever + +class KeysDistanceUtilTest { + private val project: Project = mock() + private val bigMetaService: BigMetaService = mock() + + lateinit var testData: KeysDistanceUnitTestData + + @BeforeEach + fun setup() { + testData = KeysDistanceUnitTestData() + } + + private fun initMocks(data: KeysDistanceUnitTestData) { + whenever(project.id).thenReturn(1) + + whenever(bigMetaService.findExistingKeyDistances(any(), any())) + .thenReturn( + data.existingDistances, + ) + whenever(bigMetaService.getKeyIdsForItems(any(), any())).thenReturn( + data.existingKeys, + ) + } + + @Test + fun `it works (simple case)`() { + testData.createRequestData(3, "a") + testData.generateExistingDistances(2..4) + val (toStore, toDelete) = getResult() + toStore.assert.hasSize(5) + toDelete.assert.hasSize(0) + // these are the new elements, not included in existing distances + toStore.assertDistance(0, 1).distanceEqualsTo(0.0).hitsEqualsTo(1) + toStore.assertDistance(0, 2).distanceEqualsTo(1.0).hitsEqualsTo(1) + toStore.assertDistance(1, 2).distanceEqualsTo(0.0).hitsEqualsTo(1) + + // these are the elements with increased distance + // for these elements, new distance is computed as average of the existing distance and the new distance + // the new distances is virtually pushing the elements out of the focus window and so such elements are more probable + // candidates for deletion + toStore.assertDistance(2, 3).distanceEqualsTo(10.0).hitsEqualsTo(2) + toStore.assertDistance(2, 4).distanceEqualsTo(10.5).hitsEqualsTo(2) + } + + @Test + fun `it deletes distances`() { + testData.createRequestData(2, "a") + testData.generateExistingDistances(1..30) + val (toStore, toDelete) = getResult() + toStore.forKeyId(1).assert.hasSize(20) + toDelete.forKeyId(1).assert.hasSize(10) + } + + private fun MutableSet.forKeyId(keyId: Long): List { + return this.filter { it.key1Id == keyId || it.key2Id == keyId } + } + + private fun getResult(): Pair, MutableSet> { + initMocks(testData) + return KeysDistanceUtil(testData.requestData, project, bigMetaService).toStoreAndDelete + } + + private fun MutableSet.assertDistance( + key1Id: Long, + key2Id: Long, + ): ObjectAssert { + return this.find { it.key1Id == key1Id && it.key2Id == key2Id }?.assert + ?: throw AssertionError("Distance not found") + } + + private fun ObjectAssert.hitsEqualsTo(hits: Long): ObjectAssert { + this.extracting { it.hits }.isEqualTo(hits) + return this + } + + private fun ObjectAssert.distanceEqualsTo(distance: Double): ObjectAssert { + this.extracting { it.distance }.isEqualTo(distance) + return this + } +}