Skip to content

Commit

Permalink
fix: BigMeta (context) storing -> reduce the amount of stored data (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
JanCizmar authored Dec 19, 2024
1 parent 944a827 commit f22421c
Show file tree
Hide file tree
Showing 13 changed files with 416 additions and 203 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ import io.tolgee.ProjectAuthControllerTest
import io.tolgee.development.testDataBuilder.data.BigMetaTestData
import io.tolgee.fixtures.andAssertThatJson
import io.tolgee.fixtures.andIsOk
import io.tolgee.fixtures.waitForNotThrowing
import io.tolgee.model.key.Key
import io.tolgee.service.bigMeta.BigMetaService
import io.tolgee.service.bigMeta.KeysDistanceDto
import io.tolgee.testing.annotations.ProjectJWTAuthTestMethod
import io.tolgee.testing.assert
import io.tolgee.util.Logging
import io.tolgee.util.infoMeasureTime
import io.tolgee.util.logger
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import org.springframework.beans.factory.annotation.Autowired
Expand Down Expand Up @@ -53,66 +53,78 @@ class BigMetaControllerTest : ProjectAuthControllerTest("/v2/projects/"), Loggin
),
).andIsOk

bigMetaService.findExistingKeysDistancesDtosByIds(listOf(testData.yepKey.id)).assert.hasSize(1)
getDistances(listOf(testData.yepKey.id)).assert.hasSize(1)
}

@Test
@ProjectJWTAuthTestMethod
fun `it performs well`() {
val keys = testData.addLotOfData()
testData.addLotOfReferences(keys)
saveTestDataAndPrepare()

logger.infoMeasureTime("it performs well time 1") {
storeLogOfBigMeta(keys, 500, 100)
}

logger.infoMeasureTime("it performs well time 2") {
storeLogOfBigMeta(keys, 500, 100)
}
measureTime {
storeAndAssertSize(keys, 0, 30, 354)
storeAndAssertSize(keys, 500, 100, 1409)
storeAndAssertSize(keys, 10, 200, 3155)
storeAndAssertSize(keys, 800, 50, 3710)
storeAndAssertSize(keys, 800, 50, 3710)
}.inWholeSeconds.assert.isLessThan(5)
}

logger.infoMeasureTime("it performs well time 3") {
storeLogOfBigMeta(keys, 10, 200)
}
private fun storeAndAssertSize(
allKeys: List<Key>,
drop: Int,
take: Int,
expectedSize: Int,
) {
val keyIds = allKeys.map { it.id }

logger.infoMeasureTime("it performs well time 4") {
storeLogOfBigMeta(keys, 800, 50)
val stored = storeLotOfBigMeta(allKeys, drop, take)
waitForNotThrowing(pollTime = 50, timeout = 2000) {
val distances = getDistances(keyIds)
assertAllHaveAtLeast20Distances(stored, distances)
getDistances(keyIds).assert.hasSize(expectedSize)
}

measureTime {
storeLogOfBigMeta(keys, 800, 50)
}.inWholeSeconds.assert.isLessThan(10)

bigMetaService.findExistingKeysDistancesDtosByIds(keys.map { it.id }).assert.hasSize(104790)
}

@Test
@ProjectJWTAuthTestMethod
fun `it performs well (large)`() {
val keys = testData.addLotOfData()
testData.addLotOfReferences(keys)
saveTestDataAndPrepare()
private fun getDistances(keyIds: List<Long>) = bigMetaService.findExistingKeysDistancesDtosByIds(keyIds)

storeLogOfBigMeta(keys, 0, 200)
private fun assertAllHaveAtLeast20Distances(
stored: List<Key>,
distances: Set<KeysDistanceDto>,
) {
val distancesPerKey =
stored.associate { storedKey ->
val filtered =
distances.filter {
distance ->
distance.key1Id == storedKey.id || distance.key2Id == storedKey.id
}
storedKey.id to filtered
}

distancesPerKey.values.assert.allSatisfy { it.assert.hasSizeGreaterThanOrEqualTo(20) }
}

private fun storeLogOfBigMeta(
private fun storeLotOfBigMeta(
keys: List<Key>,
drop: Int,
take: Int,
) {
): List<Key> {
val toStore = keys.drop(drop).take(take)
performProjectAuthPost(
"big-meta",
mapOf(
"relatedKeysInOrder" to
keys.drop(drop).take(take).reversed().map {
toStore.reversed().map {
mapOf(
"namespace" to it.namespace,
"keyName" to it.name,
)
},
),
).andIsOk
return toStore
}

@Test
Expand Down
6 changes: 6 additions & 0 deletions backend/data/src/main/kotlin/io/tolgee/Metrics.kt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ class Metrics(
.register(meterRegistry)
}

val bigMetaDeletingAsyncTimer: Timer by lazy {
Timer.builder("tolgee.big_meta.deleting-async.timer")
.description("Time spent deleting big meta data (async)")
.register(meterRegistry)
}

val bigMetaNewDistancesComputeTimer: Timer by lazy {
Timer.builder("tolgee.big_meta.new_distances.compute.timer")
.description("Time spent computing new distances for big meta data")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import io.tolgee.development.testDataBuilder.builders.TestDataBuilder
import io.tolgee.model.Project
import io.tolgee.model.UserAccount
import io.tolgee.model.key.Key
import io.tolgee.service.bigMeta.BigMetaService
import kotlin.math.abs

class BigMetaTestData {
Expand Down Expand Up @@ -42,9 +41,10 @@ class BigMetaTestData {
fun addLotOfReferences(keys: List<Key>) {
keys.forEachIndexed forEach1@{ idx1, key1 ->
keys.forEachIndexed forEach2@{ idx2, key2 ->
if (idx1 >= idx2 || abs(idx1 - idx2) > (BigMetaService.MAX_ORDER_DISTANCE + 1)) return@forEach2
val distance = abs(idx1 - idx2).toDouble()
if (idx1 >= idx2 || distance > (20 + 1)) return@forEach2
projectBuilder.addKeysDistance(key1, key2) {
score = 10000
this.distance = distance
hits = 1
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,34 +103,34 @@ class SuggestionTestData : BaseTestData() {

private fun ProjectBuilder.addKeyDistances() {
this.addKeysDistance(data.keys[0].self, data.keys[1].self) {
score = 10000
distance = 1000.0
}
this.addKeysDistance(data.keys[0].self, data.keys[2].self) {
score = 8000
distance = 8.0
}
this.addKeysDistance(data.keys[0].self, data.keys[3].self) {
score = 8000
distance = 8.0
}
this.addKeysDistance(data.keys[0].self, data.keys[4].self) {
score = 8000
distance = 8.0
}
this.addKeysDistance(data.keys[1].self, data.keys[2].self) {
score = 2000
distance = 2.0
}
this.addKeysDistance(data.keys[1].self, data.keys[3].self) {
score = 1000
distance = 1.0
}
this.addKeysDistance(data.keys[1].self, data.keys[4].self) {
score = 1000
distance = 1.0
}
this.addKeysDistance(data.keys[2].self, data.keys[3].self) {
score = 1000
distance = 1.0
}
this.addKeysDistance(data.keys[2].self, data.keys[4].self) {
score = 1000
distance = 1.0
}
this.addKeysDistance(data.keys[3].self, data.keys[4].self) {
score = 1000
distance = 1.0
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import jakarta.persistence.IdClass
import jakarta.persistence.Index
import jakarta.persistence.ManyToOne
import jakarta.persistence.Table
import org.hibernate.annotations.ColumnDefault
import org.springframework.data.domain.Persistable

@Entity
Expand All @@ -31,7 +32,12 @@ class KeysDistance(
@ManyToOne(fetch = FetchType.LAZY)
lateinit var project: Project

var score: Long = MAX_SCORE
// TODO: Remove this
@Deprecated("Kept for backward compatibility. Can be removed in any next path version.")
@ColumnDefault("10000")
var score: Long = 10000

var distance: Double = 0.0

var hits: Long = 1

Expand All @@ -43,14 +49,14 @@ class KeysDistance(

if (key1Id != other.key1Id) return false
if (key2Id != other.key2Id) return false
if (score != other.score) return false
if (distance != other.distance) return false
return hits == other.hits
}

override fun hashCode(): Int {
var result = key1Id.hashCode()
result = 31 * result + key2Id.hashCode()
result = 31 * result + score.hashCode()
result = 31 * result + distance.hashCode()
result = 31 * result + hits.hashCode()
return result
}
Expand All @@ -66,8 +72,4 @@ class KeysDistance(
@Transient
@Column(insertable = false, updatable = false)
var new = false

companion object {
const val MAX_SCORE = 10000L
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ interface KeysDistanceRepository : JpaRepository<KeysDistance, Long> {
select (case when kd.key1Id = :keyId then kd.key2Id else kd.key1Id end) from KeysDistance kd
where kd.key1Id = :keyId or
kd.key2Id = :keyId
order by kd.score desc
order by kd.distance
""",
)
fun getCloseKeys(
Expand All @@ -36,7 +36,7 @@ interface KeysDistanceRepository : JpaRepository<KeysDistance, Long> {
select (case when kd.key1Id = :keyId then kd.key2Id else kd.key1Id end) from KeysDistance kd
where kd.key1Id = :keyId or
kd.key2Id = :keyId
order by kd.score desc
order by kd.distance
) and k.project.id = :projectId
""",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@ import io.tolgee.model.key.Key_
import io.tolgee.model.key.Namespace_
import io.tolgee.model.keyBigMeta.KeysDistance
import io.tolgee.repository.KeysDistanceRepository
import io.tolgee.util.Logging
import io.tolgee.util.equalNullable
import io.tolgee.util.executeInNewTransaction
import io.tolgee.util.runSentryCatching
import io.tolgee.util.*
import jakarta.persistence.EntityManager
import jakarta.persistence.criteria.CriteriaBuilder
import jakarta.persistence.criteria.CriteriaQuery
Expand Down Expand Up @@ -45,12 +42,6 @@ class BigMetaService(
@Autowired
private lateinit var self: BigMetaService

companion object {
const val MAX_DISTANCE_SCORE = 10000L
const val MAX_POINTS = 2000L
const val MAX_ORDER_DISTANCE = 20
}

fun saveKeyDistance(keysDistance: KeysDistance): KeysDistance {
return keysDistanceRepository.save(keysDistance)
}
Expand Down Expand Up @@ -82,35 +73,55 @@ class BigMetaService(
KeysDistanceUtil(relatedKeysInOrder, project, this)
}!!

val (toStore, toDelete) = util.toStoreAndDelete

val (toStoreSync, toStoreAsync) =
if (forKeyId == null) {
util.newDistances to emptyList()
toStore to emptyList()
} else {
util.newDistances.partition { it.key1Id == forKeyId || it.key2Id == forKeyId }
toStore.partition { it.key1Id == forKeyId || it.key2Id == forKeyId }
}

metrics.bigMetaStoringTimer.recordCallable {
insertNewDistances(toStoreSync)
}

self.asyncInsertNewDistances(toStoreAsync)
self.asyncDeleteDistances(toDelete)
}

@Async
fun asyncInsertNewDistances(toInsert: List<KeysDistanceDto>) {
metrics.bigMetaStoringAsyncTimer.recordCallable {
self.asyncInsertNewDistances(toStoreAsync)
insertNewDistances(toInsert)
}
}

@Async
fun asyncInsertNewDistances(toInsert: List<KeysDistanceDto>) {
insertNewDistances(toInsert)
fun asyncDeleteDistances(toDelete: MutableSet<KeysDistanceDto>) {
if (toDelete.isEmpty()) {
return
}
jdbcTemplate.batchUpdate(
"""
delete from keys_distance where key1id = ? and key2id = ?
""",
toDelete,
10000,
) { ps, dto ->
ps.setLong(1, dto.key1Id)
ps.setLong(2, dto.key2Id)
}
}

private fun insertNewDistances(toInsert: List<KeysDistanceDto>) {
private fun insertNewDistances(toInsert: Collection<KeysDistanceDto>) {
if (toInsert.isEmpty()) {
return
}
val timestamp = Timestamp(currentDateProvider.date.time)
jdbcTemplate.batchUpdate(
"""
insert into keys_distance (key1id, key2id, score, hits, created_at, updated_at, project_id)
insert into keys_distance (key1id, key2id, distance, hits, created_at, updated_at, project_id)
values (?, ?, ?, ?, ?, ?, ?)
on conflict (key1id, key2id) do update set score = excluded.score, hits = excluded.hits, updated_at = ?
""",
Expand All @@ -119,7 +130,7 @@ class BigMetaService(
) { ps, dto ->
ps.setLong(1, dto.key1Id)
ps.setLong(2, dto.key2Id)
ps.setLong(3, dto.score)
ps.setDouble(3, dto.distance)
ps.setLong(4, dto.hits)
ps.setTimestamp(5, timestamp)
ps.setTimestamp(6, timestamp)
Expand Down Expand Up @@ -153,7 +164,7 @@ class BigMetaService(
fun findExistingKeysDistancesDtosByIds(keyIds: List<Long>): Set<KeysDistanceDto> {
return entityManager.createQuery(
"""
select new io.tolgee.service.bigMeta.KeysDistanceDto(kd.key1Id, kd.key2Id, kd.score, kd.project.id, kd.hits) from KeysDistance kd
select new io.tolgee.service.bigMeta.KeysDistanceDto(kd.key1Id, kd.key2Id, kd.distance, kd.project.id, kd.hits, true) from KeysDistance kd
where kd.key1Id in (
select kd2.key1Id from KeysDistance kd2 where kd2.key1Id in :data or kd2.key2Id in :data
) or kd.key2Id in (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
package io.tolgee.service.bigMeta

import io.tolgee.model.keyBigMeta.KeysDistance.Companion.MAX_SCORE

data class KeysDistanceDto(
var key1Id: Long = 0,
var key2Id: Long = 0,
var score: Long = MAX_SCORE,
var distance: Double = 0.0,
var projectId: Long,
var hits: Long = 1,
var stored: Boolean = true,
)
Loading

0 comments on commit f22421c

Please sign in to comment.