-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor: add UnsafeSerialization.writeIntsWithPrefix method to simpl…
…ify a couple LSH models (#593)
- Loading branch information
1 parent
7ccdc10
commit 1a1a561
Showing
9 changed files
with
915 additions
and
840 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
|Model|Parameters|Recall|Queries per Second| | ||
|---|---|---|---| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=0|0.378|351.096| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=0|0.447|291.666| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=3|0.634|277.702| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=3|0.716|238.914| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=0|0.767|288.441| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=0|0.847|246.201| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=3|0.922|192.499| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=3|0.960|177.009| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=0|0.378|363.121| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=0|0.446|299.144| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=3|0.634|270.522| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=3|0.716|240.419| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=0|0.768|280.053| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=0|0.847|240.014| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=3|0.922|186.668| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=3|0.960|166.241| |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
112 changes: 112 additions & 0 deletions
112
elastiknn-models/src/test/scala/com/klibisz/elastiknn/storage/UnsafeSerializationSpec.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
package com.klibisz.elastiknn.storage | ||
|
||
import org.scalatest.freespec.AnyFreeSpec | ||
import org.scalatest.matchers.should.Matchers | ||
|
||
import scala.util.Random | ||
|
||
class UnsafeSerializationSpec extends AnyFreeSpec with Matchers { | ||
|
||
"writeInts and readInts" - { | ||
"round trip with randomized arrays" in { | ||
val seed = System.currentTimeMillis() | ||
val maxLen = 4096 | ||
val rng = new Random(seed) | ||
for (i <- 0 to 1000) { | ||
withClue(s"Failed on iteration $i with seed $seed and max length $maxLen") { | ||
// Generate array of random ints. | ||
val len = rng.nextInt(maxLen) | ||
val iarr = (0 until len).map(_ => rng.nextInt(Int.MaxValue) * (if (rng.nextBoolean()) 1 else -1)).toArray | ||
|
||
// Serialize and check serialized length. | ||
val trimmed = UnsafeSerialization.writeInts(iarr) | ||
trimmed should have length (iarr.length * UnsafeSerialization.numBytesInInt) | ||
|
||
// Deserialize and check. | ||
val iarrReadTrimmed = UnsafeSerialization.readInts(trimmed, 0, trimmed.length) | ||
iarrReadTrimmed shouldBe iarr | ||
|
||
// Place in larger array with random offset. | ||
val offset = rng.nextInt(maxLen) | ||
val embedded = new Array[Byte](offset) ++ trimmed ++ new Array[Byte](rng.nextInt(maxLen)) | ||
|
||
// Deserialize and check. | ||
val iarrReadEmbedded = UnsafeSerialization.readInts(embedded, offset, trimmed.length) | ||
iarrReadEmbedded shouldBe iarr | ||
} | ||
} | ||
} | ||
} | ||
|
||
"writeIntsWithPrefix" - { | ||
"equivalent to writeInts with prefix embedded in the array" in { | ||
val seed = System.currentTimeMillis() | ||
val maxLen = 4096 | ||
val rng = new Random(seed) | ||
for (_ <- 0 to 1000) { | ||
val len = rng.nextInt(maxLen) | ||
val prefix = rng.nextInt() | ||
val iarr = (0 until len).map(_ => rng.nextInt(Int.MaxValue) * (if (rng.nextBoolean()) 1 else -1)).toArray | ||
val iarrWithPrefix = prefix +: iarr | ||
val writeIntsWithPrefix = UnsafeSerialization.writeIntsWithPrefix(prefix, iarr) | ||
val writeInts = UnsafeSerialization.writeInts(iarrWithPrefix) | ||
writeIntsWithPrefix shouldBe writeInts | ||
} | ||
} | ||
} | ||
|
||
"writeFloats and readFloats" - { | ||
"round trip with randomized arrays" in { | ||
val seed = System.currentTimeMillis() | ||
val maxLen = 4096 | ||
val rng = new Random(seed) | ||
for (i <- 0 to 1000) { | ||
withClue(s"Failed on iteration $i with seed $seed and max length $maxLen") { | ||
// Generate array of random floats. | ||
val len = rng.nextInt(maxLen) | ||
val farr = (0 until len).map(_ => rng.nextFloat() * (if (rng.nextBoolean()) Float.MaxValue else Float.MinValue)).toArray | ||
|
||
// Serialize and check length. | ||
val trimmed = UnsafeSerialization.writeFloats(farr) | ||
trimmed should have length (farr.length * UnsafeSerialization.numBytesInFloat) | ||
|
||
// Deserialize and check. | ||
val farrTrimmed = UnsafeSerialization.readFloats(trimmed, 0, trimmed.length) | ||
farrTrimmed shouldBe farr | ||
|
||
// Place in larger array with random offset. | ||
val offset = rng.nextInt(maxLen) | ||
val embedded = new Array[Byte](offset) ++ trimmed ++ new Array[Byte](rng.nextInt(maxLen)) | ||
|
||
// Deserialize and check. | ||
val farrReadEmbedded = UnsafeSerialization.readFloats(embedded, offset, trimmed.length) | ||
farrReadEmbedded shouldBe farr | ||
} | ||
} | ||
} | ||
} | ||
|
||
"writeInt" - { | ||
"variable length encoding" in { | ||
UnsafeSerialization.writeInt(127) should have length 1 | ||
UnsafeSerialization.writeInt(-127) should have length 1 | ||
UnsafeSerialization.writeInt(32767) should have length 2 | ||
UnsafeSerialization.writeInt(-32767) should have length 2 | ||
} | ||
} | ||
|
||
"writeInt and readInt" - { | ||
"round trip with randomized ints" in { | ||
val seed = System.currentTimeMillis() | ||
val rng = new Random(seed) | ||
for (i <- 0 to 10000) { | ||
withClue(s"Failed on iteration $i with seed $seed") { | ||
val i = rng.nextInt(Int.MaxValue) * (if (rng.nextBoolean()) 1 else -1) | ||
val barr = UnsafeSerialization.writeInt(i) | ||
val iRead = UnsafeSerialization.readInt(barr) | ||
iRead shouldBe i | ||
} | ||
} | ||
} | ||
} | ||
} |
88 changes: 0 additions & 88 deletions
88
elastiknn-models/src/test/scala/com/klibisz/elastiknn/storage/UnsafeSerializationSuite.scala
This file was deleted.
Oops, something went wrong.