From 924a08343f4b64677a5996c7ba61c2813b469a47 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Mon, 15 Jun 2020 10:04:44 +0300 Subject: [PATCH 1/5] [refer #1132] Change ModifierId representation --- src/main/scala/scorex/util/package.scala | 43 +++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/src/main/scala/scorex/util/package.scala b/src/main/scala/scorex/util/package.scala index 470a766..b61f426 100644 --- a/src/main/scala/scorex/util/package.scala +++ b/src/main/scala/scorex/util/package.scala @@ -5,12 +5,39 @@ import supertagged.TaggedType package object util { - object ModifierId extends TaggedType[String] - type ModifierId = ModifierId.Type + case class ModifierId(hashBytes: Array[Byte]) { + // This is much more efficient than hashing whole array or String. + // We can use the first 4 bytes and convert them into Int. + override def hashCode: Int = { + val len = math.min(hashBytes.length, 4) + var hash = 0 + for (i <- 0 until len) { + hash = (hash << 8) | (hashBytes(i) & 0xFF) + } + hash + } + override def equals(other: Any): Boolean = other.isInstanceOf[ModifierId] && hashBytes.sameElements(other.asInstanceOf[ModifierId].hashBytes) + override def toString: String = Base16.encode(hashBytes) + } + + def bytesToId(bytes: Array[Byte]): ModifierId = new ModifierId(bytes) - def bytesToId(bytes: Array[Byte]): ModifierId = ModifierId @@ Base16.encode(bytes) + def idToBytes(id: ModifierId): Array[Byte] = id.hashBytes - def idToBytes(id: ModifierId): Array[Byte] = Base16.decode(id).get + def stringToId(s: String): ModifierId = new ModifierId(Base16.decode(s).get) + + implicit val modifierOrdering : Ordering[ModifierId] = new Ordering[ModifierId] { + def compare(a: ModifierId, b: ModifierId): Int = { + val len = math.min(a.hashBytes.length, b.hashBytes.length) + for (i <- 0 until len) { + val diff = (a.hashBytes(i) & 0xFF) - (b.hashBytes(i) & 0xFF) + if (diff != 0) { + return diff + } + } + return a.hashBytes.length - len + } + } implicit class ModifierIdOps(val m: ModifierId) extends AnyVal { @inline def toBytes: Array[Byte] = idToBytes(m) @@ -19,4 +46,12 @@ package object util { implicit class ByteArrayOps(val b: Array[Byte]) extends AnyVal { @inline def toModifierId: ModifierId = bytesToId(b) } + + implicit class StringOps(val s: String) extends AnyVal { + @inline def toModifierId: ModifierId = stringToId(s) + } + + object ModifierId { + def apply(s: String): ModifierId = stringToId(s) + } } From cdd9d9a16c9e0e6dd0c404284ae9ac642fdfa23f Mon Sep 17 00:00:00 2001 From: Alexander Slesarenko Date: Tue, 16 Jun 2020 12:45:36 +0300 Subject: [PATCH 2/5] using standard JDK methods in ModifierId --- src/main/scala/scorex/util/package.scala | 57 ++++++++++++++++-------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/src/main/scala/scorex/util/package.scala b/src/main/scala/scorex/util/package.scala index b61f426..163146f 100644 --- a/src/main/scala/scorex/util/package.scala +++ b/src/main/scala/scorex/util/package.scala @@ -1,23 +1,49 @@ package scorex import scorex.util.encode.Base16 -import supertagged.TaggedType package object util { + /** Represents hash based id of a modifier (see [Content-addressable + * storage](https://en.wikipedia.org/wiki/Content-addressable_storage)). `ModifierId` is used extensively + * all over the code base. + * In most cases `ModifierId` is used as equality safe replacement of the original `Array[Byte]` 32-bytes + * hash which is stored in blockchain. + * + * The reason for this is that the default implementation of `hashCode` and `equals` in `Array` class + * doesn't allow to use arrays in `Map` (as keys) and in `Set` collections. Other methods like `distinct` + * also become broken. + * + * This class avoids the above mentioned problems and in addition outperforms even Array[Byte] + * while guaranteeing the correctness of equality sensitive operations with collections. + * The idea is to exploit the fact that ModifierId is backed by cryptographic hash, we know this for sure, + * so it is not general Array[Byte]. + * + * The implementation of `hashCode()` below is much more efficient than hashing the whole 32 bytes of + * `hashBytes` array and actually provide better `hashCode` randomness (since the hash function if + * cryptographic), which will further improve performance of `Map` and `Set` operations. + * + * @param hashBytes cryptographic hash + */ case class ModifierId(hashBytes: Array[Byte]) { // This is much more efficient than hashing whole array or String. // We can use the first 4 bytes and convert them into Int. override def hashCode: Int = { - val len = math.min(hashBytes.length, 4) - var hash = 0 - for (i <- 0 until len) { - hash = (hash << 8) | (hashBytes(i) & 0xFF) - } - hash - } - override def equals(other: Any): Boolean = other.isInstanceOf[ModifierId] && hashBytes.sameElements(other.asInstanceOf[ModifierId].hashBytes) - override def toString: String = Base16.encode(hashBytes) + val bytes = hashBytes + hashFromBytes(bytes(0), bytes(1), bytes(2), bytes(3)) + } + + override def equals(other: Any): Boolean = (this eq other.asInstanceOf[AnyRef]) || + (other match { + case other: ModifierId => java.util.Arrays.equals(hashBytes, other.hashBytes) + case _ => false + }) + + override def toString: String = Base16.encode(hashBytes) + } + + @inline final def hashFromBytes(b1: Byte, b2: Byte, b3: Byte, b4: Byte): Int = { + b1 << 24 | (b2 & 0xFF) << 16 | (b3 & 0xFF) << 8 | (b4 & 0xFF) } def bytesToId(bytes: Array[Byte]): ModifierId = new ModifierId(bytes) @@ -28,15 +54,8 @@ package object util { implicit val modifierOrdering : Ordering[ModifierId] = new Ordering[ModifierId] { def compare(a: ModifierId, b: ModifierId): Int = { - val len = math.min(a.hashBytes.length, b.hashBytes.length) - for (i <- 0 until len) { - val diff = (a.hashBytes(i) & 0xFF) - (b.hashBytes(i) & 0xFF) - if (diff != 0) { - return diff - } - } - return a.hashBytes.length - len - } + java.util.Arrays.compare(a.hashBytes, b.hashBytes) + } } implicit class ModifierIdOps(val m: ModifierId) extends AnyVal { From 53da8646909b01253ad7e091861bf74d535ec5be Mon Sep 17 00:00:00 2001 From: Alexander Slesarenko Date: Tue, 16 Jun 2020 12:54:07 +0300 Subject: [PATCH 3/5] rollback modifierOrdering optimization --- src/main/scala/scorex/util/package.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/scala/scorex/util/package.scala b/src/main/scala/scorex/util/package.scala index 163146f..98277fb 100644 --- a/src/main/scala/scorex/util/package.scala +++ b/src/main/scala/scorex/util/package.scala @@ -53,8 +53,18 @@ package object util { def stringToId(s: String): ModifierId = new ModifierId(Base16.decode(s).get) implicit val modifierOrdering : Ordering[ModifierId] = new Ordering[ModifierId] { + // TODO optimize: use java.util.Arrays.compare after JDK8 support is dropped def compare(a: ModifierId, b: ModifierId): Int = { - java.util.Arrays.compare(a.hashBytes, b.hashBytes) + val len = math.min(a.hashBytes.length, b.hashBytes.length) + var i = 0 + while (i < len) { + val diff = (a.hashBytes(i) & 0xFF) - (b.hashBytes(i) & 0xFF) + if (diff != 0) { + return diff + } + i += 1 + } + a.hashBytes.length - len } } From 5033e59722dacd03ab86b8dc1653866af3a7380a Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 2 Jul 2020 17:09:17 +0300 Subject: [PATCH 4/5] Add tests for ModifierId --- src/main/scala/scorex/util/package.scala | 11 ++++++----- src/test/scala/scorex/ModifierIdSpec.scala | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/main/scala/scorex/util/package.scala b/src/main/scala/scorex/util/package.scala index 98277fb..b4a8d80 100644 --- a/src/main/scala/scorex/util/package.scala +++ b/src/main/scala/scorex/util/package.scala @@ -4,8 +4,7 @@ import scorex.util.encode.Base16 package object util { - /** Represents hash based id of a modifier (see [Content-addressable - * storage](https://en.wikipedia.org/wiki/Content-addressable_storage)). `ModifierId` is used extensively + /** Represents hash based id of a modifier. `ModifierId` is used extensively * all over the code base. * In most cases `ModifierId` is used as equality safe replacement of the original `Array[Byte]` 32-bytes * hash which is stored in blockchain. @@ -27,10 +26,10 @@ package object util { */ case class ModifierId(hashBytes: Array[Byte]) { // This is much more efficient than hashing whole array or String. - // We can use the first 4 bytes and convert them into Int. + // We can use the last 4 bytes and convert them into Int. override def hashCode: Int = { val bytes = hashBytes - hashFromBytes(bytes(0), bytes(1), bytes(2), bytes(3)) + hashFromBytes(bytes(28), bytes(29), bytes(30), bytes(31)) } override def equals(other: Any): Boolean = (this eq other.asInstanceOf[AnyRef]) || @@ -53,7 +52,9 @@ package object util { def stringToId(s: String): ModifierId = new ModifierId(Base16.decode(s).get) implicit val modifierOrdering : Ordering[ModifierId] = new Ordering[ModifierId] { - // TODO optimize: use java.util.Arrays.compare after JDK8 support is dropped + // We can not use java.util.Arrays.compare because we have to provide compatibility with + // comparison of Base16 encoded strings representing this byte array. + // So we have to treate bytes as unsigned def compare(a: ModifierId, b: ModifierId): Int = { val len = math.min(a.hashBytes.length, b.hashBytes.length) var i = 0 diff --git a/src/test/scala/scorex/ModifierIdSpec.scala b/src/test/scala/scorex/ModifierIdSpec.scala index 0d96d5d..6427b0f 100644 --- a/src/test/scala/scorex/ModifierIdSpec.scala +++ b/src/test/scala/scorex/ModifierIdSpec.scala @@ -15,4 +15,22 @@ class ModifierIdSpec extends AnyFlatSpec with Matchers { bytes.toModifierId.toBytes shouldEqual bytes } + "ModifierId" should "equals or not equal if and only if the corresponding Base16 strings are equal" in { + val str1 = "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF" + val str2 = "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F9007F" + ModifierId(str1) shouldEqual ModifierId(str1) + ModifierId(str1) should not equal ModifierId(str2) + } + + "ModifierId" should "provide the same ordering as Base16 strings" in { + val strs = Array("0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF", + "0101020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF", + "FF01020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF", + "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F9007F", + "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F901FF") + for (i <- 0 until strs.size) + for (j <- 0 until strs.size) + math.signum(modifierOrdering.compare(ModifierId(strs(i)), ModifierId(strs(j)))) shouldEqual math.signum(strs(i).compare(strs(j))) + } + } From cdac6e95b473436f8f68590bbc5c1fa5d5931fed Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Wed, 8 Jul 2020 16:15:36 +0300 Subject: [PATCH 5/5] Handle byte arrays of arbitrary length in ModifierId --- src/main/scala/scorex/util/package.scala | 5 ++++- src/test/scala/scorex/ModifierIdSpec.scala | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/scala/scorex/util/package.scala b/src/main/scala/scorex/util/package.scala index b4a8d80..5b92e0b 100644 --- a/src/main/scala/scorex/util/package.scala +++ b/src/main/scala/scorex/util/package.scala @@ -29,7 +29,10 @@ package object util { // We can use the last 4 bytes and convert them into Int. override def hashCode: Int = { val bytes = hashBytes - hashFromBytes(bytes(28), bytes(29), bytes(30), bytes(31)) + if (bytes.size == 32) + hashFromBytes(bytes(28), bytes(29), bytes(30), bytes(31)) + else + java.util.Arrays.hashCode(bytes) } override def equals(other: Any): Boolean = (this eq other.asInstanceOf[AnyRef]) || diff --git a/src/test/scala/scorex/ModifierIdSpec.scala b/src/test/scala/scorex/ModifierIdSpec.scala index 6427b0f..e428dbe 100644 --- a/src/test/scala/scorex/ModifierIdSpec.scala +++ b/src/test/scala/scorex/ModifierIdSpec.scala @@ -17,9 +17,11 @@ class ModifierIdSpec extends AnyFlatSpec with Matchers { "ModifierId" should "equals or not equal if and only if the corresponding Base16 strings are equal" in { val str1 = "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF" - val str2 = "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F9007F" + val str2 = "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF" + val str3 = "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F9007F" ModifierId(str1) shouldEqual ModifierId(str1) - ModifierId(str1) should not equal ModifierId(str2) + ModifierId(str1) shouldEqual ModifierId(str2) + ModifierId(str2) should not equal ModifierId(str3) } "ModifierId" should "provide the same ordering as Base16 strings" in {