Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 73 additions & 5 deletions src/main/scala/scorex/util/package.scala
Original file line number Diff line number Diff line change
@@ -1,16 +1,76 @@
package scorex

import scorex.util.encode.Base16
import supertagged.TaggedType

package object util {

object ModifierId extends TaggedType[String]
type ModifierId = ModifierId.Type
/** Represents hash based id of a modifier. `ModifierId` is used extensively
* all over the code base.
* In most cases `ModifierId` is used as equality safe replacement of the original `Array[Byte]` 32-bytes
* hash which is stored in blockchain.
*
* The reason for this is that the default implementation of `hashCode` and `equals` in `Array` class
* doesn't allow to use arrays in `Map` (as keys) and in `Set` collections. Other methods like `distinct`
* also become broken.
*
* This class avoids the above mentioned problems and in addition outperforms even Array[Byte]
* while guaranteeing the correctness of equality sensitive operations with collections.
* The idea is to exploit the fact that ModifierId is backed by cryptographic hash, we know this for sure,
* so it is not general Array[Byte].
*
* The implementation of `hashCode()` below is much more efficient than hashing the whole 32 bytes of
* `hashBytes` array and actually provide better `hashCode` randomness (since the hash function if
* cryptographic), which will further improve performance of `Map` and `Set` operations.
*
* @param hashBytes cryptographic hash
*/
case class ModifierId(hashBytes: Array[Byte]) {
// This is much more efficient than hashing whole array or String.
// We can use the last 4 bytes and convert them into Int.
override def hashCode: Int = {
val bytes = hashBytes
if (bytes.size == 32)
hashFromBytes(bytes(28), bytes(29), bytes(30), bytes(31))
else
java.util.Arrays.hashCode(bytes)
}

def bytesToId(bytes: Array[Byte]): ModifierId = ModifierId @@ Base16.encode(bytes)
override def equals(other: Any): Boolean = (this eq other.asInstanceOf[AnyRef]) ||
(other match {
case other: ModifierId => java.util.Arrays.equals(hashBytes, other.hashBytes)
case _ => false
})

def idToBytes(id: ModifierId): Array[Byte] = Base16.decode(id).get
override def toString: String = Base16.encode(hashBytes)
}

@inline final def hashFromBytes(b1: Byte, b2: Byte, b3: Byte, b4: Byte): Int = {
b1 << 24 | (b2 & 0xFF) << 16 | (b3 & 0xFF) << 8 | (b4 & 0xFF)
}

def bytesToId(bytes: Array[Byte]): ModifierId = new ModifierId(bytes)

def idToBytes(id: ModifierId): Array[Byte] = id.hashBytes

def stringToId(s: String): ModifierId = new ModifierId(Base16.decode(s).get)

implicit val modifierOrdering : Ordering[ModifierId] = new Ordering[ModifierId] {
// We can not use java.util.Arrays.compare because we have to provide compatibility with
// comparison of Base16 encoded strings representing this byte array.
// So we have to treate bytes as unsigned
def compare(a: ModifierId, b: ModifierId): Int = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this is unsigned comparison, let's state it clearly in the comments

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the tests should be fixed. /cc @kushti
There is the following extension:

Extension(id: b9e2c5321993b37fb34608d08ae66c516120fed6c381a771e7e96790b64adff5, headerId: 743f7b91da88f5781bd1cdc457e8d2e50931bdc9ec0a3ec759e2b6e9e7c1b29c, fields: Vector(0005 -> 00000064, 0001 -> 001312d0, 0006 -> 000007d0, 0002 -> 00000168, 0007 -> 00000064, 0003 -> 00080000, 007b -> 00000001, 0008 -> 00000064, 0004 -> 000f4240, 007c -> 0000)) 

And here it is checked using ModifierId hash function:

  .validate(exDuplicateKeys, extension.fields.map(kv => bytesToId(kv._1)).distinct.length == extension.fields.length, extension.encodedId)

Is it a valid extension body? If so, should I replace this check not to use bytesToId function?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please replace bytestoId with just Base16.encode

val len = math.min(a.hashBytes.length, b.hashBytes.length)
var i = 0
while (i < len) {
val diff = (a.hashBytes(i) & 0xFF) - (b.hashBytes(i) & 0xFF)
if (diff != 0) {
return diff
}
i += 1
}
a.hashBytes.length - len
}
}

implicit class ModifierIdOps(val m: ModifierId) extends AnyVal {
@inline def toBytes: Array[Byte] = idToBytes(m)
Expand All @@ -19,4 +79,12 @@ package object util {
implicit class ByteArrayOps(val b: Array[Byte]) extends AnyVal {
@inline def toModifierId: ModifierId = bytesToId(b)
}

implicit class StringOps(val s: String) extends AnyVal {
@inline def toModifierId: ModifierId = stringToId(s)
}

object ModifierId {
def apply(s: String): ModifierId = stringToId(s)
}
}
20 changes: 20 additions & 0 deletions src/test/scala/scorex/ModifierIdSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,24 @@ class ModifierIdSpec extends AnyFlatSpec with Matchers {
bytes.toModifierId.toBytes shouldEqual bytes
}

"ModifierId" should "equals or not equal if and only if the corresponding Base16 strings are equal" in {
val str1 = "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF"
val str2 = "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF"
val str3 = "0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F9007F"
ModifierId(str1) shouldEqual ModifierId(str1)
ModifierId(str1) shouldEqual ModifierId(str2)
ModifierId(str2) should not equal ModifierId(str3)
}

"ModifierId" should "provide the same ordering as Base16 strings" in {
val strs = Array("0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF",
"0101020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF",
"FF01020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F900FF",
"0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F9007F",
"0001020304050607080910111213141516171819F0F1F2F3F4F5F6F7F8F901FF")
for (i <- 0 until strs.size)
for (j <- 0 until strs.size)
math.signum(modifierOrdering.compare(ModifierId(strs(i)), ModifierId(strs(j)))) shouldEqual math.signum(strs(i).compare(strs(j)))
}

}