diff --git a/gradle.properties b/gradle.properties index 0ddf0d6..d11fb73 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1 @@ -massHashVersion = 1.3.2.1 +massHashVersion = 2.0.0.0 diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 56c1252..83b44b9 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,7 +1,7 @@ [versions] guava_version = "33.5.0-jre" -junit_version = "6.0.1" -log4j_version = "2.25.2" +junit_version = "6.0.2" +log4j_version = "2.25.3" [libraries] guava = { module = "com.google.guava:guava", version.ref = "guava_version" } diff --git a/src/main/java/com/wildermods/masshash/Blob.java b/src/main/java/com/wildermods/masshash/Blob.java index bd832cf..ad92fbd 100644 --- a/src/main/java/com/wildermods/masshash/Blob.java +++ b/src/main/java/com/wildermods/masshash/Blob.java @@ -2,167 +2,105 @@ import java.io.IOException; import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; +import java.io.UncheckedIOException; +import java.security.MessageDigest; +import java.util.Objects; +import java.util.function.Supplier; import com.wildermods.masshash.exception.IntegrityException; import com.wildermods.masshash.utils.ByteUtil; /** - * Represents a data blob with associated hash. The Blob can store the data as - * a byte array and its hash, and includes methods for verifying the integrity - * of the data by checking its hash. + * A lightweight implementation of {@link IBlob} that represents data which can be read + * as a stream. Does not necessarily store the full byte array in memory. + *
+ * This is particularly useful for large files or streams (e.g., files on disk, network streams), + * where reading the entire content into memory is undesirable. The hash of the data is always stored + * and can be verified without retaining the raw bytes. + *
*/ -public record Blob(byte[] data, String hash) implements IBlob { +public final class Blob implements IBlob { - /** - * Constructs a Blob from the given data and computes its hash. - * - * @param data The byte array representing the data. - */ - public Blob(byte[] data) { - this(data, ByteUtil.hash(data)); - } - - /** - * Constructs a Blob from the given data and hash. - * - * @param data The byte array representing the data. - * @param hash The hash of the data. - * - * @throws IntegrityException if the provided hash does not match the data's hash. - */ - public Blob(byte[] data, Hash hash) throws IntegrityException { - this(data, hash.hash()); - verify(); - } - - /** - * Constructs a Blob from the contents of a file. - * - * @param path The path to the file. - * @throws IOException if an I/O error occurs while reading the file. - */ - public Blob(Path path) throws IOException { - this(Files.readAllBytes(path)); - } - - /** - * Constructs a Blob from the contents of a file and verifies its hash. - * - * @param path The path to the file. - * @param hash The expected hash of the file contents. - * - * @throws IOException if an I/O error occurs while reading the file. - * @throws IntegrityException if the file's hash does not match the provided hash. - */ - public Blob(Path path, String hash) throws IOException, IntegrityException { - this(Files.readAllBytes(path), hash); - verify(); - } - - /** - * Constructs a Blob from the contents of a file and verifies its hash. - * - * @param path The path to the file. - * @param hash The expected hash of the file contents. - * - * @throws IOException if an I/O error occurs while reading the file. - * @throws IntegrityException if the file's hash does not match the provided hash. - */ - public Blob(Path path, Hash hash) throws IOException, IntegrityException { - this(path, hash.hash()); - } - - /** - * Constructs a Blob from the data read from an InputStream. - * - * @param stream The InputStream from which data is read. - * @throws IOException if an I/O error occurs while reading from the stream. - */ - public Blob(InputStream stream) throws IOException { - this(stream.readAllBytes()); - } - - /** - * Constructs a Blob from the data read from an InputStream and verifies its hash. - * - * @param stream The InputStream from which data is read. - * @param hash The expected hash of the data. - * - * @throws IOException if an I/O error occurs while reading from the stream. - * @throws IntegrityException if the data's hash does not match the provided hash. - */ - public Blob(InputStream stream, String hash) throws IOException, IntegrityException { - this(stream.readAllBytes(), hash); - verify(); - } - - /** - * Constructs a Blob from the data read from an InputStream and verifies its hash. - * - * @param stream The InputStream from which data is read. - * @param hash The expected hash of the data. - * - * @throws IOException if an I/O error occurs while reading from the stream. - * @throws IntegrityException if the data's hash does not match the provided hash. - */ - public Blob(InputStream stream, Hash hash) throws IOException, IntegrityException { - this(stream, hash.hash()); - } + private final Supplier+ * This can occur if the file is deleted, the network stream fails, or any other I/O error + * prevents the stream from being accessed. + *
+ * + * @return {@code true} if the data stream cannot be opened, {@code false} otherwise + */ @Override - public int hashCode() { - return hash.hashCode(); + public boolean isTransient() { + try (InputStream stream = streamSupplier.get()){ + return false; + } + catch(Exception e) { + return true; + } } - + /** - * Compares this object with another Hash object for equality. All {@link Blob} objects are also instances of {@link Hash}. + * Returns a fresh {@link InputStream} for reading the blob's data. *- * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares - * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, - * the method returns {@code false}. + * Each call returns a new stream. The caller is responsible for closing it. *
- * - * @param o the object to compare with this Hash object. - * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. + * + * @return a fresh {@link InputStream} for reading the blob's contents + * @throws IOException if the stream cannot be opened */ @Override - public boolean equals(Object o) { - if(o instanceof Hash) { - return hash().equals(((Hash) o).hash()); - } - return false; + public InputStream dataStream() throws IOException { + try { + return streamSupplier.get(); + } + catch(Exception e) { + throw new IOException(e); + } } /** - * Verifies that the data stored in this object matches the provided hash. + * Verifies that the data matches the provided hash. ** This method computes the hash of the current data and compares it to the expected hash. If the hashes do not match, * an {@link IntegrityException} is thrown. This method ensures the integrity of the data. @@ -172,10 +110,39 @@ public boolean equals(Object o) { */ @Override public void verify() throws IntegrityException { - String dataHash = ByteUtil.hash(data()); - if(!dataHash.equals(hash)) { - throw new IntegrityException("Expected hash " + hash + " but got " + dataHash); + try (InputStream stream = dataStream()){ + String actualHash = ByteUtil.hash(stream); + if(!actualHash.equals(hash)) { + throw new IntegrityException("Expected hash " + hash + " but got " + actualHash); + } + } + catch(IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public int hashCode() { + return hash.hashCode(); + } + + /** + * Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}. + *
+ * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares + * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, + * the method returns {@code false}. + *
+ * + * @param o the object to compare with this Hash object. + * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. + */ + @Override + public boolean equals(Object o) { + if(o instanceof Hash) { + return hash().equals(((Hash) o).hash()); } + return false; } /** @@ -189,6 +156,7 @@ public void verify() throws IntegrityException { */ @Override public String toString() { - return hash(); + return hash(); } + } diff --git a/src/main/java/com/wildermods/masshash/BlobFactory.java b/src/main/java/com/wildermods/masshash/BlobFactory.java new file mode 100644 index 0000000..f2fc46a --- /dev/null +++ b/src/main/java/com/wildermods/masshash/BlobFactory.java @@ -0,0 +1,127 @@ +package com.wildermods.masshash; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.Provider; +import java.util.function.Supplier; + +import com.wildermods.masshash.utils.ByteUtil; + +public class BlobFactory { + + public final Provider provider; + public final Supplier+ * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares + * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, + * the method returns {@code false}. + *
+ * + * @param o the object to compare with this Hash object. + * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. + */ + @Override + public boolean equals(Object o) { + if(o instanceof Hash) { + return hash().equals(((Hash) o).hash()); + } + return false; + } + + /** + * Returns a string representation of this Blob, which is its hash value. + *+ * This method overrides the default {@link Object#toString()} method to provide a more meaningful + * string representation of the Blob. + *
+ * + * @return the hash of the Blob as a string. + */ + @Override + public String toString() { + return hash(); + } + + } + } + /** * Creates a new {@link Hash} instance from the given string hash value. * @@ -41,6 +111,16 @@ public default boolean hashEquals(String hash) { * @return a new {@link Hash} instance. */ public static Hash of(String hash) { - return new Blob((byte[])null, hash); + return new Internal.Impl(hash, ByteUtil.DEFAULT_DIGEST); + } + + /** + * Creates a new {@link Hash} instance from the given string and digest values. + * + * @param hash the string hash value. + * @return a new {@link Hash} instance. + */ + public static Hash of(Supplier+ * Deprecated because reading the entire data into memory may be expensive for large streams. + * Prefer {@link #dataStream()} instead. + *
+ * + * @return the byte array of the blob + * @throws UncheckedIOException if reading the stream fails + */ + @Override + @Deprecated(forRemoval = false) + public byte[] data(); + } diff --git a/src/main/java/com/wildermods/masshash/utils/ByteUtil.java b/src/main/java/com/wildermods/masshash/utils/ByteUtil.java index c430c8d..067c488 100644 --- a/src/main/java/com/wildermods/masshash/utils/ByteUtil.java +++ b/src/main/java/com/wildermods/masshash/utils/ByteUtil.java @@ -1,11 +1,36 @@ package com.wildermods.masshash.utils; +import java.io.IOException; +import java.io.InputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.security.Provider; import java.util.Objects; +import java.util.function.Function; +import java.util.function.Supplier; +/** + * Utility methods for hashing byte data and streams using cryptographic hash functions. + *+ * This class provides convenience overloads for hashing byte arrays and {@link InputStream}s, + * supporting explicit algorithms and security providers. All hashes are returned as lowercase + * hexadecimal strings. + *
+ */ public class ByteUtil { + public static final Supplier@@ -20,15 +45,176 @@ public class ByteUtil { * @throws NullPointerException if the provided byte array is null. */ public static String hash(byte[] bytes) { - Objects.requireNonNull(bytes, "Input byte array cannot be null."); - try { - MessageDigest digest = MessageDigest.getInstance("SHA-1"); - return bytesToHex(digest.digest(bytes)); - } catch (NoSuchAlgorithmException e) { - throw new AssertionError("SHA-1 algorithm is unavailable.", e); + return hash(bytes, DEFAULT_DIGEST); + } + + /** + * Hashes a given byte array using the specified algorithm. + * This method takes the provided byte array, hashes it using the provided algorithm, + * and returns the resulting hash as a hexadecimal string. + * + * @param bytes the byte array to hash + * @param algorithm the name of the hash algorithm + * + * @return a hexadecimal string representing the hash + * + * @throws NoSuchAlgorithmException if the algorithm is not available + * @throws NullPointerException if {@code bytes} or {@code algorithm} is null + */ + public static String hash(byte[] bytes, String algorithm) throws NoSuchAlgorithmException { + Objects.requireNonNull(algorithm, "algorithm cannot be null"); + return hash(bytes, consume.apply(MessageDigest.getInstance(algorithm))); + } + + /** + * Hashes a given byte array using the specified algorithm and security provider. + * This method takes the provided byte array, hashes it using the provided algorithm, + * and returns the resulting hash as a hexadecimal string. + * + * @param bytes the byte array to hash + * @param algorithm the name of the hash algorithm + * @param provider the security provider to use + * + * @return a hexadecimal string representing the hash + * + * @throws NoSuchAlgorithmException if the algorithm is not available from the provider + * @throws NullPointerException if any argument is null + */ + public static String hash(byte[] bytes, String algorithm, Provider provider) throws NoSuchAlgorithmException { + Objects.requireNonNull(algorithm, "Algorithm cannot be null"); + Objects.requireNonNull(provider, "Provider cannot be null"); + return hash(bytes, consume.apply(MessageDigest.getInstance(algorithm, provider))); + } + + /** + * Hashes a given byte array using a {@link MessageDigest} supplied by the caller. + *
+ * The provided {@code Callable} is invoked exactly once to obtain a fresh + * {@link MessageDigest} instance. The digest is then used to hash the entire + * byte array in a single operation. + *
+ * + * @param bytes the byte array to hash + * @param digest a callable that supplies a {@link MessageDigest} instance + * + * @return a hexadecimal string representing the hash of the byte array + * + * @throws NullPointerException if {@code bytes}, {@code digest}, or the returned + * {@link MessageDigest} is {@code null} + * @throws RuntimeException if the callable throws any other checked exception. The + * thrown checked exception is the cause. + */ + public static String hash(byte[] bytes, Supplier+ * The stream is read sequentially in fixed-size chunks and is not buffered internally + * beyond the chunk size. This method does not close the stream. + *
+ * + * @param stream the input stream to hash + * + * @return a hexadecimal string representing the SHA-1 hash + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NullPointerException if {@code stream} is null + */ + public static String hash(InputStream stream) throws IOException { + return hash(stream, DEFAULT_DIGEST); + } + + /** + * Hashes the contents of an {@link InputStream} using the specified algorithm. + *+ * The stream is consumed by this operation and is not closed. + *
+ * + * @param stream the input stream to hash + * @param algorithm the name of the hash algorithm + * + * @return a hexadecimal string representing the hash + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NoSuchAlgorithmException if the algorithm is not available + * @throws NullPointerException if {@code stream} or {@code algorithm} is null + */ + public static String hash(InputStream stream, String algorithm) throws NoSuchAlgorithmException, IOException { + Objects.requireNonNull(algorithm, "algorithm cannot be null"); + return hash(stream, consume.apply(MessageDigest.getInstance(algorithm))); + } + + /** + * Hashes the contents of an {@link InputStream} using the specified algorithm + * and security provider. + * + *+ * The stream is consumed by this operation and is not closed. + *
+ * + * @param stream the input stream to hash + * @param algorithm the name of the hash algorithm + * @param provider the security provider to use + * + * @return a hexadecimal string representing the hash + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NoSuchAlgorithmException if the algorithm is not available from the provider + * @throws NullPointerException if any argument is null + */ + public static String hash(InputStream stream, String algorithm, Provider provider) throws NoSuchAlgorithmException, IOException { + Objects.requireNonNull(algorithm, "algorithm cannot be null"); + Objects.requireNonNull(provider, "provider cannot be null"); + return hash(stream, consume.apply(MessageDigest.getInstance(algorithm, provider))); + } + + /** + * Hashes the contents of an {@link InputStream} using a {@link MessageDigest} + * supplied by the caller. + *+ * The provided {@code Callable} is invoked exactly once to obtain a fresh + * {@link MessageDigest} instance. The stream is read sequentially in 1 MiB sized + * chunks and fed into the digest. + *
+ *+ * The stream is consumed by this operation and is not closed. + *
+ * + * @param stream the input stream to hash + * @param digest a callable that supplies a {@link MessageDigest} instance + * + * @return a hexadecimal string representing the hash of the stream contents + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NoSuchAlgorithmException if the callable throws this exception + * @throws NullPointerException if {@code stream}, {@code digest}, or the returned + * {@link MessageDigest} is {@code null} + * @throws RuntimeException if the callable throws any other checked exception. The + * thrown checked exception is the cause. + */ + public static String hash(InputStream stream, Supplier@@ -44,11 +230,12 @@ public static String hash(byte[] bytes) { * @throws NullPointerException if the provided byte array is null. */ private static String bytesToHex(byte[] bytes) { - Objects.requireNonNull(bytes, "Input byte array cannot be null."); - StringBuilder hex = new StringBuilder(); - for(byte b : bytes) { - hex.append(String.format("%02x", b)); + char[] out = new char[bytes.length * 2]; + for (int i = 0, j = 0; i < bytes.length; i++) { + int v = bytes[i] & 0xFF; + out[j++] = HEX[v >>> 4]; + out[j++] = HEX[v & 0x0F]; } - return hex.toString(); + return new String(out); } } \ No newline at end of file diff --git a/src/test/java/com/wildermods/masshash/BlobTests.java b/src/test/java/com/wildermods/masshash/BlobTests.java index 0f92860..3ad671b 100644 --- a/src/test/java/com/wildermods/masshash/BlobTests.java +++ b/src/test/java/com/wildermods/masshash/BlobTests.java @@ -3,21 +3,47 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrowsExactly; +import java.util.function.Supplier; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.UncheckedIOException; + import org.junit.jupiter.api.Test; import com.wildermods.masshash.exception.IntegrityException; +import com.wildermods.masshash.utils.ByteUtil; public class BlobTests { - private static final Blob testBlob = new Blob("test".getBytes()); - private static final Blob testBlob2 = new Blob("test".getBytes()); - private static final String testHash = "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"; + private static final BlobFactory factory = new BlobFactory(); + + private static final Blob testBlob = factory.blob("test".getBytes()); + private static final Blob testBlob2 = factory.blob("test".getBytes()); + private static final Blob lightBlob = factory.blob(() -> { + return new ByteArrayInputStream("test".getBytes()); + }); + + @Test + public void testNullConstructors() { + assertThrowsExactly(NullPointerException.class, () -> factory.blob((byte[])null)); + assertThrowsExactly(NullPointerException.class, () -> factory.blob(new byte[0], (String)null)); + assertThrowsExactly(NullPointerException.class, () -> factory.blob((Supplier)null, "")); + assertThrowsExactly(NullPointerException.class, () -> factory.blob(() -> { + try { + return lightBlob.dataStream(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }, (String)null)); + } + @Test public void testBlob() { assertEquals(testBlob.hash(), testHash); assertEquals(testBlob.toString(), testHash); + assertEquals(lightBlob.toString(), testHash); } @Test @@ -25,35 +51,28 @@ public void testBlobEquality() { assertEquals(testBlob, testBlob); assertEquals(testBlob, testBlob2); assertEquals(testBlob2, testBlob); - } - - @Test - public void testDroppedBlobEquality() { - Hash testBlob2 = testBlob.dropData(); - - assertEquals(testBlob, testBlob); - assertEquals(testBlob, testBlob2); - assertEquals(testBlob2, testBlob); - assertEquals(testBlob2, testBlob2); - } - - @Test - public void testDropData() { - Blob dropped = (Blob) testBlob.dropData(); - assertThrowsExactly(UnsupportedOperationException.class, () -> {dropped.data();}); - assertThrowsExactly(UnsupportedOperationException.class, () -> {dropped.dropData();}); + assertEquals(lightBlob, testBlob); + assertEquals(testBlob, lightBlob); } @Test public void testVerification() throws IntegrityException { testBlob.verify(); - Blob dropped = (Blob) testBlob.dropData(); - Blob corrupt = new Blob(testBlob.data(), new Blob("corrupt".getBytes()).hash()); + IBlob corrupt = factory.blob(testBlob.data(), factory.blob("corrupt".getBytes()).hash()); + IBlob corrupt2 = new Blob( + () -> { + try { + return lightBlob.dataStream(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }, ByteUtil.hash("corrupt".getBytes()) + ); - assertThrowsExactly(UnsupportedOperationException.class, () -> dropped.verify()); assertThrowsExactly(IntegrityException.class, () -> corrupt.verify()); - assertThrowsExactly(IntegrityException.class, () -> new Blob("test".getBytes(), new Blob("corrupt".getBytes()))); + assertThrowsExactly(IntegrityException.class, () -> factory.blob("test".getBytes(), factory.blob("corrupt".getBytes()).hash()).verify()); + assertThrowsExactly(IntegrityException.class, () -> corrupt2.verify()); } } diff --git a/src/test/java/com/wildermods/masshash/HasherTests.java b/src/test/java/com/wildermods/masshash/HasherTests.java index c9b28bf..68116ad 100644 --- a/src/test/java/com/wildermods/masshash/HasherTests.java +++ b/src/test/java/com/wildermods/masshash/HasherTests.java @@ -42,7 +42,7 @@ void verifyTest() throws IntegrityException, IOException { System.out.println("Verification test:"); hasher = new Hasher(sources, (f,b) -> { try { - b.verify(); + ((IBlob)b).verify(); } catch (IntegrityException e) { throw new RuntimeException(e); }