diff --git a/gradle.properties b/gradle.properties index 0ddf0d6..d11fb73 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1 @@ -massHashVersion = 1.3.2.1 +massHashVersion = 2.0.0.0 diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 56c1252..83b44b9 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,7 +1,7 @@ [versions] guava_version = "33.5.0-jre" -junit_version = "6.0.1" -log4j_version = "2.25.2" +junit_version = "6.0.2" +log4j_version = "2.25.3" [libraries] guava = { module = "com.google.guava:guava", version.ref = "guava_version" } diff --git a/src/main/java/com/wildermods/masshash/Blob.java b/src/main/java/com/wildermods/masshash/Blob.java index bd832cf..ad92fbd 100644 --- a/src/main/java/com/wildermods/masshash/Blob.java +++ b/src/main/java/com/wildermods/masshash/Blob.java @@ -2,167 +2,105 @@ import java.io.IOException; import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; +import java.io.UncheckedIOException; +import java.security.MessageDigest; +import java.util.Objects; +import java.util.function.Supplier; import com.wildermods.masshash.exception.IntegrityException; import com.wildermods.masshash.utils.ByteUtil; /** - * Represents a data blob with associated hash. The Blob can store the data as - * a byte array and its hash, and includes methods for verifying the integrity - * of the data by checking its hash. + * A lightweight implementation of {@link IBlob} that represents data which can be read + * as a stream. Does not necessarily store the full byte array in memory. + *

+ * This is particularly useful for large files or streams (e.g., files on disk, network streams), + * where reading the entire content into memory is undesirable. The hash of the data is always stored + * and can be verified without retaining the raw bytes. + *

*/ -public record Blob(byte[] data, String hash) implements IBlob { +public final class Blob implements IBlob { - /** - * Constructs a Blob from the given data and computes its hash. - * - * @param data The byte array representing the data. - */ - public Blob(byte[] data) { - this(data, ByteUtil.hash(data)); - } - - /** - * Constructs a Blob from the given data and hash. - * - * @param data The byte array representing the data. - * @param hash The hash of the data. - * - * @throws IntegrityException if the provided hash does not match the data's hash. - */ - public Blob(byte[] data, Hash hash) throws IntegrityException { - this(data, hash.hash()); - verify(); - } - - /** - * Constructs a Blob from the contents of a file. - * - * @param path The path to the file. - * @throws IOException if an I/O error occurs while reading the file. - */ - public Blob(Path path) throws IOException { - this(Files.readAllBytes(path)); - } - - /** - * Constructs a Blob from the contents of a file and verifies its hash. - * - * @param path The path to the file. - * @param hash The expected hash of the file contents. - * - * @throws IOException if an I/O error occurs while reading the file. - * @throws IntegrityException if the file's hash does not match the provided hash. - */ - public Blob(Path path, String hash) throws IOException, IntegrityException { - this(Files.readAllBytes(path), hash); - verify(); - } - - /** - * Constructs a Blob from the contents of a file and verifies its hash. - * - * @param path The path to the file. - * @param hash The expected hash of the file contents. - * - * @throws IOException if an I/O error occurs while reading the file. - * @throws IntegrityException if the file's hash does not match the provided hash. - */ - public Blob(Path path, Hash hash) throws IOException, IntegrityException { - this(path, hash.hash()); - } - - /** - * Constructs a Blob from the data read from an InputStream. - * - * @param stream The InputStream from which data is read. - * @throws IOException if an I/O error occurs while reading from the stream. - */ - public Blob(InputStream stream) throws IOException { - this(stream.readAllBytes()); - } - - /** - * Constructs a Blob from the data read from an InputStream and verifies its hash. - * - * @param stream The InputStream from which data is read. - * @param hash The expected hash of the data. - * - * @throws IOException if an I/O error occurs while reading from the stream. - * @throws IntegrityException if the data's hash does not match the provided hash. - */ - public Blob(InputStream stream, String hash) throws IOException, IntegrityException { - this(stream.readAllBytes(), hash); - verify(); - } - - /** - * Constructs a Blob from the data read from an InputStream and verifies its hash. - * - * @param stream The InputStream from which data is read. - * @param hash The expected hash of the data. - * - * @throws IOException if an I/O error occurs while reading from the stream. - * @throws IntegrityException if the data's hash does not match the provided hash. - */ - public Blob(InputStream stream, Hash hash) throws IOException, IntegrityException { - this(stream, hash.hash()); - } + private final Supplier digest; + private final Supplier streamSupplier; + private final String hash; - /** - * Drops the data from the current object and returns a new Hash that represents the hash of this Blob. - * The original blob still holds the data for as long as you keep it referenced. - * - * @return A new {@link Hash} object that represents this blob, but with no associated data. - */ - public Hash dropData() { - if(isTransient()) { - throw new UnsupportedOperationException("Data already dropped!"); - } - return new Blob((byte[])null, hash); + Blob (Supplier digest, Supplier streamSupplier, String hash) { + Objects.requireNonNull(digest); + Objects.requireNonNull(streamSupplier); + Objects.requireNonNull(streamSupplier.get()); + Objects.requireNonNull(hash); + this.digest = digest; + this.streamSupplier = streamSupplier; + this.hash = hash; + } + + Blob(Supplier streamSupplier, String hash) { + this(ByteUtil.DEFAULT_DIGEST, streamSupplier, hash); + } + + public Supplier digest() { + return digest; + } + + public String hash() { + return hash; } /** - * Returns the data associated with this Blob - * - * @return a byte array that contains the data stored in this blob + * {@inheritDoc} */ @Override + @Deprecated public byte[] data() { - if(data == null) { - throw new UnsupportedOperationException("Null data! Was the data dropped?"); + try (InputStream stream = dataStream()){ + return stream.readAllBytes(); + } + catch(IOException e) { + throw new UncheckedIOException(e); } - return data; } + /** + * Checks if this blob is transient, meaning the underlying stream cannot be opened. + *

+ * This can occur if the file is deleted, the network stream fails, or any other I/O error + * prevents the stream from being accessed. + *

+ * + * @return {@code true} if the data stream cannot be opened, {@code false} otherwise + */ @Override - public int hashCode() { - return hash.hashCode(); + public boolean isTransient() { + try (InputStream stream = streamSupplier.get()){ + return false; + } + catch(Exception e) { + return true; + } } - + /** - * Compares this object with another Hash object for equality. All {@link Blob} objects are also instances of {@link Hash}. + * Returns a fresh {@link InputStream} for reading the blob's data. *

- * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares - * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, - * the method returns {@code false}. + * Each call returns a new stream. The caller is responsible for closing it. *

- * - * @param o the object to compare with this Hash object. - * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. + * + * @return a fresh {@link InputStream} for reading the blob's contents + * @throws IOException if the stream cannot be opened */ @Override - public boolean equals(Object o) { - if(o instanceof Hash) { - return hash().equals(((Hash) o).hash()); - } - return false; + public InputStream dataStream() throws IOException { + try { + return streamSupplier.get(); + } + catch(Exception e) { + throw new IOException(e); + } } /** - * Verifies that the data stored in this object matches the provided hash. + * Verifies that the data matches the provided hash. *

* This method computes the hash of the current data and compares it to the expected hash. If the hashes do not match, * an {@link IntegrityException} is thrown. This method ensures the integrity of the data. @@ -172,10 +110,39 @@ public boolean equals(Object o) { */ @Override public void verify() throws IntegrityException { - String dataHash = ByteUtil.hash(data()); - if(!dataHash.equals(hash)) { - throw new IntegrityException("Expected hash " + hash + " but got " + dataHash); + try (InputStream stream = dataStream()){ + String actualHash = ByteUtil.hash(stream); + if(!actualHash.equals(hash)) { + throw new IntegrityException("Expected hash " + hash + " but got " + actualHash); + } + } + catch(IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public int hashCode() { + return hash.hashCode(); + } + + /** + * Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}. + *

+ * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares + * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, + * the method returns {@code false}. + *

+ * + * @param o the object to compare with this Hash object. + * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. + */ + @Override + public boolean equals(Object o) { + if(o instanceof Hash) { + return hash().equals(((Hash) o).hash()); } + return false; } /** @@ -189,6 +156,7 @@ public void verify() throws IntegrityException { */ @Override public String toString() { - return hash(); + return hash(); } + } diff --git a/src/main/java/com/wildermods/masshash/BlobFactory.java b/src/main/java/com/wildermods/masshash/BlobFactory.java new file mode 100644 index 0000000..f2fc46a --- /dev/null +++ b/src/main/java/com/wildermods/masshash/BlobFactory.java @@ -0,0 +1,127 @@ +package com.wildermods.masshash; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.Provider; +import java.util.function.Supplier; + +import com.wildermods.masshash.utils.ByteUtil; + +public class BlobFactory { + + public final Provider provider; + public final Supplier digest; + + private BlobFactory (Provider provider, Supplier digest) { + this.provider = provider; + this.digest = digest; + } + + public BlobFactory() { + this(null, ByteUtil.DEFAULT_DIGEST); + } + + public BlobFactory(String algorithm) throws NoSuchAlgorithmException { + this(null, ByteUtil.consume.apply(MessageDigest.getInstance(algorithm))); + } + + public BlobFactory(Supplier digest) { + this(null, digest); + } + + public BlobFactory(Provider provider) throws NoSuchAlgorithmException { + this(provider, "SHA-1"); + } + + public BlobFactory(Provider provider, String algorithm) throws NoSuchAlgorithmException { + this(provider, ByteUtil.consume.apply(MessageDigest.getInstance(algorithm, provider))); + } + + public String algorithm() { + return digest.get().getAlgorithm(); + } + + public Blob blob(Supplier stream) { + try { + return new Blob(digest, stream, ByteUtil.hash(stream.get(), digest)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public Blob blob(Supplier stream, Hash hash) { + return new Blob(digest, stream, hash.hash()); + } + + public Blob blob(Supplier stream, String hash) { + final Blob blob = new Blob(digest, stream, hash); + return blob; + } + + public Blob blob(Path path) throws IOException { + Supplier streamSupplier = () -> { + try { + return Files.newInputStream(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + + return blob(streamSupplier); + } + + public Blob blob(Path path, Hash hash) { + Supplier streamSupplier = () -> { + try { + return Files.newInputStream(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + + return blob(streamSupplier, hash); + } + + public Blob blob(Path path, String hash) throws IOException { + Supplier streamSupplier = () -> { + try { + return Files.newInputStream(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + + return blob(streamSupplier, hash); + } + + public Blob blob(byte[] data) { + Supplier streamSupplier = () -> { + return new ByteArrayInputStream(data); + }; + + return blob(streamSupplier); + } + + public Blob blob(byte[] data, Hash hash) { + Supplier streamSupplier = () -> { + return new ByteArrayInputStream(data); + }; + + return blob(streamSupplier, hash); + } + + public Blob blob(byte[] data, String hash) { + Supplier streamSupplier = () -> { + return new ByteArrayInputStream(data); + }; + + return blob(streamSupplier, hash); + } + +} diff --git a/src/main/java/com/wildermods/masshash/Data.java b/src/main/java/com/wildermods/masshash/Data.java index 2a7b974..dcbdad3 100644 --- a/src/main/java/com/wildermods/masshash/Data.java +++ b/src/main/java/com/wildermods/masshash/Data.java @@ -1,5 +1,8 @@ package com.wildermods.masshash; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; /** @@ -15,6 +18,21 @@ public interface Data { */ public byte[] data(); + /** + * Returns an {@link InputStream} for reading the data. + * + * Default implementation wraps {@link data()} in a {@link ByteArrayInputStream} + * Classes that can stream data without holding it in memory should override this. + * + * @return an {@link InputStream} for the data + */ + public default InputStream dataStream() throws IOException { + if(isTransient()) { + throw new IllegalStateException("No data to stream!"); + } + return new ByteArrayInputStream(data()); + } + /** * Checks if the data is transient, meaning the data is null or otherwise unavailable. * diff --git a/src/main/java/com/wildermods/masshash/Hash.java b/src/main/java/com/wildermods/masshash/Hash.java index 6bb4893..7ceccb5 100644 --- a/src/main/java/com/wildermods/masshash/Hash.java +++ b/src/main/java/com/wildermods/masshash/Hash.java @@ -1,5 +1,10 @@ package com.wildermods.masshash; +import java.security.MessageDigest; +import java.util.function.Supplier; + +import com.wildermods.masshash.utils.ByteUtil; + /** * A functional interface representing a hash. It provides methods for obtaining * the hash value as a string and for comparing it to other hashes. @@ -14,6 +19,21 @@ public interface Hash { */ public String hash(); + /** + * @return The hashing algorithm used. + */ + public default String algorithm() { + return digest().get().getAlgorithm(); + } + + /** + * The messageDigest that is used to obtain the + * algorithm + */ + public default Supplier digest() { + return ByteUtil.DEFAULT_DIGEST; + } + /** * Compares this hash to another hash and returns true if they are equal. * @@ -34,6 +54,56 @@ public default boolean hashEquals(String hash) { return hash().equals(hash); } + public static class Internal { + private static record Impl(String hash, Supplier digest) implements Hash { + + private Impl(String hash, Supplier digest) { + this.hash = hash; + final MessageDigest d = digest.get(); + this.digest = () -> d; + } + + @Override + public int hashCode() { + return hash.hashCode(); + } + + /** + * Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}. + *

+ * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares + * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, + * the method returns {@code false}. + *

+ * + * @param o the object to compare with this Hash object. + * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. + */ + @Override + public boolean equals(Object o) { + if(o instanceof Hash) { + return hash().equals(((Hash) o).hash()); + } + return false; + } + + /** + * Returns a string representation of this Blob, which is its hash value. + *

+ * This method overrides the default {@link Object#toString()} method to provide a more meaningful + * string representation of the Blob. + *

+ * + * @return the hash of the Blob as a string. + */ + @Override + public String toString() { + return hash(); + } + + } + } + /** * Creates a new {@link Hash} instance from the given string hash value. * @@ -41,6 +111,16 @@ public default boolean hashEquals(String hash) { * @return a new {@link Hash} instance. */ public static Hash of(String hash) { - return new Blob((byte[])null, hash); + return new Internal.Impl(hash, ByteUtil.DEFAULT_DIGEST); + } + + /** + * Creates a new {@link Hash} instance from the given string and digest values. + * + * @param hash the string hash value. + * @return a new {@link Hash} instance. + */ + public static Hash of(Supplier digest, String hash) { + return new Internal.Impl(hash, digest); } } \ No newline at end of file diff --git a/src/main/java/com/wildermods/masshash/Hasher.java b/src/main/java/com/wildermods/masshash/Hasher.java index 5913f0f..3bb64de 100644 --- a/src/main/java/com/wildermods/masshash/Hasher.java +++ b/src/main/java/com/wildermods/masshash/Hasher.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.security.MessageDigest; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; @@ -45,7 +46,7 @@ * Subclasses can access the resulting hash-to-path mappings via {@link #results()}. *

*/ -public abstract class Hasher { +public abstract class Hasher> { /** * A multimap that stores computed hashes and their associated file paths. @@ -61,6 +62,7 @@ public abstract class Hasher { */ protected SetMultimap blobs; protected Logger logger = LogManager.getLogger(); + protected BlobFactory blobFactory = new BlobFactory(); /** * Protected no-argument constructor for subclass serialization. @@ -93,7 +95,7 @@ public Hasher(final Stream files) throws IOException { * before being added to the result map. The updated reference value will be associated with the computed hash. * @throws IOException if an I/O error occurs during hashing */ - public Hasher(final Stream files, final BiConsumer, Blob> forEachBlob) throws IOException { + public Hasher(final Stream files, final BiConsumer, IBlob> forEachBlob) throws IOException { this(files, (p) -> true, forEachBlob); } @@ -112,7 +114,7 @@ public Hasher(final Stream files, final BiConsumer, Blob> * @throws IOException if an I/O error occurs during hashing * @throws IllegalArgumentException if no files match the predicate */ - public Hasher(final Stream files, final Predicate predicate, final BiConsumer, Blob> forEachBlob) throws IOException { + public Hasher(final Stream files, final Predicate predicate, final BiConsumer, IBlob> forEachBlob) throws IOException { this(files, Runtime.getRuntime().availableProcessors(), predicate, forEachBlob); } @@ -147,7 +149,7 @@ public Hasher(final Stream files, final Predicate predicate, final B * @throws IOException if an error occurs while reading files or during thread execution * @throws IllegalArgumentException if no files matched the provided predicate */ - public Hasher(final Stream files, int threads, final Predicate predicate, final BiConsumer,Blob> forEachBlob) throws IOException { + public Hasher(final Stream files, int threads, final Predicate predicate, final BiConsumer,IBlob> forEachBlob) throws IOException { final int processors = Runtime.getRuntime().availableProcessors(); Objects.requireNonNull(files); Objects.requireNonNull(predicate); @@ -209,14 +211,18 @@ public Hasher(final Stream files, int threads, final Predicate predi List sublist = allFiles.subList(i, Math.min(i + chunkSize, allFiles.size())); futures.add(pool.submit(() -> { + //One reusable digest per thread + MessageDigest digest = blobFactory.digest.get(); + BlobFactory factory = new BlobFactory(() -> digest); //Each thread uses a local map to avoid synchronization Map> local = new HashMap<>(); for (Path file : sublist) { Reference newFile = new Reference<>(file); + //Read and hash the file into a Blob, then discard the Blob’s data to conserve memory - Hash blob = new Blob(file); - forEachBlob.accept(newFile, (Blob) blob); - ((Blob) blob).dropData(); + digest.reset(); + IBlob blob = factory.blob(file); + forEachBlob.accept(newFile, (IBlob) blob); //Group files by their content hash. Files with the same hash will share the same key local.computeIfAbsent(blob, k -> new HashSet<>()).add(newFile.get()); diff --git a/src/main/java/com/wildermods/masshash/IBlob.java b/src/main/java/com/wildermods/masshash/IBlob.java index 7fa11ef..1540b55 100644 --- a/src/main/java/com/wildermods/masshash/IBlob.java +++ b/src/main/java/com/wildermods/masshash/IBlob.java @@ -1,5 +1,7 @@ package com.wildermods.masshash; +import java.io.UncheckedIOException; + import com.wildermods.masshash.exception.IntegrityException; /** @@ -17,4 +19,19 @@ public interface IBlob extends Data, Hash { * indicating data corruption or alteration. */ public void verify() throws IntegrityException; + + /** + * Returns the full byte array of the blob data. + *

+ * Deprecated because reading the entire data into memory may be expensive for large streams. + * Prefer {@link #dataStream()} instead. + *

+ * + * @return the byte array of the blob + * @throws UncheckedIOException if reading the stream fails + */ + @Override + @Deprecated(forRemoval = false) + public byte[] data(); + } diff --git a/src/main/java/com/wildermods/masshash/utils/ByteUtil.java b/src/main/java/com/wildermods/masshash/utils/ByteUtil.java index c430c8d..067c488 100644 --- a/src/main/java/com/wildermods/masshash/utils/ByteUtil.java +++ b/src/main/java/com/wildermods/masshash/utils/ByteUtil.java @@ -1,11 +1,36 @@ package com.wildermods.masshash.utils; +import java.io.IOException; +import java.io.InputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.security.Provider; import java.util.Objects; +import java.util.function.Function; +import java.util.function.Supplier; +/** + * Utility methods for hashing byte data and streams using cryptographic hash functions. + *

+ * This class provides convenience overloads for hashing byte arrays and {@link InputStream}s, + * supporting explicit algorithms and security providers. All hashes are returned as lowercase + * hexadecimal strings. + *

+ */ public class ByteUtil { + public static final Supplier DEFAULT_DIGEST = () -> { + try { + return MessageDigest.getInstance("SHA-1"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + }; + + public static final Function> consume = (p) -> { + return () -> p; + }; + /** * Hashes a given byte array using the SHA-1 algorithm. *

@@ -20,15 +45,176 @@ public class ByteUtil { * @throws NullPointerException if the provided byte array is null. */ public static String hash(byte[] bytes) { - Objects.requireNonNull(bytes, "Input byte array cannot be null."); - try { - MessageDigest digest = MessageDigest.getInstance("SHA-1"); - return bytesToHex(digest.digest(bytes)); - } catch (NoSuchAlgorithmException e) { - throw new AssertionError("SHA-1 algorithm is unavailable.", e); + return hash(bytes, DEFAULT_DIGEST); + } + + /** + * Hashes a given byte array using the specified algorithm. + * This method takes the provided byte array, hashes it using the provided algorithm, + * and returns the resulting hash as a hexadecimal string. + * + * @param bytes the byte array to hash + * @param algorithm the name of the hash algorithm + * + * @return a hexadecimal string representing the hash + * + * @throws NoSuchAlgorithmException if the algorithm is not available + * @throws NullPointerException if {@code bytes} or {@code algorithm} is null + */ + public static String hash(byte[] bytes, String algorithm) throws NoSuchAlgorithmException { + Objects.requireNonNull(algorithm, "algorithm cannot be null"); + return hash(bytes, consume.apply(MessageDigest.getInstance(algorithm))); + } + + /** + * Hashes a given byte array using the specified algorithm and security provider. + * This method takes the provided byte array, hashes it using the provided algorithm, + * and returns the resulting hash as a hexadecimal string. + * + * @param bytes the byte array to hash + * @param algorithm the name of the hash algorithm + * @param provider the security provider to use + * + * @return a hexadecimal string representing the hash + * + * @throws NoSuchAlgorithmException if the algorithm is not available from the provider + * @throws NullPointerException if any argument is null + */ + public static String hash(byte[] bytes, String algorithm, Provider provider) throws NoSuchAlgorithmException { + Objects.requireNonNull(algorithm, "Algorithm cannot be null"); + Objects.requireNonNull(provider, "Provider cannot be null"); + return hash(bytes, consume.apply(MessageDigest.getInstance(algorithm, provider))); + } + + /** + * Hashes a given byte array using a {@link MessageDigest} supplied by the caller. + *

+ * The provided {@code Callable} is invoked exactly once to obtain a fresh + * {@link MessageDigest} instance. The digest is then used to hash the entire + * byte array in a single operation. + *

+ * + * @param bytes the byte array to hash + * @param digest a callable that supplies a {@link MessageDigest} instance + * + * @return a hexadecimal string representing the hash of the byte array + * + * @throws NullPointerException if {@code bytes}, {@code digest}, or the returned + * {@link MessageDigest} is {@code null} + * @throws RuntimeException if the callable throws any other checked exception. The + * thrown checked exception is the cause. + */ + public static String hash(byte[] bytes, Supplier digest) { + Objects.requireNonNull(bytes, "bytes cannot be null"); + Objects.requireNonNull(digest, "digest supplier cannot be null"); + + MessageDigest d; + Objects.requireNonNull(d = digest.get(), "caller provided null MessageDigest"); + return bytesToHex(d.digest(bytes)); + } + + /** + * Hashes the contents of an {@link InputStream} using the SHA-1 algorithm. + *

+ * The stream is read sequentially in fixed-size chunks and is not buffered internally + * beyond the chunk size. This method does not close the stream. + *

+ * + * @param stream the input stream to hash + * + * @return a hexadecimal string representing the SHA-1 hash + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NullPointerException if {@code stream} is null + */ + public static String hash(InputStream stream) throws IOException { + return hash(stream, DEFAULT_DIGEST); + } + + /** + * Hashes the contents of an {@link InputStream} using the specified algorithm. + *

+ * The stream is consumed by this operation and is not closed. + *

+ * + * @param stream the input stream to hash + * @param algorithm the name of the hash algorithm + * + * @return a hexadecimal string representing the hash + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NoSuchAlgorithmException if the algorithm is not available + * @throws NullPointerException if {@code stream} or {@code algorithm} is null + */ + public static String hash(InputStream stream, String algorithm) throws NoSuchAlgorithmException, IOException { + Objects.requireNonNull(algorithm, "algorithm cannot be null"); + return hash(stream, consume.apply(MessageDigest.getInstance(algorithm))); + } + + /** + * Hashes the contents of an {@link InputStream} using the specified algorithm + * and security provider. + * + *

+ * The stream is consumed by this operation and is not closed. + *

+ * + * @param stream the input stream to hash + * @param algorithm the name of the hash algorithm + * @param provider the security provider to use + * + * @return a hexadecimal string representing the hash + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NoSuchAlgorithmException if the algorithm is not available from the provider + * @throws NullPointerException if any argument is null + */ + public static String hash(InputStream stream, String algorithm, Provider provider) throws NoSuchAlgorithmException, IOException { + Objects.requireNonNull(algorithm, "algorithm cannot be null"); + Objects.requireNonNull(provider, "provider cannot be null"); + return hash(stream, consume.apply(MessageDigest.getInstance(algorithm, provider))); + } + + /** + * Hashes the contents of an {@link InputStream} using a {@link MessageDigest} + * supplied by the caller. + *

+ * The provided {@code Callable} is invoked exactly once to obtain a fresh + * {@link MessageDigest} instance. The stream is read sequentially in 1 MiB sized + * chunks and fed into the digest. + *

+ *

+ * The stream is consumed by this operation and is not closed. + *

+ * + * @param stream the input stream to hash + * @param digest a callable that supplies a {@link MessageDigest} instance + * + * @return a hexadecimal string representing the hash of the stream contents + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NoSuchAlgorithmException if the callable throws this exception + * @throws NullPointerException if {@code stream}, {@code digest}, or the returned + * {@link MessageDigest} is {@code null} + * @throws RuntimeException if the callable throws any other checked exception. The + * thrown checked exception is the cause. + */ + public static String hash(InputStream stream, Supplier digest) throws IOException { + MessageDigest d; + Objects.requireNonNull(stream, "InputStream cannot be null"); + Objects.requireNonNull(digest, "MessageDigest cannot be null"); + Objects.requireNonNull(d = digest.get(), "caller provided null MessageDigest"); + + byte[] buffer = new byte[1048576]; // 1 MiB buffer + int bytesRead; + while ((bytesRead = stream.read(buffer)) != -1) { + d.update(buffer, 0, bytesRead); } + return bytesToHex(d.digest()); } + private static final char[] HEX = "0123456789abcdef".toCharArray(); + /** * Converts a byte array into a hexadecimal string representation. *

@@ -44,11 +230,12 @@ public static String hash(byte[] bytes) { * @throws NullPointerException if the provided byte array is null. */ private static String bytesToHex(byte[] bytes) { - Objects.requireNonNull(bytes, "Input byte array cannot be null."); - StringBuilder hex = new StringBuilder(); - for(byte b : bytes) { - hex.append(String.format("%02x", b)); + char[] out = new char[bytes.length * 2]; + for (int i = 0, j = 0; i < bytes.length; i++) { + int v = bytes[i] & 0xFF; + out[j++] = HEX[v >>> 4]; + out[j++] = HEX[v & 0x0F]; } - return hex.toString(); + return new String(out); } } \ No newline at end of file diff --git a/src/test/java/com/wildermods/masshash/BlobTests.java b/src/test/java/com/wildermods/masshash/BlobTests.java index 0f92860..3ad671b 100644 --- a/src/test/java/com/wildermods/masshash/BlobTests.java +++ b/src/test/java/com/wildermods/masshash/BlobTests.java @@ -3,21 +3,47 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrowsExactly; +import java.util.function.Supplier; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.UncheckedIOException; + import org.junit.jupiter.api.Test; import com.wildermods.masshash.exception.IntegrityException; +import com.wildermods.masshash.utils.ByteUtil; public class BlobTests { - private static final Blob testBlob = new Blob("test".getBytes()); - private static final Blob testBlob2 = new Blob("test".getBytes()); - private static final String testHash = "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"; + private static final BlobFactory factory = new BlobFactory(); + + private static final Blob testBlob = factory.blob("test".getBytes()); + private static final Blob testBlob2 = factory.blob("test".getBytes()); + private static final Blob lightBlob = factory.blob(() -> { + return new ByteArrayInputStream("test".getBytes()); + }); + + @Test + public void testNullConstructors() { + assertThrowsExactly(NullPointerException.class, () -> factory.blob((byte[])null)); + assertThrowsExactly(NullPointerException.class, () -> factory.blob(new byte[0], (String)null)); + assertThrowsExactly(NullPointerException.class, () -> factory.blob((Supplier)null, "")); + assertThrowsExactly(NullPointerException.class, () -> factory.blob(() -> { + try { + return lightBlob.dataStream(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }, (String)null)); + } + @Test public void testBlob() { assertEquals(testBlob.hash(), testHash); assertEquals(testBlob.toString(), testHash); + assertEquals(lightBlob.toString(), testHash); } @Test @@ -25,35 +51,28 @@ public void testBlobEquality() { assertEquals(testBlob, testBlob); assertEquals(testBlob, testBlob2); assertEquals(testBlob2, testBlob); - } - - @Test - public void testDroppedBlobEquality() { - Hash testBlob2 = testBlob.dropData(); - - assertEquals(testBlob, testBlob); - assertEquals(testBlob, testBlob2); - assertEquals(testBlob2, testBlob); - assertEquals(testBlob2, testBlob2); - } - - @Test - public void testDropData() { - Blob dropped = (Blob) testBlob.dropData(); - assertThrowsExactly(UnsupportedOperationException.class, () -> {dropped.data();}); - assertThrowsExactly(UnsupportedOperationException.class, () -> {dropped.dropData();}); + assertEquals(lightBlob, testBlob); + assertEquals(testBlob, lightBlob); } @Test public void testVerification() throws IntegrityException { testBlob.verify(); - Blob dropped = (Blob) testBlob.dropData(); - Blob corrupt = new Blob(testBlob.data(), new Blob("corrupt".getBytes()).hash()); + IBlob corrupt = factory.blob(testBlob.data(), factory.blob("corrupt".getBytes()).hash()); + IBlob corrupt2 = new Blob( + () -> { + try { + return lightBlob.dataStream(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }, ByteUtil.hash("corrupt".getBytes()) + ); - assertThrowsExactly(UnsupportedOperationException.class, () -> dropped.verify()); assertThrowsExactly(IntegrityException.class, () -> corrupt.verify()); - assertThrowsExactly(IntegrityException.class, () -> new Blob("test".getBytes(), new Blob("corrupt".getBytes()))); + assertThrowsExactly(IntegrityException.class, () -> factory.blob("test".getBytes(), factory.blob("corrupt".getBytes()).hash()).verify()); + assertThrowsExactly(IntegrityException.class, () -> corrupt2.verify()); } } diff --git a/src/test/java/com/wildermods/masshash/HasherTests.java b/src/test/java/com/wildermods/masshash/HasherTests.java index c9b28bf..68116ad 100644 --- a/src/test/java/com/wildermods/masshash/HasherTests.java +++ b/src/test/java/com/wildermods/masshash/HasherTests.java @@ -42,7 +42,7 @@ void verifyTest() throws IntegrityException, IOException { System.out.println("Verification test:"); hasher = new Hasher(sources, (f,b) -> { try { - b.verify(); + ((IBlob)b).verify(); } catch (IntegrityException e) { throw new RuntimeException(e); }