From 0c1d4ce004530c2c70ba2eeabe69104c3f27b07c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Dec 2025 09:17:52 +0000 Subject: [PATCH 01/10] Bump org.apache.logging.log4j:log4j-core from 2.25.2 to 2.25.3 Bumps org.apache.logging.log4j:log4j-core from 2.25.2 to 2.25.3. --- updated-dependencies: - dependency-name: org.apache.logging.log4j:log4j-core dependency-version: 2.25.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 56c1252..ef696f5 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,7 +1,7 @@ [versions] guava_version = "33.5.0-jre" junit_version = "6.0.1" -log4j_version = "2.25.2" +log4j_version = "2.25.3" [libraries] guava = { module = "com.google.guava:guava", version.ref = "guava_version" } From 9425dd65fc349a00a26104e66e6e86cdd01d77f8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 7 Jan 2026 09:28:05 +0000 Subject: [PATCH 02/10] Bump junit_version from 6.0.1 to 6.0.2 Bumps `junit_version` from 6.0.1 to 6.0.2. Updates `org.junit.jupiter:junit-jupiter` from 6.0.1 to 6.0.2 - [Release notes](https://github.com/junit-team/junit-framework/releases) - [Commits](https://github.com/junit-team/junit-framework/compare/r6.0.1...r6.0.2) Updates `org.junit.platform:junit-platform-launcher` from 6.0.1 to 6.0.2 - [Release notes](https://github.com/junit-team/junit-framework/releases) - [Commits](https://github.com/junit-team/junit-framework/compare/r6.0.1...r6.0.2) --- updated-dependencies: - dependency-name: org.junit.jupiter:junit-jupiter dependency-version: 6.0.2 dependency-type: direct:production update-type: version-update:semver-patch - dependency-name: org.junit.platform:junit-platform-launcher dependency-version: 6.0.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 56c1252..18c4804 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,6 +1,6 @@ [versions] guava_version = "33.5.0-jre" -junit_version = "6.0.1" +junit_version = "6.0.2" log4j_version = "2.25.2" [libraries] From 2a55c156af2671dafcb2d5ecefde08e6c903102e Mon Sep 17 00:00:00 2001 From: Gamebuster19901 Date: Fri, 16 Jan 2026 23:43:35 -0500 Subject: [PATCH 03/10] Added lightweight blob type that uses streams instead of reading the entire file into memory --- .../java/com/wildermods/masshash/Blob.java | 26 +-- .../java/com/wildermods/masshash/Data.java | 18 ++ .../java/com/wildermods/masshash/Hash.java | 2 +- .../java/com/wildermods/masshash/Hasher.java | 14 +- .../java/com/wildermods/masshash/IBlob.java | 9 + .../com/wildermods/masshash/LightBlob.java | 217 ++++++++++++++++++ .../wildermods/masshash/utils/ByteUtil.java | 26 +++ 7 files changed, 286 insertions(+), 26 deletions(-) create mode 100644 src/main/java/com/wildermods/masshash/LightBlob.java diff --git a/src/main/java/com/wildermods/masshash/Blob.java b/src/main/java/com/wildermods/masshash/Blob.java index bd832cf..a33e39c 100644 --- a/src/main/java/com/wildermods/masshash/Blob.java +++ b/src/main/java/com/wildermods/masshash/Blob.java @@ -4,6 +4,7 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Objects; import com.wildermods.masshash.exception.IntegrityException; import com.wildermods.masshash.utils.ByteUtil; @@ -15,6 +16,11 @@ */ public record Blob(byte[] data, String hash) implements IBlob { + public Blob { + Objects.requireNonNull(data); + Objects.requireNonNull(hash); + } + /** * Constructs a Blob from the given data and computes its hash. * @@ -118,23 +124,7 @@ public Blob(InputStream stream, Hash hash) throws IOException, IntegrityExceptio * @return A new {@link Hash} object that represents this blob, but with no associated data. */ public Hash dropData() { - if(isTransient()) { - throw new UnsupportedOperationException("Data already dropped!"); - } - return new Blob((byte[])null, hash); - } - - /** - * Returns the data associated with this Blob - * - * @return a byte array that contains the data stored in this blob - */ - @Override - public byte[] data() { - if(data == null) { - throw new UnsupportedOperationException("Null data! Was the data dropped?"); - } - return data; + return () -> hash; } @Override @@ -143,7 +133,7 @@ public int hashCode() { } /** - * Compares this object with another Hash object for equality. All {@link Blob} objects are also instances of {@link Hash}. + * Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}. *

* Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, diff --git a/src/main/java/com/wildermods/masshash/Data.java b/src/main/java/com/wildermods/masshash/Data.java index 2a7b974..dcbdad3 100644 --- a/src/main/java/com/wildermods/masshash/Data.java +++ b/src/main/java/com/wildermods/masshash/Data.java @@ -1,5 +1,8 @@ package com.wildermods.masshash; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; /** @@ -15,6 +18,21 @@ public interface Data { */ public byte[] data(); + /** + * Returns an {@link InputStream} for reading the data. + * + * Default implementation wraps {@link data()} in a {@link ByteArrayInputStream} + * Classes that can stream data without holding it in memory should override this. + * + * @return an {@link InputStream} for the data + */ + public default InputStream dataStream() throws IOException { + if(isTransient()) { + throw new IllegalStateException("No data to stream!"); + } + return new ByteArrayInputStream(data()); + } + /** * Checks if the data is transient, meaning the data is null or otherwise unavailable. * diff --git a/src/main/java/com/wildermods/masshash/Hash.java b/src/main/java/com/wildermods/masshash/Hash.java index 6bb4893..688ed59 100644 --- a/src/main/java/com/wildermods/masshash/Hash.java +++ b/src/main/java/com/wildermods/masshash/Hash.java @@ -41,6 +41,6 @@ public default boolean hashEquals(String hash) { * @return a new {@link Hash} instance. */ public static Hash of(String hash) { - return new Blob((byte[])null, hash); + return () -> hash; } } \ No newline at end of file diff --git a/src/main/java/com/wildermods/masshash/Hasher.java b/src/main/java/com/wildermods/masshash/Hasher.java index 5913f0f..19be387 100644 --- a/src/main/java/com/wildermods/masshash/Hasher.java +++ b/src/main/java/com/wildermods/masshash/Hasher.java @@ -93,7 +93,7 @@ public Hasher(final Stream files) throws IOException { * before being added to the result map. The updated reference value will be associated with the computed hash. * @throws IOException if an I/O error occurs during hashing */ - public Hasher(final Stream files, final BiConsumer, Blob> forEachBlob) throws IOException { + public Hasher(final Stream files, final BiConsumer, IBlob> forEachBlob) throws IOException { this(files, (p) -> true, forEachBlob); } @@ -112,7 +112,7 @@ public Hasher(final Stream files, final BiConsumer, Blob> * @throws IOException if an I/O error occurs during hashing * @throws IllegalArgumentException if no files match the predicate */ - public Hasher(final Stream files, final Predicate predicate, final BiConsumer, Blob> forEachBlob) throws IOException { + public Hasher(final Stream files, final Predicate predicate, final BiConsumer, IBlob> forEachBlob) throws IOException { this(files, Runtime.getRuntime().availableProcessors(), predicate, forEachBlob); } @@ -147,7 +147,7 @@ public Hasher(final Stream files, final Predicate predicate, final B * @throws IOException if an error occurs while reading files or during thread execution * @throws IllegalArgumentException if no files matched the provided predicate */ - public Hasher(final Stream files, int threads, final Predicate predicate, final BiConsumer,Blob> forEachBlob) throws IOException { + public Hasher(final Stream files, int threads, final Predicate predicate, final BiConsumer,IBlob> forEachBlob) throws IOException { final int processors = Runtime.getRuntime().availableProcessors(); Objects.requireNonNull(files); Objects.requireNonNull(predicate); @@ -214,12 +214,12 @@ public Hasher(final Stream files, int threads, final Predicate predi for (Path file : sublist) { Reference newFile = new Reference<>(file); //Read and hash the file into a Blob, then discard the Blob’s data to conserve memory - Hash blob = new Blob(file); - forEachBlob.accept(newFile, (Blob) blob); - ((Blob) blob).dropData(); + IBlob blob = LightBlob.from(file); + forEachBlob.accept(newFile, (IBlob) blob); + Hash hash = blob.dropData(); //Group files by their content hash. Files with the same hash will share the same key - local.computeIfAbsent(blob, k -> new HashSet<>()).add(newFile.get()); + local.computeIfAbsent(hash, k -> new HashSet<>()).add(newFile.get()); } return local; })); diff --git a/src/main/java/com/wildermods/masshash/IBlob.java b/src/main/java/com/wildermods/masshash/IBlob.java index 7fa11ef..6f9c859 100644 --- a/src/main/java/com/wildermods/masshash/IBlob.java +++ b/src/main/java/com/wildermods/masshash/IBlob.java @@ -17,4 +17,13 @@ public interface IBlob extends Data, Hash { * indicating data corruption or alteration. */ public void verify() throws IntegrityException; + + /** + * Drops the data from the current object and returns a new Hash that represents the hash of this Blob. + * The original blob still holds the data for as long as you keep it referenced. + * + * @return A new {@link Hash} object that represents this blob, but with no associated data. + */ + public Hash dropData(); + } diff --git a/src/main/java/com/wildermods/masshash/LightBlob.java b/src/main/java/com/wildermods/masshash/LightBlob.java new file mode 100644 index 0000000..e762976 --- /dev/null +++ b/src/main/java/com/wildermods/masshash/LightBlob.java @@ -0,0 +1,217 @@ +package com.wildermods.masshash; + +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Objects; +import java.util.function.Supplier; + +import com.wildermods.masshash.exception.IntegrityException; +import com.wildermods.masshash.utils.ByteUtil; + +/** + * A lightweight implementation of {@link IBlob} that represents data which can be read + * as a stream. Does not necessarily store the full byte array in memory. + *

+ * This is particularly useful for large files or streams (e.g., files on disk, network streams), + * where reading the entire content into memory is undesirable. The hash of the data is always stored + * and can be verified without retaining the raw bytes. + *

+ */ +public record LightBlob(Supplier streamSupplier, String hash) implements IBlob { + + /** + * Canonical constructor. Ensures neither the {@code streamSupplier} nor {@code hash} are null. + * + * @param streamSupplier a {@link Supplier} that provides a fresh {@link InputStream} to read the data + * @param hash the SHA-1 hash of the data + * @throws NullPointerException if either {@code streamSupplier} or {@code hash} is null + */ + public LightBlob { + Objects.requireNonNull(streamSupplier); + Objects.requireNonNull(hash); + } + + /** + * Creates a {@link LightBlob} from a file at the specified path, computing the hash from its contents. + * + * @param path the file path + * @return a new {@link LightBlob} representing the file + * @throws IOException if reading the file fails + */ + public static LightBlob from(Path path) throws IOException { + Supplier streamSupplier = () -> { + try { + return Files.newInputStream(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + + try (InputStream stream = streamSupplier.get()){ + return new LightBlob(streamSupplier, ByteUtil.hash(stream)); + } catch (UncheckedIOException e) { + throw new IOException(e); + } + } + + /** + * Creates a {@link LightBlob} from a file at the specified path and verifies it matches the expected hash. + * + * @param path the file path + * @param expectedHash the expected hash of the file contents + * @return a new {@link LightBlob} representing the file + * @throws IOException if reading the file fails + * @throws IntegrityException if the file's hash does not match {@code expectedHash} + */ + public static LightBlob from(Path path, String expectedHash) throws IOException, IntegrityException { + Supplier streamSupplier = () -> { + try { + return Files.newInputStream(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + + try { + LightBlob blob = new LightBlob(streamSupplier, expectedHash); + blob.verify(); + return blob; + } catch (UncheckedIOException e) { + throw new IOException(e); + } + } + + /** + * Returns the full byte array of the blob data. + *

+ * Deprecated because reading the entire data into memory may be expensive for large streams. + * Prefer {@link #dataStream()} instead. + *

+ * + * @return the byte array of the blob + * @throws UncheckedIOException if reading the stream fails + */ + @Override + @Deprecated + public byte[] data() { + try (InputStream stream = dataStream()){ + return stream.readAllBytes(); + } + catch(IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Checks if this blob is transient, meaning the underlying stream cannot be opened. + *

+ * This can occur if the file is deleted, the network stream fails, or any other I/O error + * prevents the stream from being accessed. + *

+ * + * @return {@code true} if the data stream cannot be opened, {@code false} otherwise + */ + @Override + public boolean isTransient() { + try (InputStream stream = streamSupplier.get()){ + return false; + } + catch(Exception e) { + return true; + } + } + + /** + * Returns a fresh {@link InputStream} for reading the blob's data. + *

+ * Each call returns a new stream. The caller is responsible for closing it. + *

+ * + * @return a fresh {@link InputStream} for reading the blob's contents + * @throws IOException if the stream cannot be opened + */ + @Override + public InputStream dataStream() throws IOException { + try { + return streamSupplier.get(); + } + catch(Exception e) { + throw new IOException(e); + } + } + + /** + * Verifies that the data matches the provided hash. + *

+ * This method computes the hash of the current data and compares it to the expected hash. If the hashes do not match, + * an {@link IntegrityException} is thrown. This method ensures the integrity of the data. + *

+ * + * @throws IntegrityException if the computed hash of the data does not match the expected hash. + */ + @Override + public void verify() throws IntegrityException { + try (InputStream stream = dataStream()){ + String actualHash = ByteUtil.hash(stream); + if(!actualHash.equals(hash)) { + throw new IntegrityException("Expected hash " + hash + " but got " + actualHash); + } + } + catch(IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Drops the data from the current object and returns a new Hash that represents the hash of this Blob. + * The original blob still holds the data for as long as you keep it referenced. + * + * @return A new {@link Hash} object that represents this blob, but with no associated data. + */ + @Override + public Hash dropData() { + return () -> hash; + } + + @Override + public int hashCode() { + return hash.hashCode(); + } + + /** + * Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}. + *

+ * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares + * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, + * the method returns {@code false}. + *

+ * + * @param o the object to compare with this Hash object. + * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. + */ + @Override + public boolean equals(Object o) { + if(o instanceof Hash) { + return hash().equals(((Hash) o).hash()); + } + return false; + } + + /** + * Returns a string representation of this Blob, which is its hash value. + *

+ * This method overrides the default {@link Object#toString()} method to provide a more meaningful + * string representation of the Blob. + *

+ * + * @return the hash of the Blob as a string. + */ + @Override + public String toString() { + return hash(); + } + +} diff --git a/src/main/java/com/wildermods/masshash/utils/ByteUtil.java b/src/main/java/com/wildermods/masshash/utils/ByteUtil.java index c430c8d..11d765f 100644 --- a/src/main/java/com/wildermods/masshash/utils/ByteUtil.java +++ b/src/main/java/com/wildermods/masshash/utils/ByteUtil.java @@ -1,5 +1,7 @@ package com.wildermods.masshash.utils; +import java.io.IOException; +import java.io.InputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Objects; @@ -29,6 +31,30 @@ public static String hash(byte[] bytes) { } } + /** + * Hashes the contents of an InputStream using SHA-1 without loading all bytes into memory. + * The stream is read sequentially in 1 MiB chunks. + * + * @param stream the InputStream to hash + * @return the hexadecimal SHA-1 hash + * @throws IOException if an I/O error occurs reading the stream + * @throws NullPointerException if the stream is null + */ + public static String hash(InputStream stream) throws IOException { + Objects.requireNonNull(stream, "InputStream cannot be null."); + try { + MessageDigest digest = MessageDigest.getInstance("SHA-1"); + byte[] buffer = new byte[1048576]; // 1 MiB buffer + int bytesRead; + while ((bytesRead = stream.read(buffer)) != -1) { + digest.update(buffer, 0, bytesRead); + } + return bytesToHex(digest.digest()); + } catch (NoSuchAlgorithmException e) { + throw new AssertionError("SHA-1 algorithm is unavailable.", e); + } + } + /** * Converts a byte array into a hexadecimal string representation. *

From a1e45b1c608b22cc910ae6f50188808c9f1298bb Mon Sep 17 00:00:00 2001 From: Gamebuster19901 Date: Sat, 17 Jan 2026 00:11:23 -0500 Subject: [PATCH 04/10] Fix Tests --- .../java/com/wildermods/masshash/Blob.java | 2 +- .../java/com/wildermods/masshash/Hash.java | 47 ++++++++++++++++++- .../com/wildermods/masshash/LightBlob.java | 2 +- .../com/wildermods/masshash/BlobTests.java | 11 +---- 4 files changed, 49 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/wildermods/masshash/Blob.java b/src/main/java/com/wildermods/masshash/Blob.java index a33e39c..07bfa89 100644 --- a/src/main/java/com/wildermods/masshash/Blob.java +++ b/src/main/java/com/wildermods/masshash/Blob.java @@ -124,7 +124,7 @@ public Blob(InputStream stream, Hash hash) throws IOException, IntegrityExceptio * @return A new {@link Hash} object that represents this blob, but with no associated data. */ public Hash dropData() { - return () -> hash; + return Hash.of(hash()); } @Override diff --git a/src/main/java/com/wildermods/masshash/Hash.java b/src/main/java/com/wildermods/masshash/Hash.java index 688ed59..866372d 100644 --- a/src/main/java/com/wildermods/masshash/Hash.java +++ b/src/main/java/com/wildermods/masshash/Hash.java @@ -41,6 +41,51 @@ public default boolean hashEquals(String hash) { * @return a new {@link Hash} instance. */ public static Hash of(String hash) { - return () -> hash; + return new Hash() { + + @Override + public String hash() { + return hash; + } + + @Override + public int hashCode() { + return hash.hashCode(); + } + + /** + * Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}. + *

+ * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares + * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, + * the method returns {@code false}. + *

+ * + * @param o the object to compare with this Hash object. + * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. + */ + @Override + public boolean equals(Object o) { + if(o instanceof Hash) { + return hash().equals(((Hash) o).hash()); + } + return false; + } + + /** + * Returns a string representation of this Blob, which is its hash value. + *

+ * This method overrides the default {@link Object#toString()} method to provide a more meaningful + * string representation of the Blob. + *

+ * + * @return the hash of the Blob as a string. + */ + @Override + public String toString() { + return hash(); + } + + }; } } \ No newline at end of file diff --git a/src/main/java/com/wildermods/masshash/LightBlob.java b/src/main/java/com/wildermods/masshash/LightBlob.java index e762976..461419e 100644 --- a/src/main/java/com/wildermods/masshash/LightBlob.java +++ b/src/main/java/com/wildermods/masshash/LightBlob.java @@ -173,7 +173,7 @@ public void verify() throws IntegrityException { */ @Override public Hash dropData() { - return () -> hash; + return Hash.of(hash()); } @Override diff --git a/src/test/java/com/wildermods/masshash/BlobTests.java b/src/test/java/com/wildermods/masshash/BlobTests.java index 0f92860..59d1831 100644 --- a/src/test/java/com/wildermods/masshash/BlobTests.java +++ b/src/test/java/com/wildermods/masshash/BlobTests.java @@ -37,21 +37,12 @@ public void testDroppedBlobEquality() { assertEquals(testBlob2, testBlob2); } - @Test - public void testDropData() { - Blob dropped = (Blob) testBlob.dropData(); - assertThrowsExactly(UnsupportedOperationException.class, () -> {dropped.data();}); - assertThrowsExactly(UnsupportedOperationException.class, () -> {dropped.dropData();}); - } - @Test public void testVerification() throws IntegrityException { testBlob.verify(); - Blob dropped = (Blob) testBlob.dropData(); - Blob corrupt = new Blob(testBlob.data(), new Blob("corrupt".getBytes()).hash()); + IBlob corrupt = new Blob(testBlob.data(), new Blob("corrupt".getBytes()).hash()); - assertThrowsExactly(UnsupportedOperationException.class, () -> dropped.verify()); assertThrowsExactly(IntegrityException.class, () -> corrupt.verify()); assertThrowsExactly(IntegrityException.class, () -> new Blob("test".getBytes(), new Blob("corrupt".getBytes()))); } From 3fdde1c3b6c68259308050161464ddb6dded296e Mon Sep 17 00:00:00 2001 From: Gamebuster19901 Date: Sat, 17 Jan 2026 00:30:08 -0500 Subject: [PATCH 05/10] Add tests for constructors and lightblob functions --- .../com/wildermods/masshash/BlobTests.java | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/test/java/com/wildermods/masshash/BlobTests.java b/src/test/java/com/wildermods/masshash/BlobTests.java index 59d1831..b560e33 100644 --- a/src/test/java/com/wildermods/masshash/BlobTests.java +++ b/src/test/java/com/wildermods/masshash/BlobTests.java @@ -3,21 +3,41 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrowsExactly; +import java.util.function.Supplier; +import java.io.ByteArrayInputStream; +import java.io.InputStream; + import org.junit.jupiter.api.Test; import com.wildermods.masshash.exception.IntegrityException; +import com.wildermods.masshash.utils.ByteUtil; public class BlobTests { + private static final String testHash = "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"; + private static final Blob testBlob = new Blob("test".getBytes()); private static final Blob testBlob2 = new Blob("test".getBytes()); + private static final LightBlob lightBlob = new LightBlob( + (Supplier)() -> { + return new ByteArrayInputStream( + "test".getBytes() + );}, testHash + ); - private static final String testHash = "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"; + @Test + public void testNullConstructors() { + assertThrowsExactly(NullPointerException.class, () -> new Blob((byte[])null)); + assertThrowsExactly(NullPointerException.class, () -> new Blob(new byte[0], (String)null)); + assertThrowsExactly(NullPointerException.class, () -> new LightBlob(null, "")); + assertThrowsExactly(NullPointerException.class, () -> new LightBlob(lightBlob.streamSupplier(), null)); + } @Test public void testBlob() { assertEquals(testBlob.hash(), testHash); assertEquals(testBlob.toString(), testHash); + assertEquals(lightBlob.toString(), testHash); } @Test @@ -25,6 +45,8 @@ public void testBlobEquality() { assertEquals(testBlob, testBlob); assertEquals(testBlob, testBlob2); assertEquals(testBlob2, testBlob); + assertEquals(lightBlob, testBlob); + assertEquals(testBlob, lightBlob); } @Test @@ -35,6 +57,12 @@ public void testDroppedBlobEquality() { assertEquals(testBlob, testBlob2); assertEquals(testBlob2, testBlob); assertEquals(testBlob2, testBlob2); + + Hash lightBlob2 = lightBlob.dropData(); + assertEquals(testBlob, testBlob); + assertEquals(testBlob, lightBlob2); + assertEquals(lightBlob2, testBlob); + assertEquals(lightBlob2, lightBlob2); } @Test @@ -42,9 +70,13 @@ public void testVerification() throws IntegrityException { testBlob.verify(); IBlob corrupt = new Blob(testBlob.data(), new Blob("corrupt".getBytes()).hash()); + IBlob corrupt2 = new LightBlob( + lightBlob.streamSupplier(), ByteUtil.hash("corrupt".getBytes()) + ); assertThrowsExactly(IntegrityException.class, () -> corrupt.verify()); assertThrowsExactly(IntegrityException.class, () -> new Blob("test".getBytes(), new Blob("corrupt".getBytes()))); + assertThrowsExactly(IntegrityException.class, () -> corrupt2.verify()); } } From 8d93e6239cef8a7278f8cb2ae5ba6b6de716c404 Mon Sep 17 00:00:00 2001 From: Gamebuster19901 Date: Wed, 21 Jan 2026 01:44:44 -0500 Subject: [PATCH 06/10] Rewrite, blobs now are constructed via a blob factory. All blobs hold a stream to data instead of having the data directly in memory. --- .../java/com/wildermods/masshash/Blob.java | 245 +++++++++--------- .../com/wildermods/masshash/BlobFactory.java | 99 +++++++ .../java/com/wildermods/masshash/Hash.java | 61 ++++- .../java/com/wildermods/masshash/Hasher.java | 12 +- .../java/com/wildermods/masshash/IBlob.java | 16 ++ .../com/wildermods/masshash/LightBlob.java | 217 ---------------- .../wildermods/masshash/utils/ByteUtil.java | 219 +++++++++++++--- .../com/wildermods/masshash/BlobTests.java | 46 ++-- .../com/wildermods/masshash/HasherTests.java | 2 +- 9 files changed, 510 insertions(+), 407 deletions(-) create mode 100644 src/main/java/com/wildermods/masshash/BlobFactory.java delete mode 100644 src/main/java/com/wildermods/masshash/LightBlob.java diff --git a/src/main/java/com/wildermods/masshash/Blob.java b/src/main/java/com/wildermods/masshash/Blob.java index 07bfa89..5996391 100644 --- a/src/main/java/com/wildermods/masshash/Blob.java +++ b/src/main/java/com/wildermods/masshash/Blob.java @@ -2,127 +2,132 @@ import java.io.IOException; import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; +import java.io.UncheckedIOException; +import java.security.MessageDigest; import java.util.Objects; +import java.util.function.Supplier; import com.wildermods.masshash.exception.IntegrityException; import com.wildermods.masshash.utils.ByteUtil; /** - * Represents a data blob with associated hash. The Blob can store the data as - * a byte array and its hash, and includes methods for verifying the integrity - * of the data by checking its hash. + * A lightweight implementation of {@link IBlob} that represents data which can be read + * as a stream. Does not necessarily store the full byte array in memory. + *

+ * This is particularly useful for large files or streams (e.g., files on disk, network streams), + * where reading the entire content into memory is undesirable. The hash of the data is always stored + * and can be verified without retaining the raw bytes. + *

*/ -public record Blob(byte[] data, String hash) implements IBlob { +public final class Blob implements IBlob { - public Blob { - Objects.requireNonNull(data); + private final Supplier digest; + private final Supplier streamSupplier; + private final String hash; + + Blob (Supplier digest, Supplier streamSupplier, String hash) { + Objects.requireNonNull(digest); + Objects.requireNonNull(streamSupplier); + Objects.requireNonNull(streamSupplier.get()); Objects.requireNonNull(hash); + this.digest = digest; + this.streamSupplier = streamSupplier; + this.hash = hash; } - /** - * Constructs a Blob from the given data and computes its hash. - * - * @param data The byte array representing the data. - */ - public Blob(byte[] data) { - this(data, ByteUtil.hash(data)); - } - - /** - * Constructs a Blob from the given data and hash. - * - * @param data The byte array representing the data. - * @param hash The hash of the data. - * - * @throws IntegrityException if the provided hash does not match the data's hash. - */ - public Blob(byte[] data, Hash hash) throws IntegrityException { - this(data, hash.hash()); - verify(); - } - - /** - * Constructs a Blob from the contents of a file. - * - * @param path The path to the file. - * @throws IOException if an I/O error occurs while reading the file. - */ - public Blob(Path path) throws IOException { - this(Files.readAllBytes(path)); - } - - /** - * Constructs a Blob from the contents of a file and verifies its hash. - * - * @param path The path to the file. - * @param hash The expected hash of the file contents. - * - * @throws IOException if an I/O error occurs while reading the file. - * @throws IntegrityException if the file's hash does not match the provided hash. - */ - public Blob(Path path, String hash) throws IOException, IntegrityException { - this(Files.readAllBytes(path), hash); - verify(); - } - - /** - * Constructs a Blob from the contents of a file and verifies its hash. - * - * @param path The path to the file. - * @param hash The expected hash of the file contents. - * - * @throws IOException if an I/O error occurs while reading the file. - * @throws IntegrityException if the file's hash does not match the provided hash. - */ - public Blob(Path path, Hash hash) throws IOException, IntegrityException { - this(path, hash.hash()); - } + Blob(Supplier streamSupplier, String hash) { + this(ByteUtil.DEFAULT_DIGEST, streamSupplier, hash); + } + + public Supplier digest() { + return digest; + } + + public String hash() { + return hash; + } + + /** + * {@inheritDoc} + */ + @Override + @Deprecated + public byte[] data() { + try (InputStream stream = dataStream()){ + return stream.readAllBytes(); + } + catch(IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Checks if this blob is transient, meaning the underlying stream cannot be opened. + *

+ * This can occur if the file is deleted, the network stream fails, or any other I/O error + * prevents the stream from being accessed. + *

+ * + * @return {@code true} if the data stream cannot be opened, {@code false} otherwise + */ + @Override + public boolean isTransient() { + try (InputStream stream = streamSupplier.get()){ + return false; + } + catch(Exception e) { + return true; + } + } - /** - * Constructs a Blob from the data read from an InputStream. - * - * @param stream The InputStream from which data is read. - * @throws IOException if an I/O error occurs while reading from the stream. - */ - public Blob(InputStream stream) throws IOException { - this(stream.readAllBytes()); - } + /** + * Returns a fresh {@link InputStream} for reading the blob's data. + *

+ * Each call returns a new stream. The caller is responsible for closing it. + *

+ * + * @return a fresh {@link InputStream} for reading the blob's contents + * @throws IOException if the stream cannot be opened + */ + @Override + public InputStream dataStream() throws IOException { + try { + return streamSupplier.get(); + } + catch(Exception e) { + throw new IOException(e); + } + } - /** - * Constructs a Blob from the data read from an InputStream and verifies its hash. - * - * @param stream The InputStream from which data is read. - * @param hash The expected hash of the data. - * - * @throws IOException if an I/O error occurs while reading from the stream. - * @throws IntegrityException if the data's hash does not match the provided hash. - */ - public Blob(InputStream stream, String hash) throws IOException, IntegrityException { - this(stream.readAllBytes(), hash); - verify(); - } + /** + * Verifies that the data matches the provided hash. + *

+ * This method computes the hash of the current data and compares it to the expected hash. If the hashes do not match, + * an {@link IntegrityException} is thrown. This method ensures the integrity of the data. + *

+ * + * @throws IntegrityException if the computed hash of the data does not match the expected hash. + */ + @Override + public void verify() throws IntegrityException { + try (InputStream stream = dataStream()){ + String actualHash = ByteUtil.hash(stream); + if(!actualHash.equals(hash)) { + throw new IntegrityException("Expected hash " + hash + " but got " + actualHash); + } + } + catch(IOException e) { + throw new UncheckedIOException(e); + } + } - /** - * Constructs a Blob from the data read from an InputStream and verifies its hash. - * - * @param stream The InputStream from which data is read. - * @param hash The expected hash of the data. - * - * @throws IOException if an I/O error occurs while reading from the stream. - * @throws IntegrityException if the data's hash does not match the provided hash. - */ - public Blob(InputStream stream, Hash hash) throws IOException, IntegrityException { - this(stream, hash.hash()); - } - - /** - * Drops the data from the current object and returns a new Hash that represents the hash of this Blob. - * The original blob still holds the data for as long as you keep it referenced. - * - * @return A new {@link Hash} object that represents this blob, but with no associated data. - */ + /** + * Drops the data from the current object and returns a new Hash that represents the hash of this Blob. + * The original blob still holds the data for as long as you keep it referenced. + * + * @return A new {@link Hash} object that represents this blob, but with no associated data. + */ + @Override public Hash dropData() { return Hash.of(hash()); } @@ -145,27 +150,10 @@ public int hashCode() { */ @Override public boolean equals(Object o) { - if(o instanceof Hash) { - return hash().equals(((Hash) o).hash()); - } - return false; - } - - /** - * Verifies that the data stored in this object matches the provided hash. - *

- * This method computes the hash of the current data and compares it to the expected hash. If the hashes do not match, - * an {@link IntegrityException} is thrown. This method ensures the integrity of the data. - *

- * - * @throws IntegrityException if the computed hash of the data does not match the expected hash. - */ - @Override - public void verify() throws IntegrityException { - String dataHash = ByteUtil.hash(data()); - if(!dataHash.equals(hash)) { - throw new IntegrityException("Expected hash " + hash + " but got " + dataHash); + if(o instanceof Hash) { + return hash().equals(((Hash) o).hash()); } + return false; } /** @@ -179,6 +167,7 @@ public void verify() throws IntegrityException { */ @Override public String toString() { - return hash(); + return hash(); } + } diff --git a/src/main/java/com/wildermods/masshash/BlobFactory.java b/src/main/java/com/wildermods/masshash/BlobFactory.java new file mode 100644 index 0000000..53b78f9 --- /dev/null +++ b/src/main/java/com/wildermods/masshash/BlobFactory.java @@ -0,0 +1,99 @@ +package com.wildermods.masshash; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.Provider; +import java.util.function.Supplier; + +import com.wildermods.masshash.utils.ByteUtil; + +public class BlobFactory { + + public final Provider provider; + public final Supplier digest; + + private BlobFactory (Provider provider, Supplier digest) { + this.provider = provider; + this.digest = digest; + } + + public BlobFactory() { + this(null, ByteUtil.DEFAULT_DIGEST); + } + + public BlobFactory(Supplier digest) { + this(null, digest); + } + + public BlobFactory(Provider provider) throws NoSuchAlgorithmException { + this(provider, "SHA-1"); + } + + public BlobFactory(Provider provider, String algorithm) throws NoSuchAlgorithmException { + this(provider, ByteUtil.consume.apply(MessageDigest.getInstance(algorithm, provider))); + } + + public String algorithm() { + return digest.get().getAlgorithm(); + } + + public Blob blob(Supplier stream) { + try { + return new Blob(digest, stream, ByteUtil.hash(stream.get(), digest)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public Blob blob(Supplier stream, String hash) { + final Blob blob = new Blob(digest, stream, hash); + return blob; + } + + public Blob blob(Path path) throws IOException { + Supplier streamSupplier = () -> { + try { + return Files.newInputStream(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + + return blob(streamSupplier); + } + + public Blob blob(Path path, String expectedHash) throws IOException { + Supplier streamSupplier = () -> { + try { + return Files.newInputStream(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + + return blob(streamSupplier, expectedHash); + } + + public Blob blob(byte[] data) { + Supplier streamSupplier = () -> { + return new ByteArrayInputStream(data); + }; + + return blob(streamSupplier); + } + + public Blob blob(byte[] data, String hash) { + Supplier streamSupplier = () -> { + return new ByteArrayInputStream(data); + }; + + return blob(streamSupplier, hash); + } + +} diff --git a/src/main/java/com/wildermods/masshash/Hash.java b/src/main/java/com/wildermods/masshash/Hash.java index 866372d..7ceccb5 100644 --- a/src/main/java/com/wildermods/masshash/Hash.java +++ b/src/main/java/com/wildermods/masshash/Hash.java @@ -1,5 +1,10 @@ package com.wildermods.masshash; +import java.security.MessageDigest; +import java.util.function.Supplier; + +import com.wildermods.masshash.utils.ByteUtil; + /** * A functional interface representing a hash. It provides methods for obtaining * the hash value as a string and for comparing it to other hashes. @@ -14,6 +19,21 @@ public interface Hash { */ public String hash(); + /** + * @return The hashing algorithm used. + */ + public default String algorithm() { + return digest().get().getAlgorithm(); + } + + /** + * The messageDigest that is used to obtain the + * algorithm + */ + public default Supplier digest() { + return ByteUtil.DEFAULT_DIGEST; + } + /** * Compares this hash to another hash and returns true if they are equal. * @@ -34,18 +54,13 @@ public default boolean hashEquals(String hash) { return hash().equals(hash); } - /** - * Creates a new {@link Hash} instance from the given string hash value. - * - * @param hash the string hash value. - * @return a new {@link Hash} instance. - */ - public static Hash of(String hash) { - return new Hash() { - - @Override - public String hash() { - return hash; + public static class Internal { + private static record Impl(String hash, Supplier digest) implements Hash { + + private Impl(String hash, Supplier digest) { + this.hash = hash; + final MessageDigest d = digest.get(); + this.digest = () -> d; } @Override @@ -86,6 +101,26 @@ public String toString() { return hash(); } - }; + } + } + + /** + * Creates a new {@link Hash} instance from the given string hash value. + * + * @param hash the string hash value. + * @return a new {@link Hash} instance. + */ + public static Hash of(String hash) { + return new Internal.Impl(hash, ByteUtil.DEFAULT_DIGEST); + } + + /** + * Creates a new {@link Hash} instance from the given string and digest values. + * + * @param hash the string hash value. + * @return a new {@link Hash} instance. + */ + public static Hash of(Supplier digest, String hash) { + return new Internal.Impl(hash, digest); } } \ No newline at end of file diff --git a/src/main/java/com/wildermods/masshash/Hasher.java b/src/main/java/com/wildermods/masshash/Hasher.java index 19be387..2ac7b3f 100644 --- a/src/main/java/com/wildermods/masshash/Hasher.java +++ b/src/main/java/com/wildermods/masshash/Hasher.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.security.MessageDigest; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; @@ -45,7 +46,7 @@ * Subclasses can access the resulting hash-to-path mappings via {@link #results()}. *

*/ -public abstract class Hasher { +public abstract class Hasher> { /** * A multimap that stores computed hashes and their associated file paths. @@ -61,6 +62,7 @@ public abstract class Hasher { */ protected SetMultimap blobs; protected Logger logger = LogManager.getLogger(); + protected BlobFactory blobFactory = new BlobFactory(); /** * Protected no-argument constructor for subclass serialization. @@ -209,14 +211,20 @@ public Hasher(final Stream files, int threads, final Predicate predi List sublist = allFiles.subList(i, Math.min(i + chunkSize, allFiles.size())); futures.add(pool.submit(() -> { + //One reusable digest per thread + MessageDigest digest = blobFactory.digest.get(); + BlobFactory factory = new BlobFactory(() -> digest); //Each thread uses a local map to avoid synchronization Map> local = new HashMap<>(); for (Path file : sublist) { Reference newFile = new Reference<>(file); + //Read and hash the file into a Blob, then discard the Blob’s data to conserve memory - IBlob blob = LightBlob.from(file); + digest.reset(); + IBlob blob = factory.blob(file); forEachBlob.accept(newFile, (IBlob) blob); Hash hash = blob.dropData(); + blob = null; //garbage collect this asap //Group files by their content hash. Files with the same hash will share the same key local.computeIfAbsent(hash, k -> new HashSet<>()).add(newFile.get()); diff --git a/src/main/java/com/wildermods/masshash/IBlob.java b/src/main/java/com/wildermods/masshash/IBlob.java index 6f9c859..89b688f 100644 --- a/src/main/java/com/wildermods/masshash/IBlob.java +++ b/src/main/java/com/wildermods/masshash/IBlob.java @@ -1,5 +1,7 @@ package com.wildermods.masshash; +import java.io.UncheckedIOException; + import com.wildermods.masshash.exception.IntegrityException; /** @@ -26,4 +28,18 @@ public interface IBlob extends Data, Hash { */ public Hash dropData(); + /** + * Returns the full byte array of the blob data. + *

+ * Deprecated because reading the entire data into memory may be expensive for large streams. + * Prefer {@link #dataStream()} instead. + *

+ * + * @return the byte array of the blob + * @throws UncheckedIOException if reading the stream fails + */ + @Override + @Deprecated(forRemoval = false) + public byte[] data(); + } diff --git a/src/main/java/com/wildermods/masshash/LightBlob.java b/src/main/java/com/wildermods/masshash/LightBlob.java deleted file mode 100644 index 461419e..0000000 --- a/src/main/java/com/wildermods/masshash/LightBlob.java +++ /dev/null @@ -1,217 +0,0 @@ -package com.wildermods.masshash; - -import java.io.IOException; -import java.io.InputStream; -import java.io.UncheckedIOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Objects; -import java.util.function.Supplier; - -import com.wildermods.masshash.exception.IntegrityException; -import com.wildermods.masshash.utils.ByteUtil; - -/** - * A lightweight implementation of {@link IBlob} that represents data which can be read - * as a stream. Does not necessarily store the full byte array in memory. - *

- * This is particularly useful for large files or streams (e.g., files on disk, network streams), - * where reading the entire content into memory is undesirable. The hash of the data is always stored - * and can be verified without retaining the raw bytes. - *

- */ -public record LightBlob(Supplier streamSupplier, String hash) implements IBlob { - - /** - * Canonical constructor. Ensures neither the {@code streamSupplier} nor {@code hash} are null. - * - * @param streamSupplier a {@link Supplier} that provides a fresh {@link InputStream} to read the data - * @param hash the SHA-1 hash of the data - * @throws NullPointerException if either {@code streamSupplier} or {@code hash} is null - */ - public LightBlob { - Objects.requireNonNull(streamSupplier); - Objects.requireNonNull(hash); - } - - /** - * Creates a {@link LightBlob} from a file at the specified path, computing the hash from its contents. - * - * @param path the file path - * @return a new {@link LightBlob} representing the file - * @throws IOException if reading the file fails - */ - public static LightBlob from(Path path) throws IOException { - Supplier streamSupplier = () -> { - try { - return Files.newInputStream(path); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - }; - - try (InputStream stream = streamSupplier.get()){ - return new LightBlob(streamSupplier, ByteUtil.hash(stream)); - } catch (UncheckedIOException e) { - throw new IOException(e); - } - } - - /** - * Creates a {@link LightBlob} from a file at the specified path and verifies it matches the expected hash. - * - * @param path the file path - * @param expectedHash the expected hash of the file contents - * @return a new {@link LightBlob} representing the file - * @throws IOException if reading the file fails - * @throws IntegrityException if the file's hash does not match {@code expectedHash} - */ - public static LightBlob from(Path path, String expectedHash) throws IOException, IntegrityException { - Supplier streamSupplier = () -> { - try { - return Files.newInputStream(path); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - }; - - try { - LightBlob blob = new LightBlob(streamSupplier, expectedHash); - blob.verify(); - return blob; - } catch (UncheckedIOException e) { - throw new IOException(e); - } - } - - /** - * Returns the full byte array of the blob data. - *

- * Deprecated because reading the entire data into memory may be expensive for large streams. - * Prefer {@link #dataStream()} instead. - *

- * - * @return the byte array of the blob - * @throws UncheckedIOException if reading the stream fails - */ - @Override - @Deprecated - public byte[] data() { - try (InputStream stream = dataStream()){ - return stream.readAllBytes(); - } - catch(IOException e) { - throw new UncheckedIOException(e); - } - } - - /** - * Checks if this blob is transient, meaning the underlying stream cannot be opened. - *

- * This can occur if the file is deleted, the network stream fails, or any other I/O error - * prevents the stream from being accessed. - *

- * - * @return {@code true} if the data stream cannot be opened, {@code false} otherwise - */ - @Override - public boolean isTransient() { - try (InputStream stream = streamSupplier.get()){ - return false; - } - catch(Exception e) { - return true; - } - } - - /** - * Returns a fresh {@link InputStream} for reading the blob's data. - *

- * Each call returns a new stream. The caller is responsible for closing it. - *

- * - * @return a fresh {@link InputStream} for reading the blob's contents - * @throws IOException if the stream cannot be opened - */ - @Override - public InputStream dataStream() throws IOException { - try { - return streamSupplier.get(); - } - catch(Exception e) { - throw new IOException(e); - } - } - - /** - * Verifies that the data matches the provided hash. - *

- * This method computes the hash of the current data and compares it to the expected hash. If the hashes do not match, - * an {@link IntegrityException} is thrown. This method ensures the integrity of the data. - *

- * - * @throws IntegrityException if the computed hash of the data does not match the expected hash. - */ - @Override - public void verify() throws IntegrityException { - try (InputStream stream = dataStream()){ - String actualHash = ByteUtil.hash(stream); - if(!actualHash.equals(hash)) { - throw new IntegrityException("Expected hash " + hash + " but got " + actualHash); - } - } - catch(IOException e) { - throw new UncheckedIOException(e); - } - } - - /** - * Drops the data from the current object and returns a new Hash that represents the hash of this Blob. - * The original blob still holds the data for as long as you keep it referenced. - * - * @return A new {@link Hash} object that represents this blob, but with no associated data. - */ - @Override - public Hash dropData() { - return Hash.of(hash()); - } - - @Override - public int hashCode() { - return hash.hashCode(); - } - - /** - * Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}. - *

- * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares - * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, - * the method returns {@code false}. - *

- * - * @param o the object to compare with this Hash object. - * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. - */ - @Override - public boolean equals(Object o) { - if(o instanceof Hash) { - return hash().equals(((Hash) o).hash()); - } - return false; - } - - /** - * Returns a string representation of this Blob, which is its hash value. - *

- * This method overrides the default {@link Object#toString()} method to provide a more meaningful - * string representation of the Blob. - *

- * - * @return the hash of the Blob as a string. - */ - @Override - public String toString() { - return hash(); - } - -} diff --git a/src/main/java/com/wildermods/masshash/utils/ByteUtil.java b/src/main/java/com/wildermods/masshash/utils/ByteUtil.java index 11d765f..067c488 100644 --- a/src/main/java/com/wildermods/masshash/utils/ByteUtil.java +++ b/src/main/java/com/wildermods/masshash/utils/ByteUtil.java @@ -4,10 +4,33 @@ import java.io.InputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.security.Provider; import java.util.Objects; +import java.util.function.Function; +import java.util.function.Supplier; +/** + * Utility methods for hashing byte data and streams using cryptographic hash functions. + *

+ * This class provides convenience overloads for hashing byte arrays and {@link InputStream}s, + * supporting explicit algorithms and security providers. All hashes are returned as lowercase + * hexadecimal strings. + *

+ */ public class ByteUtil { + public static final Supplier DEFAULT_DIGEST = () -> { + try { + return MessageDigest.getInstance("SHA-1"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + }; + + public static final Function> consume = (p) -> { + return () -> p; + }; + /** * Hashes a given byte array using the SHA-1 algorithm. *

@@ -22,39 +45,176 @@ public class ByteUtil { * @throws NullPointerException if the provided byte array is null. */ public static String hash(byte[] bytes) { - Objects.requireNonNull(bytes, "Input byte array cannot be null."); - try { - MessageDigest digest = MessageDigest.getInstance("SHA-1"); - return bytesToHex(digest.digest(bytes)); - } catch (NoSuchAlgorithmException e) { - throw new AssertionError("SHA-1 algorithm is unavailable.", e); - } + return hash(bytes, DEFAULT_DIGEST); + } + + /** + * Hashes a given byte array using the specified algorithm. + * This method takes the provided byte array, hashes it using the provided algorithm, + * and returns the resulting hash as a hexadecimal string. + * + * @param bytes the byte array to hash + * @param algorithm the name of the hash algorithm + * + * @return a hexadecimal string representing the hash + * + * @throws NoSuchAlgorithmException if the algorithm is not available + * @throws NullPointerException if {@code bytes} or {@code algorithm} is null + */ + public static String hash(byte[] bytes, String algorithm) throws NoSuchAlgorithmException { + Objects.requireNonNull(algorithm, "algorithm cannot be null"); + return hash(bytes, consume.apply(MessageDigest.getInstance(algorithm))); } /** - * Hashes the contents of an InputStream using SHA-1 without loading all bytes into memory. - * The stream is read sequentially in 1 MiB chunks. + * Hashes a given byte array using the specified algorithm and security provider. + * This method takes the provided byte array, hashes it using the provided algorithm, + * and returns the resulting hash as a hexadecimal string. + * + * @param bytes the byte array to hash + * @param algorithm the name of the hash algorithm + * @param provider the security provider to use + * + * @return a hexadecimal string representing the hash + * + * @throws NoSuchAlgorithmException if the algorithm is not available from the provider + * @throws NullPointerException if any argument is null + */ + public static String hash(byte[] bytes, String algorithm, Provider provider) throws NoSuchAlgorithmException { + Objects.requireNonNull(algorithm, "Algorithm cannot be null"); + Objects.requireNonNull(provider, "Provider cannot be null"); + return hash(bytes, consume.apply(MessageDigest.getInstance(algorithm, provider))); + } + + /** + * Hashes a given byte array using a {@link MessageDigest} supplied by the caller. + *

+ * The provided {@code Callable} is invoked exactly once to obtain a fresh + * {@link MessageDigest} instance. The digest is then used to hash the entire + * byte array in a single operation. + *

+ * + * @param bytes the byte array to hash + * @param digest a callable that supplies a {@link MessageDigest} instance + * + * @return a hexadecimal string representing the hash of the byte array + * + * @throws NullPointerException if {@code bytes}, {@code digest}, or the returned + * {@link MessageDigest} is {@code null} + * @throws RuntimeException if the callable throws any other checked exception. The + * thrown checked exception is the cause. + */ + public static String hash(byte[] bytes, Supplier digest) { + Objects.requireNonNull(bytes, "bytes cannot be null"); + Objects.requireNonNull(digest, "digest supplier cannot be null"); + + MessageDigest d; + Objects.requireNonNull(d = digest.get(), "caller provided null MessageDigest"); + return bytesToHex(d.digest(bytes)); + } + + /** + * Hashes the contents of an {@link InputStream} using the SHA-1 algorithm. + *

+ * The stream is read sequentially in fixed-size chunks and is not buffered internally + * beyond the chunk size. This method does not close the stream. + *

* - * @param stream the InputStream to hash - * @return the hexadecimal SHA-1 hash - * @throws IOException if an I/O error occurs reading the stream - * @throws NullPointerException if the stream is null + * @param stream the input stream to hash + * + * @return a hexadecimal string representing the SHA-1 hash + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NullPointerException if {@code stream} is null */ public static String hash(InputStream stream) throws IOException { - Objects.requireNonNull(stream, "InputStream cannot be null."); - try { - MessageDigest digest = MessageDigest.getInstance("SHA-1"); - byte[] buffer = new byte[1048576]; // 1 MiB buffer - int bytesRead; - while ((bytesRead = stream.read(buffer)) != -1) { - digest.update(buffer, 0, bytesRead); - } - return bytesToHex(digest.digest()); - } catch (NoSuchAlgorithmException e) { - throw new AssertionError("SHA-1 algorithm is unavailable.", e); + return hash(stream, DEFAULT_DIGEST); + } + + /** + * Hashes the contents of an {@link InputStream} using the specified algorithm. + *

+ * The stream is consumed by this operation and is not closed. + *

+ * + * @param stream the input stream to hash + * @param algorithm the name of the hash algorithm + * + * @return a hexadecimal string representing the hash + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NoSuchAlgorithmException if the algorithm is not available + * @throws NullPointerException if {@code stream} or {@code algorithm} is null + */ + public static String hash(InputStream stream, String algorithm) throws NoSuchAlgorithmException, IOException { + Objects.requireNonNull(algorithm, "algorithm cannot be null"); + return hash(stream, consume.apply(MessageDigest.getInstance(algorithm))); + } + + /** + * Hashes the contents of an {@link InputStream} using the specified algorithm + * and security provider. + * + *

+ * The stream is consumed by this operation and is not closed. + *

+ * + * @param stream the input stream to hash + * @param algorithm the name of the hash algorithm + * @param provider the security provider to use + * + * @return a hexadecimal string representing the hash + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NoSuchAlgorithmException if the algorithm is not available from the provider + * @throws NullPointerException if any argument is null + */ + public static String hash(InputStream stream, String algorithm, Provider provider) throws NoSuchAlgorithmException, IOException { + Objects.requireNonNull(algorithm, "algorithm cannot be null"); + Objects.requireNonNull(provider, "provider cannot be null"); + return hash(stream, consume.apply(MessageDigest.getInstance(algorithm, provider))); + } + + /** + * Hashes the contents of an {@link InputStream} using a {@link MessageDigest} + * supplied by the caller. + *

+ * The provided {@code Callable} is invoked exactly once to obtain a fresh + * {@link MessageDigest} instance. The stream is read sequentially in 1 MiB sized + * chunks and fed into the digest. + *

+ *

+ * The stream is consumed by this operation and is not closed. + *

+ * + * @param stream the input stream to hash + * @param digest a callable that supplies a {@link MessageDigest} instance + * + * @return a hexadecimal string representing the hash of the stream contents + * + * @throws IOException if an I/O error occurs while reading the stream + * @throws NoSuchAlgorithmException if the callable throws this exception + * @throws NullPointerException if {@code stream}, {@code digest}, or the returned + * {@link MessageDigest} is {@code null} + * @throws RuntimeException if the callable throws any other checked exception. The + * thrown checked exception is the cause. + */ + public static String hash(InputStream stream, Supplier digest) throws IOException { + MessageDigest d; + Objects.requireNonNull(stream, "InputStream cannot be null"); + Objects.requireNonNull(digest, "MessageDigest cannot be null"); + Objects.requireNonNull(d = digest.get(), "caller provided null MessageDigest"); + + byte[] buffer = new byte[1048576]; // 1 MiB buffer + int bytesRead; + while ((bytesRead = stream.read(buffer)) != -1) { + d.update(buffer, 0, bytesRead); } + return bytesToHex(d.digest()); } + private static final char[] HEX = "0123456789abcdef".toCharArray(); + /** * Converts a byte array into a hexadecimal string representation. *

@@ -70,11 +230,12 @@ public static String hash(InputStream stream) throws IOException { * @throws NullPointerException if the provided byte array is null. */ private static String bytesToHex(byte[] bytes) { - Objects.requireNonNull(bytes, "Input byte array cannot be null."); - StringBuilder hex = new StringBuilder(); - for(byte b : bytes) { - hex.append(String.format("%02x", b)); + char[] out = new char[bytes.length * 2]; + for (int i = 0, j = 0; i < bytes.length; i++) { + int v = bytes[i] & 0xFF; + out[j++] = HEX[v >>> 4]; + out[j++] = HEX[v & 0x0F]; } - return hex.toString(); + return new String(out); } } \ No newline at end of file diff --git a/src/test/java/com/wildermods/masshash/BlobTests.java b/src/test/java/com/wildermods/masshash/BlobTests.java index b560e33..8da2078 100644 --- a/src/test/java/com/wildermods/masshash/BlobTests.java +++ b/src/test/java/com/wildermods/masshash/BlobTests.java @@ -5,7 +5,8 @@ import java.util.function.Supplier; import java.io.ByteArrayInputStream; -import java.io.InputStream; +import java.io.IOException; +import java.io.UncheckedIOException; import org.junit.jupiter.api.Test; @@ -16,21 +17,26 @@ public class BlobTests { private static final String testHash = "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"; - private static final Blob testBlob = new Blob("test".getBytes()); - private static final Blob testBlob2 = new Blob("test".getBytes()); - private static final LightBlob lightBlob = new LightBlob( - (Supplier)() -> { - return new ByteArrayInputStream( - "test".getBytes() - );}, testHash - ); + private static final BlobFactory factory = new BlobFactory(); + + private static final Blob testBlob = factory.blob("test".getBytes()); + private static final Blob testBlob2 = factory.blob("test".getBytes()); + private static final Blob lightBlob = factory.blob(() -> { + return new ByteArrayInputStream("test".getBytes()); + }); @Test public void testNullConstructors() { - assertThrowsExactly(NullPointerException.class, () -> new Blob((byte[])null)); - assertThrowsExactly(NullPointerException.class, () -> new Blob(new byte[0], (String)null)); - assertThrowsExactly(NullPointerException.class, () -> new LightBlob(null, "")); - assertThrowsExactly(NullPointerException.class, () -> new LightBlob(lightBlob.streamSupplier(), null)); + assertThrowsExactly(NullPointerException.class, () -> factory.blob((byte[])null)); + assertThrowsExactly(NullPointerException.class, () -> factory.blob(new byte[0], (String)null)); + assertThrowsExactly(NullPointerException.class, () -> factory.blob((Supplier)null, "")); + assertThrowsExactly(NullPointerException.class, () -> factory.blob(() -> { + try { + return lightBlob.dataStream(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }, null)); } @Test @@ -69,13 +75,19 @@ public void testDroppedBlobEquality() { public void testVerification() throws IntegrityException { testBlob.verify(); - IBlob corrupt = new Blob(testBlob.data(), new Blob("corrupt".getBytes()).hash()); - IBlob corrupt2 = new LightBlob( - lightBlob.streamSupplier(), ByteUtil.hash("corrupt".getBytes()) + IBlob corrupt = factory.blob(testBlob.data(), factory.blob("corrupt".getBytes()).hash()); + IBlob corrupt2 = new Blob( + () -> { + try { + return lightBlob.dataStream(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }, ByteUtil.hash("corrupt".getBytes()) ); assertThrowsExactly(IntegrityException.class, () -> corrupt.verify()); - assertThrowsExactly(IntegrityException.class, () -> new Blob("test".getBytes(), new Blob("corrupt".getBytes()))); + assertThrowsExactly(IntegrityException.class, () -> factory.blob("test".getBytes(), factory.blob("corrupt".getBytes()).hash()).verify()); assertThrowsExactly(IntegrityException.class, () -> corrupt2.verify()); } diff --git a/src/test/java/com/wildermods/masshash/HasherTests.java b/src/test/java/com/wildermods/masshash/HasherTests.java index c9b28bf..68116ad 100644 --- a/src/test/java/com/wildermods/masshash/HasherTests.java +++ b/src/test/java/com/wildermods/masshash/HasherTests.java @@ -42,7 +42,7 @@ void verifyTest() throws IntegrityException, IOException { System.out.println("Verification test:"); hasher = new Hasher(sources, (f,b) -> { try { - b.verify(); + ((IBlob)b).verify(); } catch (IntegrityException e) { throw new RuntimeException(e); } From 89403a882c16ff09a724ac7989cfa79908dd2251 Mon Sep 17 00:00:00 2001 From: Gamebuster19901 Date: Wed, 21 Jan 2026 02:19:29 -0500 Subject: [PATCH 07/10] Add missing method --- src/main/java/com/wildermods/masshash/BlobFactory.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/com/wildermods/masshash/BlobFactory.java b/src/main/java/com/wildermods/masshash/BlobFactory.java index 53b78f9..59c149f 100644 --- a/src/main/java/com/wildermods/masshash/BlobFactory.java +++ b/src/main/java/com/wildermods/masshash/BlobFactory.java @@ -27,6 +27,10 @@ public BlobFactory() { this(null, ByteUtil.DEFAULT_DIGEST); } + public BlobFactory(String algorithm) throws NoSuchAlgorithmException { + this(null, ByteUtil.consume.apply(MessageDigest.getInstance(algorithm))); + } + public BlobFactory(Supplier digest) { this(null, digest); } From c0584bf87d40ecd5fe3d62b4790724213a60c0de Mon Sep 17 00:00:00 2001 From: Gamebuster19901 Date: Wed, 21 Jan 2026 02:39:40 -0500 Subject: [PATCH 08/10] add hash convenience methods --- .../com/wildermods/masshash/BlobFactory.java | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/wildermods/masshash/BlobFactory.java b/src/main/java/com/wildermods/masshash/BlobFactory.java index 59c149f..f2fc46a 100644 --- a/src/main/java/com/wildermods/masshash/BlobFactory.java +++ b/src/main/java/com/wildermods/masshash/BlobFactory.java @@ -55,6 +55,10 @@ public Blob blob(Supplier stream) { } } + public Blob blob(Supplier stream, Hash hash) { + return new Blob(digest, stream, hash.hash()); + } + public Blob blob(Supplier stream, String hash) { final Blob blob = new Blob(digest, stream, hash); return blob; @@ -72,7 +76,7 @@ public Blob blob(Path path) throws IOException { return blob(streamSupplier); } - public Blob blob(Path path, String expectedHash) throws IOException { + public Blob blob(Path path, Hash hash) { Supplier streamSupplier = () -> { try { return Files.newInputStream(path); @@ -81,7 +85,19 @@ public Blob blob(Path path, String expectedHash) throws IOException { } }; - return blob(streamSupplier, expectedHash); + return blob(streamSupplier, hash); + } + + public Blob blob(Path path, String hash) throws IOException { + Supplier streamSupplier = () -> { + try { + return Files.newInputStream(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + + return blob(streamSupplier, hash); } public Blob blob(byte[] data) { @@ -92,6 +108,14 @@ public Blob blob(byte[] data) { return blob(streamSupplier); } + public Blob blob(byte[] data, Hash hash) { + Supplier streamSupplier = () -> { + return new ByteArrayInputStream(data); + }; + + return blob(streamSupplier, hash); + } + public Blob blob(byte[] data, String hash) { Supplier streamSupplier = () -> { return new ByteArrayInputStream(data); From dbabadac2527210e2bd7ae1180b44212abd522ad Mon Sep 17 00:00:00 2001 From: Gamebuster19901 Date: Wed, 21 Jan 2026 02:48:49 -0500 Subject: [PATCH 09/10] remove dropData --- .../java/com/wildermods/masshash/Blob.java | 11 ----------- .../java/com/wildermods/masshash/Hasher.java | 4 +--- .../java/com/wildermods/masshash/IBlob.java | 8 -------- .../com/wildermods/masshash/BlobTests.java | 18 +----------------- 4 files changed, 2 insertions(+), 39 deletions(-) diff --git a/src/main/java/com/wildermods/masshash/Blob.java b/src/main/java/com/wildermods/masshash/Blob.java index 5996391..ad92fbd 100644 --- a/src/main/java/com/wildermods/masshash/Blob.java +++ b/src/main/java/com/wildermods/masshash/Blob.java @@ -120,17 +120,6 @@ public void verify() throws IntegrityException { throw new UncheckedIOException(e); } } - - /** - * Drops the data from the current object and returns a new Hash that represents the hash of this Blob. - * The original blob still holds the data for as long as you keep it referenced. - * - * @return A new {@link Hash} object that represents this blob, but with no associated data. - */ - @Override - public Hash dropData() { - return Hash.of(hash()); - } @Override public int hashCode() { diff --git a/src/main/java/com/wildermods/masshash/Hasher.java b/src/main/java/com/wildermods/masshash/Hasher.java index 2ac7b3f..3bb64de 100644 --- a/src/main/java/com/wildermods/masshash/Hasher.java +++ b/src/main/java/com/wildermods/masshash/Hasher.java @@ -223,11 +223,9 @@ public Hasher(final Stream files, int threads, final Predicate predi digest.reset(); IBlob blob = factory.blob(file); forEachBlob.accept(newFile, (IBlob) blob); - Hash hash = blob.dropData(); - blob = null; //garbage collect this asap //Group files by their content hash. Files with the same hash will share the same key - local.computeIfAbsent(hash, k -> new HashSet<>()).add(newFile.get()); + local.computeIfAbsent(blob, k -> new HashSet<>()).add(newFile.get()); } return local; })); diff --git a/src/main/java/com/wildermods/masshash/IBlob.java b/src/main/java/com/wildermods/masshash/IBlob.java index 89b688f..1540b55 100644 --- a/src/main/java/com/wildermods/masshash/IBlob.java +++ b/src/main/java/com/wildermods/masshash/IBlob.java @@ -20,14 +20,6 @@ public interface IBlob extends Data, Hash { */ public void verify() throws IntegrityException; - /** - * Drops the data from the current object and returns a new Hash that represents the hash of this Blob. - * The original blob still holds the data for as long as you keep it referenced. - * - * @return A new {@link Hash} object that represents this blob, but with no associated data. - */ - public Hash dropData(); - /** * Returns the full byte array of the blob data. *

diff --git a/src/test/java/com/wildermods/masshash/BlobTests.java b/src/test/java/com/wildermods/masshash/BlobTests.java index 8da2078..3ad671b 100644 --- a/src/test/java/com/wildermods/masshash/BlobTests.java +++ b/src/test/java/com/wildermods/masshash/BlobTests.java @@ -36,7 +36,7 @@ public void testNullConstructors() { } catch (IOException e) { throw new UncheckedIOException(e); } - }, null)); + }, (String)null)); } @Test @@ -55,22 +55,6 @@ public void testBlobEquality() { assertEquals(testBlob, lightBlob); } - @Test - public void testDroppedBlobEquality() { - Hash testBlob2 = testBlob.dropData(); - - assertEquals(testBlob, testBlob); - assertEquals(testBlob, testBlob2); - assertEquals(testBlob2, testBlob); - assertEquals(testBlob2, testBlob2); - - Hash lightBlob2 = lightBlob.dropData(); - assertEquals(testBlob, testBlob); - assertEquals(testBlob, lightBlob2); - assertEquals(lightBlob2, testBlob); - assertEquals(lightBlob2, lightBlob2); - } - @Test public void testVerification() throws IntegrityException { testBlob.verify(); From c43d7bd7af15609ba003d0de0e6ec166f15347a2 Mon Sep 17 00:00:00 2001 From: Gamebuster19901 Date: Wed, 21 Jan 2026 03:14:02 -0500 Subject: [PATCH 10/10] Version 2.0.0.0 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index 0ddf0d6..d11fb73 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1 @@ -massHashVersion = 1.3.2.1 +massHashVersion = 2.0.0.0