From fc1212b82328e7ca9b59af94d158b26625a6098b Mon Sep 17 00:00:00 2001
From: Adam Cornille <diggity@adamcornille.com>
Date: Fri, 5 Apr 2013 16:08:25 -0700
Subject: [PATCH 1/4] compress may return less than target bytes

Instead of throwing an error or zero-padding, "compress" now returns the
input bytes if there are less than or equal to "target" number of them.
I think this is logical since the goal of compress is to reduce the
complexity of the digest before making it human consumable. In this case
the complexity is already low enough to proceed.
---
 humanhash.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/humanhash.py b/humanhash.py
index b9380df..4e8bf84 100644
--- a/humanhash.py
+++ b/humanhash.py
@@ -99,18 +99,15 @@ def compress(bytes, target):
             >>> HumanHasher.compress(bytes, 4)
             [205, 128, 156, 96]
 
-        Attempting to compress a smaller number of bytes to a larger number is
-        an error:
+        If there are less than the target number bytes, the input bytes will be returned
 
-            >>> HumanHasher.compress(bytes, 15)  # doctest: +ELLIPSIS
-            Traceback (most recent call last):
-            ...
-            ValueError: Fewer input bytes than requested output
+            >>> HumanHasher.compress(bytes, 15)
+            [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151]
         """
 
         length = len(bytes)
-        if target > length:
-            raise ValueError("Fewer input bytes than requested output")
+        if target >= length:
+            return bytes
 
         # Split `bytes` into `target` segments.
         seg_size = length // target

From 26b369255c71d925710a7e73964de46ad4595151 Mon Sep 17 00:00:00 2001
From: Adam Cornille <diggity@adamcornille.com>
Date: Fri, 5 Apr 2013 16:29:06 -0700
Subject: [PATCH 2/4] Revised compression method

Excess bytes are now distributed amongst the compressed bytes, instead
of being dumped into the final bit as they were before.
---
 humanhash.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/humanhash.py b/humanhash.py
index 4e8bf84..f8d3004 100644
--- a/humanhash.py
+++ b/humanhash.py
@@ -7,6 +7,7 @@
 
 import operator
 import uuid as uuidlib
+import math
 
 
 DEFAULT_WORDLIST = (
@@ -110,16 +111,15 @@ def compress(bytes, target):
             return bytes
 
         # Split `bytes` into `target` segments.
-        seg_size = length // target
-        segments = [bytes[i * seg_size:(i + 1) * seg_size]
-                    for i in xrange(target)]
-        # Catch any left-over bytes in the last segment.
-        segments[-1].extend(bytes[target * seg_size:])
-
+        seg_size = float(length) / float(target)
+        segments = [0] * target
+        seg_num = 0
         # Use a simple XOR checksum-like function for compression.
-        checksum = lambda bytes: reduce(operator.xor, bytes, 0)
-        checksums = map(checksum, segments)
-        return checksums
+        for i, byte in enumerate(bytes):
+            seg_num = min(int(math.floor(i / seg_size)), target-1)
+            segments[seg_num] = operator.xor(segments[seg_num], byte)
+
+        return segments
 
     def uuid(self, **params):
 
@@ -136,4 +136,4 @@ def uuid(self, **params):
 
 DEFAULT_HASHER = HumanHasher()
 uuid = DEFAULT_HASHER.uuid
-humanize = DEFAULT_HASHER.humanize
+humanize = DEFAULT_HASHER.humanize
\ No newline at end of file

From 6ad5c712237e18f8870e782ad5576fd07dfdf3f0 Mon Sep 17 00:00:00 2001
From: lafncow <lafncow@gmail.com>
Date: Tue, 9 May 2017 10:57:40 -0700
Subject: [PATCH 3/4] Compression method comments

---
 humanhash.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/humanhash.py b/humanhash.py
index f8d3004..e57b36a 100644
--- a/humanhash.py
+++ b/humanhash.py
@@ -107,16 +107,24 @@ def compress(bytes, target):
         """
 
         length = len(bytes)
+        # If there are less than the target number bytes, the input bytes will be returned
         if target >= length:
             return bytes
 
-        # Split `bytes` into `target` segments.
+        # Split `bytes` evenly into `target` segments
+        # Each segment will be composed of `seg_size` bytes, rounded down for some segments
         seg_size = float(length) / float(target)
+        # Initialize `target` number of segments
         segments = [0] * target
         seg_num = 0
-        # Use a simple XOR checksum-like function for compression.
+
+        # Use a simple XOR checksum-like function for compression
         for i, byte in enumerate(bytes):
+            # Divide the byte index by the segment size to determine which segment to place it in
+            # Floor to create a valid segment index
+            # Min to ensure the index is within `target`
             seg_num = min(int(math.floor(i / seg_size)), target-1)
+            # Apply XOR to the existing segment and the byte
             segments[seg_num] = operator.xor(segments[seg_num], byte)
 
         return segments

From de4fe4325b8e80732dfbd2099f74e82f2073d906 Mon Sep 17 00:00:00 2001
From: lafncow <lafncow@gmail.com>
Date: Wed, 10 May 2017 12:53:00 -0700
Subject: [PATCH 4/4] Updated tests

Set new correct test outputs for revised compression method. Editted comments to be within line limit.
---
 humanhash.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/humanhash.py b/humanhash.py
index 972f6e4..4f088a4 100644
--- a/humanhash.py
+++ b/humanhash.py
@@ -104,7 +104,7 @@ def humanize_list(self, hexdigest, words=4):
 
             >>> digest = '60ad8d0d871b6095808297'
             >>> HumanHasher().humanize_list(digest)
-            ['sodium', 'magnesium', 'nineteen', 'hydrogen']
+            ['equal', 'monkey', 'lake', 'beryllium']
         """
         # Gets a list of byte values between 0-255.
         bytes_ = map(lambda x: int(x, 16),
@@ -123,11 +123,11 @@ def humanize(self, hexdigest, words=4, separator='-'):
 
             >>> digest = '60ad8d0d871b6095808297'
             >>> HumanHasher().humanize(digest)
-            'sodium-magnesium-nineteen-hydrogen'
+            'equal-monkey-lake-beryllium'
             >>> HumanHasher().humanize(digest, words=6)
-            'hydrogen-pasta-mississippi-august-may-lithium'
+            'sodium-magnesium-nineteen-william-alanine-nebraska'
             >>> HumanHasher().humanize(digest, separator='*')
-            'sodium*magnesium*nineteen*hydrogen'
+            'equal*monkey*lake*beryllium'
         """
         # Map the compressed byte values through the word list.
         return separator.join(self.humanize_list(hexdigest, words))
@@ -140,9 +140,9 @@ def compress(bytes_, target):
 
             >>> bytes_ = [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151]
             >>> list(HumanHasher.compress(bytes_, 4))
-            [205, 128, 156, 96]
+            [64, 145, 117, 21]
 
-        If there are less than the target number bytes, the input bytes will be returned
+        If there are less than the target number bytes, return input bytes
 
             >>> list(HumanHasher.compress(bytes_, 15))  # doctest: +ELLIPSIS
             [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151]
@@ -151,12 +151,12 @@ def compress(bytes_, target):
         bytes_list = list(bytes_)
 
         length = len(bytes_list)
-        # If there are less than the target number bytes, the input bytes will be returned
+        # If there are less than the target number bytes, return input bytes
         if target >= length:
             return bytes_
 
         # Split `bytes` evenly into `target` segments
-        # Each segment will be composed of `seg_size` bytes, rounded down for some segments
+        # Each segment hashes `seg_size` bytes, rounded down for some
         seg_size = float(length) / float(target)
         # Initialize `target` number of segments
         segments = [0] * target
@@ -164,7 +164,7 @@ def compress(bytes_, target):
 
         # Use a simple XOR checksum-like function for compression
         for i, byte in enumerate(bytes_list):
-            # Divide the byte index by the segment size to determine which segment to place it in
+            # Divide the byte index by the segment size to assign its segment
             # Floor to create a valid segment index
             # Min to ensure the index is within `target`
             seg_num = min(int(math.floor(i / seg_size)), target-1)