From fc1212b82328e7ca9b59af94d158b26625a6098b Mon Sep 17 00:00:00 2001
From: Adam Cornille <diggity@adamcornille.com>
Date: Fri, 5 Apr 2013 16:08:25 -0700
Subject: [PATCH 1/3] compress may return less than target bytes

Instead of throwing an error or zero-padding, "compress" now returns the
input bytes if there are less than or equal to "target" number of them.
I think this is logical since the goal of compress is to reduce the
complexity of the digest before making it human consumable. In this case
the complexity is already low enough to proceed.
---
 humanhash.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/humanhash.py b/humanhash.py
index b9380df..4e8bf84 100644
--- a/humanhash.py
+++ b/humanhash.py
@@ -99,18 +99,15 @@ def compress(bytes, target):
             >>> HumanHasher.compress(bytes, 4)
             [205, 128, 156, 96]
 
-        Attempting to compress a smaller number of bytes to a larger number is
-        an error:
+        If there are less than the target number bytes, the input bytes will be returned
 
-            >>> HumanHasher.compress(bytes, 15)  # doctest: +ELLIPSIS
-            Traceback (most recent call last):
-            ...
-            ValueError: Fewer input bytes than requested output
+            >>> HumanHasher.compress(bytes, 15)
+            [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151]
         """
 
         length = len(bytes)
-        if target > length:
-            raise ValueError("Fewer input bytes than requested output")
+        if target >= length:
+            return bytes
 
         # Split `bytes` into `target` segments.
         seg_size = length // target

From 26b369255c71d925710a7e73964de46ad4595151 Mon Sep 17 00:00:00 2001
From: Adam Cornille <diggity@adamcornille.com>
Date: Fri, 5 Apr 2013 16:29:06 -0700
Subject: [PATCH 2/3] Revised compression method

Excess bytes are now distributed amongst the compressed bytes, instead
of being dumped into the final bit as they were before.
---
 humanhash.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/humanhash.py b/humanhash.py
index 4e8bf84..f8d3004 100644
--- a/humanhash.py
+++ b/humanhash.py
@@ -7,6 +7,7 @@
 
 import operator
 import uuid as uuidlib
+import math
 
 
 DEFAULT_WORDLIST = (
@@ -110,16 +111,15 @@ def compress(bytes, target):
             return bytes
 
         # Split `bytes` into `target` segments.
-        seg_size = length // target
-        segments = [bytes[i * seg_size:(i + 1) * seg_size]
-                    for i in xrange(target)]
-        # Catch any left-over bytes in the last segment.
-        segments[-1].extend(bytes[target * seg_size:])
-
+        seg_size = float(length) / float(target)
+        segments = [0] * target
+        seg_num = 0
         # Use a simple XOR checksum-like function for compression.
-        checksum = lambda bytes: reduce(operator.xor, bytes, 0)
-        checksums = map(checksum, segments)
-        return checksums
+        for i, byte in enumerate(bytes):
+            seg_num = min(int(math.floor(i / seg_size)), target-1)
+            segments[seg_num] = operator.xor(segments[seg_num], byte)
+
+        return segments
 
     def uuid(self, **params):
 
@@ -136,4 +136,4 @@ def uuid(self, **params):
 
 DEFAULT_HASHER = HumanHasher()
 uuid = DEFAULT_HASHER.uuid
-humanize = DEFAULT_HASHER.humanize
+humanize = DEFAULT_HASHER.humanize
\ No newline at end of file

From 6ad5c712237e18f8870e782ad5576fd07dfdf3f0 Mon Sep 17 00:00:00 2001
From: lafncow <lafncow@gmail.com>
Date: Tue, 9 May 2017 10:57:40 -0700
Subject: [PATCH 3/3] Compression method comments

---
 humanhash.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/humanhash.py b/humanhash.py
index f8d3004..e57b36a 100644
--- a/humanhash.py
+++ b/humanhash.py
@@ -107,16 +107,24 @@ def compress(bytes, target):
         """
 
         length = len(bytes)
+        # If there are less than the target number bytes, the input bytes will be returned
         if target >= length:
             return bytes
 
-        # Split `bytes` into `target` segments.
+        # Split `bytes` evenly into `target` segments
+        # Each segment will be composed of `seg_size` bytes, rounded down for some segments
         seg_size = float(length) / float(target)
+        # Initialize `target` number of segments
         segments = [0] * target
         seg_num = 0
-        # Use a simple XOR checksum-like function for compression.
+
+        # Use a simple XOR checksum-like function for compression
         for i, byte in enumerate(bytes):
+            # Divide the byte index by the segment size to determine which segment to place it in
+            # Floor to create a valid segment index
+            # Min to ensure the index is within `target`
             seg_num = min(int(math.floor(i / seg_size)), target-1)
+            # Apply XOR to the existing segment and the byte
             segments[seg_num] = operator.xor(segments[seg_num], byte)
 
         return segments