From fc1212b82328e7ca9b59af94d158b26625a6098b Mon Sep 17 00:00:00 2001 From: Adam Cornille Date: Fri, 5 Apr 2013 16:08:25 -0700 Subject: [PATCH 1/3] compress may return less than target bytes Instead of throwing an error or zero-padding, "compress" now returns the input bytes if there are less than or equal to "target" number of them. I think this is logical since the goal of compress is to reduce the complexity of the digest before making it human consumable. In this case the complexity is already low enough to proceed. --- humanhash.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/humanhash.py b/humanhash.py index b9380df..4e8bf84 100644 --- a/humanhash.py +++ b/humanhash.py @@ -99,18 +99,15 @@ def compress(bytes, target): >>> HumanHasher.compress(bytes, 4) [205, 128, 156, 96] - Attempting to compress a smaller number of bytes to a larger number is - an error: + If there are less than the target number bytes, the input bytes will be returned - >>> HumanHasher.compress(bytes, 15) # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - ValueError: Fewer input bytes than requested output + >>> HumanHasher.compress(bytes, 15) + [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151] """ length = len(bytes) - if target > length: - raise ValueError("Fewer input bytes than requested output") + if target >= length: + return bytes # Split `bytes` into `target` segments. seg_size = length // target From 26b369255c71d925710a7e73964de46ad4595151 Mon Sep 17 00:00:00 2001 From: Adam Cornille Date: Fri, 5 Apr 2013 16:29:06 -0700 Subject: [PATCH 2/3] Revised compression method Excess bytes are now distributed amongst the compressed bytes, instead of being dumped into the final bit as they were before. --- humanhash.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/humanhash.py b/humanhash.py index 4e8bf84..f8d3004 100644 --- a/humanhash.py +++ b/humanhash.py @@ -7,6 +7,7 @@ import operator import uuid as uuidlib +import math DEFAULT_WORDLIST = ( @@ -110,16 +111,15 @@ def compress(bytes, target): return bytes # Split `bytes` into `target` segments. - seg_size = length // target - segments = [bytes[i * seg_size:(i + 1) * seg_size] - for i in xrange(target)] - # Catch any left-over bytes in the last segment. - segments[-1].extend(bytes[target * seg_size:]) - + seg_size = float(length) / float(target) + segments = [0] * target + seg_num = 0 # Use a simple XOR checksum-like function for compression. - checksum = lambda bytes: reduce(operator.xor, bytes, 0) - checksums = map(checksum, segments) - return checksums + for i, byte in enumerate(bytes): + seg_num = min(int(math.floor(i / seg_size)), target-1) + segments[seg_num] = operator.xor(segments[seg_num], byte) + + return segments def uuid(self, **params): @@ -136,4 +136,4 @@ def uuid(self, **params): DEFAULT_HASHER = HumanHasher() uuid = DEFAULT_HASHER.uuid -humanize = DEFAULT_HASHER.humanize +humanize = DEFAULT_HASHER.humanize \ No newline at end of file From 6ad5c712237e18f8870e782ad5576fd07dfdf3f0 Mon Sep 17 00:00:00 2001 From: lafncow Date: Tue, 9 May 2017 10:57:40 -0700 Subject: [PATCH 3/3] Compression method comments --- humanhash.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/humanhash.py b/humanhash.py index f8d3004..e57b36a 100644 --- a/humanhash.py +++ b/humanhash.py @@ -107,16 +107,24 @@ def compress(bytes, target): """ length = len(bytes) + # If there are less than the target number bytes, the input bytes will be returned if target >= length: return bytes - # Split `bytes` into `target` segments. + # Split `bytes` evenly into `target` segments + # Each segment will be composed of `seg_size` bytes, rounded down for some segments seg_size = float(length) / float(target) + # Initialize `target` number of segments segments = [0] * target seg_num = 0 - # Use a simple XOR checksum-like function for compression. + + # Use a simple XOR checksum-like function for compression for i, byte in enumerate(bytes): + # Divide the byte index by the segment size to determine which segment to place it in + # Floor to create a valid segment index + # Min to ensure the index is within `target` seg_num = min(int(math.floor(i / seg_size)), target-1) + # Apply XOR to the existing segment and the byte segments[seg_num] = operator.xor(segments[seg_num], byte) return segments