From fc1212b82328e7ca9b59af94d158b26625a6098b Mon Sep 17 00:00:00 2001 From: Adam Cornille Date: Fri, 5 Apr 2013 16:08:25 -0700 Subject: [PATCH 1/4] compress may return less than target bytes Instead of throwing an error or zero-padding, "compress" now returns the input bytes if there are less than or equal to "target" number of them. I think this is logical since the goal of compress is to reduce the complexity of the digest before making it human consumable. In this case the complexity is already low enough to proceed. --- humanhash.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/humanhash.py b/humanhash.py index b9380df..4e8bf84 100644 --- a/humanhash.py +++ b/humanhash.py @@ -99,18 +99,15 @@ def compress(bytes, target): >>> HumanHasher.compress(bytes, 4) [205, 128, 156, 96] - Attempting to compress a smaller number of bytes to a larger number is - an error: + If there are less than the target number bytes, the input bytes will be returned - >>> HumanHasher.compress(bytes, 15) # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - ValueError: Fewer input bytes than requested output + >>> HumanHasher.compress(bytes, 15) + [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151] """ length = len(bytes) - if target > length: - raise ValueError("Fewer input bytes than requested output") + if target >= length: + return bytes # Split `bytes` into `target` segments. seg_size = length // target From 26b369255c71d925710a7e73964de46ad4595151 Mon Sep 17 00:00:00 2001 From: Adam Cornille Date: Fri, 5 Apr 2013 16:29:06 -0700 Subject: [PATCH 2/4] Revised compression method Excess bytes are now distributed amongst the compressed bytes, instead of being dumped into the final bit as they were before. --- humanhash.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/humanhash.py b/humanhash.py index 4e8bf84..f8d3004 100644 --- a/humanhash.py +++ b/humanhash.py @@ -7,6 +7,7 @@ import operator import uuid as uuidlib +import math DEFAULT_WORDLIST = ( @@ -110,16 +111,15 @@ def compress(bytes, target): return bytes # Split `bytes` into `target` segments. - seg_size = length // target - segments = [bytes[i * seg_size:(i + 1) * seg_size] - for i in xrange(target)] - # Catch any left-over bytes in the last segment. - segments[-1].extend(bytes[target * seg_size:]) - + seg_size = float(length) / float(target) + segments = [0] * target + seg_num = 0 # Use a simple XOR checksum-like function for compression. - checksum = lambda bytes: reduce(operator.xor, bytes, 0) - checksums = map(checksum, segments) - return checksums + for i, byte in enumerate(bytes): + seg_num = min(int(math.floor(i / seg_size)), target-1) + segments[seg_num] = operator.xor(segments[seg_num], byte) + + return segments def uuid(self, **params): @@ -136,4 +136,4 @@ def uuid(self, **params): DEFAULT_HASHER = HumanHasher() uuid = DEFAULT_HASHER.uuid -humanize = DEFAULT_HASHER.humanize +humanize = DEFAULT_HASHER.humanize \ No newline at end of file From 6ad5c712237e18f8870e782ad5576fd07dfdf3f0 Mon Sep 17 00:00:00 2001 From: lafncow Date: Tue, 9 May 2017 10:57:40 -0700 Subject: [PATCH 3/4] Compression method comments --- humanhash.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/humanhash.py b/humanhash.py index f8d3004..e57b36a 100644 --- a/humanhash.py +++ b/humanhash.py @@ -107,16 +107,24 @@ def compress(bytes, target): """ length = len(bytes) + # If there are less than the target number bytes, the input bytes will be returned if target >= length: return bytes - # Split `bytes` into `target` segments. + # Split `bytes` evenly into `target` segments + # Each segment will be composed of `seg_size` bytes, rounded down for some segments seg_size = float(length) / float(target) + # Initialize `target` number of segments segments = [0] * target seg_num = 0 - # Use a simple XOR checksum-like function for compression. + + # Use a simple XOR checksum-like function for compression for i, byte in enumerate(bytes): + # Divide the byte index by the segment size to determine which segment to place it in + # Floor to create a valid segment index + # Min to ensure the index is within `target` seg_num = min(int(math.floor(i / seg_size)), target-1) + # Apply XOR to the existing segment and the byte segments[seg_num] = operator.xor(segments[seg_num], byte) return segments From de4fe4325b8e80732dfbd2099f74e82f2073d906 Mon Sep 17 00:00:00 2001 From: lafncow Date: Wed, 10 May 2017 12:53:00 -0700 Subject: [PATCH 4/4] Updated tests Set new correct test outputs for revised compression method. Editted comments to be within line limit. --- humanhash.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/humanhash.py b/humanhash.py index 972f6e4..4f088a4 100644 --- a/humanhash.py +++ b/humanhash.py @@ -104,7 +104,7 @@ def humanize_list(self, hexdigest, words=4): >>> digest = '60ad8d0d871b6095808297' >>> HumanHasher().humanize_list(digest) - ['sodium', 'magnesium', 'nineteen', 'hydrogen'] + ['equal', 'monkey', 'lake', 'beryllium'] """ # Gets a list of byte values between 0-255. bytes_ = map(lambda x: int(x, 16), @@ -123,11 +123,11 @@ def humanize(self, hexdigest, words=4, separator='-'): >>> digest = '60ad8d0d871b6095808297' >>> HumanHasher().humanize(digest) - 'sodium-magnesium-nineteen-hydrogen' + 'equal-monkey-lake-beryllium' >>> HumanHasher().humanize(digest, words=6) - 'hydrogen-pasta-mississippi-august-may-lithium' + 'sodium-magnesium-nineteen-william-alanine-nebraska' >>> HumanHasher().humanize(digest, separator='*') - 'sodium*magnesium*nineteen*hydrogen' + 'equal*monkey*lake*beryllium' """ # Map the compressed byte values through the word list. return separator.join(self.humanize_list(hexdigest, words)) @@ -140,9 +140,9 @@ def compress(bytes_, target): >>> bytes_ = [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151] >>> list(HumanHasher.compress(bytes_, 4)) - [205, 128, 156, 96] + [64, 145, 117, 21] - If there are less than the target number bytes, the input bytes will be returned + If there are less than the target number bytes, return input bytes >>> list(HumanHasher.compress(bytes_, 15)) # doctest: +ELLIPSIS [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151] @@ -151,12 +151,12 @@ def compress(bytes_, target): bytes_list = list(bytes_) length = len(bytes_list) - # If there are less than the target number bytes, the input bytes will be returned + # If there are less than the target number bytes, return input bytes if target >= length: return bytes_ # Split `bytes` evenly into `target` segments - # Each segment will be composed of `seg_size` bytes, rounded down for some segments + # Each segment hashes `seg_size` bytes, rounded down for some seg_size = float(length) / float(target) # Initialize `target` number of segments segments = [0] * target @@ -164,7 +164,7 @@ def compress(bytes_, target): # Use a simple XOR checksum-like function for compression for i, byte in enumerate(bytes_list): - # Divide the byte index by the segment size to determine which segment to place it in + # Divide the byte index by the segment size to assign its segment # Floor to create a valid segment index # Min to ensure the index is within `target` seg_num = min(int(math.floor(i / seg_size)), target-1)