From ea155265ee5ffa604523e753f9516cfb52459478 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Mon, 23 Feb 2015 18:48:02 +0000 Subject: [PATCH 1/7] trivial: Run code through autopep8 Resolve PEP8 warnings. Signed-off-by: Stephen Finucane --- jwzthreading.py | 64 +++++++++++++++++++++++++++--------------------- setup.py | 24 +++++++++--------- test/test_jwz.py | 13 +++++----- 3 files changed, 54 insertions(+), 47 deletions(-) diff --git a/jwzthreading.py b/jwzthreading.py index de19638..cc98b50 100644 --- a/jwzthreading.py +++ b/jwzthreading.py @@ -20,7 +20,6 @@ Copyright (c) 2003-2010, A.M. Kuchling. This code is under a BSD-style license; see the LICENSE file for details. - """ import re @@ -28,6 +27,7 @@ __all__ = ['Message', 'make_message', 'thread'] + class Container: """Contains a tree of messages. @@ -43,35 +43,28 @@ class Container: .parent : Container Parent container; may be None. """ - - #__slots__ = ['message', 'parent', 'children', 'id'] - def __init__ (self): + def __init__(self): self.message = self.parent = None self.children = [] - def __repr__ (self): + def __repr__(self): return '<%s %x: %r>' % (self.__class__.__name__, id(self), self.message) - def is_dummy (self): + def is_dummy(self): return self.message is None - def add_child (self, child): - ##print "Assigning: " - ##print_container(child, 0, True) - ##print "as children of: " - ##print_container(self, 0, True) - + def add_child(self, child): if child.parent: child.parent.remove_child(child) self.children.append(child) child.parent = self - def remove_child (self, child): + def remove_child(self, child): self.children.remove(child) child.parent = None - def has_descendant (self, ctr): + def has_descendant(self, ctr): """(Container): bool Returns true if 'ctr' is a descendant of this Container. @@ -92,9 +85,10 @@ def has_descendant (self, ctr): stack.append(child) return False + def uniq(alist): set = {} - return [set.setdefault(e,e) for e in alist if e not in set.keys()] + return [set.setdefault(e, e) for e in alist if e not in set.keys()] msgid_pat = re.compile('<([^>]+)>') restrip_pat = re.compile("""( @@ -102,8 +96,10 @@ def uniq(alist): \s*)+ """, re.I | re.VERBOSE) -def make_message (msg): + +def make_message(msg): """(msg:rfc822.Message) : Message + Create a Message object for threading purposes from an RFC822 message. """ @@ -131,6 +127,7 @@ def make_message (msg): return new + class Message (object): """Represents a message to be threaded. @@ -143,26 +140,24 @@ class Message (object): List of message IDs from the In-Reply-To and References headers. .message : any Can contain information for the caller's use (e.g. an RFC-822 message object). - """ - __slots__ = ['message', 'message_id', 'references', 'subject'] - def __init__(self, msg=None): self.message = msg self.message_id = None self.references = [] self.subject = None - def __repr__ (self): + def __repr__(self): return '<%s: %r>' % (self.__class__.__name__, self.message_id) -def prune_container (container): + +def prune_container(container): """(container:Container) : [Container] + Recursively prune a tree of containers, as described in step 4 of the algorithm. Returns a list of the children that should replace this container. """ - # Prune children, assembling a new list of children new_children = [] @@ -186,11 +181,11 @@ def prune_container (container): ## print_container(ctr, 0, True) if (container.message is None and - len(container.children) == 0): + len(container.children) == 0): # 4.A: nuke empty containers return [] elif (container.message is None and - (len(container.children)==1 or + (len(container.children) == 1 or container.parent is not None)): # 4.B: promote children L = container.children[:] @@ -203,7 +198,7 @@ def prune_container (container): return [container] -def thread (msglist): +def thread(msglist): """([Message]) : {string:Container} The main threading function. This takes a list of Message @@ -212,7 +207,6 @@ def thread (msglist): list of subtrees, so callers can then sort children by date or poster or whatever. """ - id_table = {} for msg in msglist: # 1A @@ -267,9 +261,15 @@ def thread (msglist): for container in root_set: assert container.parent == None +<<<<<<< 1a77baff4b05ece3d4c54e0cd806b9f8e755f8df ##print 'before' ##for ctr in root_set: ## print_container(ctr, 0, True) +======= + # print 'before' + # for ctr in root_set: + # print_container(ctr) +>>>>>>> trivial: Run code through autopep8 new_root_set = [] for container in root_set: @@ -278,9 +278,15 @@ def thread (msglist): root_set = new_root_set +<<<<<<< 1a77baff4b05ece3d4c54e0cd806b9f8e755f8df ##print '\n\nafter' ##for ctr in root_set: ## print_container(ctr, 0, True) +======= + # print '\n\nafter' + # for ctr in root_set: + # print_container(ctr) +>>>>>>> trivial: Run code through autopep8 # 5. Group root set by subject subject_table = {} @@ -340,7 +346,8 @@ def thread (msglist): def print_container(ctr, depth=0, debug=0): import sys - sys.stdout.write(depth*' ') + + sys.stdout.write(depth * ' ') if debug: # Printing the repr() is more useful for debugging sys.stdout.write(repr(ctr) + ' ' + repr(ctr.message and ctr.message.subject)) @@ -349,7 +356,8 @@ def print_container(ctr, depth=0, debug=0): sys.stdout.write('\n') for c in ctr.children: - print_container(c, depth+1, debug) + print_container(c, depth + 1, debug) + print_container(c, depth + 1) def main(): diff --git a/setup.py b/setup.py index 81e972c..1ea3880 100644 --- a/setup.py +++ b/setup.py @@ -1,17 +1,17 @@ -from distutils import core +from distutils import core kw = { - 'name' : 'jwzthreading', - 'version' : '0.92', - 'description' : 'Algorithm for threading mail messages.', + 'name': 'jwzthreading', + 'version': '0.92', + 'description': 'Algorithm for threading mail messages.', 'long_description' : '''Contains an implementation of an algorithm for threading mail messages, as described at http://www.jwz.org/doc/threading.html.''', - 'author' : "A.M. Kuchling", - 'author_email' : "amk@amk.ca", - 'url' : "http://www.amk.ca/python/code/jwz.html", - 'py_modules' : ['jwzthreading'], - 'classifiers' : [ + 'author': "A.M. Kuchling", + 'author_email': "amk@amk.ca", + 'url': "http://www.amk.ca/python/code/jwz.html", + 'py_modules': ['jwzthreading'], + 'classifiers': [ 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', @@ -19,9 +19,7 @@ 'Programming Language :: Python :: 3', 'Topic :: Software Development :: Libraries', 'Topic :: Communications :: Email', - ] + ] } - -core.setup(**kw) - +core.setup(**kw) diff --git a/test/test_jwz.py b/test/test_jwz.py index e9464d8..e25471e 100644 --- a/test/test_jwz.py +++ b/test/test_jwz.py @@ -16,13 +16,14 @@ tested_modules = ["jwzthreading"] -def make_message (S): + +def make_message(S): return message_from_string(S) class JWZTest (unittest.TestCase): - def test_container (self): + def test_container(self): c = jwzthreading.Container() repr(c) @@ -70,11 +71,11 @@ def test_deep_container(self): # Test a search that fails self.assertFalse(L[0].has_descendant(jwzthreading.Container())) - + def test_uniq(self): - self.assertEquals(jwzthreading.uniq((1,2,3,1,2,3)), [1,2,3]) + self.assertEquals(jwzthreading.uniq((1, 2, 3, 1, 2, 3)), [1, 2, 3]) - def test_rfc822_make_message (self): + def test_rfc822_make_message(self): if rfc822 is None: return from StringIO import StringIO @@ -90,7 +91,7 @@ def test_rfc822_make_message (self): m = rfc822.Message(f) self.assertRaises(ValueError, jwzthreading.make_message, m) - def test_email_make_message (self): + def test_email_make_message(self): msg_templ = """Subject: %(subject)s Message-ID: %(msg_id)s From 4c620c978a7cfa72a5d8b5f287b807f459d1b8cd Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Mon, 23 Feb 2015 19:06:13 +0000 Subject: [PATCH 2/7] Update docstrings per Google style guide Per PEP257, it is not recommended that signatures are given in docstrings as they may be obtained via introspection. Instead, use the docstring format suggested by Google Python Style Guide. Signed-off-by: Stephen Finucane --- jwzthreading.py | 146 +++++++++++++++++++++++++++++------------------- 1 file changed, 89 insertions(+), 57 deletions(-) diff --git a/jwzthreading.py b/jwzthreading.py index cc98b50..022b8c4 100644 --- a/jwzthreading.py +++ b/jwzthreading.py @@ -28,20 +28,28 @@ __all__ = ['Message', 'make_message', 'thread'] -class Container: - """Contains a tree of messages. +# +# constants +# + +MSGID_RE = re.compile(r'<([^>]+)>') +SUBJECT_RE = re.compile( + r'((Re(\[\d+\])?:) | (\[ [^]]+ \])\s*)+', re.I | re.VERBOSE) + - Instance attributes: - .message : Message - Message corresponding to this tree node. This can be None, - if a Message-Id is referenced but no message with the ID is - included. +# +# models +# - .children : [Container] - Possibly-empty list of child containers. +class Container(object): + """Contains a tree of messages. - .parent : Container - Parent container; may be None. + Attributes: + message (Message): Message corresponding to this tree node. + This can be None, if a Message-Id is referenced but no + message with the ID is included. + children ([Container]): Possibly-empty list of child containers + parent (Container): Parent container, if any """ def __init__(self): self.message = self.parent = None @@ -52,22 +60,37 @@ def __repr__(self): self.message) def is_dummy(self): + """Check if Container has a message.""" return self.message is None def add_child(self, child): + """Add a child to `self`. + + Arguments: + child (Container): Child to add. + """ if child.parent: child.parent.remove_child(child) self.children.append(child) child.parent = self def remove_child(self, child): + """Remove a child from `self`. + + Arguments: + child (Container): Child to remove. + """ self.children.remove(child) child.parent = None def has_descendant(self, ctr): - """(Container): bool + """Check if `ctr` is a descendant of this. + + Arguments: + ctr (Container): possible descendant container. - Returns true if 'ctr' is a descendant of this Container. + Returns: + True if `ctr` is a descendant of `self`, else False. """ # To avoid recursing indefinitely, we'll do a depth-first search; # 'seen' tracks the containers we've already seen, and 'stack' @@ -86,26 +109,46 @@ def has_descendant(self, ctr): return False +class Message(object): + """Represents a message to be threaded. + + Attributes: + subject (str): Subject line of the message. + message_id (str): Message ID as retrieved from the Message-ID + header. + references ([str]): List of message IDs from the In-Reply-To + and References headers. + message (any): Can contain information for the caller's use + (e.g. an RFC-822 message object). + """ + def __init__(self, msg=None): + self.message = msg + self.message_id = None + self.references = [] + self.subject = None + + def __repr__(self): + return '<%s: %r>' % (self.__class__.__name__, self.message_id) + + +# +# functions +# + def uniq(alist): set = {} return [set.setdefault(e, e) for e in alist if e not in set.keys()] -msgid_pat = re.compile('<([^>]+)>') -restrip_pat = re.compile("""( - (Re(\[\d+\])?:) | (\[ [^]]+ \]) -\s*)+ -""", re.I | re.VERBOSE) - def make_message(msg): - """(msg:rfc822.Message) : Message + """Create new Message object. Create a Message object for threading purposes from an RFC822 message. """ new = Message(msg) - m = msgid_pat.search(msg.get("Message-ID", "")) + m = MSGID_RE.search(msg.get("Message-ID", "")) if m is None: raise ValueError("Message does not contain a Message-ID: header") @@ -113,13 +156,13 @@ def make_message(msg): # Get list of unique message IDs from the References: header refs = msg.get("References", "") - new.references = msgid_pat.findall(refs) + new.references = MSGID_RE.findall(refs) new.references = uniq(new.references) new.subject = msg.get('Subject', "No subject") # Get In-Reply-To: header and add it to references in_reply_to = msg.get("In-Reply-To", "") - m = msgid_pat.search(in_reply_to) + m = MSGID_RE.search(in_reply_to) if m: msg_id = m.group(1) if msg_id not in new.references: @@ -128,35 +171,18 @@ def make_message(msg): return new -class Message (object): - """Represents a message to be threaded. - - Instance attributes: - .subject : str - Subject line of the message. - .message_id : str - Message ID as retrieved from the Message-ID header. - .references : [str] - List of message IDs from the In-Reply-To and References headers. - .message : any - Can contain information for the caller's use (e.g. an RFC-822 message object). - """ - def __init__(self, msg=None): - self.message = msg - self.message_id = None - self.references = [] - self.subject = None - - def __repr__(self): - return '<%s: %r>' % (self.__class__.__name__, self.message_id) - - def prune_container(container): - """(container:Container) : [Container] + """Prune a tree of containers. - Recursively prune a tree of containers, as described in step 4 - of the algorithm. Returns a list of the children that should replace + Recursively prune a tree of containers, as described in step 4 of + the algorithm. Returns a list of the children that should replace this container. + + Arguments: + container (Container): Container to prune + + Returns: + List of zero or more containers. """ # Prune children, assembling a new list of children new_children = [] @@ -199,13 +225,18 @@ def prune_container(container): def thread(msglist): - """([Message]) : {string:Container} + """Thread a list of mail items. + + Takes a list of Message objects, and returns a dictionary mapping + subjects to Containers. Containers are trees, with the `children` + attribute containing a list of subtrees, so callers can then sort + children by date or poster or whatever. + + Arguments: + messages ([Message]): List of Message itesms - The main threading function. This takes a list of Message - objects, and returns a dictionary mapping subjects to Containers. - Containers are trees, with the .children attribute containing a - list of subtrees, so callers can then sort children by date or - poster or whatever. + Returns: + dict of containers, with subject as the key """ id_table = {} for msg in msglist: @@ -297,7 +328,7 @@ def thread(msglist): c = container.children[0] subj = container.children[0].message.subject - subj = restrip_pat.sub('', subj) + subj = SUBJECT_RE.sub('', subj) if subj == "": continue @@ -317,7 +348,7 @@ def thread(msglist): else: subj = container.children[0].message.subject - subj = restrip_pat.sub('', subj) + subj = SUBJECT_RE.sub('', subj) ctr = subject_table.get(subj) if ctr is None or ctr is container: continue @@ -345,6 +376,7 @@ def thread(msglist): def print_container(ctr, depth=0, debug=0): + """Print summary of Thread to stdout.""" import sys sys.stdout.write(depth * ' ') From 5d1f0cae90b40c2c4c59cfcd82653f11c9703a63 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Mon, 23 Feb 2015 19:10:53 +0000 Subject: [PATCH 3/7] Move 'make_message' to '__init__' of 'Message' Rather than having a function to "make" Message, do the required actions in the '__init__' function. This includes minor changes to the unit tests per this change. Signed-off-by: Stephen Finucane --- jwzthreading.py | 61 ++++++++++++++++++++---------------------------- test/test_jwz.py | 6 ++--- 2 files changed, 28 insertions(+), 39 deletions(-) diff --git a/jwzthreading.py b/jwzthreading.py index 022b8c4..9c47a39 100644 --- a/jwzthreading.py +++ b/jwzthreading.py @@ -25,7 +25,7 @@ import re from collections import deque -__all__ = ['Message', 'make_message', 'thread'] +__all__ = ['Message', 'thread'] # @@ -121,11 +121,31 @@ class Message(object): message (any): Can contain information for the caller's use (e.g. an RFC-822 message object). """ + message = None + message_id = None + references = [] + subject = None + def __init__(self, msg=None): + if msg is None: + return + + msg_id = MSGID_RE.search(msg.get('Message-ID', '')) + if msg_id is None: + raise ValueError('Message does not contain a Message-ID: header') + self.message = msg - self.message_id = None - self.references = [] - self.subject = None + self.message_id = msg_id.group(1) + + self.references = uniq(MSGID_RE.findall(msg.get('References', ''))) + self.subject = msg.get('Subject', "No subject") + + # Get In-Reply-To: header and add it to references + msg_id = MSGID_RE.search(msg.get('In-Reply-To', '')) + if msg_id: + msg_id = msg_id.group(1) + if msg_id not in self.references: + self.references.append(msg_id) def __repr__(self): return '<%s: %r>' % (self.__class__.__name__, self.message_id) @@ -140,37 +160,6 @@ def uniq(alist): return [set.setdefault(e, e) for e in alist if e not in set.keys()] -def make_message(msg): - """Create new Message object. - - Create a Message object for threading purposes from an RFC822 - message. - """ - new = Message(msg) - - m = MSGID_RE.search(msg.get("Message-ID", "")) - if m is None: - raise ValueError("Message does not contain a Message-ID: header") - - new.message_id = m.group(1) - - # Get list of unique message IDs from the References: header - refs = msg.get("References", "") - new.references = MSGID_RE.findall(refs) - new.references = uniq(new.references) - new.subject = msg.get('Subject', "No subject") - - # Get In-Reply-To: header and add it to references - in_reply_to = msg.get("In-Reply-To", "") - m = MSGID_RE.search(in_reply_to) - if m: - msg_id = m.group(1) - if msg_id not in new.references: - new.references.append(msg_id) - - return new - - def prune_container(container): """Prune a tree of containers. @@ -403,7 +392,7 @@ def main(): msg = mbox.next() if msg is None: break - m = make_message(msg) + m = Message(msg) msglist.append(m) f.close() diff --git a/test/test_jwz.py b/test/test_jwz.py index e25471e..fe40f04 100644 --- a/test/test_jwz.py +++ b/test/test_jwz.py @@ -89,7 +89,7 @@ def test_rfc822_make_message(self): Body.""") m = rfc822.Message(f) - self.assertRaises(ValueError, jwzthreading.make_message, m) + self.assertRaises(ValueError, jwzthreading.Message, m) def test_email_make_message(self): msg_templ = """Subject: %(subject)s @@ -100,7 +100,7 @@ def test_email_make_message(self): m = message_from_string("""Subject: random Body.""") - self.assertRaises(ValueError, jwzthreading.make_message, m) + self.assertRaises(ValueError, jwzthreading.Message, m) def test_basic_message(self): msg = message_from_string("""Subject: random @@ -109,7 +109,7 @@ def test_basic_message(self): In-Reply-To: Body.""") - m = jwzthreading.make_message(msg) + m = jwzthreading.Message(msg) self.assertTrue(repr(m)) self.assertEquals(m.subject, 'random') self.assertEquals(sorted(m.references), From b27b3d9e0325e6d3d35d42f6186f269aa7d36a60 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Mon, 23 Feb 2015 19:25:58 +0000 Subject: [PATCH 4/7] trivial: PyLint cleanup Resolve PyLint issues: * Invalid variables names * Missing docstrings * Badly wrapped lines Signed-off-by: Stephen Finucane --- jwzthreading.py | 153 ++++++++++++++++++++---------------------------- 1 file changed, 63 insertions(+), 90 deletions(-) diff --git a/jwzthreading.py b/jwzthreading.py index 9c47a39..58bdd81 100644 --- a/jwzthreading.py +++ b/jwzthreading.py @@ -22,8 +22,9 @@ This code is under a BSD-style license; see the LICENSE file for details. """ -import re +from __future__ import print_function from collections import deque +import re __all__ = ['Message', 'thread'] @@ -92,12 +93,14 @@ def has_descendant(self, ctr): Returns: True if `ctr` is a descendant of `self`, else False. """ - # To avoid recursing indefinitely, we'll do a depth-first search; - # 'seen' tracks the containers we've already seen, and 'stack' - # is a deque containing containers that we need to look at. + # To avoid recursing indefinitely, we'll do a depth-first + # search; 'seen' tracks the containers we've already seen, + # and 'stack' is a deque containing containers that we need + # to look at. stack = deque() stack.append(self) seen = set() + while stack: node = stack.pop() if node is ctr: @@ -106,6 +109,7 @@ def has_descendant(self, ctr): for child in node.children: if child not in seen: stack.append(child) + return False @@ -156,8 +160,8 @@ def __repr__(self): # def uniq(alist): - set = {} - return [set.setdefault(e, e) for e in alist if e not in set.keys()] + result = {} + return [result.setdefault(e, e) for e in alist if e not in result] def prune_container(container): @@ -176,44 +180,30 @@ def prune_container(container): # Prune children, assembling a new list of children new_children = [] - ##old_children = container.children[:] - - for ctr in container.children[:]: - L = prune_container(ctr) - new_children.extend(L) + for ctr in container.children[:]: # copy the container.children list + pruned_child = prune_container(ctr) + new_children.extend(pruned_child) container.remove_child(ctr) - for c in new_children: - container.add_child(c) - - ##if new_children != old_children: - ## print 'Children for ' + repr(container) + 'Changed' - ## print 'OLD children:' - ## for ctr in old_children: - ## print_container(ctr, 0, True) - ## print 'NEW children:' - ## for ctr in container.children[:]: - ## print_container(ctr, 0, True) - - if (container.message is None and - len(container.children) == 0): - # 4.A: nuke empty containers + for child in new_children: + container.add_child(child) + + if container.message is None and not len(container.children): + # step 4 (a) - nuke empty containers return [] - elif (container.message is None and - (len(container.children) == 1 or - container.parent is not None)): - # 4.B: promote children - L = container.children[:] - ##print "Promoting "+repr(len(container.children))+" children" - for c in L: - container.remove_child(c) - return L + elif container.message is None and ( + len(container.children) == 1 or container.parent is not None): + # step 4 (b) - promote children + children = container.children[:] + for child in children: + container.remove_child(child) + return children else: # Leave this node in place return [container] -def thread(msglist): +def thread(messages): """Thread a list of mail items. Takes a list of Message objects, and returns a dictionary mapping @@ -227,9 +217,11 @@ def thread(msglist): Returns: dict of containers, with subject as the key """ + # step one id_table = {} - for msg in msglist: - # 1A + + for msg in messages: + # step one (a) this_container = id_table.get(msg.message_id, None) if this_container is not None: this_container.message = msg @@ -238,14 +230,13 @@ def thread(msglist): this_container.message = msg id_table[msg.message_id] = this_container - # 1B + # step one (b) prev = None for ref in msg.references: ## print "Processing reference for "+repr(msg.message_id)+": "+repr(ref) container = id_table.get(ref, None) if container is None: container = Container() - container.message_id = ref id_table[ref] = container if prev is not None: @@ -270,55 +261,38 @@ def thread(msglist): this_container.parent.remove_child(this_container) - # 2. Find root set + # step two - find root set root_set = [container for container in id_table.values() if container.parent is None] - # 3. Delete id_table + # step three - delete id_table del id_table - # 4. Prune empty containers + # step four - prune empty containers for container in root_set: assert container.parent == None -<<<<<<< 1a77baff4b05ece3d4c54e0cd806b9f8e755f8df - ##print 'before' - ##for ctr in root_set: - ## print_container(ctr, 0, True) -======= - # print 'before' - # for ctr in root_set: - # print_container(ctr) ->>>>>>> trivial: Run code through autopep8 - new_root_set = [] for container in root_set: - L = prune_container(container) - new_root_set.extend(L) + new_container = prune_container(container) + new_root_set.extend(new_container) root_set = new_root_set -<<<<<<< 1a77baff4b05ece3d4c54e0cd806b9f8e755f8df - ##print '\n\nafter' - ##for ctr in root_set: - ## print_container(ctr, 0, True) -======= # print '\n\nafter' # for ctr in root_set: # print_container(ctr) ->>>>>>> trivial: Run code through autopep8 - # 5. Group root set by subject + # step five - group root set by subject subject_table = {} for container in root_set: if container.message: subj = container.message.subject else: - c = container.children[0] subj = container.children[0].message.subject subj = SUBJECT_RE.sub('', subj) - if subj == "": + if subj == '': continue existing = subject_table.get(subj, None) @@ -330,7 +304,7 @@ def thread(msglist): len(existing.message.subject) > len(container.message.subject))): subject_table[subj] = container - # 5C + # step five (c) for container in root_set: if container.message: subj = container.message.subject @@ -339,11 +313,13 @@ def thread(msglist): subj = SUBJECT_RE.sub('', subj) ctr = subject_table.get(subj) + if ctr is None or ctr is container: continue + if ctr.is_dummy() and container.is_dummy(): - for c in ctr.children: - container.add_child(c) + for child in ctr.children: + container.add_child(child) elif ctr.is_dummy() or container.is_dummy(): if ctr.is_dummy(): ctr.add_child(container) @@ -366,43 +342,40 @@ def thread(msglist): def print_container(ctr, depth=0, debug=0): """Print summary of Thread to stdout.""" - import sys - - sys.stdout.write(depth * ' ') if debug: - # Printing the repr() is more useful for debugging - sys.stdout.write(repr(ctr) + ' ' + repr(ctr.message and ctr.message.subject)) + message = repr(ctr) + ' ' + repr(ctr.message and ctr.message.subject)) else: - sys.stdout.write(repr(ctr.message and ctr.message.subject)) + message = str(ctr.message and ctr.message.subject) + + print(''.join(['> ' * depth, message])) - sys.stdout.write('\n') - for c in ctr.children: - print_container(c, depth + 1, debug) - print_container(c, depth + 1) + for child in ctr.children: + print_container(child, depth + 1, debug) + print_container(child, depth + 1) def main(): import mailbox - print('Reading input file...') - f = open("mbox", 'rb') - mbox = mailbox.UnixMailbox(f) msglist = [] - while 1: - msg = mbox.next() - if msg is None: - break - m = Message(msg) - msglist.append(m) - f.close() + + print('Reading input file...') + with open('mbox', 'rb') as file_: + mbox = mailbox.UnixMailbox(file_) + while 1: + msg = mbox.next() + if msg is None: + break + parsed_msg = Message(msg) + msglist.append(parsed_msg) print('Threading...') subject_table = thread(msglist) # Output - L = subject_table.items() - L.sort() - for subj, container in L: + subjects = subject_table.items() + subjects.sort() + for _, container in subjects: print_container(container) if __name__ == "__main__": From ffaa74fcdf2b0a5d0ad1a8d5b6af064635b9aab8 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Wed, 25 Feb 2015 20:08:28 +0000 Subject: [PATCH 5/7] test/test_jwz: Remove rfc822 tests This package is deprecated and is not available in Python 3. Remove the tests. In addition, resolve some immediate PyLint warnings. Signed-off-by: Stephen Finucane --- test/test_jwz.py | 85 +++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 52 deletions(-) diff --git a/test/test_jwz.py b/test/test_jwz.py index fe40f04..cf18c1a 100644 --- a/test/test_jwz.py +++ b/test/test_jwz.py @@ -1,29 +1,19 @@ -#!/www/python/bin/python - """ Test script for jwzthreading. - """ +# pylint: disable=c0103,c0111,r0904 + import unittest -import jwzthreading +import textwrap from email import message_from_string -try: - import rfc822 -except ImportError: - rfc822 = None - -tested_modules = ["jwzthreading"] - - -def make_message(S): - return message_from_string(S) - +import jwzthreading -class JWZTest (unittest.TestCase): +class TestContainer(unittest.TestCase): def test_container(self): + """Test linking of containers.""" c = jwzthreading.Container() repr(c) @@ -57,10 +47,11 @@ def test_container(self): self.assertEquals(c3.parent, c2) def test_deep_container(self): - # Build a 50000-deep list of nested Containers. + """Build a 50000-deep list of nested Containers.""" parent = jwzthreading.Container() L = [parent] - for i in range(50000): + + for _ in xrange(50000): child = jwzthreading.Container() parent.add_child(child) L.append(child) @@ -72,52 +63,40 @@ def test_deep_container(self): # Test a search that fails self.assertFalse(L[0].has_descendant(jwzthreading.Container())) + +class TestUniq(unittest.TestCase): def test_uniq(self): self.assertEquals(jwzthreading.uniq((1, 2, 3, 1, 2, 3)), [1, 2, 3]) - def test_rfc822_make_message(self): - if rfc822 is None: - return - from StringIO import StringIO - - msg_templ = """Subject: %(subject)s -Message-ID: %(msg_id)s - -Message body -""" - f = StringIO("""Subject: random - -Body.""") - m = rfc822.Message(f) - self.assertRaises(ValueError, jwzthreading.Message, m) +class TestMessage(unittest.TestCase): def test_email_make_message(self): - msg_templ = """Subject: %(subject)s -Message-ID: %(msg_id)s + text = """\ + Subject: random -Message body -""" - m = message_from_string("""Subject: random - -Body.""") - self.assertRaises(ValueError, jwzthreading.Message, m) + Body.""" + msg = message_from_string(textwrap.dedent(text)) + self.assertRaises(ValueError, jwzthreading.Message, msg) def test_basic_message(self): - msg = message_from_string("""Subject: random -Message-ID: -References: -In-Reply-To: - -Body.""") + text = """\ + Subject: random + Message-ID: + References: + In-Reply-To: + + Body.""" + msg = message_from_string(textwrap.dedent(text)) m = jwzthreading.Message(msg) self.assertTrue(repr(m)) self.assertEquals(m.subject, 'random') - self.assertEquals(sorted(m.references), - ['ref1', 'ref2', 'reply']) + self.assertEquals(sorted(m.references), ['ref1', 'ref2', 'reply']) # Verify that repr() works repr(m) + +class TestPrune(unittest.TestCase): def test_prune_empty(self): c = jwzthreading.Container() self.assertEquals(jwzthreading.prune_container(c), []) @@ -129,14 +108,16 @@ def test_prune_promote(self): p.add_child(c1) self.assertEquals(jwzthreading.prune_container(p), [c1]) + +class TestThread(unittest.TestCase): def test_thread_single(self): - "Thread a single message" + """Thread a single message.""" m = jwzthreading.Message(None) m.subject = m.message_id = 'Single' self.assertEqual(jwzthreading.thread([m])['Single'].message, m) def test_thread_unrelated(self): - "Thread two unconnected messages" + """Thread two unconnected messages.""" m1 = jwzthreading.Message(None) m1.subject = m1.message_id = 'First' m2 = jwzthreading.Message(None) @@ -147,7 +128,7 @@ def test_thread_unrelated(self): self.assertEqual(d['Second'].message, m2) def test_thread_two(self): - "Thread two messages together." + """Thread two messages together.""" m1 = jwzthreading.Message(None) m1.subject = m1.message_id = 'First' m2 = jwzthreading.Message(None) From 46a9c34095126a593d01098352e2351dc9fd9813 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Wed, 25 Feb 2015 20:38:07 +0000 Subject: [PATCH 6/7] Enable configurable mbox file Allow user to provide mbox path to 'main' function as an argument to the executable. Signed-off-by: Stephen Finucane --- jwzthreading.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/jwzthreading.py b/jwzthreading.py index 58bdd81..87cf0c9 100644 --- a/jwzthreading.py +++ b/jwzthreading.py @@ -356,23 +356,23 @@ def print_container(ctr, depth=0, debug=0): def main(): import mailbox + import sys msglist = [] print('Reading input file...') - with open('mbox', 'rb') as file_: - mbox = mailbox.UnixMailbox(file_) - while 1: - msg = mbox.next() - if msg is None: - break - parsed_msg = Message(msg) - msglist.append(parsed_msg) + mbox = mailbox.mbox(sys.argv[1]) + for message in mbox: + try: + parsed_msg = Message(message) + except ValueError: + continue + msglist.append(parsed_msg) print('Threading...') subject_table = thread(msglist) - # Output + print('Output...') subjects = subject_table.items() subjects.sort() for _, container in subjects: From 4476058abd36fbed30356da8a9a1ecb2b1f05f0c Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Mon, 28 Nov 2016 15:45:10 +0100 Subject: [PATCH 7/7] Fixing merge conflicts --- jwzthreading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jwzthreading.py b/jwzthreading.py index 87cf0c9..59fbfb8 100644 --- a/jwzthreading.py +++ b/jwzthreading.py @@ -343,7 +343,7 @@ def thread(messages): def print_container(ctr, depth=0, debug=0): """Print summary of Thread to stdout.""" if debug: - message = repr(ctr) + ' ' + repr(ctr.message and ctr.message.subject)) + message = repr(ctr) + ' ' + repr(ctr.message and ctr.message.subject) else: message = str(ctr.message and ctr.message.subject)