diff --git a/jwzthreading.py b/jwzthreading.py index ef05611..de19638 100644 --- a/jwzthreading.py +++ b/jwzthreading.py @@ -57,6 +57,11 @@ def is_dummy (self): return self.message is None def add_child (self, child): + ##print "Assigning: " + ##print_container(child, 0, True) + ##print "as children of: " + ##print_container(self, 0, True) + if child.parent: child.parent.remove_child(child) self.children.append(child) @@ -160,6 +165,9 @@ def prune_container (container): # Prune children, assembling a new list of children new_children = [] + + ##old_children = container.children[:] + for ctr in container.children[:]: L = prune_container(ctr) new_children.extend(L) @@ -168,6 +176,15 @@ def prune_container (container): for c in new_children: container.add_child(c) + ##if new_children != old_children: + ## print 'Children for ' + repr(container) + 'Changed' + ## print 'OLD children:' + ## for ctr in old_children: + ## print_container(ctr, 0, True) + ## print 'NEW children:' + ## for ctr in container.children[:]: + ## print_container(ctr, 0, True) + if (container.message is None and len(container.children) == 0): # 4.A: nuke empty containers @@ -177,6 +194,7 @@ def prune_container (container): container.parent is not None)): # 4.B: promote children L = container.children[:] + ##print "Promoting "+repr(len(container.children))+" children" for c in L: container.remove_child(c) return L @@ -209,24 +227,34 @@ def thread (msglist): # 1B prev = None for ref in msg.references: + ## print "Processing reference for "+repr(msg.message_id)+": "+repr(ref) container = id_table.get(ref, None) if container is None: container = Container() container.message_id = ref id_table[ref] = container - if (prev is not None): + if prev is not None: + #If they are already linked, don't change the existing links. + if container.parent!=None: + pass # Don't add link if it would create a loop - if container is this_container: - continue - if container.has_descendant(prev): - continue - prev.add_child(container) + elif container is this_container or container.has_descendant(prev) or prev.has_descendant(container): + pass + else: + prev.add_child(container) prev = container - + ## print "Finished processing reference for "+repr(msg.message_id)+", container now: " + ## print_container(container, 0, True) + #1C if prev is not None: + ##print "Setting parent of "+repr(this_container)+", to last reference: " + repr (prev) prev.add_child(this_container) + else: + if(this_container.parent): + this_container.parent.remove_child(this_container) + # 2. Find root set root_set = [container for container in id_table.values() @@ -241,7 +269,7 @@ def thread (msglist): ##print 'before' ##for ctr in root_set: - ## print_container(ctr) + ## print_container(ctr, 0, True) new_root_set = [] for container in root_set: @@ -252,7 +280,7 @@ def thread (msglist): ##print '\n\nafter' ##for ctr in root_set: - ## print_container(ctr) + ## print_container(ctr, 0, True) # 5. Group root set by subject subject_table = {} @@ -315,13 +343,13 @@ def print_container(ctr, depth=0, debug=0): sys.stdout.write(depth*' ') if debug: # Printing the repr() is more useful for debugging - sys.stdout.write(repr(ctr)) + sys.stdout.write(repr(ctr) + ' ' + repr(ctr.message and ctr.message.subject)) else: sys.stdout.write(repr(ctr.message and ctr.message.subject)) sys.stdout.write('\n') for c in ctr.children: - print_container(c, depth+1) + print_container(c, depth+1, debug) def main(): diff --git a/test/test_jwz.py b/test/test_jwz.py index f9ed878..e9464d8 100644 --- a/test/test_jwz.py +++ b/test/test_jwz.py @@ -169,6 +169,31 @@ def test_thread_two_reverse(self): self.assertEqual(len(d['First'].children), 1) self.assertEqual(d['First'].children[0].message, m2) + def test_thread_lying_message(self): + "Thread three messages together, with other messages lying in their references." + dummy_parent_m = jwzthreading.Message(None) + dummy_parent_m.subject = dummy_parent_m.message_id = 'Dummy parent' + lying_before_m = jwzthreading.Message(None) + lying_before_m.subject = lying_before_m.message_id = 'Lying before' + lying_before_m.references = ['Dummy parent', 'Second', 'First', 'Third'] + m1 = jwzthreading.Message(None) + m1.subject = m1.message_id = 'First' + m2 = jwzthreading.Message(None) + m2.subject = m2.message_id = 'Second' + m2.references = ['First'] + m3 = jwzthreading.Message(None) + m3.subject = m3.message_id = 'Third' + m3.references = ['First', 'Second'] + lying_after_m = jwzthreading.Message(None) + lying_after_m.subject = lying_after_m.message_id = 'Lying after' + #lying_after_m.references = ['Dummy parent','Third', 'Second', 'First'] + d = jwzthreading.thread([dummy_parent_m, lying_before_m, m1, m2, m3, lying_after_m]) + self.assertEqual(d['First'].message, m1) + self.assertEqual(len(d['First'].children), 1) + self.assertEqual(d['First'].children[0].message, m2) + self.assertEqual(len(d['First'].children[0].children), 1) + self.assertEqual(d['First'].children[0].children[0].message, m3) + def test_thread_two_missing_parent(self): "Thread two messages, both children of a missing parent." m1 = jwzthreading.Message(None)