Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 39 additions & 11 deletions jwzthreading.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ def is_dummy (self):
return self.message is None

def add_child (self, child):
##print "Assigning: "
##print_container(child, 0, True)
##print "as children of: "
##print_container(self, 0, True)

if child.parent:
child.parent.remove_child(child)
self.children.append(child)
Expand Down Expand Up @@ -160,6 +165,9 @@ def prune_container (container):

# Prune children, assembling a new list of children
new_children = []

##old_children = container.children[:]

for ctr in container.children[:]:
L = prune_container(ctr)
new_children.extend(L)
Expand All @@ -168,6 +176,15 @@ def prune_container (container):
for c in new_children:
container.add_child(c)

##if new_children != old_children:
## print 'Children for ' + repr(container) + 'Changed'
## print 'OLD children:'
## for ctr in old_children:
## print_container(ctr, 0, True)
## print 'NEW children:'
## for ctr in container.children[:]:
## print_container(ctr, 0, True)

if (container.message is None and
len(container.children) == 0):
# 4.A: nuke empty containers
Expand All @@ -177,6 +194,7 @@ def prune_container (container):
container.parent is not None)):
# 4.B: promote children
L = container.children[:]
##print "Promoting "+repr(len(container.children))+" children"
for c in L:
container.remove_child(c)
return L
Expand Down Expand Up @@ -209,24 +227,34 @@ def thread (msglist):
# 1B
prev = None
for ref in msg.references:
## print "Processing reference for "+repr(msg.message_id)+": "+repr(ref)
container = id_table.get(ref, None)
if container is None:
container = Container()
container.message_id = ref
id_table[ref] = container

if (prev is not None):
if prev is not None:
#If they are already linked, don't change the existing links.
if container.parent!=None:
pass
# Don't add link if it would create a loop
if container is this_container:
continue
if container.has_descendant(prev):
continue
prev.add_child(container)
elif container is this_container or container.has_descendant(prev) or prev.has_descendant(container):
pass
else:
prev.add_child(container)

prev = container

## print "Finished processing reference for "+repr(msg.message_id)+", container now: "
## print_container(container, 0, True)
#1C
if prev is not None:
##print "Setting parent of "+repr(this_container)+", to last reference: " + repr (prev)
prev.add_child(this_container)
else:
if(this_container.parent):
this_container.parent.remove_child(this_container)


# 2. Find root set
root_set = [container for container in id_table.values()
Expand All @@ -241,7 +269,7 @@ def thread (msglist):

##print 'before'
##for ctr in root_set:
## print_container(ctr)
## print_container(ctr, 0, True)

new_root_set = []
for container in root_set:
Expand All @@ -252,7 +280,7 @@ def thread (msglist):

##print '\n\nafter'
##for ctr in root_set:
## print_container(ctr)
## print_container(ctr, 0, True)

# 5. Group root set by subject
subject_table = {}
Expand Down Expand Up @@ -315,13 +343,13 @@ def print_container(ctr, depth=0, debug=0):
sys.stdout.write(depth*' ')
if debug:
# Printing the repr() is more useful for debugging
sys.stdout.write(repr(ctr))
sys.stdout.write(repr(ctr) + ' ' + repr(ctr.message and ctr.message.subject))
else:
sys.stdout.write(repr(ctr.message and ctr.message.subject))

sys.stdout.write('\n')
for c in ctr.children:
print_container(c, depth+1)
print_container(c, depth+1, debug)


def main():
Expand Down
25 changes: 25 additions & 0 deletions test/test_jwz.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,31 @@ def test_thread_two_reverse(self):
self.assertEqual(len(d['First'].children), 1)
self.assertEqual(d['First'].children[0].message, m2)

def test_thread_lying_message(self):
"Thread three messages together, with other messages lying in their references."
dummy_parent_m = jwzthreading.Message(None)
dummy_parent_m.subject = dummy_parent_m.message_id = 'Dummy parent'
lying_before_m = jwzthreading.Message(None)
lying_before_m.subject = lying_before_m.message_id = 'Lying before'
lying_before_m.references = ['Dummy parent', 'Second', 'First', 'Third']
m1 = jwzthreading.Message(None)
m1.subject = m1.message_id = 'First'
m2 = jwzthreading.Message(None)
m2.subject = m2.message_id = 'Second'
m2.references = ['First']
m3 = jwzthreading.Message(None)
m3.subject = m3.message_id = 'Third'
m3.references = ['First', 'Second']
lying_after_m = jwzthreading.Message(None)
lying_after_m.subject = lying_after_m.message_id = 'Lying after'
#lying_after_m.references = ['Dummy parent','Third', 'Second', 'First']
d = jwzthreading.thread([dummy_parent_m, lying_before_m, m1, m2, m3, lying_after_m])
self.assertEqual(d['First'].message, m1)
self.assertEqual(len(d['First'].children), 1)
self.assertEqual(d['First'].children[0].message, m2)
self.assertEqual(len(d['First'].children[0].children), 1)
self.assertEqual(d['First'].children[0].children[0].message, m3)

def test_thread_two_missing_parent(self):
"Thread two messages, both children of a missing parent."
m1 = jwzthreading.Message(None)
Expand Down