diff --git a/htmltreediff/html.py b/htmltreediff/html.py
index 19b15e4..eaab143 100644
--- a/htmltreediff/html.py
+++ b/htmltreediff/html.py
@@ -117,25 +117,26 @@ def fix_lists(dom):
wrap_inner(c, 'del')
-def fix_tables(dom):
- _internalize_changes_markup(dom, set(['td', 'th']))
-
- # Show table row insertions
- tags = set()
- for node in list(dom.getElementsByTagName('tr')):
- parent = node.parentNode
- if parent.tagName in ('ins', 'del'):
- tags.add(parent)
- for tag in tags:
- distribute(tag)
- # Show table cell insertions
+def distribute_ins_and_del_tags(dom, tag_names):
tags = set()
- for node in list(dom.getElementsByTagName('td') + dom.getElementsByTagName('th')):
- parent = node.parentNode
- if parent.tagName in ('ins', 'del'):
- tags.add(parent)
+ for tag_name in tag_names:
+ for node in list(dom.getElementsByTagName(tag_name)):
+ parent = node.parentNode
+ if parent.tagName in ('ins', 'del'):
+ tags.add(parent)
for tag in tags:
distribute(tag)
+
+
+def fix_tables(dom):
+ _internalize_changes_markup(dom, set(['tbody', 'thead', 'tfoot']))
+ _internalize_changes_markup(dom, set(['tr']))
+ _internalize_changes_markup(dom, set(['td', 'th']))
+
+ distribute_ins_and_del_tags(dom, ['tbody', 'thead', 'tfoot'])
+ distribute_ins_and_del_tags(dom, ['tr'])
+ distribute_ins_and_del_tags(dom, ['td', 'th'])
+
# All other ins and del tags inside a table but not in a cell are invalid,
# so remove them.
for node in list(dom.getElementsByTagName('ins') + dom.getElementsByTagName('del')):
diff --git a/htmltreediff/test_html.py b/htmltreediff/test_html.py
index f57c9c5..3235fb3 100644
--- a/htmltreediff/test_html.py
+++ b/htmltreediff/test_html.py
@@ -552,6 +552,156 @@ def test_fix_tables():
'''
),
+ (
+ 'tbody inside ins is distributed',
+ '''
+
+ ''',
+ '''
+
+ '''
+ ),
+ (
+ 'tbody inside del is distributed',
+ '''
+
+ ''',
+ '''
+
+ '''
+ ),
+ (
+ 'thead inside ins is distributed',
+ '''
+
+ ''',
+ '''
+
+ '''
+ ),
+ (
+ 'thead inside del is distributed',
+ '''
+
+ ''',
+ '''
+
+ '''
+ ),
+ (
+ 'tfoot inside ins is distributed',
+ '''
+
+ ''',
+ '''
+
+ '''
+ ),
+ (
+ 'tfoot inside del is distributed',
+ '''
+
+ ''',
+ '''
+
+ '''
+ ),
+ (
+ 'tbody del and ins pair is internalized',
+ '''
+
+ ''',
+ '''
+
+ '''
+ ),
+ (
+ 'thead del and ins pair is internalized',
+ '''
+
+ | old header |
|---|
+ | new header |
+ | data |
+
+ ''',
+ '''
+
+ old headernew header |
+ | data |
+
+ '''
+ ),
+ (
+ 'tfoot del and ins pair is internalized',
+ '''
+
+ | data |
+ | old footer |
+ | new footer |
+
+ ''',
+ '''
+
+ | data |
+ old footernew footer |
+
+ '''
+ ),
+ (
+ 'tr del and ins pair is internalized',
+ '''
+
+
+ | old row |
+ | new row |
+
+
+ ''',
+ '''
+
+ '''
+ ),
(
'remove ins and del tags at the wrong level of the table',
'''
diff --git a/setup.py b/setup.py
index 6001d3b..ba9d89c 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,7 @@ def get_requirements(path):
setup(
name="html-tree-diff",
- version="0.3.0",
+ version="0.3.1",
description="Structure-aware diff for html and xml documents",
author="Christian Oudard",
author_email="christian.oudard@gmail.com",