Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions htmltreediff/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,25 +117,26 @@ def fix_lists(dom):
wrap_inner(c, 'del')


def fix_tables(dom):
_internalize_changes_markup(dom, set(['td', 'th']))

# Show table row insertions
tags = set()
for node in list(dom.getElementsByTagName('tr')):
parent = node.parentNode
if parent.tagName in ('ins', 'del'):
tags.add(parent)
for tag in tags:
distribute(tag)
# Show table cell insertions
def distribute_ins_and_del_tags(dom, tag_names):
tags = set()
for node in list(dom.getElementsByTagName('td') + dom.getElementsByTagName('th')):
parent = node.parentNode
if parent.tagName in ('ins', 'del'):
tags.add(parent)
for tag_name in tag_names:
for node in list(dom.getElementsByTagName(tag_name)):
parent = node.parentNode
if parent.tagName in ('ins', 'del'):
tags.add(parent)
for tag in tags:
distribute(tag)


def fix_tables(dom):
_internalize_changes_markup(dom, set(['tbody', 'thead', 'tfoot']))
Comment thread
chrisbailey-rldatix marked this conversation as resolved.
_internalize_changes_markup(dom, set(['tr']))
_internalize_changes_markup(dom, set(['td', 'th']))

distribute_ins_and_del_tags(dom, ['tbody', 'thead', 'tfoot'])
distribute_ins_and_del_tags(dom, ['tr'])
distribute_ins_and_del_tags(dom, ['td', 'th'])

# All other ins and del tags inside a table but not in a cell are invalid,
# so remove them.
for node in list(dom.getElementsByTagName('ins') + dom.getElementsByTagName('del')):
Comment thread
chrisbailey-rldatix marked this conversation as resolved.
Expand Down
150 changes: 150 additions & 0 deletions htmltreediff/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,156 @@ def test_fix_tables():
</table>
'''
),
(
'tbody inside ins is distributed',
'''
<table>
<ins><tbody><tr><td>A</td></tr></tbody></ins>
</table>
''',
'''
<table>
<tbody><tr><td><ins>A</ins></td></tr></tbody>
</table>
'''
),
(
'tbody inside del is distributed',
'''
<table>
<del><tbody><tr><td>A</td></tr></tbody></del>
</table>
''',
'''
<table>
<tbody><tr><td><del>A</del></td></tr></tbody>
</table>
'''
),
(
'thead inside ins is distributed',
'''
<table>
<ins><thead><tr><th>Header</th></tr></thead></ins>
<tbody><tr><td>Data</td></tr></tbody>
</table>
''',
'''
<table>
<thead><tr><th><ins>Header</ins></th></tr></thead>
<tbody><tr><td>Data</td></tr></tbody>
</table>
'''
),
(
'thead inside del is distributed',
'''
<table>
<del><thead><tr><th>Header</th></tr></thead></del>
<tbody><tr><td>Data</td></tr></tbody>
</table>
''',
'''
<table>
<thead><tr><th><del>Header</del></th></tr></thead>
<tbody><tr><td>Data</td></tr></tbody>
</table>
'''
),
(
'tfoot inside ins is distributed',
'''
<table>
<tbody><tr><td>Data</td></tr></tbody>
<ins><tfoot><tr><td>Footer</td></tr></tfoot></ins>
</table>
''',
'''
<table>
<tbody><tr><td>Data</td></tr></tbody>
<tfoot><tr><td><ins>Footer</ins></td></tr></tfoot>
</table>
'''
),
(
'tfoot inside del is distributed',
'''
<table>
<tbody><tr><td>Data</td></tr></tbody>
<del><tfoot><tr><td>Footer</td></tr></tfoot></del>
</table>
''',
'''
<table>
<tbody><tr><td>Data</td></tr></tbody>
<tfoot><tr><td><del>Footer</del></td></tr></tfoot>
</table>
'''
),
(
'tbody del and ins pair is internalized',
'''
<table>
<del><tbody><tr><td>old data</td></tr></tbody></del>
<ins><tbody><tr><td>new data</td></tr></tbody></ins>
</table>
''',
'''
<table>
<tbody><tr><td><del>old data</del><ins>new data</ins></td></tr></tbody>
</table>
'''
),
(
'thead del and ins pair is internalized',
'''
<table>
<del><thead><tr><th>old header</th></tr></thead></del>
<ins><thead><tr><th>new header</th></tr></thead></ins>
<tbody><tr><td>data</td></tr></tbody>
</table>
''',
'''
<table>
<thead><tr><th><del>old header</del><ins>new header</ins></th></tr></thead>
<tbody><tr><td>data</td></tr></tbody>
</table>
'''
),
(
'tfoot del and ins pair is internalized',
'''
<table>
<tbody><tr><td>data</td></tr></tbody>
<del><tfoot><tr><td>old footer</td></tr></tfoot></del>
<ins><tfoot><tr><td>new footer</td></tr></tfoot></ins>
</table>
''',
'''
<table>
<tbody><tr><td>data</td></tr></tbody>
<tfoot><tr><td><del>old footer</del><ins>new footer</ins></td></tr></tfoot>
</table>
'''
),
(
'tr del and ins pair is internalized',
'''
<table>
<tbody>
<del><tr><td>old row</td></tr></del>
<ins><tr><td>new row</td></tr></ins>
</tbody>
</table>
''',
'''
<table>
<tbody>
<tr><td><del>old row</del><ins>new row</ins></td></tr>
</tbody>
</table>
'''
),
(
'remove ins and del tags at the wrong level of the table',
'''
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_requirements(path):

setup(
name="html-tree-diff",
version="0.3.0",
version="0.3.1",
description="Structure-aware diff for html and xml documents",
author="Christian Oudard",
author_email="christian.oudard@gmail.com",
Expand Down