Skip to content

Commit fe93434

Browse files
committed
Ensure HTML is parsed as UTF-8
Fixes: #44
1 parent be3ce3c commit fe93434

File tree

5 files changed

+5
-5
lines changed

5 files changed

+5
-5
lines changed

commands/preprocess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ def remove_unused_external(html):
386386

387387

388388
def preprocess_html_file(root, fn, rename_map):
389-
parser = etree.HTMLParser()
389+
parser = etree.HTMLParser(encoding='utf-8')
390390
html = etree.parse(fn, parser)
391391
output = io.StringIO()
392392

commands/preprocess_cssless.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
def preprocess_html_merge_cssless(src_path, dst_path):
3232
with open(src_path, 'r') as a_file:
3333
content = a_file.read()
34-
parser = etree.HTMLParser()
34+
parser = etree.HTMLParser(encoding='utf-8')
3535
stripped = content.strip()
3636
root = etree.fromstring(stripped, parser)
3737

index2ddg.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ def main():
582582
# i+=1
583583

584584
root = e.parse(os.path.join(args.reference, fn),
585-
parser=html.HTMLParser())
585+
parser=html.HTMLParser(encoding='utf-8'))
586586

587587
for ident in idents:
588588

tests/test_preprocess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def setUp(self):
258258
self.testdata = os.path.join(os.path.dirname(__file__),
259259
'preprocess_data')
260260
infile = os.path.join(self.testdata, "fabs.html")
261-
self.parser = etree.HTMLParser()
261+
self.parser = etree.HTMLParser(encoding='utf-8')
262262
self.html = etree.parse(infile, self.parser)
263263

264264
# Check whether the HTML matches the contents of the specified test data

tests/test_preprocess_cssless.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def assert_converts_html(self, input, expected_output, function):
8383
expected_output = \
8484
'<html><body>{0}</body></html>'.format(expected_output)
8585

86-
parser = etree.HTMLParser()
86+
parser = etree.HTMLParser(encoding='utf-8')
8787
root = etree.fromstring(input, parser)
8888

8989
root = function(root)

0 commit comments

Comments
 (0)