diff --git a/commands/preprocess.py b/commands/preprocess.py index e745b8e1..5172fa6f 100644 --- a/commands/preprocess.py +++ b/commands/preprocess.py @@ -386,7 +386,7 @@ def remove_unused_external(html): def preprocess_html_file(root, fn, rename_map): - parser = etree.HTMLParser() + parser = etree.HTMLParser(encoding='utf-8') html = etree.parse(fn, parser) output = io.StringIO() diff --git a/commands/preprocess_cssless.py b/commands/preprocess_cssless.py index e6f9e022..d377f940 100644 --- a/commands/preprocess_cssless.py +++ b/commands/preprocess_cssless.py @@ -29,9 +29,9 @@ def preprocess_html_merge_cssless(src_path, dst_path): - with open(src_path, 'r') as a_file: + with open(src_path, 'r', encoding='utf-8') as a_file: content = a_file.read() - parser = etree.HTMLParser() + parser = etree.HTMLParser(encoding='utf-8') stripped = content.strip() root = etree.fromstring(stripped, parser) diff --git a/gadgets/replace_tests_base.py b/gadgets/replace_tests_base.py index 353d927b..e3158ea7 100755 --- a/gadgets/replace_tests_base.py +++ b/gadgets/replace_tests_base.py @@ -46,14 +46,14 @@ def main(): for path in paths: print('Processing {0}'.format(path)) - with open(path, 'r') as file: + with open(path, 'r', encoding='utf-8') as file: text = file.read() # TODO user proper XML parser, not this hack text = re.sub('', '', text) - with open(path, 'w') as file: + with open(path, 'w', encoding='utf-8') as file: file.write(text) if __name__ == '__main__': diff --git a/gadgets/sync_tests_mwiki.py b/gadgets/sync_tests_mwiki.py index b9e13374..dab44701 100755 --- a/gadgets/sync_tests_mwiki.py +++ b/gadgets/sync_tests_mwiki.py @@ -71,7 +71,7 @@ def sync_single_page(page, direction, dest_root): if direction == SYNC_DIRECTION_UPLOAD: if not os.path.exists(dest_path): return - with open(dest_path, 'r') as file: + with open(dest_path, 'r', encoding='utf-8') as file: new_text = file.read() if fix_whitespace(text) != fix_whitespace(new_text): page.put(new_text, 'sync with git') @@ -82,7 +82,7 @@ def sync_single_page(page, direction, dest_root): if not os.path.exists(dest_dir): os.makedirs(dest_dir) - with open(dest_path, 'w') as file: + with open(dest_path, 'w', encoding='utf-8') as file: file.write(fix_whitespace(text)) print('Downloaded {0}'.format(dest_path)) diff --git a/index2ddg.py b/index2ddg.py index 71d15ed8..c4249b99 100755 --- a/index2ddg.py +++ b/index2ddg.py @@ -582,7 +582,7 @@ def main(): # i+=1 root = e.parse(os.path.join(args.reference, fn), - parser=html.HTMLParser()) + parser=html.HTMLParser(encoding='utf-8')) for ident in idents: diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py index 3c0c9525..1d1b0cb5 100644 --- a/tests/test_preprocess.py +++ b/tests/test_preprocess.py @@ -258,7 +258,7 @@ def setUp(self): self.testdata = os.path.join(os.path.dirname(__file__), 'preprocess_data') infile = os.path.join(self.testdata, "fabs.html") - self.parser = etree.HTMLParser() + self.parser = etree.HTMLParser(encoding='utf-8') self.html = etree.parse(infile, self.parser) # Check whether the HTML matches the contents of the specified test data diff --git a/tests/test_preprocess_cssless.py b/tests/test_preprocess_cssless.py index 64864ccf..b62e7f2c 100644 --- a/tests/test_preprocess_cssless.py +++ b/tests/test_preprocess_cssless.py @@ -43,10 +43,10 @@ def test_preprocess_html_merge_cssless(self): preprocess_html_merge_cssless(src_path, dst_path) - with open(dst_path, 'r') as a_file: + with open(dst_path, 'r', encoding='utf-8') as a_file: test = a_file.read() - with open(expected_path, 'r') as a_file: + with open(expected_path, 'r', encoding='utf-8') as a_file: expected = a_file.read() self.assertEqual(test, expected) @@ -63,10 +63,10 @@ def test_preprocess_html_merge_cssless2(self): preprocess_html_merge_cssless(src_path, dst_path) - with open(dst_path, 'r') as a_file: + with open(dst_path, 'r', encoding='utf-8') as a_file: test = a_file.read() - with open(expected_path, 'r') as a_file: + with open(expected_path, 'r', encoding='utf-8') as a_file: expected = a_file.read() self.assertEqual(test, expected) @@ -83,7 +83,7 @@ def assert_converts_html(self, input, expected_output, function): expected_output = \ '{0}'.format(expected_output) - parser = etree.HTMLParser() + parser = etree.HTMLParser(encoding='utf-8') root = etree.fromstring(input, parser) root = function(root)