4141 Sequence ,
4242 Set ,
4343 Tuple ,
44+ Callable ,
4445)
4546
4647from ._text_util import fix_case
@@ -833,6 +834,34 @@ def apply_uri_ignore_words(
833834 return check_matches
834835
835836
837+ def line_tokenizer_factory (
838+ uri_ignore_words : Set [str ],
839+ uri_regex : Pattern [str ],
840+ word_regex : Pattern [str ],
841+ ignore_word_regex : Optional [Pattern [str ]],
842+ ) -> Callable [[str ], Iterable [re .Match [str ]]]:
843+ def line_tokenizer (line : str ) -> Iterable [Match [str ]]:
844+ # If all URI spelling errors will be ignored, erase any URI before
845+ # extracting words. Otherwise, apply ignores after extracting words.
846+ # This ensures that if a URI ignore word occurs both inside a URI and
847+ # outside, it will still be a spelling error.
848+ if "*" in uri_ignore_words :
849+ line = uri_regex .sub (" " , line )
850+ check_matches = extract_words_iter (line , word_regex , ignore_word_regex )
851+ if "*" not in uri_ignore_words :
852+ check_matches = apply_uri_ignore_words (
853+ check_matches ,
854+ line ,
855+ word_regex ,
856+ ignore_word_regex ,
857+ uri_regex ,
858+ uri_ignore_words ,
859+ )
860+ return check_matches
861+
862+ return line_tokenizer
863+
864+
836865def parse_file (
837866 filename : str ,
838867 colors : TermColors ,
@@ -910,6 +939,13 @@ def parse_file(
910939 except OSError :
911940 return bad_count
912941
942+ line_tokenizer = line_tokenizer_factory (
943+ uri_ignore_words ,
944+ uri_regex ,
945+ word_regex ,
946+ ignore_word_regex ,
947+ )
948+
913949 for i , line in enumerate (lines ):
914950 if line .rstrip () in exclude_lines :
915951 continue
@@ -926,23 +962,7 @@ def parse_file(
926962 fixed_words = set ()
927963 asked_for = set ()
928964
929- # If all URI spelling errors will be ignored, erase any URI before
930- # extracting words. Otherwise, apply ignores after extracting words.
931- # This ensures that if a URI ignore word occurs both inside a URI and
932- # outside, it will still be a spelling error.
933- if "*" in uri_ignore_words :
934- line = uri_regex .sub (" " , line )
935- check_matches = extract_words_iter (line , word_regex , ignore_word_regex )
936- if "*" not in uri_ignore_words :
937- check_matches = apply_uri_ignore_words (
938- check_matches ,
939- line ,
940- word_regex ,
941- ignore_word_regex ,
942- uri_regex ,
943- uri_ignore_words ,
944- )
945- for match in check_matches :
965+ for match in line_tokenizer (line ):
946966 word = match .group ()
947967 if word in ignore_words_cased :
948968 continue
0 commit comments