From 0776115af60b7f63abfe07437da849def2279792 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 4 Nov 2024 16:11:40 +0100 Subject: [PATCH 1/5] Match end statements with the opening Replaces program foo end program with program foo end program foo --- fortran_linter/main.py | 82 +++++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 12 deletions(-) diff --git a/fortran_linter/main.py b/fortran_linter/main.py index 32d55c5..b79e47e 100755 --- a/fortran_linter/main.py +++ b/fortran_linter/main.py @@ -222,8 +222,16 @@ def format_rule(self, rule: RAW_RULE_T, fmt: dict) -> RULE_T: INDENTER_RULES = ( re.compile( - r"\b(if.*then|do|select|while|subroutine|function|module(?!\s*procedure)|interface)\b", - re.I, + r""" + \b + (?P + if|do|select|while|block + )|( + (?Ptype|program|subroutine|function|(sub)?module(?!\s*procedure)|interface) + (\s+(?P\w+))? + )\b + """, + re.I | re.VERBOSE, ), ) CONTINUATION_LINE_RULES = (re.compile(r"&(?=\s*(!.*)?$)"),) @@ -237,16 +245,23 @@ def format_rule(self, rule: RAW_RULE_T, fmt: dict) -> RULE_T: re.compile( r""" \b - # end - end - # white space + (?P + # end + end + # may be followed by the construct name, e.g. 'end function' + ( + # either just `end` + \b + | + # or end if / end do / end subroutine + \s* + (?Pif|do|select|block|case|while|type|program|subroutine|function|(sub)?module|interface) + # and eventually the name of the function, e.g. + # 'end function foo' + (\s+(?P\w+))? + ) + ) \s* - # may be followed by the construct name, e.g. 'end function' - ( - (if|do|select|case|while|subroutine|function|module|interface) - # and eventually the name of the function, ..., e.g. 'end function foo' - \s*(\s+\w+)?\s* - )? # we do not want to capture this (?= # may be followed by a comment... @@ -342,8 +357,12 @@ class Indenter: current_line_indent: int = 0 continuation_line: bool = False + # The stack of program/module/subroutine/function + program_stack: list[tuple[str, str]] + def __init__(self, nindent: int): self.Nindent = nindent + self.program_stack = [] def checker( self, @@ -381,12 +400,45 @@ def indent_line(self, line: str) -> str: line, LABEL_RULES, comment_pos, string_spans, return_matches=label_matches ) indent_matches: list[re.Match] = [] + named_constructs_matches: list[re.Match] = [] if self.checker(line, IMMEDIATE_DEDENTER_RULES, comment_pos, string_spans): cur_line_shift = self.Nindent - elif self.checker(line, DEDENTER_RULES, comment_pos, string_spans): + elif self.checker( + line, + DEDENTER_RULES, + comment_pos, + string_spans, + return_matches=named_constructs_matches, + ): cur_line_shift = self.Nindent dedent = True + + # Dedent a previously-opened construct + construct, name = self.program_stack.pop() + + m = named_constructs_matches[-1] + + # Sanity check + if m.group("construct") and m.group("construct") != construct: + raise ValueError( + "Named construct does not match the construct pattern, " + f"expected '{construct}' " + f"but got '{named_constructs_matches[-1].group('construct')}'." + ) + + span = named_constructs_matches[-1].span("end_construct_name") + + # Replace the construct with proper format + proper_format_lst = ["end"] + if construct: + proper_format_lst.append(construct) + if name: + proper_format_lst.append(name) + + proper_format = " ".join(proper_format_lst) + line = line[: span[0]] + proper_format + line[span[1] :] + elif self.checker( line, INDENTER_RULES, @@ -395,6 +447,12 @@ def indent_line(self, line: str) -> str: return_matches=indent_matches, ): indent = True + construct = indent_matches[-1].group("construct") or indent_matches[ + -1 + ].group("construct_no_name") + name = indent_matches[-1].group("name") + self.program_stack.append((construct, name)) + if self.checker(line, CONTINUATION_LINE_RULES, comment_pos, string_spans): curline_continuation = True From d5fdf7ea1d8398ab7c86485582b3cc36c934fb6e Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 4 Nov 2024 16:54:07 +0100 Subject: [PATCH 2/5] Fix matching of functions --- fortran_linter/main.py | 62 +++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/fortran_linter/main.py b/fortran_linter/main.py index b79e47e..ce4302f 100755 --- a/fortran_linter/main.py +++ b/fortran_linter/main.py @@ -1,6 +1,7 @@ import logging import re from collections.abc import Callable, Iterator +from typing import Any logging.basicConfig(filename="myapp.log", level=logging.DEBUG) re_strings = re.compile(r"([\"']).*?\1") @@ -223,12 +224,23 @@ def format_rule(self, rule: RAW_RULE_T, fmt: dict) -> RULE_T: INDENTER_RULES = ( re.compile( r""" - \b - (?P - if|do|select|while|block - )|( - (?Ptype|program|subroutine|function|(sub)?module(?!\s*procedure)|interface) - (\s+(?P\w+))? + ^\s*\b + ( + (?P + # Note: we match on "then" rather than "if.*then" + # so that we allow if (...) & \n then + then|do|select|while|block + )|( + ( + (?P + type|program|subroutine|(sub)?module(?!\s*procedure)|interface + )|( + (?!\bend\b)(\w+\s+)* + (?Pfunction) + ) + ) + (\s+(?P\w+))? + ) )\b """, re.I | re.VERBOSE, @@ -358,7 +370,7 @@ class Indenter: continuation_line: bool = False # The stack of program/module/subroutine/function - program_stack: list[tuple[str, str]] + program_stack: list[tuple[str, str, Any]] def __init__(self, nindent: int): self.Nindent = nindent @@ -382,7 +394,7 @@ def checker( return False - def indent_line(self, line: str) -> str: + def indent_line(self, line: str, context: Any) -> str: if line.startswith("#"): return line @@ -415,16 +427,23 @@ def indent_line(self, line: str) -> str: dedent = True # Dedent a previously-opened construct - construct, name = self.program_stack.pop() + construct, name, opening_context = self.program_stack.pop() m = named_constructs_matches[-1] + this_construct = m.group("construct") + # We match if (...) then using then + # but it corresponds to an end if + if this_construct == "then": + this_construct = "if" # Sanity check - if m.group("construct") and m.group("construct") != construct: + if this_construct and this_construct.lower() != construct: raise ValueError( "Named construct does not match the construct pattern, " f"expected '{construct}' " - f"but got '{named_constructs_matches[-1].group('construct')}'." + f"but got '{this_construct.lower()}'.\n" + f"Current context: {context}\n" + f"Other context: {opening_context}" ) span = named_constructs_matches[-1].span("end_construct_name") @@ -447,11 +466,19 @@ def indent_line(self, line: str) -> str: return_matches=indent_matches, ): indent = True - construct = indent_matches[-1].group("construct") or indent_matches[ - -1 - ].group("construct_no_name") + m = indent_matches[-1] + construct = ( + m.group("construct") + or m.group("construct_no_name") + or m.group("construct_function") + ) name = indent_matches[-1].group("name") - self.program_stack.append((construct, name)) + # We match 'if (...) then' using then + # but it corresponds to an 'end if' + if construct.lower() == "then": + construct = "if" + + self.program_stack.append((construct.lower(), name, context)) if self.checker(line, CONTINUATION_LINE_RULES, comment_pos, string_spans): curline_continuation = True @@ -495,7 +522,10 @@ def indent_line(self, line: str) -> str: return new_line def __call__(self, lines: list[str]) -> list[str]: - return [self.indent_line(line) for line in lines] + return [ + self.indent_line(line, {"line_number": line_number + 1, "line": line}) + for line_number, line in enumerate(lines) + ] class LineChecker: From be4b5de538e15add3a2beec36cafc0588451105a Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 4 Nov 2024 17:21:18 +0100 Subject: [PATCH 3/5] Fix matching of type(...), complex functions --- fortran_linter/main.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/fortran_linter/main.py b/fortran_linter/main.py index ce4302f..8ece24b 100755 --- a/fortran_linter/main.py +++ b/fortran_linter/main.py @@ -224,18 +224,20 @@ def format_rule(self, rule: RAW_RULE_T, fmt: dict) -> RULE_T: INDENTER_RULES = ( re.compile( r""" - ^\s*\b ( (?P # Note: we match on "then" rather than "if.*then" # so that we allow if (...) & \n then - then|do|select|while|block - )|( + .*(?P\bthen)|(^\s*\b(do|select|while|block)) + )|(^\s*\b ( (?P - type|program|subroutine|(sub)?module(?!\s*procedure)|interface + type(?!\()|program|subroutine|(sub)?module(?!\s*procedure)|interface )|( - (?!\bend\b)(\w+\s+)* + # This one is tricky, we match on 'function' but not 'end' + # to capture 'function foo' but not 'end function' + # but also 'real(dp) elemental pure function' + (?!\bend\b).* (?Pfunction) ) ) @@ -430,11 +432,7 @@ def indent_line(self, line: str, context: Any) -> str: construct, name, opening_context = self.program_stack.pop() m = named_constructs_matches[-1] - this_construct = m.group("construct") - # We match if (...) then using then - # but it corresponds to an end if - if this_construct == "then": - this_construct = "if" + this_construct = m.group("construct").strip() # Sanity check if this_construct and this_construct.lower() != construct: @@ -472,13 +470,22 @@ def indent_line(self, line: str, context: Any) -> str: or m.group("construct_no_name") or m.group("construct_function") ) - name = indent_matches[-1].group("name") + name = m.group("name") # We match 'if (...) then' using then # but it corresponds to an 'end if' - if construct.lower() == "then": + if m.group("has_then"): construct = "if" - self.program_stack.append((construct.lower(), name, context)) + if construct is None: + raise ValueError( + "Construct should not be None. " + f"Match: {m.group(0)}\n" + f"Current context: {context}" + ) + + construct = construct.lower().strip() + + self.program_stack.append((construct, name, context)) if self.checker(line, CONTINUATION_LINE_RULES, comment_pos, string_spans): curline_continuation = True From 7eb8e4efa5bb6859f906e77330046863da960af6 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 4 Nov 2024 17:39:33 +0100 Subject: [PATCH 4/5] Support 'label: do' statements --- fortran_linter/main.py | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/fortran_linter/main.py b/fortran_linter/main.py index 8ece24b..c352198 100755 --- a/fortran_linter/main.py +++ b/fortran_linter/main.py @@ -228,7 +228,12 @@ def format_rule(self, rule: RAW_RULE_T, fmt: dict) -> RULE_T: (?P # Note: we match on "then" rather than "if.*then" # so that we allow if (...) & \n then - .*(?P\bthen)|(^\s*\b(do|select|while|block)) + .*(?P\bthen)|( + ^\s*\b + # Possibly match optional label (foo: do) + (?P