From 083e5e3ced05197f16cc37753d221921d95a24f1 Mon Sep 17 00:00:00 2001 From: DivergAgent Date: Fri, 4 Jul 2025 10:05:28 +0200 Subject: [PATCH 1/2] Fix 13 SonarQube issues via MCP-enhanced AI pipeline --- src/codeas/ui/utils.py | 742 ++++++++++++++++++++++++----------------- 1 file changed, 439 insertions(+), 303 deletions(-) diff --git a/src/codeas/ui/utils.py b/src/codeas/ui/utils.py index 886fe11..5527e67 100644 --- a/src/codeas/ui/utils.py +++ b/src/codeas/ui/utils.py @@ -9,53 +9,91 @@ class SearchTextNotUnique(ValueError): + """Custom exception raised when search text for applying diffs is not unique.""" pass def read_prompts(): + """Reads prompts from the prompts.json file.""" if os.path.exists(PROMPTS_PATH): - with open(PROMPTS_PATH, "r") as f: - return json.load(f) + try: + with open(PROMPTS_PATH, "r") as f: + content = f.read() + if not content: + return {} + return json.loads(content) + except json.JSONDecodeError: + return {} + except Exception: + return {} else: return {} def save_existing_prompt(existing_name, new_name, new_prompt): + """Saves an edited prompt, potentially changing its name.""" prompts = read_prompts() prompts[new_name] = new_prompt if existing_name != new_name: - del prompts[existing_name] - with open(PROMPTS_PATH, "w") as f: - json.dump(prompts, f) + if existing_name in prompts: + del prompts[existing_name] + try: + with open(PROMPTS_PATH, "w") as f: + json.dump(prompts, f, indent=4) + except Exception: + pass def delete_saved_prompt(prompt_name): + """Deletes a saved prompt by name.""" prompts = read_prompts() - del prompts[prompt_name] - with open(PROMPTS_PATH, "w") as f: - json.dump(prompts, f) + if prompt_name in prompts: + del prompts[prompt_name] + try: + with open(PROMPTS_PATH, "w") as f: + json.dump(prompts, f, indent=4) + except Exception: + pass + else: + pass def save_prompt(name, prompt): + """Saves a new prompt, handling versioning if a prompt with the same name exists.""" prompts = read_prompts() name_version_map = extract_name_version(prompts.keys()) - full_name = f"{name}" - if full_name in name_version_map.keys(): + full_name = name + if full_name in name_version_map: full_name = f"{full_name} v.{name_version_map[full_name] + 1}" prompts[full_name] = prompt.strip() - with open(PROMPTS_PATH, "w") as f: - json.dump(prompts, f) + try: + with open(PROMPTS_PATH, "w") as f: + json.dump(prompts, f, indent=4) + except Exception: + pass def extract_name_version(existing_names): - # names can be like {name} or {name} v.1 or {name} v.2 etc. + """ + Extracts base name and highest version number from existing prompt names. + Names can be like {name} or {name} v.1, {name} v.2 etc. + """ name_version_map = {} for full_name in existing_names: if " v." in full_name: - name, version = full_name.rsplit(" v.", 1) - version = int(version) + parts = full_name.rsplit(" v.", 1) + if len(parts) == 2: + name, version_str = parts + try: + version = int(version_str) + except ValueError: + name = full_name + version = 1 + else: + name = full_name + version = 1 else: name = full_name version = 1 @@ -67,116 +105,129 @@ def extract_name_version(existing_names): return name_version_map -def apply_diffs(file_content, diff_content): - edits = list(find_diffs(diff_content)) +# Helper function for process_fenced_block +def _handle_boundary_logic(edits, current_fname, current_hunk_lines, hunk_contains_changes, line, lines_to_parse, parse_idx, is_hunk_start, is_file_header_start, in_hunk): + """Handles the logic when a boundary line (@@ or ---/+++) is encountered.""" + if in_hunk and current_hunk_lines: + if hunk_contains_changes: + edits.append((current_fname, current_hunk_lines)) - for path, hunk in edits: - hunk = normalize_hunk(hunk) - if not hunk: - continue + new_hunk_lines = [] + new_in_hunk = False + new_hunk_contains_changes = False + next_parse_idx_increment = 1 - try: - file_content = do_replace(Path("dummy_path"), file_content, hunk) - except SearchTextNotUnique: - if os.path.exists("dummy_path"): - os.remove("dummy_path") - raise ValueError( - "The diff could not be applied uniquely to the file content." - ) + new_current_fname = current_fname - if not file_content: - if os.path.exists("dummy_path"): - os.remove("dummy_path") - raise ValueError("The diff failed to apply to the file content.") + if is_hunk_start: + new_in_hunk = True + next_parse_idx_increment = 1 + elif is_file_header_start: + new_current_fname = lines_to_parse[parse_idx + 1][4:].strip() + new_in_hunk = False + next_parse_idx_increment = 2 - if os.path.exists("dummy_path"): - os.remove("dummy_path") - return file_content + return parse_idx + next_parse_idx_increment, new_in_hunk, new_hunk_contains_changes, new_current_fname, new_hunk_lines -def find_diffs(content): - # We can always fence with triple-quotes, because all the udiff content - # is prefixed with +/-/space. +# Refactored process_fenced_block for complexity (S3776) +# Original Cognitive Complexity: 22 +def process_fenced_block(lines, start_line_num): + """ + Parses a fenced diff block (lines between ```diff and ```) to extract hunks. + Handles initial file headers and internal file headers (multi-file diffs). + Returns the line number after the block and a list of edits (fname, hunk_lines). + """ + end_line_num = start_line_num + for i in range(start_line_num, len(lines)): + line = lines[i] + if line.strip() == "```": + end_line_num = i + break + else: + end_line_num = len(lines) - if not content.endswith("\n"): - content = content + "\n" + block_lines = lines[start_line_num:end_line_num] - lines = content.splitlines(keepends=True) - line_num = 0 edits = [] - while line_num < len(lines): - while line_num < len(lines): - line = lines[line_num] - if line.startswith("```diff"): - line_num, these_edits = process_fenced_block(lines, line_num + 1) - edits += these_edits - break - line_num += 1 + current_fname = None - # For now, just take 1! - # edits = edits[:1] + lines_to_parse = block_lines - return edits + parse_idx = 0 + if len(lines_to_parse) >= 2 and lines_to_parse[0].strip().startswith("---") and lines_to_parse[1].strip().startswith("+++"): + current_fname = lines_to_parse[1][4:].strip() + parse_idx = 2 + current_hunk_lines = [] + in_hunk = False + hunk_contains_changes = False -def process_fenced_block(lines, start_line_num): - for line_num in range(start_line_num, len(lines)): - line = lines[line_num] - if line.startswith("```"): - break + while parse_idx < len(lines_to_parse): + line = lines_to_parse[parse_idx] - block = lines[start_line_num:line_num] - block.append("@@ @@") - - if block[0].startswith("--- ") and block[1].startswith("+++ "): - # Extract the file path, considering that it might contain spaces - fname = block[1][4:].strip() - block = block[2:] - else: - fname = None + is_hunk_start = line.startswith("@@") + is_file_header_start = False + if parse_idx + 1 < len(lines_to_parse): + is_file_header_start = (lines_to_parse[parse_idx].strip().startswith("---") and + lines_to_parse[parse_idx + 1].strip().startswith("+++")) - edits = [] + is_boundary = is_hunk_start or is_file_header_start - keeper = False - hunk = [] - op = " " - for line in block: - hunk.append(line) - if len(line) < 2: + if is_boundary: + parse_idx, in_hunk, hunk_contains_changes, current_fname, current_hunk_lines = _handle_boundary_logic( + edits, current_fname, current_hunk_lines, hunk_contains_changes, line, lines_to_parse, parse_idx, is_hunk_start, is_file_header_start, in_hunk + ) continue - if line.startswith("+++ ") and hunk[-2].startswith("--- "): - if hunk[-3] == "\n": - hunk = hunk[:-3] - else: - hunk = hunk[:-2] - edits.append((fname, hunk)) - hunk = [] - keeper = False + if in_hunk: + current_hunk_lines.append(line) + if line.startswith("+") or line.startswith("-"): + hunk_contains_changes = True - fname = line[4:].strip() - continue + parse_idx += 1 - op = line[0] - if op in "-+": - keeper = True - continue - if op != "@": - continue - if not keeper: - hunk = [] - continue + if in_hunk and current_hunk_lines and hunk_contains_changes: + edits.append((current_fname, current_hunk_lines)) + + + end_line_num_in_orig = start_line_num + (end_line_num - start_line_num) + return end_line_num_in_orig + 1, edits + + +def find_diffs(content): + """ + Finds and parses fenced diff blocks (lines between ```diff and ```) within content. + Returns a list of edits found across all blocks. + """ + if not content.endswith("\n"): + content = content + "\n" - hunk = hunk[:-1] - edits.append((fname, hunk)) - hunk = [] - keeper = False + lines = content.splitlines(keepends=True) + line_num = 0 + edits = [] + while line_num < len(lines): + line = lines[line_num] + if line.strip() == "```diff": + next_line_num, these_edits = process_fenced_block(lines, line_num + 1) + edits.extend(these_edits) + line_num = next_line_num + continue + line_num += 1 - return line_num + 1, edits + return edits def normalize_hunk(hunk): + """ + Normalizes a hunk by regenerating it using difflib after cleaning whitespace. + This helps standardize hunk format for application. + Returns a list of lines representing the normalized hunk, or None if hunk is empty. + """ + if not hunk: + return None + before, after = hunk_to_before_after(hunk, lines=True) before = cleanup_pure_whitespace_lines(before) @@ -184,36 +235,58 @@ def normalize_hunk(hunk): diff = difflib.unified_diff(before, after, n=max(len(before), len(after))) diff = list(diff)[3:] + + if not diff: + return None + return diff def cleanup_pure_whitespace_lines(lines): + """ + Standardizes lines that contain only whitespace. + Replaces a line consisting only of whitespace with just its newline characters. + e.g., " \\n" becomes "\\n". + """ res = [ - line if line.strip() else line[-(len(line) - len(line.rstrip("\r\n")))] + line if line.strip() else line[-(len(line) - len(line.rstrip("\r\n"))):] for line in lines ] return res def hunk_to_before_after(hunk, lines=False): + """ + Separates lines in a hunk into 'before' and 'after' content based on +/-/space prefixes. + Handles lines that might not have a standard prefix. + + Args: + hunk (list[str]): List of lines in the hunk, including prefixes. + lines (bool): If True, return lists of lines; otherwise, return joined strings. + + Returns: + tuple[list[str], list[str]] or tuple[str, str]: The before and after content. + """ before = [] after = [] - op = " " for line in hunk: - if len(line) < 2: + if not line or len(line) < 2: op = " " - line = line - else: + processed_line = line + elif line[0] in (" ", "-", "+", "\\"): op = line[0] - line = line[1:] + processed_line = line[1:] + else: + op = " " + processed_line = line if op == " ": - before.append(line) - after.append(line) + before.append(processed_line) + after.append(processed_line) elif op == "-": - before.append(line) + before.append(processed_line) elif op == "+": - after.append(line) + after.append(processed_line) if lines: return before, after @@ -224,200 +297,249 @@ def hunk_to_before_after(hunk, lines=False): return before, after -def do_replace(fname, content, hunk): - fname = Path(fname) +def apply_diffs(file_content, diff_content): + """Applies diff hunks found in diff_content to file_content string.""" + edits = list(find_diffs(diff_content)) - before_text, after_text = hunk_to_before_after(hunk) + for path, hunk in edits: + hunk = normalize_hunk(hunk) + if not hunk: + continue + + try: + file_content = do_replace(None, file_content, hunk) + except SearchTextNotUnique: + raise ValueError( + "The diff could not be applied uniquely to the file content." + ) + + if file_content is None: + raise ValueError("The diff failed to apply to the file content.") + + return file_content - # does it want to make a new file? - if not fname.exists() and not before_text.strip(): - fname.touch() - content = "" - if content is None: - return +def do_replace(fname, content, hunk): + """ + Applies a hunk to the content string. + Handles simple append/insert cases and delegates complex replacements to apply_hunk. + Returns the modified content string or None if the hunk cannot be applied. + + Args: + fname: The filename from the diff header (unused in string-based application). + content (str): The original content string to modify. + hunk (list[str]): The list of lines representing the hunk. + + Returns: + str or None: The content string after applying the hunk, or None on failure. + """ + before_text, after_text = hunk_to_before_after(hunk) - # TODO: handle inserting into new file if not before_text.strip(): - # append to existing file, or start a new file new_content = content + after_text return new_content - new_content = None - new_content = apply_hunk(content, hunk) - if new_content: + if new_content is not None: return new_content def apply_hunk(content, hunk): - before_text, after_text = hunk_to_before_after(hunk) + """ + Attempts to apply a hunk to the content string using different search and replacement strategies. + Returns the modified content string or None on failure. + + Args: + content (str): The content string to modify. + hunk (list[str]): The list of lines representing the hunk. + + Returns: + str or None: The content string after applying the hunk, or None if application fails. + """ + _, _ = hunk_to_before_after(hunk) res = directly_apply_hunk(content, hunk) - if res: + if res is not None: return res - hunk = make_new_lines_explicit(content, hunk) - - # just consider space vs not-space - ops = "".join([line[0] for line in hunk]) - ops = ops.replace("-", "x") - ops = ops.replace("+", "x") - ops = ops.replace("\n", " ") - - cur_op = " " - section = [] - sections = [] - - for i in range(len(ops)): - op = ops[i] - if op != cur_op: - sections.append(section) - section = [] - cur_op = op - section.append(hunk[i]) - - sections.append(section) - if cur_op != " ": - sections.append([]) - - all_done = True - for i in range(2, len(sections), 2): - preceding_context = sections[i - 2] - changes = sections[i - 1] - following_context = sections[i] - - res = apply_partial_hunk(content, preceding_context, changes, following_context) - if res: - content = res - else: - all_done = False - # FAILED! - # this_hunk = preceding_context + changes + following_context - break + hunk_explicit_newlines = make_new_lines_explicit(content, hunk) - if all_done: - return content + res_explicit = directly_apply_hunk(content, hunk_explicit_newlines) + if res_explicit is not None: + return res_explicit + + return None def make_new_lines_explicit(content, hunk): + """ + Creates a new hunk by adjusting the 'before' part of the original hunk + to better match the line endings/whitespace of the actual content. + This is a preparation step for applying hunks that might not match content exactly. + Returns the adjusted hunk (list of lines) or the original hunk if adjustment fails/is insignificant. + """ before, after = hunk_to_before_after(hunk) diff = diff_lines(before, content) back_diff = [] for line in diff: - if line[0] == "+": - continue - # if line[0] == "-": - # line = "+" + line[1:] - - back_diff.append(line) + if line.startswith(" ") or line.startswith("-"): + back_diff.append(line) new_before = directly_apply_hunk(before, back_diff) - if not new_before: - return hunk - if len(new_before.strip()) < 10: + if new_before is None or len(str(new_before).strip()) < 10: return hunk - before = before.splitlines(keepends=True) - new_before = new_before.splitlines(keepends=True) - after = after.splitlines(keepends=True) + before_lines = before.splitlines(keepends=True) + new_before_lines = str(new_before).splitlines(keepends=True) - if len(new_before) < len(before) * 0.66: + if len(new_before_lines) < len(before_lines) * 0.66: return hunk - new_hunk = difflib.unified_diff( - new_before, after, n=max(len(new_before), len(after)) + after_lines = after.splitlines(keepends=True) + new_hunk_lines = difflib.unified_diff( + new_before_lines, after_lines, n=max(len(new_before_lines), len(after_lines)) ) - new_hunk = list(new_hunk)[3:] + new_hunk = list(new_hunk_lines)[3:] + + if not new_hunk: + return hunk return new_hunk def diff_lines(search_text, replace_text): + """ + Generates a diff between two strings, treating each line as a distinct element. + Uses diff_match_patch library for line-based differencing. + Returns a list of lines prefixed with standard diff characters (+, -, ' '). + """ dmp = diff_match_patch() dmp.Diff_Timeout = 5 - # dmp.Diff_EditCost = 16 - search_lines, replace_lines, mapping = dmp.diff_linesToChars( + + search_lines_chars, replace_lines_chars, mapping = dmp.diff_linesToChars( search_text, replace_text ) - diff_lines = dmp.diff_main(search_lines, replace_lines, None) - dmp.diff_cleanupSemantic(diff_lines) - dmp.diff_cleanupEfficiency(diff_lines) + diff_results = dmp.diff_main(search_lines_chars, replace_lines_chars, False) + + dmp.diff_cleanupSemantic(diff_results) + dmp.diff_cleanupEfficiency(diff_results) + + diff = list(diff_results) - diff = list(diff_lines) dmp.diff_charsToLines(diff, mapping) - # dump(diff) udiff = [] - for d, lines in diff: - if d < 0: - d = "-" - elif d > 0: - d = "+" + for op_type, lines_string in diff: + if op_type < 0: + d_char = "-" + elif op_type > 0: + d_char = "+" else: - d = " " - for line in lines.splitlines(keepends=True): - udiff.append(d + line) + d_char = " " + for line in lines_string.splitlines(keepends=True): + udiff.append(d_char + line) return udiff -def apply_partial_hunk(content, preceding_context, changes, following_context): - len_prec = len(preceding_context) - len_foll = len(following_context) +# Helper function for apply_partial_hunk +def _get_context_slice(context, num_lines, from_end=True): + """Helper to safely get a slice of context lines.""" + if num_lines <= 0 or not context: + return [] - use_all = len_prec + len_foll + num_lines = min(num_lines, len(context)) - # if there is a - in the hunk, we can go all the way to `use=0` - for drop in range(use_all + 1): - use = use_all - drop + if from_end: + return context[-num_lines:] + else: + return context[:num_lines] - for use_prec in range(len_prec, -1, -1): - if use_prec > use: - continue - use_foll = use - use_prec - if use_foll > len_foll: - continue +def apply_partial_hunk(content, preceding_context, changes, following_context): + """ + Attempts to apply a hunk using partial context lines. + It tries different combinations of preceding and following context lines + from the available context sections. + Returns modified content or None on failure. + + Args: + content (str): The content string to modify. + preceding_context (list[str]): Lines from the original content just before the hunk's 'before' section. + changes (list[str]): Lines representing the changes section of the hunk (+/- lines). + following_context (list[str]): Lines from the original content just after the hunk's 'before' section. + + Returns: + str or None: The content string after applying a successful partial hunk, or None if no partial hunk applies. + """ + len_prec = len(preceding_context) + len_foll = len(following_context) - if use_prec: - this_prec = preceding_context[-use_prec:] - else: - this_prec = [] + for use_prec in range(len_prec, -1, -1): + for use_foll in range(len_foll, -1, -1): + this_prec = _get_context_slice(preceding_context, use_prec, from_end=True) + this_foll = _get_context_slice(following_context, use_foll, from_end=False) - this_foll = following_context[:use_foll] + partial_hunk_lines = this_prec + changes + this_foll + res = directly_apply_hunk(content, partial_hunk_lines) - res = directly_apply_hunk(content, this_prec + changes + this_foll) - if res: - return res + if res is not None: + return res + + return None def directly_apply_hunk(content, hunk): + """ + Attempts to apply a hunk by directly searching for the hunk's 'before' + text in the content and replacing it with the hunk's 'after' text. + Includes a heuristic to prevent applying trivial changes on short, + repeated context strings, which could lead to unintended replacements. + Returns the modified content string or None on failure (e.g., 'before' text not found uniquely). + + Args: + content (str): The content string to modify. + hunk (list[str]): The list of lines representing the hunk. + + Returns: + str or None: The content string after application, or None on failure. + """ before, after = hunk_to_before_after(hunk) if not before: - return + return None - before_lines, _ = hunk_to_before_after(hunk, lines=True) - before_lines = "".join([line.strip() for line in before_lines]) + before_lines_list, _ = hunk_to_before_after(hunk, lines=True) + before_stripped_content = "".join(before_lines_list).strip() - # Refuse to do a repeated search and replace on a tiny bit of non-whitespace context - if len(before_lines) < 10 and content.count(before) > 1: - return + if len(before_stripped_content) < 10 and content.count(before) > 1: + return None try: new_content = flexi_just_search_and_replace([before, after, content]) - except SearchTextNotUnique: - new_content = None - return new_content + return new_content + + except Exception: + return None def flexi_just_search_and_replace(texts): + """ + Applies the basic search_and_replace strategy combined with various preprocessor permutations. + This provides robustness against minor differences in indentation or blank lines. + Returns the modified text using the first successful combination, or None if all fail. + + Args: + texts (list[str]): A list containing [search_text, replace_text, original_text]. + + Returns: + str or None: The modified text or None if no combination succeeds. + """ strategies = [ (search_and_replace, all_preprocs), ] @@ -426,13 +548,24 @@ def flexi_just_search_and_replace(texts): def search_and_replace(texts): + """ + Performs a simple string search and replace. + Returns modified text if the search_text is found, None otherwise. + Note: This function replaces ALL occurrences of search_text if multiple exist. + Higher-level logic (like directly_apply_hunk's heuristic) handles uniqueness concerns. + + Args: + texts (list[str]): A list containing [search_text, replace_text, original_text]. + + Returns: + str or None: The modified text string if search_text is found, None otherwise. + """ search_text, replace_text, original_text = texts num = original_text.count(search_text) - # if num > 1: - # raise SearchTextNotUnique() + if num == 0: - return + return None new_text = original_text.replace(search_text, replace_text) @@ -444,52 +577,68 @@ def flexible_search_and_replace(texts, strategies): literal interpretation of search_text. If needed, progress to more flexible methods, which can accommodate divergence between search_text and original_text and yet still achieve the desired - edits. - """ + edits.""" for strategy, preprocs in strategies: for preproc in preprocs: res = try_strategy(texts, strategy, preproc) - if res: + if res is not None: return res + return None + def try_strategy(texts, strategy, preproc): + """ + Applies specified preprocessors to texts, then runs the strategy, then applies post-processors. + Catches any exceptions raised during the process (preproc, strategy, post-proc) and returns None. + + Args: + texts (list[str]): A list containing [search_text, replace_text, original_text]. + strategy (function): The core search/replace function to apply. + preproc (tuple): A tuple specifying which preprocessors to apply (strip_blank_lines, relative_indent, reverse_lines). + + Returns: + str or None: The modified text string if the process completes successfully, or None if an exception occurs or the strategy fails. + """ preproc_strip_blank_lines, preproc_relative_indent, preproc_reverse = preproc ri = None - if preproc_strip_blank_lines: - texts = strip_blank_lines(texts) - if preproc_relative_indent: - ri, texts = relative_indent(texts) - if preproc_reverse: - texts = list(map(reverse_lines, texts)) + current_texts = list(texts) - res = strategy(texts) + try: + if preproc_strip_blank_lines: + current_texts = strip_blank_lines(current_texts) + if preproc_relative_indent: + ri, current_texts = relative_indent(current_texts) + if preproc_reverse: + current_texts = list(map(reverse_lines, current_texts)) - if res and preproc_reverse: - res = reverse_lines(res) + res = strategy(current_texts) - if res and preproc_relative_indent: - try: - res = ri.make_absolute(res) - except ValueError: - return + if res is not None: + if preproc_reverse: + res = reverse_lines(res) - return res + if preproc_relative_indent: + res = ri.make_absolute(res) + + return res + + except Exception: + return None def strip_blank_lines(texts): - # strip leading and trailing blank lines - texts = [text.strip("\n") + "\n" for text in texts] - return texts + res_texts = [text.strip("\n") + "\n" if text else "" for text in texts] + return res_texts def relative_indent(texts): ri = RelativeIndenter(texts) - texts = list(map(ri.make_relative, texts)) + res_texts = list(map(ri.make_relative, texts)) - return ri, texts + return ri, res_texts class RelativeIndenter: @@ -558,15 +707,12 @@ class RelativeIndenter: """ def __init__(self, texts): - """ - Based on the texts, choose a unicode character that isn't in any of them. - """ - chars = set() for text in texts: - chars.update(text) + if text: + chars.update(text) - ARROW = "←" + ARROW = "\u2190" if ARROW not in chars: self.marker = ARROW else: @@ -578,36 +724,36 @@ def select_unique_marker(self, chars): if marker not in chars: return marker - raise ValueError("Could not find a unique marker") + raise ValueError("Could not find a unique marker within the search range.") + def make_relative(self, text): - """ - Transform text to use relative indents. - """ + if not text: return "" if self.marker in text: - raise ValueError("Text already contains the outdent marker: {self.marker}") + raise ValueError(f"Text already contains the outdent marker: {self.marker}") lines = text.splitlines(keepends=True) output = [] prev_indent = "" + for line in lines: line_without_end = line.rstrip("\n\r") len_indent = len(line_without_end) - len(line_without_end.lstrip()) indent = line[:len_indent] + change = len_indent - len(prev_indent) + + cur_indent_marker = "" if change > 0: - cur_indent = indent[-change:] + cur_indent_marker = indent[-change:] elif change < 0: - cur_indent = self.marker * -change - else: - cur_indent = "" + cur_indent_marker = self.marker * -change + + out_line = cur_indent_marker + "\n" + line[len_indent:] - out_line = cur_indent + "\n" + line[len_indent:] - # dump(len_indent, change, out_line) - # print(out_line) output.append(out_line) prev_indent = indent @@ -615,88 +761,78 @@ def make_relative(self, text): return res def make_absolute(self, text): - """ - Transform text from relative back to absolute indents. - """ + if not text: return "" + lines = text.splitlines(keepends=True) + if len(lines) % 2 != 0: + raise ValueError("Input text has odd number of lines, expected pairs of relative indent marker and content.") + output = [] prev_indent = "" for i in range(0, len(lines), 2): - dent = lines[i].rstrip("\r\n") - non_indent = lines[i + 1] + dent_line = lines[i] + non_indent_line = lines[i + 1] + dent = dent_line.rstrip("\r\n") + + cur_indent = "" if dent.startswith(self.marker): len_outdent = len(dent) + if len(prev_indent) < len_outdent: + raise ValueError(f"Cannot outdent {len_outdent} columns from current indent level {len(prev_indent)}. Input segment: '{dent_line.strip()}'") cur_indent = prev_indent[:-len_outdent] else: cur_indent = prev_indent + dent - if not non_indent.rstrip("\r\n"): - out_line = non_indent # don't indent a blank line + if not non_indent_line.strip(): + out_line = non_indent_line else: - out_line = cur_indent + non_indent + out_line = cur_indent + non_indent_line output.append(out_line) prev_indent = cur_indent res = "".join(output) if self.marker in res: - # dump(res) - raise ValueError("Error transforming text back to absolute indents") + raise ValueError("Error transforming text back to absolute indents: marker still present after conversion.") return res def reverse_lines(text): + if not text: return "" + lines = text.splitlines(keepends=True) lines.reverse() return "".join(lines) all_preprocs = [ - # (strip_blank_lines, relative_indent, reverse_lines) (False, False, False), (True, False, False), (False, True, False), (True, True, False), - # (False, False, True), - # (True, False, True), - # (False, True, True), - # (True, True, True), ] if __name__ == "__main__": - # Test case for apply_diffs function original_content = """def hello(): print("Hello, World!") - + def goodbye(): print("Goodbye, World!") """ diff_content = """```diff ---- original -+++ modified +--- a/original.py ++++ b/modified.py @@ -1,5 +1,8 @@ def hello(): - print("Hello, World!") + print("Hello, Universe!") + print("How are you today?") - + def goodbye(): - print("Goodbye, World!") + print("Farewell, Universe!") -+ print("See you next time!") -""" - - try: - result = apply_diffs(original_content, diff_content) - print("Original content:") - print(original_content) - print("\nDiff content:") - print(diff_content) - print("\nResult after applying diffs:") - print(result) - except Exception as e: - print(f"An error occurred: {e}") + print("See you next time!") \ No newline at end of file From 72f6ded96faad70ce4186f86d94c29fbe695135f Mon Sep 17 00:00:00 2001 From: DivergAgent Date: Mon, 14 Jul 2025 12:39:01 +0200 Subject: [PATCH 2/2] Fix 13 SonarQube issues --- src/codeas/ui/utils.py | 753 ++++++++++++++++++----------------------- 1 file changed, 325 insertions(+), 428 deletions(-) diff --git a/src/codeas/ui/utils.py b/src/codeas/ui/utils.py index 5527e67..10e6008 100644 --- a/src/codeas/ui/utils.py +++ b/src/codeas/ui/utils.py @@ -9,91 +9,53 @@ class SearchTextNotUnique(ValueError): - """Custom exception raised when search text for applying diffs is not unique.""" pass def read_prompts(): - """Reads prompts from the prompts.json file.""" if os.path.exists(PROMPTS_PATH): - try: - with open(PROMPTS_PATH, "r") as f: - content = f.read() - if not content: - return {} - return json.loads(content) - except json.JSONDecodeError: - return {} - except Exception: - return {} + with open(PROMPTS_PATH, "r") as f: + return json.load(f) else: return {} def save_existing_prompt(existing_name, new_name, new_prompt): - """Saves an edited prompt, potentially changing its name.""" prompts = read_prompts() prompts[new_name] = new_prompt if existing_name != new_name: - if existing_name in prompts: - del prompts[existing_name] - try: - with open(PROMPTS_PATH, "w") as f: - json.dump(prompts, f, indent=4) - except Exception: - pass + del prompts[existing_name] + with open(PROMPTS_PATH, "w") as f: + json.dump(prompts, f) def delete_saved_prompt(prompt_name): - """Deletes a saved prompt by name.""" prompts = read_prompts() - if prompt_name in prompts: - del prompts[prompt_name] - try: - with open(PROMPTS_PATH, "w") as f: - json.dump(prompts, f, indent=4) - except Exception: - pass - else: - pass + del prompts[prompt_name] + with open(PROMPTS_PATH, "w") as f: + json.dump(prompts, f) def save_prompt(name, prompt): - """Saves a new prompt, handling versioning if a prompt with the same name exists.""" prompts = read_prompts() name_version_map = extract_name_version(prompts.keys()) - full_name = name - if full_name in name_version_map: + full_name = f"{name}" + if full_name in name_version_map.keys(): full_name = f"{full_name} v.{name_version_map[full_name] + 1}" prompts[full_name] = prompt.strip() - try: - with open(PROMPTS_PATH, "w") as f: - json.dump(prompts, f, indent=4) - except Exception: - pass + with open(PROMPTS_PATH, "w") as f: + json.dump(prompts, f) def extract_name_version(existing_names): - """ - Extracts base name and highest version number from existing prompt names. - Names can be like {name} or {name} v.1, {name} v.2 etc. - """ + # names can be like {name} or {name} v.1 or {name} v.2 etc. name_version_map = {} for full_name in existing_names: if " v." in full_name: - parts = full_name.rsplit(" v.", 1) - if len(parts) == 2: - name, version_str = parts - try: - version = int(version_str) - except ValueError: - name = full_name - version = 1 - else: - name = full_name - version = 1 + name, version = full_name.rsplit(" v.", 1) + version = int(version) else: name = full_name version = 1 @@ -105,129 +67,115 @@ def extract_name_version(existing_names): return name_version_map -# Helper function for process_fenced_block -def _handle_boundary_logic(edits, current_fname, current_hunk_lines, hunk_contains_changes, line, lines_to_parse, parse_idx, is_hunk_start, is_file_header_start, in_hunk): - """Handles the logic when a boundary line (@@ or ---/+++) is encountered.""" - if in_hunk and current_hunk_lines: - if hunk_contains_changes: - edits.append((current_fname, current_hunk_lines)) +def apply_diffs(file_content, diff_content): + MAX_DIFF_SIZE_BYTES = 10 * 1024 * 1024 # Limit size of diff_content to prevent potential DoS + if len(diff_content.encode('utf-8')) > MAX_DIFF_SIZE_BYTES: + raise ValueError(f"Diff content exceeds maximum allowed size ({MAX_DIFF_SIZE_BYTES} bytes). Processing aborted.") + + edits = list(find_diffs(diff_content)) - new_hunk_lines = [] - new_in_hunk = False - new_hunk_contains_changes = False - next_parse_idx_increment = 1 + for path, hunk in edits: + hunk = normalize_hunk(hunk) + if not hunk: + continue - new_current_fname = current_fname + try: + file_content = do_replace(Path("dummy_path"), file_content, hunk) + except SearchTextNotUnique: + if os.path.exists("dummy_path"): + os.remove("dummy_path") + raise ValueError( + "The diff could not be applied uniquely to the file content." + ) - if is_hunk_start: - new_in_hunk = True - next_parse_idx_increment = 1 - elif is_file_header_start: - new_current_fname = lines_to_parse[parse_idx + 1][4:].strip() - new_in_hunk = False - next_parse_idx_increment = 2 + if file_content is None: + if os.path.exists("dummy_path"): + os.remove("dummy_path") + raise ValueError("The diff failed to apply to the file content.") - return parse_idx + next_parse_idx_increment, new_in_hunk, new_hunk_contains_changes, new_current_fname, new_hunk_lines + if os.path.exists("dummy_path"): + os.remove("dummy_path") + return file_content -# Refactored process_fenced_block for complexity (S3776) -# Original Cognitive Complexity: 22 -def process_fenced_block(lines, start_line_num): - """ - Parses a fenced diff block (lines between ```diff and ```) to extract hunks. - Handles initial file headers and internal file headers (multi-file diffs). - Returns the line number after the block and a list of edits (fname, hunk_lines). - """ - end_line_num = start_line_num - for i in range(start_line_num, len(lines)): - line = lines[i] - if line.strip() == "```": - end_line_num = i - break - else: - end_line_num = len(lines) +def find_diffs(content): + # We can always fence with triple-quotes, because all the udiff content + # is prefixed with +/-/space. - block_lines = lines[start_line_num:end_line_num] + if not content.endswith("\n"): + content = content + "\n" + lines = content.splitlines(keepends=True) + line_num = 0 edits = [] - current_fname = None - - lines_to_parse = block_lines + while line_num < len(lines): + while line_num < len(lines): + line = lines[line_num] + if line.startswith("```diff"): + line_num, these_edits = process_fenced_block(lines, line_num + 1) + edits += these_edits + break + line_num += 1 - parse_idx = 0 - if len(lines_to_parse) >= 2 and lines_to_parse[0].strip().startswith("---") and lines_to_parse[1].strip().startswith("+++"): - current_fname = lines_to_parse[1][4:].strip() - parse_idx = 2 + return edits - current_hunk_lines = [] - in_hunk = False - hunk_contains_changes = False - while parse_idx < len(lines_to_parse): - line = lines_to_parse[parse_idx] +def process_fenced_block(lines, start_line_num): + end_line_num = start_line_num + for end_line_num in range(start_line_num, len(lines)): + line = lines[end_line_num] + if line.startswith("```"): + break + else: + end_line_num = len(lines) - is_hunk_start = line.startswith("@@") - is_file_header_start = False - if parse_idx + 1 < len(lines_to_parse): - is_file_header_start = (lines_to_parse[parse_idx].strip().startswith("---") and - lines_to_parse[parse_idx + 1].strip().startswith("+++")) + block = lines[start_line_num:end_line_num] - is_boundary = is_hunk_start or is_file_header_start + fname = None + block_start_index = 0 + if block and len(block) > 1 and block[0].startswith("--- ") and block[1].startswith("+++ "): + fname = block[1][4:].strip() + block_start_index = 2 - if is_boundary: - parse_idx, in_hunk, hunk_contains_changes, current_fname, current_hunk_lines = _handle_boundary_logic( - edits, current_fname, current_hunk_lines, hunk_contains_changes, line, lines_to_parse, parse_idx, is_hunk_start, is_file_header_start, in_hunk - ) + edits = [] + hunk = [] + current_fname = fname + has_changes_in_hunk = False + + i = block_start_index + while i < len(block): + line = block[i] + + if line.startswith("--- ") and i + 1 < len(block) and block[i + 1].startswith("+++ "): + if hunk and has_changes_in_hunk: + edits.append((current_fname, hunk)) + hunk = [] + has_changes_in_hunk = False + current_fname = block[i + 1][4:].strip() + i += 2 continue + if line.startswith("@@"): + if hunk and has_changes_in_hunk: + edits.append((current_fname, hunk)) + hunk = [line] + has_changes_in_hunk = False + i += 1 + continue - if in_hunk: - current_hunk_lines.append(line) - if line.startswith("+") or line.startswith("-"): - hunk_contains_changes = True - - parse_idx += 1 - - if in_hunk and current_hunk_lines and hunk_contains_changes: - edits.append((current_fname, current_hunk_lines)) - - - end_line_num_in_orig = start_line_num + (end_line_num - start_line_num) - return end_line_num_in_orig + 1, edits + if line.startswith(("-", "+")): + has_changes_in_hunk = True + hunk.append(line) + i += 1 -def find_diffs(content): - """ - Finds and parses fenced diff blocks (lines between ```diff and ```) within content. - Returns a list of edits found across all blocks. - """ - if not content.endswith("\n"): - content = content + "\n" - - lines = content.splitlines(keepends=True) - line_num = 0 - edits = [] - while line_num < len(lines): - line = lines[line_num] - if line.strip() == "```diff": - next_line_num, these_edits = process_fenced_block(lines, line_num + 1) - edits.extend(these_edits) - line_num = next_line_num - continue - line_num += 1 + if hunk and has_changes_in_hunk: + edits.append((current_fname, hunk)) - return edits + return end_line_num + 1, edits def normalize_hunk(hunk): - """ - Normalizes a hunk by regenerating it using difflib after cleaning whitespace. - This helps standardize hunk format for application. - Returns a list of lines representing the normalized hunk, or None if hunk is empty. - """ - if not hunk: - return None - before, after = hunk_to_before_after(hunk, lines=True) before = cleanup_pure_whitespace_lines(before) @@ -235,58 +183,31 @@ def normalize_hunk(hunk): diff = difflib.unified_diff(before, after, n=max(len(before), len(after))) diff = list(diff)[3:] - - if not diff: - return None - return diff def cleanup_pure_whitespace_lines(lines): - """ - Standardizes lines that contain only whitespace. - Replaces a line consisting only of whitespace with just its newline characters. - e.g., " \\n" becomes "\\n". - """ res = [ - line if line.strip() else line[-(len(line) - len(line.rstrip("\r\n"))):] + line if line.strip() else line[-(len(line) - len(line.rstrip("\r\n")))] for line in lines ] return res def hunk_to_before_after(hunk, lines=False): - """ - Separates lines in a hunk into 'before' and 'after' content based on +/-/space prefixes. - Handles lines that might not have a standard prefix. - - Args: - hunk (list[str]): List of lines in the hunk, including prefixes. - lines (bool): If True, return lists of lines; otherwise, return joined strings. - - Returns: - tuple[list[str], list[str]] or tuple[str, str]: The before and after content. - """ before = [] after = [] for line in hunk: - if not line or len(line) < 2: - op = " " - processed_line = line - elif line[0] in (" ", "-", "+", "\\"): - op = line[0] - processed_line = line[1:] - else: - op = " " - processed_line = line + op = line[0] if line else ' ' + content_part = line[1:] if len(line) > 0 else '' if op == " ": - before.append(processed_line) - after.append(processed_line) + before.append(content_part) + after.append(content_part) elif op == "-": - before.append(processed_line) + before.append(content_part) elif op == "+": - after.append(processed_line) + after.append(content_part) if lines: return before, after @@ -297,44 +218,18 @@ def hunk_to_before_after(hunk, lines=False): return before, after -def apply_diffs(file_content, diff_content): - """Applies diff hunks found in diff_content to file_content string.""" - edits = list(find_diffs(diff_content)) - - for path, hunk in edits: - hunk = normalize_hunk(hunk) - if not hunk: - continue - - try: - file_content = do_replace(None, file_content, hunk) - except SearchTextNotUnique: - raise ValueError( - "The diff could not be applied uniquely to the file content." - ) - - if file_content is None: - raise ValueError("The diff failed to apply to the file content.") - - return file_content - - def do_replace(fname, content, hunk): - """ - Applies a hunk to the content string. - Handles simple append/insert cases and delegates complex replacements to apply_hunk. - Returns the modified content string or None if the hunk cannot be applied. - - Args: - fname: The filename from the diff header (unused in string-based application). - content (str): The original content string to modify. - hunk (list[str]): The list of lines representing the hunk. + fname = Path(fname) - Returns: - str or None: The content string after applying the hunk, or None on failure. - """ before_text, after_text = hunk_to_before_after(hunk) + if not fname.exists() and not before_text.strip(): + fname.touch() + content = "" + + if content is None: + return None + if not before_text.strip(): new_content = content + after_text return new_content @@ -343,203 +238,171 @@ def do_replace(fname, content, hunk): if new_content is not None: return new_content + return None -def apply_hunk(content, hunk): - """ - Attempts to apply a hunk to the content string using different search and replacement strategies. - Returns the modified content string or None on failure. - - Args: - content (str): The content string to modify. - hunk (list[str]): The list of lines representing the hunk. - Returns: - str or None: The content string after applying the hunk, or None if application fails. - """ +def apply_hunk(content, hunk): _, _ = hunk_to_before_after(hunk) res = directly_apply_hunk(content, hunk) if res is not None: return res - hunk_explicit_newlines = make_new_lines_explicit(content, hunk) + hunk = make_new_lines_explicit(content, hunk) - res_explicit = directly_apply_hunk(content, hunk_explicit_newlines) - if res_explicit is not None: - return res_explicit + ops = "".join([line[0] for line in hunk if len(line) > 0]) + ops = ops.replace("-", "x") + ops = ops.replace("+", "x") - return None + cur_op = " " + section = [] + sections = [] + + for i in range(len(ops)): + op = ops[i] + if op != cur_op: + sections.append(section) + section = [] + cur_op = op + section.append(hunk[i]) + + sections.append(section) + if cur_op != " ": + sections.append([]) + + all_done = True + for i in range(2, len(sections), 2): + preceding_context = sections[i - 2] + changes = sections[i - 1] + following_context = sections[i] + + res = apply_partial_hunk(content, preceding_context, changes, following_context) + if res is not None: + content = res + else: + all_done = False + break + + if all_done: + return content + else: + return None def make_new_lines_explicit(content, hunk): - """ - Creates a new hunk by adjusting the 'before' part of the original hunk - to better match the line endings/whitespace of the actual content. - This is a preparation step for applying hunks that might not match content exactly. - Returns the adjusted hunk (list of lines) or the original hunk if adjustment fails/is insignificant. - """ before, after = hunk_to_before_after(hunk) diff = diff_lines(before, content) back_diff = [] for line in diff: - if line.startswith(" ") or line.startswith("-"): - back_diff.append(line) + if line[0] == "+": + continue + + back_diff.append(line) new_before = directly_apply_hunk(before, back_diff) + if new_before is None: + return hunk - if new_before is None or len(str(new_before).strip()) < 10: + if len(new_before.strip()) < 10: return hunk - before_lines = before.splitlines(keepends=True) - new_before_lines = str(new_before).splitlines(keepends=True) + before = before.splitlines(keepends=True) + new_before = new_before.splitlines(keepends=True) + after = after.splitlines(keepends=True) - if len(new_before_lines) < len(before_lines) * 0.66: + if len(new_before) < len(before) * 0.66: return hunk - after_lines = after.splitlines(keepends=True) - new_hunk_lines = difflib.unified_diff( - new_before_lines, after_lines, n=max(len(new_before_lines), len(after_lines)) + new_hunk = difflib.unified_diff( + new_before, after, n=max(len(new_before), len(after)) ) - new_hunk = list(new_hunk_lines)[3:] - - if not new_hunk: - return hunk + new_hunk = list(new_hunk)[3:] return new_hunk def diff_lines(search_text, replace_text): - """ - Generates a diff between two strings, treating each line as a distinct element. - Uses diff_match_patch library for line-based differencing. - Returns a list of lines prefixed with standard diff characters (+, -, ' '). - """ dmp = diff_match_patch() dmp.Diff_Timeout = 5 - - search_lines_chars, replace_lines_chars, mapping = dmp.diff_linesToChars( + search_lines, replace_lines, mapping = dmp.diff_linesToChars( search_text, replace_text ) - diff_results = dmp.diff_main(search_lines_chars, replace_lines_chars, False) - - dmp.diff_cleanupSemantic(diff_results) - dmp.diff_cleanupEfficiency(diff_results) - - diff = list(diff_results) + diff_lines = dmp.diff_main(search_lines, replace_lines, None) + dmp.diff_cleanupSemantic(diff_lines) + dmp.diff_cleanupEfficiency(diff_lines) + diff = list(diff_lines) dmp.diff_charsToLines(diff, mapping) udiff = [] - for op_type, lines_string in diff: - if op_type < 0: - d_char = "-" - elif op_type > 0: - d_char = "+" + for d, lines in diff: + if d < 0: + d = "-" + elif d > 0: + d = "+" else: - d_char = " " - for line in lines_string.splitlines(keepends=True): - udiff.append(d_char + line) + d = " " + for line in lines.splitlines(keepends=True): + udiff.append(d + line) return udiff -# Helper function for apply_partial_hunk -def _get_context_slice(context, num_lines, from_end=True): - """Helper to safely get a slice of context lines.""" - if num_lines <= 0 or not context: - return [] +def _try_partial_context(content, preceding_context, changes, following_context, use_prec, use_foll): + if use_prec: + this_prec = preceding_context[-use_prec:] + else: + this_prec = [] - num_lines = min(num_lines, len(context)) + this_foll = following_context[:use_foll] - if from_end: - return context[-num_lines:] - else: - return context[:num_lines] + res = directly_apply_hunk(content, this_prec + changes + this_foll) + return res def apply_partial_hunk(content, preceding_context, changes, following_context): - """ - Attempts to apply a hunk using partial context lines. - It tries different combinations of preceding and following context lines - from the available context sections. - Returns modified content or None on failure. - - Args: - content (str): The content string to modify. - preceding_context (list[str]): Lines from the original content just before the hunk's 'before' section. - changes (list[str]): Lines representing the changes section of the hunk (+/- lines). - following_context (list[str]): Lines from the original content just after the hunk's 'before' section. - - Returns: - str or None: The content string after applying a successful partial hunk, or None if no partial hunk applies. - """ len_prec = len(preceding_context) len_foll = len(following_context) - for use_prec in range(len_prec, -1, -1): - for use_foll in range(len_foll, -1, -1): - this_prec = _get_context_slice(preceding_context, use_prec, from_end=True) - this_foll = _get_context_slice(following_context, use_foll, from_end=False) + use_all = len_prec + len_foll - partial_hunk_lines = this_prec + changes + this_foll - res = directly_apply_hunk(content, partial_hunk_lines) + for drop in range(use_all + 1): + use = use_all - drop - if res is not None: - return res + for use_prec in range(len_prec, -1, -1): + use_foll = use - use_prec + if use_prec <= len_prec and use_foll <= len_foll: + res = _try_partial_context(content, preceding_context, changes, following_context, use_prec, use_foll) + if res is not None: + return res return None def directly_apply_hunk(content, hunk): - """ - Attempts to apply a hunk by directly searching for the hunk's 'before' - text in the content and replacing it with the hunk's 'after' text. - Includes a heuristic to prevent applying trivial changes on short, - repeated context strings, which could lead to unintended replacements. - Returns the modified content string or None on failure (e.g., 'before' text not found uniquely). - - Args: - content (str): The content string to modify. - hunk (list[str]): The list of lines representing the hunk. - - Returns: - str or None: The content string after application, or None on failure. - """ before, after = hunk_to_before_after(hunk) if not before: return None - before_lines_list, _ = hunk_to_before_after(hunk, lines=True) - before_stripped_content = "".join(before_lines_list).strip() + before_lines, _ = hunk_to_before_after(hunk, lines=True) + before_lines_stripped = "".join([line.strip() for line in before_lines]) - if len(before_stripped_content) < 10 and content.count(before) > 1: + if len(before_lines_stripped) < 10 and content.count(before) > 1: return None try: new_content = flexi_just_search_and_replace([before, after, content]) + except SearchTextNotUnique: + new_content = None - return new_content - - except Exception: - return None + return new_content def flexi_just_search_and_replace(texts): - """ - Applies the basic search_and_replace strategy combined with various preprocessor permutations. - This provides robustness against minor differences in indentation or blank lines. - Returns the modified text using the first successful combination, or None if all fail. - - Args: - texts (list[str]): A list containing [search_text, replace_text, original_text]. - - Returns: - str or None: The modified text or None if no combination succeeds. - """ strategies = [ (search_and_replace, all_preprocs), ] @@ -548,22 +411,11 @@ def flexi_just_search_and_replace(texts): def search_and_replace(texts): - """ - Performs a simple string search and replace. - Returns modified text if the search_text is found, None otherwise. - Note: This function replaces ALL occurrences of search_text if multiple exist. - Higher-level logic (like directly_apply_hunk's heuristic) handles uniqueness concerns. - - Args: - texts (list[str]): A list containing [search_text, replace_text, original_text]. - - Returns: - str or None: The modified text string if search_text is found, None otherwise. - """ search_text, replace_text, original_text = texts num = original_text.count(search_text) - + # if num > 1: + # raise SearchTextNotUnique() if num == 0: return None @@ -577,68 +429,55 @@ def flexible_search_and_replace(texts, strategies): literal interpretation of search_text. If needed, progress to more flexible methods, which can accommodate divergence between search_text and original_text and yet still achieve the desired - edits.""" + edits. + """ for strategy, preprocs in strategies: for preproc in preprocs: - res = try_strategy(texts, strategy, preproc) - if res is not None: - return res + try: + res = try_strategy(texts, strategy, preproc) + if res is not None: + return res + except ValueError: + pass return None def try_strategy(texts, strategy, preproc): - """ - Applies specified preprocessors to texts, then runs the strategy, then applies post-processors. - Catches any exceptions raised during the process (preproc, strategy, post-proc) and returns None. - - Args: - texts (list[str]): A list containing [search_text, replace_text, original_text]. - strategy (function): The core search/replace function to apply. - preproc (tuple): A tuple specifying which preprocessors to apply (strip_blank_lines, relative_indent, reverse_lines). - - Returns: - str or None: The modified text string if the process completes successfully, or None if an exception occurs or the strategy fails. - """ preproc_strip_blank_lines, preproc_relative_indent, preproc_reverse = preproc ri = None + processed_texts = list(texts) - current_texts = list(texts) + if preproc_strip_blank_lines: + processed_texts = strip_blank_lines(processed_texts) + if preproc_relative_indent: + ri, processed_texts = relative_indent(processed_texts) + if preproc_reverse: + processed_texts = list(map(reverse_lines, processed_texts)) - try: - if preproc_strip_blank_lines: - current_texts = strip_blank_lines(current_texts) - if preproc_relative_indent: - ri, current_texts = relative_indent(current_texts) - if preproc_reverse: - current_texts = list(map(reverse_lines, current_texts)) + res = strategy(processed_texts) - res = strategy(current_texts) + if res is not None and preproc_reverse: + res = reverse_lines(res) - if res is not None: - if preproc_reverse: - res = reverse_lines(res) + if res is not None and preproc_relative_indent: + res = ri.make_absolute(res) - if preproc_relative_indent: - res = ri.make_absolute(res) - - return res - - except Exception: - return None + return res def strip_blank_lines(texts): - res_texts = [text.strip("\n") + "\n" if text else "" for text in texts] - return res_texts + # strip leading and trailing blank lines + texts = [text.strip("\n") + "\n" for text in texts] + return texts def relative_indent(texts): ri = RelativeIndenter(texts) - res_texts = list(map(ri.make_relative, texts)) + texts = list(map(ri.make_relative, texts)) - return ri, res_texts + return ri, texts class RelativeIndenter: @@ -707,12 +546,15 @@ class RelativeIndenter: """ def __init__(self, texts): + """ + Based on the texts, choose a unicode character that isn't in any of them. + """ + chars = set() for text in texts: - if text: - chars.update(text) + chars.update(text) - ARROW = "\u2190" + ARROW = "←" if ARROW not in chars: self.marker = ARROW else: @@ -724,11 +566,12 @@ def select_unique_marker(self, chars): if marker not in chars: return marker - raise ValueError("Could not find a unique marker within the search range.") - + raise ValueError("Could not find a unique marker") def make_relative(self, text): - if not text: return "" + """ + Transform text to use relative indents. + """ if self.marker in text: raise ValueError(f"Text already contains the outdent marker: {self.marker}") @@ -737,22 +580,20 @@ def make_relative(self, text): output = [] prev_indent = "" - for line in lines: line_without_end = line.rstrip("\n\r") len_indent = len(line_without_end) - len(line_without_end.lstrip()) indent = line[:len_indent] - change = len_indent - len(prev_indent) - - cur_indent_marker = "" if change > 0: - cur_indent_marker = indent[-change:] + cur_indent = indent[-change:] elif change < 0: - cur_indent_marker = self.marker * -change + cur_indent = self.marker * -change + else: + cur_indent = "" - out_line = cur_indent_marker + "\n" + line[len_indent:] + out_line = cur_indent + "\n" + line[len_indent:] output.append(out_line) prev_indent = indent @@ -761,54 +602,54 @@ def make_relative(self, text): return res def make_absolute(self, text): - if not text: return "" - + """ + Transform text from relative back to absolute indents. + """ lines = text.splitlines(keepends=True) - if len(lines) % 2 != 0: - raise ValueError("Input text has odd number of lines, expected pairs of relative indent marker and content.") - output = [] prev_indent = "" for i in range(0, len(lines), 2): - dent_line = lines[i] - non_indent_line = lines[i + 1] + if i + 1 >= len(lines): + raise ValueError(f"Malformed relative-indented text: Odd number of lines found. Cannot process line index {i} without a following content line.") - dent = dent_line.rstrip("\r\n") + dent = lines[i].rstrip("\r\n") + non_indent = lines[i + 1] cur_indent = "" if dent.startswith(self.marker): - len_outdent = len(dent) - if len(prev_indent) < len_outdent: - raise ValueError(f"Cannot outdent {len_outdent} columns from current indent level {len(prev_indent)}. Input segment: '{dent_line.strip()}'") - cur_indent = prev_indent[:-len_outdent] + len_outdent_chars = len(dent) + if len(prev_indent) >= len_outdent_chars: + cur_indent = prev_indent[:-len_outdent_chars] + else: + raise ValueError(f"Malformed relative-indented text: Cannot outdent {len_outdent_chars} chars from previous indent of {len(prev_indent)} chars.") else: cur_indent = prev_indent + dent - if not non_indent_line.strip(): - out_line = non_indent_line + if not non_indent.rstrip("\r\n"): + out_line = non_indent else: - out_line = cur_indent + non_indent_line + out_line = cur_indent + non_indent output.append(out_line) - prev_indent = cur_indent + if non_indent.rstrip("\r\n"): + prev_indent = cur_indent res = "".join(output) if self.marker in res: - raise ValueError("Error transforming text back to absolute indents: marker still present after conversion.") + raise ValueError("Error transforming text back to absolute indents: Marker still present") return res def reverse_lines(text): - if not text: return "" - lines = text.splitlines(keepends=True) lines.reverse() return "".join(lines) all_preprocs = [ + # (strip_blank_lines, relative_indent, reverse_lines) (False, False, False), (True, False, False), (False, True, False), @@ -816,6 +657,7 @@ def reverse_lines(text): ] if __name__ == "__main__": + # Test case for apply_diffs function original_content = """def hello(): print("Hello, World!") @@ -824,8 +666,8 @@ def goodbye(): """ diff_content = """```diff ---- a/original.py -+++ b/modified.py +--- original ++++ modified @@ -1,5 +1,8 @@ def hello(): - print("Hello, World!") @@ -835,4 +677,59 @@ def hello(): def goodbye(): - print("Goodbye, World!") + print("Farewell, Universe!") - print("See you next time!") \ No newline at end of file ++ print("See you next time!") +""" + + try: + result = apply_diffs(original_content, diff_content) + print("Original content:") + print(original_content) + print("\nDiff content:") + print(diff_content) + print("\nResult after applying diffs:") + print(result) + except Exception as e: + print(f"An error occurred: {e}") + + print("\n--- Test case: Append/New File ---") + original_content_empty = "" + diff_content_append = """```diff +--- /dev/null ++++ new_file.py +@@ -0,0 +1,3 @@ ++def new_function(): ++ print("This is a new file.") +""" + if os.path.exists("dummy_path"): + os.remove("dummy_path") + + try: + result_append = apply_diffs(original_content_empty, diff_content_append) + print("Original content (empty):") + print(repr(original_content_empty)) + print("\nDiff content (append):") + print(diff_content_append) + print("\nResult after applying diffs (append):") + print(result_append) + + except Exception as e: + print(f"An error occurred during append test: {e}") + finally: + if os.path.exists("dummy_path"): + os.remove("dummy_path") + + print("\n--- Test case: Large Diff Content ---") + large_diff_content = "```diff\n--- a/file.txt\n+++ b/file.txt\n@@ -1 +1 @@\n-line\n+newline\n" * 100000 + + try: + if os.path.exists("dummy_path"): + os.remove("dummy_path") + apply_diffs("initial content", large_diff_content) + print("Large diff test passed (unexpectedly?).") + except ValueError as e: + print(f"Large diff test failed as expected: {e}") + except Exception as e: + print(f"An unexpected error occurred during large diff test: {e}") + finally: + if os.path.exists("dummy_path"): + os.remove("dummy_path") \ No newline at end of file