From 9d3d19c8e4e45602746df8d7250d1fdce04cfa35 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 2 Jan 2026 21:48:26 +0000 Subject: [PATCH 1/3] Initial plan From d8bcf096157e4464ae72b88ef2f4c14080a76141 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 2 Jan 2026 22:00:15 +0000 Subject: [PATCH 2/3] Fix BED format parsing and improve error messages - Fix BED end coordinate validation: change comparison from `>` to `>=` to properly handle BED format's exclusive end coordinates - Improve error messages to clearly distinguish between "chromosome not found" and "coordinate out of bounds" errors - Add comprehensive tests for BED boundary conditions - Refactor code to eliminate compiler warnings Co-authored-by: wdecoster <11660522+wdecoster@users.noreply.github.com> --- src/repeats.rs | 81 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 10 deletions(-) diff --git a/src/repeats.rs b/src/repeats.rs index 7fdc75f..328b3d4 100644 --- a/src/repeats.rs +++ b/src/repeats.rs @@ -254,19 +254,24 @@ impl RepeatInterval { .expect("Failed to read fai file") .lines() { - if line.split('\t').collect::>()[0] == chrom - && line.split('\t').collect::>()[1] + let parts: Vec<&str> = line.split('\t').collect(); + if parts[0] == chrom { + let chrom_length = parts[1] .parse::() - .expect("Failed parsing chromosome length from fai file") - > end - { - return Some(Self { chrom, start, end, created: None }); + .expect("Failed parsing chromosome length from fai file"); + // BED format uses 0-based half-open coordinates, so end is exclusive + // Therefore, end can equal the chromosome length + if chrom_length >= end { + return Some(Self { chrom, start, end, created: None }); + } else { + panic!( + "End coordinate {end} is out of bounds for chromosome {chrom} (length: {chrom_length})" + ); + } } } - // if the chromosome is not in the fai file or the end does not fit the interval, return None - panic!( - "Chromosome {chrom} is not in the fasta file or the end coordinate is out of bounds" - ); + // if the chromosome is not in the fai file, panic with clear message + panic!("Chromosome {chrom} is not in the fasta file"); } pub fn new(chrom: &str, start: u32, end: u32) -> Self { Self { chrom: chrom.to_string(), start, end, created: None } @@ -627,4 +632,60 @@ chr19 45770205 45770266 CAG ATXN1_CAG_61"#; let result = crate::repeats::RepeatIntervalIterator::pathogenic(&fasta); assert!(result.num_intervals > 0, "Should have found some pathogenic intervals"); } + + #[test] + fn test_bed_end_coordinate_at_chromosome_boundary() { + // Test that BED format end coordinate equal to chromosome length is valid + // BED format uses 0-based half-open coordinates, so end is exclusive + let temp_dir = std::env::temp_dir(); + let test_fasta = temp_dir.join("test_bed_boundary.fa"); + let test_fai = temp_dir.join("test_bed_boundary.fa.fai"); + + let fasta_content = ">chr1\nATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC"; + // Chromosome length is 1000 + let fai_content = "chr1\t1000\t6\t60\t61"; + + std::fs::write(&test_fasta, fasta_content).expect("Failed to write test fasta"); + std::fs::write(&test_fai, fai_content).expect("Failed to write test fai"); + + // Test 1: End coordinate equal to chromosome length should be valid + // This represents the region [900, 1000) which includes positions 900-999 + let bed_data_at_boundary = "chr1\t900\t1000\tATGC\tgene1"; + let result = RepeatIntervalIterator::from_string_data( + bed_data_at_boundary, + &test_fasta.to_string_lossy(), + ); + let intervals: Vec = result.collect(); + assert_eq!(intervals.len(), 1); + assert_eq!(intervals[0].end, 1000); + + // Clean up + let _ = std::fs::remove_file(&test_fasta); + let _ = std::fs::remove_file(&test_fai); + } + + #[test] + #[should_panic(expected = "End coordinate 1001 is out of bounds for chromosome chr1")] + fn test_bed_end_coordinate_beyond_chromosome_boundary() { + // Test that end coordinate beyond chromosome length properly fails + let temp_dir = std::env::temp_dir(); + let test_fasta = temp_dir.join("test_bed_oob.fa"); + let test_fai = temp_dir.join("test_bed_oob.fa.fai"); + + let fasta_content = ">chr1\nATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC"; + // Chromosome length is 1000 + let fai_content = "chr1\t1000\t6\t60\t61"; + + std::fs::write(&test_fasta, fasta_content).expect("Failed to write test fasta"); + std::fs::write(&test_fai, fai_content).expect("Failed to write test fai"); + + // End coordinate beyond chromosome length should panic with clear message + let bed_data_oob = "chr1\t900\t1001\tATGC\tgene1"; + let _result = + RepeatIntervalIterator::from_string_data(bed_data_oob, &test_fasta.to_string_lossy()); + + // Clean up (won't reach here due to panic, but good practice) + let _ = std::fs::remove_file(&test_fasta); + let _ = std::fs::remove_file(&test_fai); + } } From 7c77808abd2d90e35148f0d1ee0b2272259cb528 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 2 Jan 2026 22:03:56 +0000 Subject: [PATCH 3/3] Address code review: use proper 1000bp test sequences - Update test data to use actual 1000bp sequences matching FAI declarations - Ensures tests are robust and won't cause issues with faidx library - Tests still validate the same boundary conditions Co-authored-by: wdecoster <11660522+wdecoster@users.noreply.github.com> --- src/repeats.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/repeats.rs b/src/repeats.rs index 328b3d4..671751f 100644 --- a/src/repeats.rs +++ b/src/repeats.rs @@ -641,7 +641,9 @@ chr19 45770205 45770266 CAG ATXN1_CAG_61"#; let test_fasta = temp_dir.join("test_bed_boundary.fa"); let test_fai = temp_dir.join("test_bed_boundary.fa.fai"); - let fasta_content = ">chr1\nATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC"; + // Create a sequence of exactly 1000 bp + let sequence = "ATGC".repeat(250); // 4 * 250 = 1000 bp + let fasta_content = format!(">chr1\n{}", sequence); // Chromosome length is 1000 let fai_content = "chr1\t1000\t6\t60\t61"; @@ -672,7 +674,9 @@ chr19 45770205 45770266 CAG ATXN1_CAG_61"#; let test_fasta = temp_dir.join("test_bed_oob.fa"); let test_fai = temp_dir.join("test_bed_oob.fa.fai"); - let fasta_content = ">chr1\nATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC"; + // Create a sequence of exactly 1000 bp + let sequence = "ATGC".repeat(250); // 4 * 250 = 1000 bp + let fasta_content = format!(">chr1\n{}", sequence); // Chromosome length is 1000 let fai_content = "chr1\t1000\t6\t60\t61";