#!/usr/bin/env python3 from pathlib import Path import matplotlib.pyplot as plt import numpy as np import pykmertools as kt def read_first_fasta_sequence(path: Path) -> str: seq_parts: list[str] = [] for line in path.read_text(encoding="utf-8").splitlines(): if not line: continue if line.startswith(">"): if seq_parts: break continue seq_parts.append(line.strip()) if not seq_parts: raise ValueError(f"No FASTA sequence found in {path}") return "".join(seq_parts) def plot_api_scatter(points: list[tuple[float, float]], output_path: Path) -> None: x = [p[0] for p in points] y = [p[1] for p in points] plt.figure(figsize=(8, 8)) plt.scatter(x, y, s=3, alpha=0.35, color="#1f77b4") plt.title("CGR Scatter from Python API (NC_001802.1)") plt.xlabel("X coordinate") plt.ylabel("Y coordinate") plt.tight_layout() plt.savefig(output_path, dpi=150) plt.close() def plot_api_density(points: list[tuple[float, float]], output_path: Path) -> None: arr = np.array(points, dtype=float) plt.figure(figsize=(8, 8)) hist = plt.hist2d(arr[:, 0], arr[:, 1], bins=64, cmap="viridis") plt.title("CGR Density from Python API (NC_001802.1)") plt.xlabel("X coordinate") plt.ylabel("Y coordinate") cbar = plt.colorbar(hist[3]) cbar.set_label("Point count") plt.tight_layout() plt.savefig(output_path, dpi=150) plt.close() def main() -> None: base_dir = Path(__file__).resolve().parent fasta_path = base_dir / "data" / "cgr" / "NC_001802.1.fasta" seq = read_first_fasta_sequence(fasta_path) cgr = kt.CgrComputer(1) points = cgr.vectorise_one(seq) scatter_out = base_dir / "cgr_scatter_from_api.png" density_out = base_dir / "cgr_density_from_api.png" plot_api_scatter(points, scatter_out) plot_api_density(points, density_out) print(f"Sequence length: {len(seq)}") print(f"CGR points: {len(points)}") print(f"Saved: {scatter_out}") print(f"Saved: {density_out}") if __name__ == "__main__": main()