From a46bf9ce6038aac8f3ace0a0e982ee0bc496cf33 Mon Sep 17 00:00:00 2001
From: Andrew Riha <apriha@gmail.com>
Date: Mon, 26 Jan 2026 21:57:12 -0800
Subject: [PATCH 1/3] Fix link rendering

---
 docs/conf.py          | 1 +
 docs/requirements.txt | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docs/conf.py b/docs/conf.py
index 1a0d14c..33b329e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -69,6 +69,7 @@
     "colon_fence",
     "deflist",
     "fieldlist",
+    "linkify",
     "substitution",
     "tasklist",
 ]
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 3b80d46..ec05c16 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -2,3 +2,4 @@ sphinx==9.1.0
 furo==2025.12.19
 myst-parser==5.0.0
 sphinx-copybutton==0.5.2
+linkify-it-py>=2.0.0

From bbcd47e02da1458a2c116336f25a0ecfe0c45c29 Mon Sep 17 00:00:00 2001
From: Andrew Riha <apriha@gmail.com>
Date: Mon, 26 Jan 2026 22:27:15 -0800
Subject: [PATCH 2/3] Refactor shared test utilities to module

---
 docs/api.rst        |   8 +
 src/snps/testing.py | 374 ++++++++++++++++++++++++++++++++++++++++++++
 tests/__init__.py   | 182 ++-------------------
 3 files changed, 396 insertions(+), 168 deletions(-)
 create mode 100644 src/snps/testing.py

diff --git a/docs/api.rst b/docs/api.rst
index ebe8003..490fc68 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -100,3 +100,11 @@ Helper functions and utilities.
    :members:
    :undoc-members:
    :show-inheritance:
+
+snps.testing
+~~~~~~~~~~~~
+
+.. automodule:: snps.testing
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/src/snps/testing.py b/src/snps/testing.py
new file mode 100644
index 0000000..5627fe8
--- /dev/null
+++ b/src/snps/testing.py
@@ -0,0 +1,374 @@
+"""Shared test utilities for snps."""
+
+from __future__ import annotations
+
+import os
+from typing import Any
+
+import numpy as np
+import pandas as pd
+from pandas.api.types import is_object_dtype, is_string_dtype
+
+# Standard dtypes for normalized SNP DataFrames
+NORMALIZED_DTYPES = {
+    "rsid": object,
+    "chrom": object,
+    "pos": np.uint32,
+    "genotype": object,
+}
+
+
+def get_complement(base: str) -> str:
+    """Get the complement of a DNA base.
+
+    Parameters
+    ----------
+    base : str
+        A single DNA base (A, C, G, or T)
+
+    Returns
+    -------
+    str
+        The complementary base (A<->T, C<->G), or the original if not a valid base
+    """
+    complements = {"A": "T", "T": "A", "C": "G", "G": "C"}
+    return complements.get(base, base)
+
+
+def complement_genotype(genotype: str) -> str:
+    """Get the complement of a genotype (both alleles).
+
+    Parameters
+    ----------
+    genotype : str
+        A two-character genotype string (e.g., "AT", "CG")
+
+    Returns
+    -------
+    str
+        The complemented genotype, or np.nan if input is null
+    """
+    if pd.isnull(genotype):
+        return np.nan
+    return "".join(get_complement(base) for base in genotype)
+
+
+def complement_one_allele(genotype: str) -> str:
+    """Get the complement of only the first allele of a genotype.
+
+    The second allele is preserved unchanged. This is useful for simulating
+    partial strand complementation in test data.
+
+    Parameters
+    ----------
+    genotype : str
+        A two-character genotype string (e.g., "AT", "CG")
+
+    Returns
+    -------
+    str
+        Genotype with first allele complemented, or np.nan if input is null
+    """
+    if pd.isnull(genotype):
+        return np.nan
+    return get_complement(genotype[0]) + genotype[1]
+
+
+def create_snp_df(
+    rsid: list[str],
+    chrom: list[str],
+    pos: list[int],
+    genotype: list[str],
+) -> pd.DataFrame:
+    """Create a normalized SNP DataFrame.
+
+    Parameters
+    ----------
+    rsid : list of str
+        SNP identifiers (becomes the index)
+    chrom : list of str
+        Chromosome values
+    pos : list of int
+        Position values
+    genotype : list of str
+        Genotype values
+
+    Returns
+    -------
+    ~pandas.DataFrame
+        DataFrame with rsid index and chrom, pos, genotype columns
+    """
+    df = pd.DataFrame(
+        {"rsid": rsid, "chrom": chrom, "pos": pos, "genotype": genotype},
+        columns=["rsid", "chrom", "pos", "genotype"],
+    )
+    df = df.astype(NORMALIZED_DTYPES)
+    df = df.set_index("rsid")
+    return df
+
+
+def create_simulated_snp_df(
+    chrom: str = "1",
+    pos_start: int = 1,
+    pos_max: int = 248140902,
+    pos_step: int = 100,
+    pos_dtype: type = np.uint32,
+    genotype: str = "AA",
+    insert_nulls: bool = True,
+    null_snp_step: int = 101,
+    complement_genotype_one_allele: bool = False,
+    complement_genotype_two_alleles: bool = False,
+    complement_snp_step: int = 50,
+) -> pd.DataFrame:
+    """Create a simulated SNP DataFrame for testing.
+
+    This is the core logic for creating simulated SNP data. Each project
+    can wrap this to assign to their specific object types.
+
+    Parameters
+    ----------
+    chrom : str
+        Chromosome value for all SNPs (default: "1")
+    pos_start : int
+        Starting position (default: 1)
+    pos_max : int
+        Maximum position (default: 248140902)
+    pos_step : int
+        Step between positions (default: 100)
+    pos_dtype : type
+        Numpy dtype for positions (default: np.uint32)
+    genotype : str
+        Default genotype for all SNPs (default: "AA")
+    insert_nulls : bool
+        Whether to insert null genotypes (default: True)
+    null_snp_step : int
+        Insert null every N SNPs (default: 101)
+    complement_genotype_one_allele : bool
+        Complement first allele at intervals (default: False)
+    complement_genotype_two_alleles : bool
+        Complement both alleles at intervals (default: False)
+    complement_snp_step : int
+        Apply complement every N SNPs (default: 50)
+
+    Returns
+    -------
+    ~pandas.DataFrame
+        DataFrame with rsid index and chrom, pos, genotype columns
+    """
+    positions = np.arange(pos_start, pos_max, pos_step, dtype=pos_dtype)
+    snps = pd.DataFrame(
+        {"chrom": chrom},
+        index=pd.Index([f"rs{x + 1}" for x in range(len(positions))], name="rsid"),
+    )
+    snps["pos"] = positions
+    snps["genotype"] = genotype
+
+    if insert_nulls:
+        snps.loc[snps.iloc[0::null_snp_step, :].index, "genotype"] = np.nan
+
+    indices = snps.iloc[0::complement_snp_step, :].index
+    if complement_genotype_two_alleles:
+        snps.loc[indices, "genotype"] = snps.loc[indices, "genotype"].apply(
+            complement_genotype
+        )
+    elif complement_genotype_one_allele:
+        snps.loc[indices, "genotype"] = snps.loc[indices, "genotype"].apply(
+            complement_one_allele
+        )
+
+    return snps
+
+
+def assert_series_equal_with_string_dtype(
+    left: pd.Series,
+    right: pd.Series,
+    test_case: Any = None,
+    **kwargs,
+) -> None:
+    """Assert Series are equal, accepting both object and StringDtype for string data.
+
+    In Python 3.14+, pandas infers StringDtype for string data instead of object.
+    This function compares Series without strict dtype matching for string data.
+
+    Parameters
+    ----------
+    left : ~pandas.Series
+        First Series to compare
+    right : ~pandas.Series
+        Second Series to compare
+    test_case : object, optional
+        Object with assertTrue method for assertions (uses assert if None)
+    **kwargs : dict
+        Additional arguments passed to pd.testing.assert_series_equal
+    """
+    import pandas as pd
+
+    # Verify string series have string or object dtypes
+    if is_string_dtype(left.dtype) or is_object_dtype(left.dtype):
+        right_is_string = is_string_dtype(right.dtype) or is_object_dtype(right.dtype)
+        if test_case:
+            test_case.assertTrue(
+                right_is_string,
+                f"Right series dtype {right.dtype} should be string/object type",
+            )
+        else:
+            assert right_is_string, (
+                f"Right series dtype {right.dtype} should be string/object type"
+            )
+    # Compare Series without strict dtype matching
+    pd.testing.assert_series_equal(left, right, check_dtype=False, **kwargs)
+
+
+def assert_frame_equal_with_string_index(
+    left: pd.DataFrame,
+    right: pd.DataFrame,
+    test_case: Any = None,
+    **kwargs,
+) -> None:
+    """Assert DataFrames are equal, accepting both object and StringDtype for string columns.
+
+    In Python 3.14+, pandas infers StringDtype for string columns/indices instead of object.
+    This function validates that string columns have string types, then compares the
+    DataFrames without strict dtype matching for object/string columns.
+
+    Parameters
+    ----------
+    left : ~pandas.DataFrame
+        First DataFrame to compare
+    right : ~pandas.DataFrame
+        Second DataFrame to compare
+    test_case : object, optional
+        Object with assertTrue method for assertions (uses assert if None)
+    **kwargs : dict
+        Additional arguments passed to pd.testing.assert_frame_equal
+    """
+    import pandas as pd
+
+    def _assert(condition: bool, message: str) -> None:
+        if test_case:
+            test_case.assertTrue(condition, message)
+        else:
+            assert condition, message
+
+    # Verify index dtypes are string types if they're named 'rsid'
+    if left.index.name == "rsid":
+        _assert(
+            is_string_dtype(left.index.dtype),
+            f"Left index dtype {left.index.dtype} is not a string type",
+        )
+    if right.index.name == "rsid":
+        _assert(
+            is_string_dtype(right.index.dtype),
+            f"Right index dtype {right.index.dtype} is not a string type",
+        )
+
+    # Verify string columns (chrom, genotype) have string dtypes
+    for col in ["chrom", "genotype"]:
+        if col in left.columns:
+            _assert(
+                is_string_dtype(left[col].dtype) or is_object_dtype(left[col].dtype),
+                f"Left column '{col}' dtype {left[col].dtype} is not a string/object type",
+            )
+        if col in right.columns:
+            _assert(
+                is_string_dtype(right[col].dtype) or is_object_dtype(right[col].dtype),
+                f"Right column '{col}' dtype {right[col].dtype} is not a string/object type",
+            )
+
+    # Compare DataFrames without strict dtype matching for string columns
+    pd.testing.assert_frame_equal(
+        left, right, check_index_type=False, check_dtype=False, **kwargs
+    )
+
+
+class SNPsTestMixin:
+    """Mixin class providing common test assertions and utilities for SNP DataFrames.
+
+    This mixin can be combined with unittest.TestCase to add convenient
+    assertion methods for comparing SNP DataFrames with flexible string dtype handling,
+    plus common test utilities like creating test DataFrames.
+
+    Example
+    -------
+    >>> class MyTestCase(SNPsTestMixin, TestCase):
+    ...     def test_something(self):
+    ...         df = self.generic_snps()
+    ...         self.assert_frame_equal_with_string_index(df, expected_df)
+    """
+
+    @property
+    def downloads_enabled(self) -> bool:
+        """Check if external downloads are enabled for tests.
+
+        Only download from external resources when an environment variable named
+        "DOWNLOADS_ENABLED" is set to "true".
+
+        Returns
+        -------
+        bool
+        """
+        return os.getenv("DOWNLOADS_ENABLED") == "true"
+
+    @staticmethod
+    def get_complement(base: str) -> str:
+        """Get the complement of a DNA base.
+
+        See :func:`get_complement` for details.
+        """
+        return get_complement(base)
+
+    def complement_genotype(self, genotype: str) -> str:
+        """Get the complement of a genotype (both alleles).
+
+        See :func:`complement_genotype` for details.
+        """
+        return complement_genotype(genotype)
+
+    def complement_one_allele(self, genotype: str) -> str:
+        """Get the complement of only the first allele of a genotype.
+
+        See :func:`complement_one_allele` for details.
+        """
+        return complement_one_allele(genotype)
+
+    @staticmethod
+    def create_snp_df(
+        rsid: list[str],
+        chrom: list[str],
+        pos: list[int],
+        genotype: list[str],
+    ) -> pd.DataFrame:
+        """Create a normalized SNP DataFrame.
+
+        See :func:`create_snp_df` for details.
+        """
+        return create_snp_df(rsid, chrom, pos, genotype)
+
+    def generic_snps(self) -> pd.DataFrame:
+        """Create a generic SNP DataFrame for testing.
+
+        Returns
+        -------
+        ~pandas.DataFrame
+            DataFrame with 8 SNPs (rs1-rs8) on chromosome 1
+        """
+        return create_snp_df(
+            rsid=[f"rs{i}" for i in range(1, 9)],
+            chrom=["1"] * 8,
+            pos=list(range(101, 109)),
+            genotype=["AA", "CC", "GG", "TT", np.nan, "GC", "TC", "AT"],
+        )
+
+    def assert_series_equal_with_string_dtype(self, left, right, **kwargs):
+        """Assert Series are equal, accepting both object and StringDtype for string data.
+
+        See :func:`assert_series_equal_with_string_dtype` for details.
+        """
+        assert_series_equal_with_string_dtype(left, right, test_case=self, **kwargs)
+
+    def assert_frame_equal_with_string_index(self, left, right, **kwargs):
+        """Assert DataFrames are equal, accepting both object and StringDtype for string columns.
+
+        See :func:`assert_frame_equal_with_string_index` for details.
+        """
+        assert_frame_equal_with_string_index(left, right, test_case=self, **kwargs)
diff --git a/tests/__init__.py b/tests/__init__.py
index d7d1a0a..d0cb797 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -9,10 +9,11 @@
 from pandas.api.types import is_object_dtype, is_string_dtype, is_unsigned_integer_dtype
 
 from snps import SNPs
+from snps.testing import SNPsTestMixin, create_simulated_snp_df
 from snps.utils import gzip_file, zip_file
 
 
-class BaseSNPsTestCase(TestCase):
+class BaseSNPsTestCase(SNPsTestMixin, TestCase):
     def simulate_snps(
         self,
         chrom="1",
@@ -27,97 +28,22 @@ def simulate_snps(
         complement_snp_step=50,
     ):
         s = SNPs()
-
         s._build = 37
-
-        positions = np.arange(pos_start, pos_max, pos_step, dtype=np.uint32)
-        snps = pd.DataFrame(
-            {"chrom": chrom},
-            index=pd.Index(
-                ["rs" + str(x + 1) for x in range(len(positions))], name="rsid"
-            ),
+        s._snps = create_simulated_snp_df(
+            chrom=chrom,
+            pos_start=pos_start,
+            pos_max=pos_max,
+            pos_step=pos_step,
+            pos_dtype=np.uint32,
+            genotype=genotype,
+            insert_nulls=insert_nulls,
+            null_snp_step=null_snp_step,
+            complement_genotype_one_allele=complement_genotype_one_chrom,
+            complement_genotype_two_alleles=complement_genotype_two_chroms,
+            complement_snp_step=complement_snp_step,
         )
-        snps["pos"] = positions
-        snps["genotype"] = genotype
-
-        if insert_nulls:
-            snps.loc[snps.iloc[0::null_snp_step, :].index, "genotype"] = np.nan
-
-        indices = snps.iloc[0::complement_snp_step, :].index
-        if complement_genotype_two_chroms:
-            snps.loc[indices, "genotype"] = snps.loc[indices, "genotype"].apply(
-                self.complement_two_chroms
-            )
-        elif complement_genotype_one_chrom:
-            snps.loc[indices, "genotype"] = snps.loc[indices, "genotype"].apply(
-                self.complement_one_chrom
-            )
-
-        s._snps = snps
-
         return s
 
-    @property
-    def downloads_enabled(self):
-        """Property indicating if downloads are enabled.
-
-        Only download from external resources when an environment variable named
-        "DOWNLOADS_ENABLED" is set to "true".
-
-        Returns
-        -------
-        bool
-        """
-        return True if os.getenv("DOWNLOADS_ENABLED") == "true" else False
-
-    @staticmethod
-    def get_complement(base):
-        if base == "A":
-            return "T"
-        elif base == "G":
-            return "C"
-        elif base == "C":
-            return "G"
-        elif base == "T":
-            return "A"
-        else:
-            return base
-
-    def complement_one_chrom(self, genotype):
-        if pd.isnull(genotype):
-            return np.nan
-
-        complement = ""
-
-        for base in list(genotype):
-            complement += self.get_complement(base)
-            complement += genotype[1]
-            return complement
-
-    def complement_two_chroms(self, genotype):
-        if pd.isnull(genotype):
-            return np.nan
-
-        complement = ""
-
-        for base in list(genotype):
-            complement += self.get_complement(base)
-
-        return complement
-
-    @staticmethod
-    def create_snp_df(rsid, chrom, pos, genotype):
-        df = pd.DataFrame(
-            {"rsid": rsid, "chrom": chrom, "pos": pos, "genotype": genotype},
-            columns=["rsid", "chrom", "pos", "genotype"],
-        )
-        df.rsid = df.rsid.astype(object)
-        df.chrom = df.chrom.astype(object)
-        df.pos = df.pos.astype(np.uint32)
-        df.genotype = df.genotype.astype(object)
-        df = df.set_index("rsid")
-        return df
-
     def load_assign_PAR_SNPs(self, path):
         """Load and assign PAR SNPs.
 
@@ -502,14 +428,6 @@ def snps_GRCh38_PAR(self):
             genotype=["AA", "AA", "AA"],
         )
 
-    def generic_snps(self):
-        return self.create_snp_df(
-            rsid=["rs" + str(i) for i in range(1, 9)],
-            chrom=["1"] * 8,
-            pos=list(range(101, 109)),
-            genotype=["AA", "CC", "GG", "TT", np.nan, "GC", "TC", "AT"],
-        )
-
     def generic_snps_vcf(self):
         df = self.generic_snps()
         return pd.concat(
@@ -646,78 +564,6 @@ def run_parsing_tests_vcf(
                 snps_df,
             )
 
-    def assert_series_equal_with_string_dtype(self, left, right, **kwargs):
-        """Assert Series are equal, accepting both object and StringDtype for string data.
-
-        In Python 3.14+, pandas infers StringDtype for string data instead of object.
-        This wrapper compares Series without strict dtype matching for string data.
-
-        Parameters
-        ----------
-        left : pd.Series
-            First Series to compare
-        right : pd.Series
-            Second Series to compare
-        **kwargs : dict
-            Additional arguments passed to pd.testing.assert_series_equal
-        """
-        # Verify string series have string or object dtypes
-        if is_string_dtype(left.dtype) or is_object_dtype(left.dtype):
-            self.assertTrue(
-                is_string_dtype(right.dtype) or is_object_dtype(right.dtype),
-                f"Right series dtype {right.dtype} should be string/object type",
-            )
-        # Compare Series without strict dtype matching
-        pd.testing.assert_series_equal(left, right, check_dtype=False, **kwargs)
-
-    def assert_frame_equal_with_string_index(self, left, right, **kwargs):
-        """Assert DataFrames are equal, accepting both object and StringDtype for string columns.
-
-        In Python 3.14+, pandas infers StringDtype for string columns/indices instead of object.
-        This wrapper validates that string columns have string types, then compares the
-        DataFrames without strict dtype matching for object/string columns.
-
-        Parameters
-        ----------
-        left : pd.DataFrame
-            First DataFrame to compare
-        right : pd.DataFrame
-            Second DataFrame to compare
-        **kwargs : dict
-            Additional arguments passed to pd.testing.assert_frame_equal
-        """
-        # Verify index dtypes are string types if they're named 'rsid'
-        if left.index.name == "rsid":
-            self.assertTrue(
-                is_string_dtype(left.index.dtype),
-                f"Left index dtype {left.index.dtype} is not a string type",
-            )
-        if right.index.name == "rsid":
-            self.assertTrue(
-                is_string_dtype(right.index.dtype),
-                f"Right index dtype {right.index.dtype} is not a string type",
-            )
-
-        # Verify string columns (chrom, genotype) have string dtypes
-        for col in ["chrom", "genotype"]:
-            if col in left.columns:
-                self.assertTrue(
-                    is_string_dtype(left[col].dtype)
-                    or is_object_dtype(left[col].dtype),
-                    f"Left column '{col}' dtype {left[col].dtype} is not a string/object type",
-                )
-            if col in right.columns:
-                self.assertTrue(
-                    is_string_dtype(right[col].dtype)
-                    or is_object_dtype(right[col].dtype),
-                    f"Right column '{col}' dtype {right[col].dtype} is not a string/object type",
-                )
-
-        # Compare DataFrames without strict dtype matching for string columns
-        pd.testing.assert_frame_equal(
-            left, right, check_index_type=False, check_dtype=False, **kwargs
-        )
-
     def make_normalized_dataframe_assertions(self, df):
         self.assertEqual(df.index.name, "rsid")
         # Accept both object dtype and StringDtype (used in Python 3.14+)

From 4f0e47e2230eabb9c40f0c753ce2e69a12dde2bb Mon Sep 17 00:00:00 2001
From: Andrew Riha <apriha@gmail.com>
Date: Mon, 26 Jan 2026 22:38:33 -0800
Subject: [PATCH 3/3] Update description

---
 docs/conf.py         | 2 +-
 src/snps/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 33b329e..9a4d5a7 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -143,7 +143,7 @@
         "snps Documentation",
         author,
         "snps",
-        "Tools for reading, writing, merging, and remapping SNPs.",
+        "tools for reading, writing, generating, merging, and remapping SNPs",
         "Miscellaneous",
     )
 ]
diff --git a/src/snps/__init__.py b/src/snps/__init__.py
index 37b5c9c..8f975a0 100644
--- a/src/snps/__init__.py
+++ b/src/snps/__init__.py
@@ -1,6 +1,6 @@
 """`snps`
 
-tools for reading, writing, merging, and remapping SNPs
+tools for reading, writing, generating, merging, and remapping SNPs
 
 """