jbloomlab · davidrich27 · Mar 5, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 12, 2025
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,14 @@
 Changelog
 ===========
 
+2.4.2
+------
+* Updated `setup.py` dependencies.
+
+* `scipy` 1.15.0 has deprecated and removed `derivative` function with no direct replacement. For now, function has been imported into the project under `phydmslib/scipy_derivative.py`.
+
+* Updated `phydms_tutorial.Rmd` to python3 syntax.
+
 2.4.1
 -----
 * Unpin `biopython` now that `pyvolve` is updated.

diff --git a/phydmslib/_metadata.py b/phydmslib/_metadata.py
@@ -1,4 +1,4 @@
-__version__ = '2.4.1'
+__version__ = '2.4.2'
 __author__ = 'the Bloom lab (see https://github.com/jbloomlab/phydms/contributors)'
 __url__ = 'http://jbloomlab.github.io/phydms'
 __author_email__ = 'jbloom@fredhutch.org'

diff --git a/phydmslib/file_io.py b/phydmslib/file_io.py
@@ -219,7 +219,7 @@ def readPrefs(prefsfile, minpref=0, avgprefs=False, randprefs=False,
             prefs[r] = {}
             for aa in df.columns:
                 if aa != 'site':
-                    prefs[r][aa] = float(rdf[aa])
+                    prefs[r][aa] = float(rdf[aa].iloc[0])
     else:
         # try reading as dms_tools format
         prefs = phydmslib.file_io.readPrefs_dms_tools_format(prefsfile)[2]

diff --git a/phydmslib/models.py b/phydmslib/models.py
@@ -26,6 +26,7 @@
                                  INDEX_TO_NT, CODON_NT_MUT, CODON_NT_COUNT,
                                  CODON_TO_AA, INDEX_TO_AA, AA_TO_INDEX,
                                  CODON_NT)
+from phydmslib.scipy_derivative import derivative
 warnings.simplefilter('always')
 warnings.simplefilter('ignore', ImportWarning)
 
@@ -571,7 +572,7 @@ def M(self, t, tips=None, gaps=None):
                                                   * expD).swapaxes(1, 0),
                                                   broadcastGetCols(self.Ainv,
                                                                    tips))
-                if gaps is not None:
+                if gaps is not None and gaps.size > 0:
                     M[gaps] = numpy.ones(N_CODON, dtype='float')
         #  if M.min() < -0.01:
         #    warnings.warn("Large negative value in M(t) being set to 0. "
@@ -598,7 +599,7 @@ def dM(self, t, param, Mt, tips=None, gaps=None):
             else:
                 dM_param = broadcastMatrixVectorMultiply(self.Prxy, Mt,
                                                          alpha=alpha)
-                if gaps is not None:
+                if gaps is not None and gaps.size > 0:
                     dM_param[gaps] = numpy.zeros(N_CODON, dtype='float')
             return dM_param
 
@@ -663,7 +664,7 @@ def dM(self, t, param, Mt, tips=None, gaps=None):
                     for j in range(paramlength):
                         dM_param[j] = (broadcastMatrixVectorMultiply(self.A,
                                        broadcastGetCols(broadcastMatrixMultiply(self.B[param][j] * V, self.Ainv), tips)))  # noqa: E501
-                if gaps is not None:
+                if gaps is not None and gaps.size > 0:
                     if not paramisvec:
                         dM_param[gaps] = numpy.zeros(N_CODON, dtype='float')
                     else:
@@ -1338,10 +1339,10 @@ def _update_phi(self):
         _checkParam('phi', self.phi, self.PARAMLIMITS, self.PARAMTYPES)
         self._eta_from_phi()
         dbeta = 1.0e-3
-        self.dphi_dbeta = scipy.misc.derivative(self._compute_empirical_phi,
+        self.dphi_dbeta = derivative(self._compute_empirical_phi,
                                                 self.beta, dx=dbeta,
                                                 n=1, order=5)
-        dphi_dbeta_halfdx = scipy.misc.derivative(self._compute_empirical_phi,
+        dphi_dbeta_halfdx = derivative(self._compute_empirical_phi,
                                                   self.beta, dx=dbeta / 2,
                                                   n=1, order=5)
         assert numpy.allclose(self.dphi_dbeta, dphi_dbeta_halfdx, atol=1e-5,
@@ -1793,7 +1794,7 @@ def M(self, t, tips=None, gaps=None):
                 newM = numpy.zeros((len(tips), N_CODON))
                 for i in range(len(tips)):
                     newM[i] = (M[0][:, tips[i]])
-                if gaps is not None:
+                if gaps is not None and gaps.size > 0:
                     newM[gaps] = numpy.ones(N_CODON, dtype='float')
                 return newM
 
@@ -1820,7 +1821,7 @@ def dM(self, t, param, Mt, tips=None, gaps=None):
                 dM_param = (broadcastMatrixVectorMultiply(
                            numpy.tile(self.Pxy[0], (self.nsites, 1, 1)),
                            Mt, alpha=alpha))
-                if gaps is not None:
+                if gaps is not None and gaps.size > 0:
                     dM_param[gaps] = numpy.zeros(N_CODON, dtype='float')
             return dM_param
 
@@ -1862,7 +1863,7 @@ def dM(self, t, param, Mt, tips=None, gaps=None):
                 newdM_param = numpy.zeros((len(tips), N_CODON))
                 for i in range(len(tips)):
                     newdM_param[i] = (dM_param[0][:, tips[i]])
-                if gaps is not None:
+                if gaps is not None and gaps.size > 0:
                     newdM_param[gaps] = numpy.zeros(N_CODON, dtype='float')
                 return newdM_param
 
@@ -2188,11 +2189,10 @@ def f_beta(beta):
             for (param, f) in [('alpha_lambda', f_alpha),
                                ('beta_lambda', f_beta)]:
                 pvalue = getattr(self, param)
-                dparam = scipy.misc.derivative(f, pvalue, dx, n=1, order=5)
+                dparam = derivative(f, pvalue, dx, n=1, order=5)
                 assert dparam.shape == (self.ncats,)
                 for stepchange in [0.5, 2]:  # make sure robust to step size
-                    dparam2 = scipy.misc.derivative(f, pvalue, stepchange * dx,
-                                                    n=1, order=5)
+                    dparam2 = derivative(f, pvalue, stepchange * dx, n=1, order=5)
                     assert (numpy.allclose(dparam, dparam2,
                                            atol=1e-5, rtol=1e-4)), (
                             "Numerical derivative of {0} at {1} differs for "

diff --git a/phydmslib/scipy_derivative.py b/phydmslib/scipy_derivative.py
@@ -0,0 +1,194 @@
+""" scipy derivative functions
+
+Imported from scipy v1.11.0 -- has been deprecated and removed from scipy.
+"""
+
+from numpy import arange, newaxis, hstack, prod, array
+
+
+def derivative(func, x0, dx=1.0, n=1, args=(), order=3):
+    """
+    Find the nth derivative of a function at a point.
+
+    Given a function, use a central difference formula with spacing `dx` to
+    compute the nth derivative at `x0`.
+
+    .. deprecated:: 1.10.0
+        `derivative` has been deprecated from `scipy.misc.derivative`
+        in SciPy 1.10.0 and it will be completely removed in SciPy 1.12.0.
+        You may consider using
+        findiff: https://github.com/maroba/findiff or
+        numdifftools: https://github.com/pbrod/numdifftools
+
+    Parameters
+    ----------
+    func : function
+        Input function.
+    x0 : float
+        The point at which the nth derivative is found.
+    dx : float, optional
+        Spacing.
+    n : int, optional
+        Order of the derivative. Default is 1.
+    args : tuple, optional
+        Arguments
+    order : int, optional
+        Number of points to use, must be odd.
+
+    Notes
+    -----
+    Decreasing the step size too small can result in round-off error.
+
+    Examples
+    --------
+    >>> def f(x):
+    ...     return x**3 + x**2
+    >>> float(derivative(f, 1.0, dx=1e-6))
+    4.9999999999217...
+
+    """
+    return _derivative(func, x0, dx, n, args, order)
+
+
+def _central_diff_weights(Np, ndiv=1):
+    """
+    Return weights for an Np-point central derivative.
+
+    Assumes equally-spaced function points.
+
+    If weights are in the vector w, then
+    derivative is w[0] * f(x-ho*dx) + ... + w[-1] * f(x+h0*dx)
+
+    Parameters
+    ----------
+    Np : int
+        Number of points for the central derivative.
+    ndiv : int, optional
+        Number of divisions. Default is 1.
+
+    Returns
+    -------
+    w : ndarray
+        Weights for an Np-point central derivative. Its size is `Np`.
+
+    Notes
+    -----
+    Can be inaccurate for a large number of points.
+
+    Examples
+    --------
+    We can calculate a derivative value of a function.
+
+    >>> def f(x):
+    ...     return 2 * x**2 + 3
+    >>> x = 3.0 # derivative point
+    >>> h = 0.1 # differential step
+    >>> Np = 3 # point number for central derivative
+    >>> weights = _central_diff_weights(Np) # weights for first derivative
+    >>> vals = [f(x + (i - Np/2) * h) for i in range(Np)]
+    >>> float(sum(w * v for (w, v) in zip(weights, vals))/h)
+    11.79999999999998...
+
+    This value is close to the analytical solution:
+    f'(x) = 4x, so f'(3) = 12
+
+    References
+    ----------
+    .. [1] https://en.wikipedia.org/wiki/Finite_difference
+
+    """
+    if Np < ndiv + 1:
+        raise ValueError(
+            "Number of points must be at least the derivative order + 1."
+        )
+    if Np % 2 == 0:
+        raise ValueError("The number of points must be odd.")
+    from scipy import linalg
+
+    ho = Np >> 1
+    x = arange(-ho, ho + 1.0)
+    x = x[:, newaxis]
+    X = x**0.0
+    for k in range(1, Np):
+        X = hstack([X, x**k])
+    w = prod(arange(1, ndiv + 1), axis=0) * linalg.inv(X)[ndiv]
+    return w
+
+
+def _derivative(func, x0, dx=1.0, n=1, args=(), order=3):
+    """
+    Find the nth derivative of a function at a point.
+
+    Given a function, use a central difference formula with spacing `dx` to
+    compute the nth derivative at `x0`.
+
+    Parameters
+    ----------
+    func : function
+        Input function.
+    x0 : float
+        The point at which the nth derivative is found.
+    dx : float, optional
+        Spacing.
+    n : int, optional
+        Order of the derivative. Default is 1.
+    args : tuple, optional
+        Arguments
+    order : int, optional
+        Number of points to use, must be odd.
+
+    Notes
+    -----
+    Decreasing the step size too small can result in round-off error.
+
+    Examples
+    --------
+    >>> def f(x):
+    ...     return x**3 + x**2
+    >>> float(_derivative(f, 1.0, dx=1e-6))
+    4.9999999999217...
+
+    """
+    if order < n + 1:
+        raise ValueError(
+            "'order' (the number of points used to compute the derivative), "
+            "must be at least the derivative order 'n' + 1."
+        )
+    if order % 2 == 0:
+        raise ValueError(
+            "'order' (the number of points used to compute the derivative) "
+            "must be odd."
+        )
+    # pre-computed for n=1 and 2 and low-order for speed.
+    if n == 1:
+        if order == 3:
+            weights = array([-1, 0, 1]) / 2.0
+        elif order == 5:
+            weights = array([1, -8, 0, 8, -1]) / 12.0
+        elif order == 7:
+            weights = array([-1, 9, -45, 0, 45, -9, 1]) / 60.0
+        elif order == 9:
+            weights = array([3, -32, 168, -672, 0, 672, -168, 32, -3]) / 840.0
+        else:
+            weights = _central_diff_weights(order, 1)
+    elif n == 2:
+        if order == 3:
+            weights = array([1, -2.0, 1])
+        elif order == 5:
+            weights = array([-1, 16, -30, 16, -1]) / 12.0
+        elif order == 7:
+            weights = array([2, -27, 270, -490, 270, -27, 2]) / 180.0
+        elif order == 9:
+            weights = (
+                array([-9, 128, -1008, 8064, -14350, 8064, -1008, 128, -9])
+                / 5040.0
+            )
+        else:
+            weights = _central_diff_weights(order, 2)
+    else:
+        weights = _central_diff_weights(order, n)
+    val = 0.0
+    ho = order >> 1
+    for k in range(order):
+        val += weights[k] * func(x0 + (k - ho) * dx, *args)
+    return val / prod((dx,) * n, axis=0)
diff --git a/phydmslib/simulate.py b/phydmslib/simulate.py
@@ -120,9 +120,7 @@ def simulateAlignment(model, treeFile, alignmentPrefix, randomSeed=False):
     file with the name having the prefix giving by `alignmentPrefix`
     and the suffix `'_simulatedalignment.fasta'`.
     """
-    if randomSeed is False:
-        pass
-    else:
+    if randomSeed:
         random.seed(randomSeed)
 
     # Transform the branch lengths by dividing by the model `branchScale`
@@ -146,7 +144,10 @@ def simulateAlignment(model, treeFile, alignmentPrefix, randomSeed=False):
     info = '_temp_{0}info.txt'.format(alignmentPrefix)
     rates = '_temp_{0}_ratefile.txt'.format(alignmentPrefix)
     evolver = pyvolve.Evolver(partitions=partitions, tree=pyvolve_tree)
-    evolver(seqfile=alignment, infofile=info, ratefile=rates)
+    if randomSeed:
+        evolver(seqfile=alignment, infofile=info, ratefile=rates, seed=randomSeed)
+    else:
+        evolver(seqfile=alignment, infofile=info, ratefile=rates)
     for f in [rates, info, "custom_matrix_frequencies.txt"]:
         if os.path.isfile(f):
             os.remove(f)

diff --git a/phydmslib/treelikelihood.py b/phydmslib/treelikelihood.py
@@ -300,7 +300,7 @@ def __init__(self, tree, alignment, model, underflowfreq=3,
                 self.descendants.append([self.name_to_nodeindex[nx] for nx
                                          in node.find_clades() if nx != node])
             self.name_to_nodeindex[node] = n
-        self.gaps = numpy.array(self.gaps)
+        self.gaps = numpy.array(self.gaps, dtype=object)
         assert len(self.gaps) == self.ntips
 
         # _index_to_param defines internal mapping of

diff --git a/phydmslib/weblogo.py b/phydmslib/weblogo.py
@@ -474,7 +474,7 @@ def LogoPlot(sites, datatype, data, plotfile, nperline,
 
         # create web logo
         charstring = ''.join(chars_for_string)
-        assert len(charstring) == len(chars_for_string),\
+        assert len(charstring) == len(chars_for_string), \
             ("Length of charstring doesn't match length of "
              "chars_for_string. Do you have unallowable multi-letter "
              "characters?\n%s"
@@ -741,7 +741,7 @@ def format_color(color):
                 s.append((s_d[aa], aa))
             else:
                 s.append((s_d[aa], ' '))
-#        s = [(s_d[aa], aa) for aa in ordered_alphabets[seq_index]]
+        # s = [(s_d[aa], aa) for aa in ordered_alphabets[seq_index]]
 
         # Sort by frequency. If equal frequency then reverse alphabetic
         # (So sort reverse alphabetic first, then frequencty)
@@ -889,8 +889,8 @@ def read_transfac(fin, alphabet=None):
         hcols = len(header)
         rows = len(items)
         cols = len(items[0])
-        if not(header[0] == 'PO' or header[0] == 'P0' or
-               hcols == cols-1 or hcols == cols-2):
+        if not (header[0] == 'PO' or header[0] == 'P0' or
+                hcols == cols-1 or hcols == cols-2):
             raise ValueError("Missing header line!")
 
         # Do all lines (except the first) contain the same number of items?
@@ -1055,8 +1055,8 @@ def LogoOverlay(sites, overlayfile, overlay, nperline, sitewidth, rmargin,
     for (prop_d, shortname, longname) in overlay:
         if shortname == longname == 'wildtype':
             assert all(((isinstance(prop, str) and len(prop) == 1) for
-                        prop in prop_d.values())),\
-                        'prop_d does not give letters'
+                        prop in prop_d.values())), \
+                'prop_d does not give letters'
             proptype = 'wildtype'
             (vmin, vmax) = (0, 1)  # not used, but need to be assigned
             propcategories = None  # not used, but needs to be assigned

diff --git a/scripts/phydms b/scripts/phydms
@@ -229,7 +229,7 @@ def main():
         seqnames = set([head for (head, seq) in alignment])
 
         # process the substitution model
-        yngkp_match = re.compile('^YNGKP_(?P<modelvariant>M\d+)$')
+        yngkp_match = re.compile(r'^YNGKP_(?P<modelvariant>M\d+)$')
         if (isinstance(args['model'],
                        str) and yngkp_match.search(args['model'])):
             for argname in ['randprefs', 'avgprefs',