Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Changelog
===========

2.4.2
------
* Updated `setup.py` dependencies.

* `scipy` 1.15.0 has deprecated and removed `derivative` function with no direct replacement. For now, function has been imported into the project under `phydmslib/scipy_derivative.py`.

* Updated `phydms_tutorial.Rmd` to python3 syntax.

2.4.1
-----
* Unpin `biopython` now that `pyvolve` is updated.
Expand Down
2 changes: 1 addition & 1 deletion phydmslib/_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '2.4.1'
__version__ = '2.4.2'
__author__ = 'the Bloom lab (see https://github.com/jbloomlab/phydms/contributors)'
__url__ = 'http://jbloomlab.github.io/phydms'
__author_email__ = 'jbloom@fredhutch.org'
Expand Down
2 changes: 1 addition & 1 deletion phydmslib/file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def readPrefs(prefsfile, minpref=0, avgprefs=False, randprefs=False,
prefs[r] = {}
for aa in df.columns:
if aa != 'site':
prefs[r][aa] = float(rdf[aa])
prefs[r][aa] = float(rdf[aa].iloc[0])
else:
# try reading as dms_tools format
prefs = phydmslib.file_io.readPrefs_dms_tools_format(prefsfile)[2]
Expand Down
22 changes: 11 additions & 11 deletions phydmslib/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
INDEX_TO_NT, CODON_NT_MUT, CODON_NT_COUNT,
CODON_TO_AA, INDEX_TO_AA, AA_TO_INDEX,
CODON_NT)
from phydmslib.scipy_derivative import derivative
warnings.simplefilter('always')
warnings.simplefilter('ignore', ImportWarning)

Expand Down Expand Up @@ -571,7 +572,7 @@ def M(self, t, tips=None, gaps=None):
* expD).swapaxes(1, 0),
broadcastGetCols(self.Ainv,
tips))
if gaps is not None:
if gaps is not None and gaps.size > 0:
M[gaps] = numpy.ones(N_CODON, dtype='float')
# if M.min() < -0.01:
# warnings.warn("Large negative value in M(t) being set to 0. "
Expand All @@ -598,7 +599,7 @@ def dM(self, t, param, Mt, tips=None, gaps=None):
else:
dM_param = broadcastMatrixVectorMultiply(self.Prxy, Mt,
alpha=alpha)
if gaps is not None:
if gaps is not None and gaps.size > 0:
dM_param[gaps] = numpy.zeros(N_CODON, dtype='float')
return dM_param

Expand Down Expand Up @@ -663,7 +664,7 @@ def dM(self, t, param, Mt, tips=None, gaps=None):
for j in range(paramlength):
dM_param[j] = (broadcastMatrixVectorMultiply(self.A,
broadcastGetCols(broadcastMatrixMultiply(self.B[param][j] * V, self.Ainv), tips))) # noqa: E501
if gaps is not None:
if gaps is not None and gaps.size > 0:
if not paramisvec:
dM_param[gaps] = numpy.zeros(N_CODON, dtype='float')
else:
Expand Down Expand Up @@ -1338,10 +1339,10 @@ def _update_phi(self):
_checkParam('phi', self.phi, self.PARAMLIMITS, self.PARAMTYPES)
self._eta_from_phi()
dbeta = 1.0e-3
self.dphi_dbeta = scipy.misc.derivative(self._compute_empirical_phi,
self.dphi_dbeta = derivative(self._compute_empirical_phi,
self.beta, dx=dbeta,
n=1, order=5)
dphi_dbeta_halfdx = scipy.misc.derivative(self._compute_empirical_phi,
dphi_dbeta_halfdx = derivative(self._compute_empirical_phi,
self.beta, dx=dbeta / 2,
n=1, order=5)
assert numpy.allclose(self.dphi_dbeta, dphi_dbeta_halfdx, atol=1e-5,
Expand Down Expand Up @@ -1793,7 +1794,7 @@ def M(self, t, tips=None, gaps=None):
newM = numpy.zeros((len(tips), N_CODON))
for i in range(len(tips)):
newM[i] = (M[0][:, tips[i]])
if gaps is not None:
if gaps is not None and gaps.size > 0:
newM[gaps] = numpy.ones(N_CODON, dtype='float')
return newM

Expand All @@ -1820,7 +1821,7 @@ def dM(self, t, param, Mt, tips=None, gaps=None):
dM_param = (broadcastMatrixVectorMultiply(
numpy.tile(self.Pxy[0], (self.nsites, 1, 1)),
Mt, alpha=alpha))
if gaps is not None:
if gaps is not None and gaps.size > 0:
dM_param[gaps] = numpy.zeros(N_CODON, dtype='float')
return dM_param

Expand Down Expand Up @@ -1862,7 +1863,7 @@ def dM(self, t, param, Mt, tips=None, gaps=None):
newdM_param = numpy.zeros((len(tips), N_CODON))
for i in range(len(tips)):
newdM_param[i] = (dM_param[0][:, tips[i]])
if gaps is not None:
if gaps is not None and gaps.size > 0:
newdM_param[gaps] = numpy.zeros(N_CODON, dtype='float')
return newdM_param

Expand Down Expand Up @@ -2188,11 +2189,10 @@ def f_beta(beta):
for (param, f) in [('alpha_lambda', f_alpha),
('beta_lambda', f_beta)]:
pvalue = getattr(self, param)
dparam = scipy.misc.derivative(f, pvalue, dx, n=1, order=5)
dparam = derivative(f, pvalue, dx, n=1, order=5)
assert dparam.shape == (self.ncats,)
for stepchange in [0.5, 2]: # make sure robust to step size
dparam2 = scipy.misc.derivative(f, pvalue, stepchange * dx,
n=1, order=5)
dparam2 = derivative(f, pvalue, stepchange * dx, n=1, order=5)
assert (numpy.allclose(dparam, dparam2,
atol=1e-5, rtol=1e-4)), (
"Numerical derivative of {0} at {1} differs for "
Expand Down
194 changes: 194 additions & 0 deletions phydmslib/scipy_derivative.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
""" scipy derivative functions

Imported from scipy v1.11.0 -- has been deprecated and removed from scipy.
"""

from numpy import arange, newaxis, hstack, prod, array


def derivative(func, x0, dx=1.0, n=1, args=(), order=3):
"""
Find the nth derivative of a function at a point.

Given a function, use a central difference formula with spacing `dx` to
compute the nth derivative at `x0`.

.. deprecated:: 1.10.0
`derivative` has been deprecated from `scipy.misc.derivative`
in SciPy 1.10.0 and it will be completely removed in SciPy 1.12.0.
You may consider using
findiff: https://github.com/maroba/findiff or
numdifftools: https://github.com/pbrod/numdifftools

Parameters
----------
func : function
Input function.
x0 : float
The point at which the nth derivative is found.
dx : float, optional
Spacing.
n : int, optional
Order of the derivative. Default is 1.
args : tuple, optional
Arguments
order : int, optional
Number of points to use, must be odd.

Notes
-----
Decreasing the step size too small can result in round-off error.

Examples
--------
>>> def f(x):
... return x**3 + x**2
>>> float(derivative(f, 1.0, dx=1e-6))
4.9999999999217...

"""
return _derivative(func, x0, dx, n, args, order)


def _central_diff_weights(Np, ndiv=1):
"""
Return weights for an Np-point central derivative.

Assumes equally-spaced function points.

If weights are in the vector w, then
derivative is w[0] * f(x-ho*dx) + ... + w[-1] * f(x+h0*dx)

Parameters
----------
Np : int
Number of points for the central derivative.
ndiv : int, optional
Number of divisions. Default is 1.

Returns
-------
w : ndarray
Weights for an Np-point central derivative. Its size is `Np`.

Notes
-----
Can be inaccurate for a large number of points.

Examples
--------
We can calculate a derivative value of a function.

>>> def f(x):
... return 2 * x**2 + 3
>>> x = 3.0 # derivative point
>>> h = 0.1 # differential step
>>> Np = 3 # point number for central derivative
>>> weights = _central_diff_weights(Np) # weights for first derivative
>>> vals = [f(x + (i - Np/2) * h) for i in range(Np)]
>>> float(sum(w * v for (w, v) in zip(weights, vals))/h)
11.79999999999998...

This value is close to the analytical solution:
f'(x) = 4x, so f'(3) = 12

References
----------
.. [1] https://en.wikipedia.org/wiki/Finite_difference

"""
if Np < ndiv + 1:
raise ValueError(
"Number of points must be at least the derivative order + 1."
)
if Np % 2 == 0:
raise ValueError("The number of points must be odd.")
from scipy import linalg

ho = Np >> 1
x = arange(-ho, ho + 1.0)
x = x[:, newaxis]
X = x**0.0
for k in range(1, Np):
X = hstack([X, x**k])
w = prod(arange(1, ndiv + 1), axis=0) * linalg.inv(X)[ndiv]
return w


def _derivative(func, x0, dx=1.0, n=1, args=(), order=3):
"""
Find the nth derivative of a function at a point.

Given a function, use a central difference formula with spacing `dx` to
compute the nth derivative at `x0`.

Parameters
----------
func : function
Input function.
x0 : float
The point at which the nth derivative is found.
dx : float, optional
Spacing.
n : int, optional
Order of the derivative. Default is 1.
args : tuple, optional
Arguments
order : int, optional
Number of points to use, must be odd.

Notes
-----
Decreasing the step size too small can result in round-off error.

Examples
--------
>>> def f(x):
... return x**3 + x**2
>>> float(_derivative(f, 1.0, dx=1e-6))
4.9999999999217...

"""
if order < n + 1:
raise ValueError(
"'order' (the number of points used to compute the derivative), "
"must be at least the derivative order 'n' + 1."
)
if order % 2 == 0:
raise ValueError(
"'order' (the number of points used to compute the derivative) "
"must be odd."
)
# pre-computed for n=1 and 2 and low-order for speed.
if n == 1:
if order == 3:
weights = array([-1, 0, 1]) / 2.0
elif order == 5:
weights = array([1, -8, 0, 8, -1]) / 12.0
elif order == 7:
weights = array([-1, 9, -45, 0, 45, -9, 1]) / 60.0
elif order == 9:
weights = array([3, -32, 168, -672, 0, 672, -168, 32, -3]) / 840.0
else:
weights = _central_diff_weights(order, 1)
elif n == 2:
if order == 3:
weights = array([1, -2.0, 1])
elif order == 5:
weights = array([-1, 16, -30, 16, -1]) / 12.0
elif order == 7:
weights = array([2, -27, 270, -490, 270, -27, 2]) / 180.0
elif order == 9:
weights = (
array([-9, 128, -1008, 8064, -14350, 8064, -1008, 128, -9])
/ 5040.0
)
else:
weights = _central_diff_weights(order, 2)
else:
weights = _central_diff_weights(order, n)
val = 0.0
ho = order >> 1
for k in range(order):
val += weights[k] * func(x0 + (k - ho) * dx, *args)
return val / prod((dx,) * n, axis=0)
9 changes: 5 additions & 4 deletions phydmslib/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,7 @@ def simulateAlignment(model, treeFile, alignmentPrefix, randomSeed=False):
file with the name having the prefix giving by `alignmentPrefix`
and the suffix `'_simulatedalignment.fasta'`.
"""
if randomSeed is False:
pass
else:
if randomSeed:
random.seed(randomSeed)

# Transform the branch lengths by dividing by the model `branchScale`
Expand All @@ -146,7 +144,10 @@ def simulateAlignment(model, treeFile, alignmentPrefix, randomSeed=False):
info = '_temp_{0}info.txt'.format(alignmentPrefix)
rates = '_temp_{0}_ratefile.txt'.format(alignmentPrefix)
evolver = pyvolve.Evolver(partitions=partitions, tree=pyvolve_tree)
evolver(seqfile=alignment, infofile=info, ratefile=rates)
if randomSeed:
evolver(seqfile=alignment, infofile=info, ratefile=rates, seed=randomSeed)
else:
evolver(seqfile=alignment, infofile=info, ratefile=rates)
for f in [rates, info, "custom_matrix_frequencies.txt"]:
if os.path.isfile(f):
os.remove(f)
Expand Down
2 changes: 1 addition & 1 deletion phydmslib/treelikelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def __init__(self, tree, alignment, model, underflowfreq=3,
self.descendants.append([self.name_to_nodeindex[nx] for nx
in node.find_clades() if nx != node])
self.name_to_nodeindex[node] = n
self.gaps = numpy.array(self.gaps)
self.gaps = numpy.array(self.gaps, dtype=object)
assert len(self.gaps) == self.ntips

# _index_to_param defines internal mapping of
Expand Down
12 changes: 6 additions & 6 deletions phydmslib/weblogo.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def LogoPlot(sites, datatype, data, plotfile, nperline,

# create web logo
charstring = ''.join(chars_for_string)
assert len(charstring) == len(chars_for_string),\
assert len(charstring) == len(chars_for_string), \
("Length of charstring doesn't match length of "
"chars_for_string. Do you have unallowable multi-letter "
"characters?\n%s"
Expand Down Expand Up @@ -741,7 +741,7 @@ def format_color(color):
s.append((s_d[aa], aa))
else:
s.append((s_d[aa], ' '))
# s = [(s_d[aa], aa) for aa in ordered_alphabets[seq_index]]
# s = [(s_d[aa], aa) for aa in ordered_alphabets[seq_index]]

# Sort by frequency. If equal frequency then reverse alphabetic
# (So sort reverse alphabetic first, then frequencty)
Expand Down Expand Up @@ -889,8 +889,8 @@ def read_transfac(fin, alphabet=None):
hcols = len(header)
rows = len(items)
cols = len(items[0])
if not(header[0] == 'PO' or header[0] == 'P0' or
hcols == cols-1 or hcols == cols-2):
if not (header[0] == 'PO' or header[0] == 'P0' or
hcols == cols-1 or hcols == cols-2):
raise ValueError("Missing header line!")

# Do all lines (except the first) contain the same number of items?
Expand Down Expand Up @@ -1055,8 +1055,8 @@ def LogoOverlay(sites, overlayfile, overlay, nperline, sitewidth, rmargin,
for (prop_d, shortname, longname) in overlay:
if shortname == longname == 'wildtype':
assert all(((isinstance(prop, str) and len(prop) == 1) for
prop in prop_d.values())),\
'prop_d does not give letters'
prop in prop_d.values())), \
'prop_d does not give letters'
proptype = 'wildtype'
(vmin, vmax) = (0, 1) # not used, but need to be assigned
propcategories = None # not used, but needs to be assigned
Expand Down
2 changes: 1 addition & 1 deletion scripts/phydms
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def main():
seqnames = set([head for (head, seq) in alignment])

# process the substitution model
yngkp_match = re.compile('^YNGKP_(?P<modelvariant>M\d+)$')
yngkp_match = re.compile(r'^YNGKP_(?P<modelvariant>M\d+)$')
if (isinstance(args['model'],
str) and yngkp_match.search(args['model'])):
for argname in ['randprefs', 'avgprefs',
Expand Down
Loading