Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
19 changes: 18 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,21 @@

## 10.07.18

Adapt Makefile to call raxml-ng instead of raxml.
Adapted Makefile to call raxml-ng instead of raxml.

## 09.08.18

Adapted Makefile to call raxml-ng with `-all` to include bootstrap analysis.

## 10.08.18

Added script `minimal_branchlength.py` that calculates minimal branchlength and related bootstrap
values for each leaf and plots these. `minimal_branchlength.sh` is a bash script that calls it the
python script a specified number of times to analyse newick files stored in out$number directories.
`Make minimal_branchlength` calls the bash script.

## 11.08.18

Added script `uptree_branchlength.py` that calculates the branchlength to the uptree neighbour and related bootstrap values for each leaf and plots these. `uptree_branchlength.sh` is a bash script that calls it the
python script a specified number of times to analyse newick files stored in out$number directories.
`Make uptree_branchlength` calls the bash script.
10 changes: 8 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,19 @@ $(OUTDIR)/%-phyml.newick: $(OUTDIR)/%.phy
rm -f $(OUTDIR)/*.phy_phyml_*_xxxxx*

$(OUTDIR)/%-raxml.newick: $(OUTDIR)/%.phy
raxml-ng --msa $< --model GTR+G --prefix xxxxx --seed $$RANDOM --threads 1 >/dev/null
mv xxxxx.raxml.bestTree $@
raxml-ng --all --msa $< --model GTR+G --prefix xxxxx --seed $$RANDOM --threads 1 >/dev/null
mv xxxxx.raxml.support $@
rm -f xxxxx.*

$(OUTDIR)/%.ascii: $(OUTDIR)/%.newick
newick-to-ascii.py --outgroup $(OUTGROUP) < $< > $@

minimal_distance:
minimal_distance.sh

branchlength:
branchlength.sh

clean:
rm -f $(ASCII) $(NEWICK) $(PHY) $(OUTDIR)/*xxxxx* xxxxx.*

Expand Down
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,21 @@ Install [RAxML](https://github.com/amkozlov/raxml-ng). If on a mac with

Install [phyml](http://www.atgc-montpellier.fr/phyml/). If on a mac with
[brew](https://brew.sh/) you can just run `brew install phyml`.

## Plot recombinant tree data

One dataset (stored as out$i) contains trees generated from all json specification files, here different sequences are recombined.

`make` generates newick (with bootstrapvalues) and ascii files.

`make branchlength` plots bootstrap values against tip branchlengths for all trees in i datasets (i specified in `branchlength.sh`).

`make minimal_distance` plots bootstrap values against tip minimal distance to another tip for all trees in i datasets (i specified in `minimal_distance.sh`).

## Recombinantgradients

One dataset (stored as out$i) contains trees generated from the json specification files, here the recombinant is formed from to set sequences, but to varying percentages.

`make` generates newick (without bootstrapvalues for raxml-ng) and ascii files.

`../recombinant_gradient.py *raxml.newick > outtest` plots percentage of recombinant from one parent against the distance to it.
66 changes: 66 additions & 0 deletions branchlength.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env python

from ete3 import Tree
from argparse import ArgumentParser
import matplotlib.pyplot as plt
import seaborn

def branchlength(newickfile):

def plot_bl_bysupport(allmindistances, allsupportvalues, allleaves):
plt.figure()
plt.scatter(allmindistances, allsupportvalues)
for i, leaf in enumerate(allleaves):
if leaf == "recombinant":
plt.annotate(leaf, (allmindistances[i], allsupportvalues[i]))
plt.scatter(allmindistances[i], allsupportvalues[i], c='r')
else:
pass
#plt.annotate(leaf, (allmindistances[i], allsupportvalues[i]))
plottitle = str(name)
plt.title(plottitle)
plt.xlabel('Minimal branchlength to another leaf', fontsize=14)
#plt.xlim((0, 0.7))
plt.ylabel('Support value', fontsize=14)
plt.ylim((15, 102))
plt.tight_layout()
plotpath = 'branchlengthplot/' + str(name) + '.png'
plt.savefig(plotpath, dpi=300)

# Input the tree data. This is written for newick, as the code assumes only
# one input line.
with open(newickfile) as f:
name = f.name
print('Analysing sample:', name)
treedata = f.read()
treedata = treedata.strip()
t = Tree(treedata)

# Reroot the tree.
t.set_outgroup("root")

allleaves = []
allbranchlengths = []
allsupportvalues = []

for leaf in t:
stringleaf = str(leaf.name)
allleaves.append(stringleaf)
leafup = leaf.up
branchlength = leafup.get_distance(stringleaf)
print('The branchlength of %s is %f.' %(stringleaf, branchlength))
allbranchlengths.append(branchlength)
supportvalue = leafup.support
print('The support value of %s is %f.' %(stringleaf, supportvalue))
allsupportvalues.append(supportvalue)

plot_bl_bysupport(allbranchlengths[1:], allsupportvalues[1:], allleaves[1:])

parser = ArgumentParser()
parser.add_argument('fname', nargs='+', action='append', metavar='FILE', help='Newick file to analyse.')
args = parser.parse_args()

filenames = args.fname
for filename in filenames[0]:
branchlength(filename)

7 changes: 7 additions & 0 deletions branchlength.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
for i in $(seq 5)
do
cd out$i
mkdir branchlengthplot
../branchlength.py *raxml.newick > branchlength_$i
cd ..
done
99 changes: 99 additions & 0 deletions minimal_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python

from ete3 import Tree
from argparse import ArgumentParser
import matplotlib.pyplot as plt
import seaborn

def maxbranchlength(newickfile):

def plot_bl_bysupport(allmindistances, allaveragedistances, allleaves):
plt.figure()
plt.scatter(allmindistances, allaveragedistances)
for i, leaf in enumerate(allleaves):
if leaf == "recombinant":
plt.annotate(leaf, (allmindistances[i], allaveragedistances[i]))
plt.scatter(allmindistances[i], allaveragedistances[i], c='r')
else:
pass
#plt.annotate(leaf, (allmindistances[i], allaveragedistances[i]))
plottitle = str(name)
plt.title(plottitle)
plt.xlabel('Minimal branchlength to another leaf', fontsize=14)
plt.xlim((0, 0.7))
plt.ylabel('Support value', fontsize=14)
plt.ylim((15, 102))
plt.tight_layout()
plotpath = 'branchlengthplot/' + str(name) + '.png'
plt.savefig(plotpath, dpi=300)

# Input the tree data. This is written for newick, as the code assumes only
# one input line.
with open(newickfile) as f:
name = f.name
print('Analysing sample:', name)
treedata = f.read()
treedata = treedata.strip()
t = Tree(treedata)

# Reroot the tree.
t.set_outgroup("root")

allleaves = []
allmindistanceleaves = []
allmindistances = []
allsupportvalues = []

for leaf in t:
stringleaf = str(leaf.name)
allleaves.append(stringleaf)

# Find the leaf2 with the smallest distance to leaf.
alldistances = []
allleaves2 = []
for leaf2 in t:
stringleaf2 = str(leaf2.name)
if stringleaf2 == stringleaf:
pass
else:
distance = leaf2.get_distance(stringleaf)
alldistances.append(distance)
allleaves2.append(stringleaf2)
mindist = min(alldistances)
allmindistances.append(mindist)
leaf2mindist = allleaves2[alldistances.index(mindist)]
allmindistanceleaves.append(leaf2mindist)

# Find support value for parent internal node of leaf.
leafup = leaf.up
supportvalue = leafup.support
allsupportvalues.append(supportvalue)

# Textoutput.
if "recombinant" not in allleaves:
print('There is no recombinant in this sample.')
maxmindist = max(allmindistances)
maxminleaf = str(allleaves[allmindistances.index(maxmindist)])
maxminleaf2 = str(allmindistanceleaves[allmindistances.index(maxmindist)])
minsupport = min(allsupportvalues[1:])
minsupportleaf = str(allleaves[allsupportvalues.index(minsupport)])
print('The leaf with the biggest minimal distance to another leaf is %s, to %s.' %(maxminleaf, maxminleaf2))
print('The leaf with the parental node with a minimal branch bootstrap value is %s.' %(minsupportleaf))
#print(allleaves)
#print(allmindistanceleaves)
#print(allmindistances)
#print(allsupportvalues)
if all(element==1.0 for element in allsupportvalues):
print('There are no bootstrapvalues in the newick file.')

# Plot branchlength against bootstrap value, leave out root (at index 0).
plot_bl_bysupport(allmindistances[1:], allsupportvalues[1:], allleaves[1:])

parser = ArgumentParser()
parser.add_argument('fname', nargs='+', action='append', metavar='FILE', help='Newick file to analyse.')
args = parser.parse_args()

filenames = args.fname
for filename in filenames[0]:
maxbranchlength(filename)
print('\n')
7 changes: 7 additions & 0 deletions minimal_distance.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
for i in $(seq 5)
do
cd out$i
mkdir minimal_distanceplot
../minimal_distance.py *raxml.newick > minimal_distance_$i
cd ..
done
Binary file added out1/.DS_Store
Binary file not shown.
Loading