Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4a656d5
degen.py working with minimal tests
Jul 22, 2015
ea2eba6
Merge branch 'master' of https://github.com/VDBWRAIR/bio_pieces
Jul 22, 2015
b253729
Building up pca analysis for fasta sequence alignment
necrolyte2 Jul 31, 2015
e61449e
This notebook will build what we think is a correct 3d axis for pca a…
necrolyte2 Jul 31, 2015
03942de
Converted ipython notebook to script that takes in fasta file path an…
necrolyte2 Jul 31, 2015
e83d9e3
docs and allowed matrix to lookup 'all' key for values that are not i…
necrolyte2 Aug 3, 2015
15190a8
Added matplotlib to requirements. Tests now build documentation image…
necrolyte2 Aug 3, 2015
a9db6fa
Removed x,y and z tick labels. Addex X,Y Z axis labels. Added mean le…
necrolyte2 Aug 3, 2015
0b81095
ipython notebook for messing around with
necrolyte2 Aug 11, 2015
9eb6fd0
adding nologcapture. Removing specific emails for build notifications…
necrolyte2 Aug 12, 2015
159ec02
py26 ordereddict import check
necrolyte2 Aug 12, 2015
21b1a2a
Trying to fix matplotlib tkagg issue
necrolyte2 Aug 12, 2015
d5b0e74
StringIO,BytesIO compatibility fixed for py26/27 py3
necrolyte2 Aug 12, 2015
22300f1
Merge branch 'pca' of https://github.com/VDBWRAIR/bio_pieces into pca
Aug 12, 2015
963beb2
initial commit of make_pca
Aug 12, 2015
02c28de
added make_pca requirements: emperor,skbio
Aug 12, 2015
5117d65
fixed syntax bug in make_pca, added shorter test file for faster runtime
Aug 12, 2015
51f9020
aliased `python make_pca.py` as make_pca in setup.py
Aug 12, 2015
ab4c39b
added make_pca robot test
Aug 12, 2015
60e124f
fixed scikit-bio package name
Aug 12, 2015
a5da170
added blas to .travis.yml
Aug 12, 2015
333ea58
make_pcoa is now optional and documented
Aug 14, 2015
febe749
degen files belong in different branch
Aug 14, 2015
38e0e6a
added make_pcoa to index
Aug 14, 2015
a1a144d
removed old PCA stuff
Aug 14, 2015
6905bfa
finished removing old pca stuff
Aug 14, 2015
e0b73df
removed matplotlib
Aug 14, 2015
8812564
Update .travis.yml
averagehat Aug 18, 2015
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,18 @@ python:
- "2.6"
- "2.7"
- "3.4"
#before_install: # Should fix matplotlib errors (matplotlib not currently used)
#- "export DISPLAY=:99.0"
# - "sh -e /etc/init.d/xvfb start"
install:
- sudo apt-get install gfortran libopenblas-dev liblapack-dev #needed for sci-*
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2 ordereddict counter; fi
- if [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then pip install robotframework-python3; else pip install robotframework; fi
- pip install -r requirements.txt
- pip install -r tests/requirements.txt
- python setup.py develop
script:
- nosetests tests --with-coverage --cover-erase --cover-package=bio_pieces
- nosetests tests --with-coverage --cover-erase --cover-package=bio_pieces --nologcapture
- pybot tests/*.robot
after_success:
- coveralls

notifications:
email:
recipients:
- michael.panciera.work@gmail.com
92 changes: 92 additions & 0 deletions bio_pieces/make_pcoa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
'''
Usage: make_pcoa.py <fasta> [--map <mapfile>] [--outdir <DIR>] [--coord <coordfile>]

Options:
--outdir=<DIR>,-o=<DIR> Directory to put html file in. [Default: pcoa]
--map=<mapfile>,-m=<mapfile> TSV file which maps FASTA IDs to metadata. If not supplied one is generated using the FASTA IDs only.
--coord=<coordfile>,-c=<coordfile> Coordinate file including distance matrix, defined by Qiime pipeline. Will over-ride the information in

Help:
After running, open the resulting index.html file in your browser. i.e.:
$ make_pcoa aln.fasta --outdir pcoa
$ firefox pcoa/index.html
'''

from __future__ import print_function

#options in docopt are special, and need = if using them
''' also create static png files.
allow "color-by" parameters. '''
from docopt import docopt
from schema import Schema, Use, Optional
import os
import sh
try:
from skbio import Alignment
from skbio.stats.ordination import PCoA
import emperor #!not used, but emperor must be installed to run `make_emperor.py`
run_emperor = sh.Command('make_emperor.py')
except ImportError:
print("make_pcoa requires emperor and scikit-bio!\nExecute `pip install emperor` to use.")




def make_coordinates(fasta_filename):
alignment = Alignment.read(fasta_filename)
distance_matrix = alignment.distances()
pcoa = PCoA(distance_matrix)
scores = pcoa.scores()
return scores

def write_coordiates(fasta_filename):
outname = '%s.coord' % fasta_filename
assert not os.path.exists(outname), "Coordinate file %s exists! Please remove or run again with --coord parameter." % outname
print("Generating Coordinate file %s from fasta file %s" % (outname, fasta_filename))
make_coordinates(fasta_filename).write(outname)
return outname

def make_emperor(fasta_fn, outdir, mapfile, coordfile):
# do try/except for sh call to make_emperor
mapfile = mapfile or make_simple_mapping(fasta_fn)
coordinate_file = coordfile or write_coordiates(fasta_fn)
return run_emperor(i=coordinate_file, m=mapfile, o=outdir)

def make_simple_mapping(fasta_fn):
ids = map(lambda x: x[1:], filter(lambda x: x.startswith('>'), open(fasta_fn)))
header = '#SampleID\n'
mapfile_fn = '%s.map' % fasta_fn
assert not os.path.exists(mapfile_fn), "Mapping file %s exists! Please remove, or run again with --map parameter." % mapfile_fn
print("Auto-generating map file %s from fasta file %s" % (mapfile_fn, fasta_fn))
with open(mapfile_fn, 'w') as mapfile:
mapfile.write(header)
mapfile.writelines(ids)
return mapfile_fn

#NOTE: Currently unused
'''
def make_undescore_metadata_mapping(fasta):
import re
reg = re.compile(r'^[^_]+_([^_]+)_')
with open('%s.map' % fasta, 'w') as mapfile:
ids = map(X[1:], filter(X[0] == '>', open(fasta)))
#groups = groupby(ids, lambda x: x[:x.find('_')])
groups = groupby(ids, lambda x: reg.match(x).groups()[0])
header = '#Group\tSampleID\n'
mapfile.write(header)
for k, group in groups:
mapfile.writelines(map(('%s\t' % k + '{0}').format, group))
return mapfile.name
'''
def main():
scheme = Schema(
{ '<fasta>' : os.path.isfile,
Optional('--map') : Use(lambda x: x or None),
Optional('--coord') : Use(lambda x: x or None),
'--outdir' : str
})
raw_args = docopt(__doc__, version='Version 1.0')
args = scheme.validate(raw_args)
make_emperor( args['<fasta>'], args['--outdir'], args['--map'], args['--coord'])

if __name__ == '__main__': main()
1 change: 1 addition & 0 deletions docs/scripts/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ Scripts
beast_checkpoint
beast_wrapper
group_references
make_pcoa
58 changes: 58 additions & 0 deletions docs/scripts/make_pcoa.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
make_pcoa
=======

make_pcoa is used to build an interactive 3D plot for a given fasta alignment file built from
a Principal Coordinate Analysis (PCoA). The file produced can be opened in a browser for interactive viewing.

makce_pcoa uses the scikit-bio package to compute the PCoA and the emperor program to create the plot.

Usage
+++++++++++++++++++++++++++


Usage:
make_pcoa.py <fasta> [--map <mapfile>] [--outdir <DIR>] [--coord <coordfile>]

Options:
--outdir=<DIR>,-o=<DIR> Directory to put html file in. [Default: emperor]
--map=<mapfile>,-m=<mapfile> TSV file which maps FASTA IDs to metadata. If not supplied one is generated using the FASTA IDs only.
--coord=<coordfile>,-c=<coordfile> Coordinate file including distance matrix, defined by Qiime pipeline

Examples:

.. code-block:: bash

$> make_pcoa tests/testinput/aln1.fasta --map mymap.map --outdir results

$> make_pcoa tests/testinput/aln1.fasta --outdir results

$> make_pcoa tests/testinput/aln1.fasta --map mymap.map --coord mycoord.coord


Produces a folder results (named "pcoa" if --outdir is not provided) condaining an index.html file. The plot can be viewed by opening it in the browser:

.. code-block:: bash

$> firefox pcoa/index.html



Mapping File
+++++++++++++

Mapping files are used to customize the plot (e.g., color grouping) by defining categories for the provided sequences. Mapping files are simple TSV-seperated files which
map FASTA IDs (from the input file e.g. aln1.fasta) to arbitrary categories; i.e. geographic region of sequence, sequencing platform, etc.

Information about mapping files can be found here: `http://qiime.org/documentation/file_formats.html#metadata-mapping-files`_
An example is located here: `http://qiime.org/_static/Examples/File_Formats/Example_Mapping_File.txt`_

In general, a mapping file is suggested to assist in interpreting the data, but if one is not provided, make_pcoa will create one automatically. This auto-generated mapping file will have no categories besides FASTA ID.


Coordinate File
+++++++++++++++

Coordinate files include the data generated by PCoA. In general, make_pcoa should create this file automatically; but the --coord argument can be
supplied to avoid re-creating a new coordinate file each time (if, for example, a new mapping file is provided with the same alignment).

NOTE: If a coordinate file is provided, the fasta file is ignored: the coordinate file provides the information to create the plot (unless it is used to auto-generate the mapping file, which will effect metadata only).
2 changes: 2 additions & 0 deletions pcoa-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
emperor
scikit-bio
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
'beast_wrapper = bio_pieces.beast_wrapper:beast_wrapper',
'beast_est_time = bio_pieces.beast_wrapper:beast_est_time',
'version = bio_pieces.version:main',
'make_pcoa = bio_pieces.make_pcoa:main'
#'sequence_concat = bio_pieces.sequence_concat:main',
#'sequence_files_concat = bio_pieces.sequence_files_concat:main',
#'sequence_split = bio_pieces_old.sequence_split:main',
Expand Down
26 changes: 0 additions & 26 deletions test

This file was deleted.

17 changes: 17 additions & 0 deletions tests/make_pcoa.robot.dontrun.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
*** Settings ***
Library Process
Library OperatingSystem
Library Collections
Suite Teardown Terminate All Processes

*** Variables ***
${fasta} = tests/testinput/short.aln1.fasta

*** Test Cases ***
TestMakePCAReturnCodeIsZero
${process_result} = Run Process make_pca ${fasta}
Log To Console ${process_result.stdout}
Log To Console ${process_result.stderr}
Should Be Equal As Integers ${process_result.rc} 0
# Check output
Should Not Contain ${process_result.stdout} Error
Loading