Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
251 commits
Select commit Hold shift + click to select a range
9782bec
bug fix runSingle.py
HannahBioI Apr 1, 2021
e56d87a
cleaning output
HannahBioI Apr 8, 2021
766c89d
testing
HannahBioI Apr 8, 2021
48e4154
testing
HannahBioI Apr 8, 2021
47f45d6
testing
HannahBioI Apr 8, 2021
fe44e0b
testing
HannahBioI Apr 8, 2021
34e87ca
testing
HannahBioI Apr 8, 2021
32bce0e
testing
HannahBioI Apr 8, 2021
a8362e3
testing
HannahBioI Apr 8, 2021
0458c25
testing
HannahBioI Apr 8, 2021
1b07c90
testing
HannahBioI Apr 8, 2021
2d3f8dd
testing
HannahBioI Apr 8, 2021
afec218
testing
HannahBioI Apr 8, 2021
9983e00
testing
HannahBioI Apr 8, 2021
4cca757
bug fix if augutus can't idetify a gene at a candidate region
HannahBioI Apr 9, 2021
d9bb72d
testing
HannahBioI Apr 9, 2021
ddec3f0
bug fix
HannahBioI Apr 9, 2021
13aea2d
bug fix
HannahBioI Apr 9, 2021
89a8843
cleaning up
HannahBioI Apr 9, 2021
116acad
testing
HannahBioI Apr 12, 2021
0078ee4
testing
HannahBioI Apr 12, 2021
c03e59d
testing
HannahBioI Apr 12, 2021
366a4ab
testing
HannahBioI Apr 12, 2021
79f2b67
bug fix in merge function, regions in minus strand were not merged co…
HannahBioI Apr 12, 2021
5425cd1
testing
HannahBioI Apr 12, 2021
174cc0c
testing
HannahBioI Apr 12, 2021
ccc3e4e
testing
HannahBioI Apr 12, 2021
e2cb392
testing
HannahBioI Apr 12, 2021
6c9b258
testing
HannahBioI Apr 12, 2021
b9c055e
bug fix
HannahBioI Apr 12, 2021
79df315
testing
HannahBioI Apr 12, 2021
0bc70a0
testing
HannahBioI Apr 12, 2021
a31d5e9
testing
HannahBioI Apr 12, 2021
55137f4
testing
HannahBioI Apr 12, 2021
ab85180
testing
HannahBioI Apr 12, 2021
f66f72c
clean up
HannahBioI Apr 13, 2021
f573dc4
testing
HannahBioI Apr 15, 2021
4dad886
testing
HannahBioI Apr 15, 2021
ef9c17f
testing
HannahBioI Apr 15, 2021
e5b06e1
testing
HannahBioI Apr 15, 2021
7e0377d
bug fix
HannahBioI Apr 15, 2021
721cfff
testing new tblastn call
HannahBioI Apr 15, 2021
496bb1f
testing
HannahBioI Apr 15, 2021
2cdc82d
testing
HannahBioI Apr 16, 2021
3898d4e
testing
HannahBioI Apr 16, 2021
e1fec1a
testing
HannahBioI Apr 16, 2021
65c1e1e
testing
HannahBioI Apr 18, 2021
34b2ee5
code clean up
HannahBioI Apr 20, 2021
6546b53
clean up code
HannahBioI Apr 21, 2021
5835365
clean up
HannahBioI Apr 21, 2021
421580d
clean up
HannahBioI Apr 21, 2021
89dfaf0
reduce output
HannahBioI Apr 21, 2021
ecf29ed
clean up code
HannahBioI Apr 22, 2021
55a9e6c
check augustus
HannahBioI Apr 24, 2021
d2492d0
testing
HannahBioI Apr 24, 2021
2c0d152
adding option to recognize if co-ortholog or not in header of the ext…
HannahBioI Apr 25, 2021
4b19832
testing
HannahBioI Apr 25, 2021
db4c6a5
testing
HannahBioI Apr 25, 2021
f487145
testing
HannahBioI Apr 25, 2021
43b73b0
testing
HannahBioI Apr 25, 2021
620d5fa
testing
HannahBioI Apr 25, 2021
1f265f3
Merge branch 'fdog_goes_assembly' of https://github.com/mueli94/fDOG …
HannahBioI Apr 25, 2021
ac34773
testing
HannahBioI Apr 25, 2021
86337fc
testing
HannahBioI Apr 25, 2021
5072380
testing
HannahBioI Apr 25, 2021
df6d324
added function starting_subprocess() to handle call of extern tools m…
HannahBioI Apr 28, 2021
7187972
added augustus to dependencies
HannahBioI Apr 28, 2021
721bcdb
testing
HannahBioI Apr 28, 2021
9a2e4d0
bug fix
HannahBioI Apr 28, 2021
1e5893b
testing
HannahBioI Apr 28, 2021
e8440c8
testing
HannahBioI Apr 28, 2021
6362e47
testing
HannahBioI Apr 28, 2021
02ad76c
testing
HannahBioI Apr 28, 2021
ac929b7
testing
HannahBioI Apr 28, 2021
060b4bb
testing
HannahBioI Apr 28, 2021
c996ca6
testing
HannahBioI Apr 28, 2021
3f46b83
testing
HannahBioI Apr 28, 2021
b5924a8
testing
HannahBioI Apr 28, 2021
490f43c
added function to clean up .domain files
HannahBioI Apr 28, 2021
07c693d
testing
HannahBioI Apr 28, 2021
3d80422
testing
HannahBioI Apr 28, 2021
acdb6fe
testing
HannahBioI Apr 28, 2021
38aca29
testing
HannahBioI Apr 28, 2021
f46cdc0
improve user output
HannahBioI May 11, 2021
b662346
fdog.assembly started with fDOG is always silent
HannahBioI May 31, 2021
a751205
testing
HannahBioI May 31, 2021
eb9f585
testing output
HannahBioI May 31, 2021
bb3c148
testing
HannahBioI May 31, 2021
be2b9d4
testing
HannahBioI May 31, 2021
6fbd5aa
testing
HannahBioI May 31, 2021
34d683c
testing
HannahBioI May 31, 2021
f950474
testing
HannahBioI May 31, 2021
0b129a2
removing automatically .tsv files if existing
HannahBioI May 31, 2021
6c6b125
Fdog goes assembly (#8)
HannahBioI May 31, 2021
f8ccac5
measure computational time
HannahBioI Jun 30, 2021
1cf64f1
measure computational time
HannahBioI Jun 30, 2021
6e163ba
bug fix
HannahBioI Jun 30, 2021
1d1c47a
testing
HannahBioI Jul 1, 2021
6e0ce72
computational time output
HannahBioI Jul 1, 2021
a1cb75d
corrected computational time output
HannahBioI Jul 1, 2021
328f26d
automatic augustus installation during setup
HannahBioI Jul 20, 2021
594715d
added tblastn version check
HannahBioI Jul 21, 2021
be91b3b
bug fix
HannahBioI Jul 21, 2021
4b5fb49
bug fix
HannahBioI Jul 21, 2021
c630d75
testing BLAST version check
HannahBioI Jul 21, 2021
f31cebf
tblastn version check during fdog.setup --conda
HannahBioI Jul 21, 2021
7ef3a05
Merge branch 'fdog_goes_assembly' into fdog_goes_assembly
HannahBioI Aug 2, 2021
6edf7a0
Fdog goes assembly (#10)
HannahBioI Aug 2, 2021
1b4232e
Added link to fDOG-Assembly poster for QfO 6.5
HannahBioI Aug 2, 2021
4798b8f
Update README.md
HannahBioI Aug 2, 2021
97dcf81
Merge branch 'fdog_goes_assembly' into fdog_assembly
HannahBioI Sep 9, 2021
087cae2
Merge pull request #6 from BIONF/fdog_assembly
HannahBioI Sep 9, 2021
d64177c
added option checkOff
HannahBioI Sep 10, 2021
ef6b0dc
bug fix
HannahBioI Sep 10, 2021
d4bf11f
bug fix
HannahBioI Sep 10, 2021
62badce
testing
HannahBioI Sep 10, 2021
a51b8f4
testing
HannahBioI Sep 10, 2021
43070d9
Merge branch 'fdog_goes_assembly' of https://github.com/mueli94/fDOG …
HannahBioI Sep 10, 2021
147bbc9
fixed
HannahBioI Sep 10, 2021
a992e32
fixed FAS call
HannahBioI Sep 10, 2021
abea098
changed FAS call
HannahBioI Sep 10, 2021
d56b83e
new function that checks if input path exist and new function that ch…
HannahBioI Sep 14, 2021
3431992
improved user output
HannahBioI Sep 15, 2021
a843bfe
added timeout for tblastn search, fixed bug during delition of tmp fo…
HannahBioI Sep 17, 2021
36fc207
added options force and append
HannahBioI Sep 20, 2021
2e17db1
tested --foce and --append, only the 10 best candidate regions (regar…
HannahBioI Sep 23, 2021
8056287
create a function that performs the ortholog search and returns the h…
HannahBioI Sep 28, 2021
ee36364
added parallelization with bib multiprocessing
HannahBioI Sep 29, 2021
da8cdcc
added output for computational time
HannahBioI Oct 1, 2021
ba752aa
updated fDOG-Assembly structure. fDOG-Assembly is now a separate scri…
HannahBioI Oct 11, 2021
49a430b
testing addSeq function
HannahBioI Oct 11, 2021
e18872b
bug fix in addSeq function
HannahBioI Oct 11, 2021
b4d1e0c
bug fix in ortholog search function
HannahBioI Oct 16, 2021
e85fd1c
bug fix in ortholog search if tblastn takes to long
HannahBioI Oct 17, 2021
1f9f736
updated input options
HannahBioI Oct 18, 2021
2112856
Merge branch 'fdog_goes_assembly' into fdog_goes_assembly
HannahBioI Oct 19, 2021
42e4ba1
Update fDOG goes assembly to version 0.1.2 (#12)
HannahBioI Oct 19, 2021
6d7df01
updated help function
HannahBioI Oct 19, 2021
2750a78
Merge branch 'fdog_goes_assembly' of https://github.com/mueli94/fDOG …
HannahBioI Oct 19, 2021
ac2652b
updated help function
HannahBioI Oct 19, 2021
688b21e
rm filter option
HannahBioI Oct 19, 2021
0756168
error handling of ValueError in function get_distance_biopython
HannahBioI Oct 19, 2021
f9d4623
test
HannahBioI Oct 19, 2021
134f94d
test
HannahBioI Oct 19, 2021
81af9ad
test
HannahBioI Oct 19, 2021
1c54841
test
HannahBioI Oct 19, 2021
8eb12a5
fixed item not found error in distance function
HannahBioI Oct 19, 2021
326ff42
cleaning up output
HannahBioI Oct 20, 2021
0016fa5
Merge branch 'fdog_goes_assembly' into fdog_goes_assembly
HannahBioI Oct 20, 2021
97750b6
Fdog goes assembly (#13)
HannahBioI Oct 20, 2021
a7f9e19
bug fix in function checkCoOrthologs
HannahBioI Oct 20, 2021
7b8745b
bug fix
HannahBioI Oct 20, 2021
c21a3f5
enabled output during parallel computation
HannahBioI Oct 22, 2021
97a3dc2
Merge branch 'fdog_goes_assembly' of https://github.com/mueli94/fDOG …
HannahBioI Oct 22, 2021
d437423
enabled output during parallel computation
HannahBioI Oct 22, 2021
7a37abc
bug fix
HannahBioI Oct 22, 2021
02f0046
improved output
HannahBioI Oct 22, 2021
52feba3
improved output
HannahBioI Oct 22, 2021
67ce089
Merge branch 'fdog_goes_assembly' into fdog_goes_assembly
HannahBioI Oct 25, 2021
254034d
Fdog goes assembly (#14)
HannahBioI Oct 25, 2021
9c228b2
a file can be used as input for --searchTaxa
HannahBioI Oct 25, 2021
84c4c09
Merge branch 'fdog_goes_assembly' of https://github.com/mueli94/fDOG …
HannahBioI Oct 25, 2021
fdb3073
bug fix
HannahBioI Oct 25, 2021
42f7def
Fdog goes assembly (#15)
HannahBioI Oct 25, 2021
f43820e
fixed bug in searching_for_db
HannahBioI Oct 26, 2021
7d12ffa
fixed bug in function searching_for_db
HannahBioI Oct 26, 2021
110073f
bug fix searching_for_db function
HannahBioI Oct 26, 2021
afd28c6
testing
HannahBioI Oct 26, 2021
6076c5d
testing
HannahBioI Oct 26, 2021
2f38455
reorganization of code to enable the use of metaeuk as an alternative…
HannahBioI Feb 9, 2022
e088dff
included metaeuk
HannahBioI Feb 9, 2022
5cb0f2b
bug fix
HannahBioI Feb 10, 2022
cb085c7
bug fix
HannahBioI Feb 10, 2022
0d2d26d
bug fix
HannahBioI Feb 10, 2022
8d9ce60
added preparation steps for metaeuk (tblastn search preparation)
HannahBioI Feb 10, 2022
65c8835
bug fix
HannahBioI Feb 10, 2022
8327592
bug fix
HannahBioI Feb 10, 2022
fb62700
bug fix
HannahBioI Feb 10, 2022
f5e25db
bug fix
HannahBioI Feb 10, 2022
e59ae53
bug fix
HannahBioI Feb 10, 2022
188ae4b
bug fix
HannahBioI Feb 10, 2022
93e79fe
bug fix
HannahBioI Feb 10, 2022
90eb408
testing other paramteres for metaeuk
HannahBioI Feb 10, 2022
ca4133a
testing new parameters
HannahBioI Feb 10, 2022
6be7252
testing
HannahBioI Feb 10, 2022
926963f
testing
HannahBioI Feb 10, 2022
062eefc
testing
HannahBioI Feb 10, 2022
49c080e
testing
HannahBioI Feb 10, 2022
fb8e97a
testing
HannahBioI Feb 10, 2022
be1b56a
metaeuk is incldued and running in fdog_assembly workflow
HannahBioI Feb 10, 2022
cb9a5fd
testing other metaeuk parameters
HannahBioI Feb 10, 2022
79791e8
using complete contigs for metaeuk
HannahBioI Feb 10, 2022
f6f72f7
testing
HannahBioI Feb 10, 2022
61a1ee5
added parameter for own metaeuk db
HannahBioI Feb 11, 2022
81ec9a5
bugfix
HannahBioI Feb 11, 2022
17a546a
for debugging function get_distance_biopython
HannahBioI Feb 14, 2022
c260ce4
testing
HannahBioI Feb 14, 2022
0ec7678
bug fix, testing
HannahBioI Feb 23, 2022
76e5038
bug fix
HannahBioI Feb 24, 2022
ad12f0a
gff file positions were corrected during fDOG-Assembly run
HannahBioI Mar 1, 2022
6b15f26
bug fix
HannahBioI Mar 1, 2022
7d7504f
bug fix
HannahBioI Mar 1, 2022
826d676
bug fix
HannahBioI Mar 1, 2022
8a832fc
bug fix
HannahBioI Mar 1, 2022
14c852c
bug fix
HannahBioI Mar 1, 2022
9146028
Merge pull request #8 from BIONF/master
HannahBioI Mar 10, 2022
3bb9075
Merge branch 'fdog_goes_assembly' into master
HannahBioI Mar 10, 2022
9856606
Merge pull request #9 from mueli94/master
HannahBioI Mar 10, 2022
5b031aa
Merge pull request #10 from BIONF/master
HannahBioI Mar 10, 2022
33b103f
Merge pull request #11 from mueli94/master
HannahBioI Mar 10, 2022
1b0a84d
Merge branch 'fdog_goes_assembly' into fdog_goes_assembly
HannahBioI Apr 12, 2022
b3663fc
Merge pull request #22 from mueli94/fdog_goes_assembly
trvinh Apr 12, 2022
b1a25aa
bug fix blast DB computation (#23)
HannahBioI Jun 15, 2022
7f666b7
Squashed commit of the following:
HannahBioI Oct 24, 2023
87c66f1
Fdog assembly v0.1.4 (#35)
HannahBioI Oct 24, 2023
d429ec9
bug fix parallel computation
HannahBioI Nov 17, 2023
8b1a58f
Fdog assembly (#37)
HannahBioI Nov 20, 2023
64a0022
renamed ortholog group fasta file because of signalp bug
HannahBioI Nov 20, 2023
90a4f20
renamed .extended.fa to _og.fa
HannahBioI Nov 20, 2023
6dbebfd
Merge remote-tracking branch 'origin/fdog_goes_assembly' into fdog_go…
HannahBioI Nov 20, 2023
172ee24
bug fix run time error with --fasoff
HannahBioI Dec 11, 2023
46b8c05
bugfix block profile computation
HannahBioI Feb 5, 2024
9983e30
update block profile function
HannahBioI Feb 6, 2024
844c02d
Fdog assembly v0.1.5 (#39)
HannahBioI Feb 9, 2024
05283e0
Fdog assembly (#40)
HannahBioI Feb 12, 2024
08510c0
Fdog assembly (#41)
HannahBioI Mar 4, 2024
60ae899
Fdog assembly (#42)
HannahBioI Mar 4, 2024
c38687e
Update of some functions to improve I/O operations (#44)
HannahBioI Jun 13, 2024
dddd57a
v 0.1.5.1 (#45)
HannahBioI Jun 13, 2024
3a3081e
Improvements fdog.addAssembly script (#47)
HannahBioI Jun 18, 2024
01c94ba
bugfix using --strict (#48)
HannahBioI Jun 20, 2024
430b7bb
Merge branch 'master' into fdog_goes_assembly
HannahBioI Jan 22, 2025
d79e253
Improved FAS error output
HannahBioI Mar 24, 2025
7b656af
Merge remote-tracking branch 'upstream/master' into fdog_goes_assembly
HannahBioI Sep 10, 2025
5414d1b
update
HannahBioI Sep 10, 2025
5b458ea
Update README.md
HannahBioI Sep 10, 2025
ebe8e8a
Revert "Merge remote-tracking branch 'upstream/master' into fdog_goes…
HannahBioI Sep 10, 2025
3c49728
Revert "update"
HannahBioI Sep 10, 2025
26ffa54
Revert "Update README.md"
HannahBioI Sep 10, 2025
ec1eec6
fDOG update (#55)
HannahBioI Sep 14, 2025
5831add
added test Assembly for fDOG-Assembly
HannahBioI Sep 14, 2025
62de82b
fDOG update (#56)
HannahBioI Sep 14, 2025
23cc9bd
Fdog assembly update to 0.1.5.2 (#58)
HannahBioI Sep 14, 2025
f13becc
creating assembly_dir in setup; auto detect datapath in fdog-assembly
trvinh Oct 4, 2025
45e3029
Merge branch 'master' into fdog_goes_assembly
trvinh Oct 6, 2025
34df42a
fixed undefined var
trvinh Oct 6, 2025
b0ae1b3
removed hmmemit from dependencies.txt
trvinh Oct 6, 2025
af6e400
removed tblasn from dependencies.txt; added check fdog.assembly to bu…
trvinh Oct 6, 2025
fa9031d
updated github_build
trvinh Oct 6, 2025
39075f6
updated setup.py and manifest.in
trvinh Oct 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/github_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ jobs:
pwd
pip install .
path=$(fdog.setup -d ./ --getSourcepath); for i in $(less $path/data/dependencies.txt); do sudo apt-get install -y -qq $i; done
path=$(fdog.setup -d ./ --getSourcepath); ls $path/data/
echo "#########################################"
pwd
ls
echo "#########################################"
echo "TEST fdog.setup"
fdog.setup -d /home/runner/work/fDOG/fDOG/dt --woFAS
echo "TEST fdog.checkData"
Expand All @@ -49,6 +54,8 @@ jobs:
fdog.showTaxa
echo "TEST fdog.run"
fdog.run --seqFile infile.fa --jobName test --refspec HUMAN@9606@qfo24_02 --fasOff --group mammalia
echo "TEST fdog.assembly"
fdog.assembly --gene test --refSpec HUMAN@9606@qfo24_02 --augustus --augustusRefSpec human --coregroupPath core_orthologs/ --out test_assembly --fasoff
mkdir seeds
path=$(fdog.setup -d ./ --getSourcepath); a="1 2 3"; for i in ${a[@]}; do cp $path/data/infile.fa seeds/$i.fa; done
echo "TEST fdogs.run"
Expand Down
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,16 @@ dmypy.json
# Pyre type checker
.pyre/

# DS_store
**/.DS_Store
/fdog/.DS_Store
/fdog/data/.DS_Store
/fdog/bin/.DS_Store
/fdog/setup/.DS_Store

#Hannah
/fdog/data/core_orthologs/
/fdog/data/assembly_dir/
/fdog/fdog_goes_assembly/tmp/
taxdump*
.DS_Store
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions .idea/fDOG-Assembly.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
recursive-include fDOG *
recursive-include fdog/data *
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
![Github Build](https://github.com/BIONF/fDOG/workflows/build/badge.svg)

# Poster fDOG - Assembly
https://github.com/BIONF/fDOG/blob/gh-pages/www/Poster_fDOG_Assembly.pdf
# Table of Contents
* [How to install](#how-to-install)
* [Install the fDOG package](#install-the-fdog-package)
Expand Down
143 changes: 143 additions & 0 deletions fdog/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# DS_store
**/.DS_Store
/fdog/.DS_Store
/fdog/data/.DS_Store
/fdog/bin/.DS_Store
/fdog/setup/.DS_Store

#Hannah
/fdog/data/core_orthologs/
/fdog/data/assembly_dir/
/fdog/fdog_goes_assembly/tmp/
taxdump*
/fdog/fDOGassembly.py
124 changes: 124 additions & 0 deletions fdog/addAssembly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import fdog.libs.addtaxon as addTaxon_fn
import fdog.libs.tree as tree_fn
import fdog.libs.zzz as general_fn
import sys
import os
import argparse

def check_fasta(file):
nHeader = general_fn.count_line(file, '>', True)
nSeq = general_fn.count_line(file, '>', False)
if not nHeader == nSeq:
return(1)
nPipe = general_fn.count_line(file, '|', True)
if nPipe > 0:
return(1)
return(0)

def check_path(path):
if not os.path.exists(path):
return False
else:
if os.path.isfile(path):
return "File"
else:
return "Path"

def parse_file(path):
file = open(path, "r")
lines = file.readlines()
id_dict = {}
for line in lines:
line = line.rstrip()
ncbi, name = line.split("\t")
id_dict[ncbi] = name
file.close()
return id_dict


def main():
print("#################################")
#################### handle user input #####################################
version = '0.0.3'
################### initialize parser ######################################
parser = argparse.ArgumentParser(description='You are running fdog.addAssembly version ' + str(version) + '.')
################## required arguments ######################################
required = parser.add_argument_group('Required arguments')
required.add_argument('--fasta', help='Path to fasta file or folder', action='store', default='', required=True)
required.add_argument('--out', help='Path to output folder.', action='store', default='', required=True)
required.add_argument('--ncbi', help='NCBI ID of species or a mapping file (tab separated) containing the NCBI ID and the corresponding file name placed in the folder given by --fasta. ', action='store', default='', required=True)
required.add_argument('--ver', help='Version', action='store', default='', required=True)
optional = parser.add_argument_group('Optional arguments')
optional.add_argument('--link', help='links fasta files instead of copying them', action='store_true', default = False)

args = parser.parse_args()
fasta = args.fasta
if check_path(fasta) == False:
print("%s does not exists. Exiting ..."%(fasta))
sys.exit()
else:
format = check_path(fasta)
out_folder = args.out
out_folder = os.path.abspath(out_folder) + '/'
os.system('mkdir %s >/dev/null 2>&1' % (out_folder))
ncbi = args.ncbi
ver = args.ver
ln = args.link
id_dict = {}

if check_path(ncbi) == False:
if ncbi.isdigit() and format == "File":
id_dict[ncbi] = fasta
else:
print("%s is no file or digit. Exiting ..."%(ncbi))
sys.exit()
elif check_path(ncbi) == "File":
print("Parsing mapping file ...")
id_dict = parse_file(ncbi)
print("... done")
else:
print("%s is no file or digit. Exiting ..."%(ncbi))
sys.exit()
#print(format)
#print(fasta)
if format == "File":
fa = id_dict[ncbi]
if check_fasta(fa):
name = addTaxon_fn.generate_spec_name(ncbi, "", ver)
if ln == False:
assembly_folder = out_folder + name
os.system('mkdir %s >/dev/null 2>&1' % (assembly_folder))
os.system("cp %s %s/%s.fa" %(fa, assembly_folder, name))
else:
assembly_folder = out_folder + name
os.system('mkdir %s >/dev/null 2>&1' % (assembly_folder))
os.system("ln %s %s/%s.fa" %(fa, assembly_folder, name))
else:
print("%s Fasta format not valid or header includes |"%(fa))

else:
for sp in id_dict:
print("Adding species %s"%(sp))
#print(id_dict)
fa = id_dict[sp]
fasta = os.path.abspath(fasta) + '/'
#print(fa)
#print(fasta)
fasta_path = fasta + fa
if check_fasta(fasta_path):
name = addTaxon_fn.generate_spec_name(sp, "", ver)
if ln == False:
assembly_folder = out_folder + name
os.system('mkdir %s >/dev/null 2>&1' % (assembly_folder))
os.system("cp %s %s/%s.fa" %(fasta_path, assembly_folder, name))
else:
assembly_folder = out_folder + name
os.system('mkdir %s >/dev/null 2>&1' % (assembly_folder))
os.system("ln -s %s %s/%s.fa" %(fasta_path, assembly_folder, name))
else:
print("%s Fasta format not valid or header includes |"%(fasta_path))

print("DONE, files can be found: %s"%(out_folder))

if __name__ == '__main__':
main()
Loading