Skip to content

Commit f2dacc8

Browse files
committed
Get foldseek path from parameter, defaults to same place as spacedust binary
1 parent f09db40 commit f2dacc8

File tree

7 files changed

+47
-5
lines changed

7 files changed

+47
-5
lines changed

Spacedust.ipynb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,8 @@
354354
" wget -q wget https://mmseqs.com/foldseek/foldseek-linux-avx2.tar.gz\n",
355355
" tar -xzf foldseek-linux-avx2.tar.gz\n",
356356
" rm -f foldseek-linux-avx2.tar.gz\n",
357+
" mv foldseek/bin/foldseek spacedust/bin\n",
358+
" rm -rf foldseek\n",
357359
" touch FOLDSEEK_READY\n",
358360
"fi\n",
359361
"\n",

data/clusterdb.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ notExists() {
99

1010
IN="$1"
1111
TMP_PATH="$2"
12-
FOLDSEEK="$(pwd)"/foldseek/bin/foldseek
1312

1413
[ ! -f "${IN}.dbtype" ] && echo "${IN}.dbtype not found!" && exit 1;
1514
if [ -n "${USE_FOLDSEEK}" ]; then

data/clustersearch.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ QUERY="$1"
2121
TARGET="$2"
2222
OUTPUT="$3"
2323
TMP_PATH="$4"
24-
FOLDSEEK="$(pwd)"/foldseek/bin/foldseek
2524

2625
if [ -n "${USE_FOLDSEEK}" ]; then
2726
[ -n "${USE_PROFILE}" ] && [ ! -f "${TARGET}_foldseek_clu_seq.dbtype" ] && echo "${TARGET}_foldseek_clu_seq.dbtype not found! Please make sure the ${TARGET}_foldseek is clustered with clusterdb ${TARGET}_foldseek tmp --search-mode 1" && exit 1;

examples/Spacedust.ipynb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@
132132
" wget -q wget https://mmseqs.com/foldseek/foldseek-linux-avx2.tar.gz\n",
133133
" tar -xzf foldseek-linux-avx2.tar.gz\n",
134134
" rm -f foldseek-linux-avx2.tar.gz\n",
135+
" mv foldseek/bin/foldseek spacedust/bin\n",
136+
" rm -rf foldseek\n",
135137
" touch FOLDSEEK_READY\n",
136138
"fi\n",
137139
"\n",

src/commons/LocalParameters.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class LocalParameters : public Parameters {
4040
PARAMETER(PARAM_FILE_INCLUDE)
4141
PARAMETER(PARAM_FILE_EXCLUDE)
4242
PARAMETER(PARAM_GFF_DIR)
43+
PARAMETER(PARAM_FOLDSEEK_PATH)
4344

4445
int clusterSearchMode;
4546
float pMHThr;
@@ -53,6 +54,7 @@ class LocalParameters : public Parameters {
5354
int suboptHitsFactor;
5455
std::string fileInclude;
5556
std::string fileExclude;
57+
std::string foldseekPath;
5658

5759
private:
5860
LocalParameters() :
@@ -68,7 +70,8 @@ class LocalParameters : public Parameters {
6870
PARAM_PROFILE_CLUSTER_SEARCH(PARAM_PROFILE_CLUSTER_SEARCH_ID, "--profile-cluster-search", "Cluster search against profiles", "Perform profile(target)-sequence searches in clustersearch", typeid(bool), (void *) &profileClusterSearch, ""),
6971
PARAM_FILE_INCLUDE(PARAM_FILE_INCLUDE_ID, "--file-include", "File Inclusion Regex", "Include file names based on this regex", typeid(std::string), (void *) &fileInclude, "^.*$"),
7072
PARAM_FILE_EXCLUDE(PARAM_FILE_EXCLUDE_ID, "--file-exclude", "File Exclusion Regex", "Exclude file names based on this regex", typeid(std::string), (void *) &fileExclude, "^.*$"),
71-
PARAM_GFF_DIR(PARAM_GFF_DIR_ID, "--gff-dir", "gff dir file", "Path to gff dir file", typeid(std::string), (void *) &gffDir, "")
73+
PARAM_GFF_DIR(PARAM_GFF_DIR_ID, "--gff-dir", "gff dir file", "Path to gff dir file", typeid(std::string), (void *) &gffDir, ""),
74+
PARAM_FOLDSEEK_PATH(PARAM_FOLDSEEK_PATH_ID, "--foldseek-path", "Path to Foldseek", "Path to Foldseek binary", typeid(std::string), (void *) &foldseekPath, "")
7275
{
7376

7477
// clusterhits
@@ -132,6 +135,7 @@ class LocalParameters : public Parameters {
132135
clustersearchworkflow = combineList(clustersearchworkflow, clusterhits);
133136
clustersearchworkflow.push_back(&PARAM_PROFILE_CLUSTER_SEARCH);
134137
clustersearchworkflow.push_back(&PARAM_CLUSTERSEARCH_MODE);
138+
clustersearchworkflow.push_back(&PARAM_FOLDSEEK_PATH);
135139

136140
//aa2foldseek
137141
aa2foldseek = combineList(prefilter, align);
@@ -141,6 +145,7 @@ class LocalParameters : public Parameters {
141145
//clusterdb
142146
clusterdb = combineList(clusterworkflow, profile2seq);
143147
clusterdb.push_back(&PARAM_CLUSTERSEARCH_MODE);
148+
clusterdb.push_back(&PARAM_FOLDSEEK_PATH);
144149

145150
clusterSearchMode = 0;
146151
suboptHitsFactor = 0;
@@ -154,6 +159,7 @@ class LocalParameters : public Parameters {
154159
fileInclude = ".*";
155160
fileExclude = "^$";
156161
gffDir = "";
162+
foldseekPath = "foldseek";
157163

158164
//TODO: add citations (foldseek & mmseqs & clustersearch)
159165
citations.emplace(CITATION_SPACEDUST, "");

src/workflow/clusterdb.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ void setclusterDbDefaults(LocalParameters *p) {
1515
int clusterdb(int argc, const char **argv, const Command &command) {
1616
LocalParameters &par = LocalParameters::getLocalInstance();
1717
setclusterDbDefaults(&par);
18+
par.foldseekPath = FileUtil::dirName(*(argv - 2)) + "/foldseek";
1819
par.parseParameters(argc, argv, command, true, 0, 0);
1920

2021
if (FileUtil::directoryExists(par.db2.c_str()) == false) {
@@ -42,7 +43,23 @@ int clusterdb(int argc, const char **argv, const Command &command) {
4243
if (par.removeTmpFiles) {
4344
cmd.addVariable("REMOVE_TMP", "TRUE");
4445
}
45-
cmd.addVariable("USE_FOLDSEEK", par.clusterSearchMode == 1 ? "TRUE" : NULL);
46+
47+
bool useFoldseek = false;
48+
if (par.clusterSearchMode == 1) {
49+
useFoldseek = true;
50+
struct stat st;
51+
if (stat(par.foldseekPath.c_str(), &st) != 0) {
52+
Debug(Debug::ERROR) << "Cannot find foldseek binary " << par.foldseekPath << ".\n";
53+
EXIT(EXIT_FAILURE);
54+
}
55+
bool isExecutable = (st.st_mode & S_IXUSR) || (st.st_mode & S_IXGRP) || (st.st_mode & S_IXOTH);
56+
if (isExecutable == false) {
57+
Debug(Debug::ERROR) << "Cannot execute foldseek binary " << par.foldseekPath << ".\n";
58+
EXIT(EXIT_FAILURE);
59+
}
60+
}
61+
cmd.addVariable("FOLDSEEK", par.foldseekPath.c_str());
62+
cmd.addVariable("USE_FOLDSEEK", useFoldseek ? "TRUE" : NULL);
4663
cmd.addVariable("CLUSTER_PAR", par.createParameterString(par.clusterworkflow).c_str());
4764
cmd.addVariable("CONSENSUS_PAR", par.createParameterString(par.profile2seq).c_str());
4865
cmd.addVariable("THREADS_PAR", par.createParameterString(par.onlythreads).c_str());

src/workflow/clustersearch.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ void setClusterSearchWorkflowDefaults(LocalParameters *p) {
3939
int clustersearch(int argc, const char **argv, const Command &command) {
4040
LocalParameters &par = LocalParameters::getLocalInstance();
4141
setClusterSearchWorkflowDefaults(&par);
42+
par.foldseekPath = FileUtil::dirName(*(argv - 2)) + "/foldseek";
4243

4344
par.PARAM_MAX_REJECTED.addCategory(MMseqsParameter::COMMAND_EXPERT);
4445
par.PARAM_DB_OUTPUT.addCategory(MMseqsParameter::COMMAND_EXPERT);
@@ -86,8 +87,24 @@ int clustersearch(int argc, const char **argv, const Command &command) {
8687
if (par.removeTmpFiles) {
8788
cmd.addVariable("REMOVE_TMP", "TRUE");
8889
}
90+
91+
bool useFoldseek = false;
92+
if (par.clusterSearchMode == 1) {
93+
useFoldseek = true;
94+
struct stat st;
95+
if (stat(par.foldseekPath.c_str(), &st) != 0) {
96+
Debug(Debug::ERROR) << "Cannot find foldseek binary " << par.foldseekPath << ".\n";
97+
EXIT(EXIT_FAILURE);
98+
}
99+
bool isExecutable = (st.st_mode & S_IXUSR) || (st.st_mode & S_IXGRP) || (st.st_mode & S_IXOTH);
100+
if (isExecutable == false) {
101+
Debug(Debug::ERROR) << "Cannot execute foldseek binary " << par.foldseekPath << ".\n";
102+
EXIT(EXIT_FAILURE);
103+
}
104+
}
89105
cmd.addVariable("USE_PROFILE", par.profileClusterSearch == 1 ? "TRUE" : NULL);
90-
cmd.addVariable("USE_FOLDSEEK", par.clusterSearchMode == 1 ? "TRUE" : NULL);
106+
cmd.addVariable("FOLDSEEK", par.foldseekPath.c_str());
107+
cmd.addVariable("USE_FOLDSEEK", useFoldseek ? "TRUE" : NULL);
91108
cmd.addVariable("CLUSTER_PAR", par.createParameterString(par.clusterworkflow).c_str());
92109
if(par.numIterations <= 1){
93110
std::vector<MMseqsParameter*> searchwithoutnumiter;

0 commit comments

Comments
 (0)