From 58d5f24ea66f168345520cf2b4450d6b76bf0f81 Mon Sep 17 00:00:00 2001 From: username Date: Tue, 23 Aug 2022 11:40:09 +0900 Subject: [PATCH 1/3] 1)Add parameter x. 2)Remove what appears to be an error. --- dprog.c | 8 ++++---- dprog.h | 4 ++-- main.c | 20 ++++++++++++++------ 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/dprog.c b/dprog.c index e30598a..435c3cd 100644 --- a/dprog.c +++ b/dprog.c @@ -28,7 +28,7 @@ coding, RBS scores, etc. *******************************************************************************/ -int dprog(struct _node *nod, int nn, struct _training *tinf, int flag) { +int dprog(struct _node *nod, int nn, struct _training *tinf, int flag, int max_node_dist) { int i, j, min, max_ndx = -1, path, nxt, tmp; double max_sc = -1.0; @@ -42,15 +42,15 @@ int dprog(struct _node *nod, int nn, struct _training *tinf, int flag) { /* Set up distance constraints for making connections, */ /* but make exceptions for giant ORFS. */ - if(i < MAX_NODE_DIST) min = 0; else min = i-MAX_NODE_DIST; + if(i < max_node_dist) min = 0; else min = i-max_node_dist; if(nod[i].strand == -1 && nod[i].type != STOP && nod[min].ndx >= nod[i].stop_val) while(min >= 0 && nod[i].ndx != nod[i].stop_val) min--; if(nod[i].strand == 1 && nod[i].type == STOP && nod[min].ndx >= nod[i].stop_val) while(min >= 0 && nod[i].ndx != nod[i].stop_val) min--; - if(min < MAX_NODE_DIST) min = 0; - else min = min-MAX_NODE_DIST; +// if(min < max_node_dist) min = 0; +// else min = min-max_node_dist; for(j = min; j < i; j++) { score_connection(nod, j, i, tinf, flag); } diff --git a/dprog.h b/dprog.h index d729f4c..19f5817 100644 --- a/dprog.h +++ b/dprog.h @@ -28,9 +28,9 @@ #define MAX_SAM_OVLP 60 #define MAX_OPP_OVLP 200 -#define MAX_NODE_DIST 500 +// #define MAX_NODE_DIST 500 -int dprog(struct _node *, int, struct _training *, int); +int dprog(struct _node *, int, struct _training *, int, int); void score_connection(struct _node *, int, int, struct _training *, int); void eliminate_bad_genes(struct _node *, int, struct _training *); diff --git a/main.c b/main.c index 0834a07..201e3d6 100644 --- a/main.c +++ b/main.c @@ -43,7 +43,7 @@ int copy_standard_input_to_file(char *, int); int main(int argc, char *argv[]) { int rv, slen, nn, ng, i, ipath, *gc_frame, do_training, output, max_phase; - int closed, do_mask, nmask, force_nonsd, user_tt, is_meta, num_seq, quiet; + int closed, do_mask, nmask, force_nonsd, user_tt, is_meta, num_seq, quiet, max_node_dist; int piped, max_slen, fnum; double max_score, gc, low, high; unsigned char *seq, *rseq, *useq; @@ -87,7 +87,7 @@ int main(int argc, char *argv[]) { memset(meta[i].tinf, 0, sizeof(struct _training)); } nn = 0; slen = 0; ipath = 0; ng = 0; nmask = 0; - user_tt = 0; is_meta = 0; num_seq = 0; quiet = 0; + user_tt = 0; is_meta = 0; num_seq = 0; quiet = 0; max_node_dist = 500; max_phase = 0; max_score = -100.0; train_file = NULL; do_training = 0; start_file = NULL; trans_file = NULL; nuc_file = NULL; @@ -113,15 +113,17 @@ int main(int argc, char *argv[]) { /* Parse the command line arguments */ for(i = 1; i < argc; i++) { - if(i == argc-1 && (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "-T") == 0 - || strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "-A") == 0 || + if(i == argc-1 && + (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "-T") == 0 || + strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "-A") == 0 || strcmp(argv[i], "-g") == 0 || strcmp(argv[i], "-g") == 0 || strcmp(argv[i], "-f") == 0 || strcmp(argv[i], "-F") == 0 || strcmp(argv[i], "-s") == 0 || strcmp(argv[i], "-S") == 0 || strcmp(argv[i], "-i") == 0 || strcmp(argv[i], "-I") == 0 || strcmp(argv[i], "-o") == 0 || strcmp(argv[i], "-O") == 0 || - strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "-P") == 0)) - usage("-a/-f/-g/-i/-o/-p/-s options require parameters."); + strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "-P") == 0 || + strcmp(argv[i], "-x") == 0 || strcmp(argv[i], "-X") == 0)) + usage("-a/-f/-g/-i/-o/-p/-s/-x options require parameters."); else if(strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "-C") == 0) closed = 1; else if(strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "-Q") == 0) @@ -189,6 +191,10 @@ int main(int argc, char *argv[]) { else usage("Invalid output format specified."); i++; } + else if(strcmp(argv[i], "-x") == 0 || strcmp(argv[i], "-X") == 0) { + max_node_dist = atoi(argv[i+1]); + i++; + } else usage("Unknown option."); } @@ -684,6 +690,8 @@ void help() { fprintf(stderr, " -t: Write a training file (if none exists); "); fprintf(stderr, "otherwise, read and use\n"); fprintf(stderr, " the specified training file.\n"); + fprintf(stderr, " -x: Specify the number of neighbor nodes for connection scoring."); + fprintf(stderr, " Default is 500.\n"); fprintf(stderr, " -v: Print version number and exit.\n\n"); exit(0); } From 74514ca21d89ca64f6885b2357a98609e2d890e7 Mon Sep 17 00:00:00 2001 From: username Date: Tue, 23 Aug 2022 14:13:35 +0900 Subject: [PATCH 2/3] Rapid mode --- dprog.c | 13 ++++++++++--- dprog.h | 2 +- main.c | 18 +++++++++++------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/dprog.c b/dprog.c index 435c3cd..a718e8c 100644 --- a/dprog.c +++ b/dprog.c @@ -28,7 +28,7 @@ coding, RBS scores, etc. *******************************************************************************/ -int dprog(struct _node *nod, int nn, struct _training *tinf, int flag, int max_node_dist) { +int dprog(struct _node *nod, int nn, struct _training *tinf, int flag, int max_node_dist, int rapid) { int i, j, min, max_ndx = -1, path, nxt, tmp; double max_sc = -1.0; @@ -49,8 +49,15 @@ int dprog(struct _node *nod, int nn, struct _training *tinf, int flag, int max_n if(nod[i].strand == 1 && nod[i].type == STOP && nod[min].ndx >= nod[i].stop_val) while(min >= 0 && nod[i].ndx != nod[i].stop_val) min--; -// if(min < max_node_dist) min = 0; -// else min = min-max_node_dist; + + /* Rapid mode is 50% faster producing the same result, */ + /* when tested with E. coli genome (GCF_000008865.2). */ + if(rapid){ + if(min < 0) min = 0; + } else { + if(min < max_node_dist) min = 0; else min = min-max_node_dist; + } + for(j = min; j < i; j++) { score_connection(nod, j, i, tinf, flag); } diff --git a/dprog.h b/dprog.h index 19f5817..6235108 100644 --- a/dprog.h +++ b/dprog.h @@ -30,7 +30,7 @@ #define MAX_OPP_OVLP 200 // #define MAX_NODE_DIST 500 -int dprog(struct _node *, int, struct _training *, int, int); +int dprog(struct _node *, int, struct _training *, int, int, int); void score_connection(struct _node *, int, int, struct _training *, int); void eliminate_bad_genes(struct _node *, int, struct _training *); diff --git a/main.c b/main.c index 201e3d6..ad39522 100644 --- a/main.c +++ b/main.c @@ -43,7 +43,7 @@ int copy_standard_input_to_file(char *, int); int main(int argc, char *argv[]) { int rv, slen, nn, ng, i, ipath, *gc_frame, do_training, output, max_phase; - int closed, do_mask, nmask, force_nonsd, user_tt, is_meta, num_seq, quiet, max_node_dist; + int closed, do_mask, nmask, force_nonsd, user_tt, is_meta, num_seq, quiet, max_node_dist, rapid; int piped, max_slen, fnum; double max_score, gc, low, high; unsigned char *seq, *rseq, *useq; @@ -87,7 +87,7 @@ int main(int argc, char *argv[]) { memset(meta[i].tinf, 0, sizeof(struct _training)); } nn = 0; slen = 0; ipath = 0; ng = 0; nmask = 0; - user_tt = 0; is_meta = 0; num_seq = 0; quiet = 0; max_node_dist = 500; + user_tt = 0; is_meta = 0; num_seq = 0; quiet = 0; max_node_dist = 500; rapid = 0; max_phase = 0; max_score = -100.0; train_file = NULL; do_training = 0; start_file = NULL; trans_file = NULL; nuc_file = NULL; @@ -122,7 +122,7 @@ int main(int argc, char *argv[]) { strcmp(argv[i], "-i") == 0 || strcmp(argv[i], "-I") == 0 || strcmp(argv[i], "-o") == 0 || strcmp(argv[i], "-O") == 0 || strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "-P") == 0 || - strcmp(argv[i], "-x") == 0 || strcmp(argv[i], "-X") == 0)) + strcmp(argv[i], "-x") == 0 || strcmp(argv[i], "-X") == 0 )) usage("-a/-f/-g/-i/-o/-p/-s/-x options require parameters."); else if(strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "-C") == 0) closed = 1; @@ -195,6 +195,8 @@ int main(int argc, char *argv[]) { max_node_dist = atoi(argv[i+1]); i++; } + else if(strcmp(argv[i], "-r") == 0 || strcmp(argv[i], "-R") == 0) + rapid = 1; else usage("Unknown option."); } @@ -388,7 +390,7 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Building initial set of genes to train from..."); } record_overlapping_starts(nodes, nn, &tinf, 0); - ipath = dprog(nodes, nn, &tinf, 0); + ipath = dprog(nodes, nn, &tinf, 0, max_node_dist, rapid); if(quiet == 0) { fprintf(stderr, "done!\n"); } @@ -520,7 +522,7 @@ int main(int argc, char *argv[]) { write_start_file(start_ptr, nodes, nn, &tinf, num_seq, slen, 0, NULL, VERSION, cur_header); record_overlapping_starts(nodes, nn, &tinf, 1); - ipath = dprog(nodes, nn, &tinf, 1); + ipath = dprog(nodes, nn, &tinf, 1, max_node_dist, rapid); eliminate_bad_genes(nodes, ipath, &tinf); ng = add_genes(genes, nodes, ipath); tweak_final_starts(genes, ng, nodes, nn, &tinf); @@ -561,7 +563,7 @@ int main(int argc, char *argv[]) { reset_node_scores(nodes, nn); score_nodes(seq, rseq, slen, nodes, nn, meta[i].tinf, closed, is_meta); record_overlapping_starts(nodes, nn, meta[i].tinf, 1); - ipath = dprog(nodes, nn, meta[i].tinf, 1); + ipath = dprog(nodes, nn, meta[i].tinf, 1, max_node_dist, rapid); if(nodes[ipath].score > max_score) { max_phase = i; max_score = nodes[ipath].score; @@ -690,8 +692,10 @@ void help() { fprintf(stderr, " -t: Write a training file (if none exists); "); fprintf(stderr, "otherwise, read and use\n"); fprintf(stderr, " the specified training file.\n"); - fprintf(stderr, " -x: Specify the number of neighbor nodes for connection scoring."); + fprintf(stderr, " -x: Specify the number of neighbor nodes to score connection."); fprintf(stderr, " Default is 500.\n"); + fprintf(stderr, " -r: Rapid mode. When tested with the E. coli genome, "); + fprintf(stderr, "the same result was obtained in 2/3 of the original time."); fprintf(stderr, " -v: Print version number and exit.\n\n"); exit(0); } From 0920435fe19a331365c9e8450c63efaa159e6b49 Mon Sep 17 00:00:00 2001 From: username Date: Tue, 23 Aug 2022 15:10:45 +0900 Subject: [PATCH 3/3] edit usage --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index ad39522..95d6d2f 100644 --- a/main.c +++ b/main.c @@ -653,7 +653,7 @@ void usage(char *msg) { fprintf(stderr, " [-g tr_table] [-h] [-i input_file] [-m]"); fprintf(stderr, " [-n] [-o output_file]\n"); fprintf(stderr, " [-p mode] [-q] [-s start_file]"); - fprintf(stderr, " [-t training_file] [-v]\n"); + fprintf(stderr, " [-t training_file] [-x] [-r] [-v]\n"); fprintf(stderr, "\nDo 'prodigal -h' for more information.\n\n"); exit(15); }