BackofenLab · Necopy-byte · May 28, 2025 · May 28, 2025 · Jun 3, 2025 · Jun 3, 2025
diff --git a/components/components_evaluation.py b/components/components_evaluation.py
@@ -300,7 +300,7 @@ def _extract_orf_scores_and_proteins(self):
     def _run_hmm_search(self):
         #cmd = 'tools/hmm_search/hmmsearch --tblout result_hmm.out tools/hmm_search/models_tandem.hmm protein_results.fa'
 
-        no_binary_cmd = "hmmsearch --tblout result_hmm.out tools/hmm_search/models_tandem.hmm protein_results.fa"
+        no_binary_cmd = "hmmsearch --tblout result_hmm.out ../tools/hmm_search/models_tandem.hmm protein_results.fa"
 
         process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
         process.communicate()
@@ -403,7 +403,7 @@ def _extract_all_blast_scores(self):
         #cmd += ' -word_size 6'
         #cmd += ' -outfmt 6 -out output_fasta_bulk_extraction1'
 
-        no_binary_cmd = f"blastn -query file_with_all_consensus.fa -db tools/blasting/{db_file} -word_size 6  -outfmt 6 -out output_fasta_bulk_extraction1"
+        no_binary_cmd = f"blastn -query file_with_all_consensus.fa -db ../tools/blasting/{db_file} -word_size 6  -outfmt 6 -out output_fasta_bulk_extraction1"
 
         process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
         process.communicate()
@@ -416,7 +416,7 @@ def _extract_all_blast_scores(self):
         #cmd += ' -word_size 6'
         #cmd += ' -outfmt 6 -out output_fasta_bulk_extraction2'
 
-        no_binary_cmd = f"blastn -query file_with_all_consensus.fa -db tools/blasting/{db_file} -word_size 6  -outfmt 6 -out output_fasta_bulk_extraction2"
+        no_binary_cmd = f"blastn -query file_with_all_consensus.fa -db ../tools/blasting/{db_file} -word_size 6  -outfmt 6 -out output_fasta_bulk_extraction2"
 
         process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
         process.communicate()
@@ -869,7 +869,7 @@ def __init__(self, index, list_repeats, list_spacers):
         self._create_crispr_seq()
         self._create_file()
         self._call_prodigal()
-        self._run_hmm_search('tools/hmm_search/models_tandem.hmm')
+        self._run_hmm_search('../tools/hmm_search/models_tandem.hmm')
         self._get_best_score_from_hmm()
         self._clean_up()
 
@@ -897,7 +897,7 @@ def _run_hmm_search(self, hmm_model):
             #                                                            hmm_model,
             #                                                            'protein_{}.fa'.format(self.index))
 
-            no_binary_cmd = f"tools/hmm_search/hmmsearch --tblout result_hmm_{self.index}.out hmm_model protein_{self.index}.fa"
+            no_binary_cmd = f"hmmsearch --tblout result_hmm_{self.index}.out {hmm_model} protein_{self.index}.fa"
 
             process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
             process.communicate()

diff --git a/components/components_output_maker.py b/components/components_output_maker.py
@@ -408,7 +408,7 @@ def _write_fasta_summary(self):
         fasta_path = f"{self.result_path}/Spacers.fasta"
 
         with open(fasta_path, "w") as fasta_file:
-            for category_index, category_name in zip([0, 2], ["Bona-fide", "Possible"]):
+            for category_index, category_name in zip([0], ["Bona-fide"]):
                 for key, crisprs in self.categories[category_index].items():
                     for crispr in crisprs:
                         consensus = crispr[1].consensus
@@ -550,7 +550,7 @@ def crispr_candidate_to_dictionary(crispr_candidate):
 
 
 class GFFOutputMaker:
-    def __init__(self, result_path, categories, non_array_data, header, list_feature_names):
+    def __init__(self, result_path, categories, non_array_data, list_feature_names):
         self.result_path = result_path
         self.categories = categories
         self.non_array_data = non_array_data
@@ -623,7 +623,7 @@ def _create_gff_bona_fide(self):
                 for index, array_index, array, score, feature_info in zip(range(len(arrays)), array_indexes,
                                                                           arrays, scores, features):
                     if "Bona-fide" in self.non_array_data["Strand"]:
-                        strand = self.non_array_data["Strand"]["Bona_fide"][index]
+                        strand = self.non_array_data["Strand"]["Bona-fide"][index]
                     else:
                         strand = "Forward (Orientation was not computed)"
 
@@ -633,7 +633,7 @@ def _create_gff_bona_fide(self):
 
                     crispr_stats = crispr.compute_stats()
                     crispr_start = crispr_stats["start"]
-                    crispr_end = crispr_stats["end"] + 1
+                    crispr_end = crispr_stats["end"]
                     crispr_length = crispr_end - crispr_start + 1
                     consensus = crispr.consensus
                     line_crispr = f"{self.acc_num}\tCRISPRidentify\tbona-fide_array_region\t{crispr_start}\t{crispr_end}\t{crispr_length}\t{strand_sign}\t.\tID=CRISPR{array_index}_{crispr_start}_{crispr_end};Note={consensus};Dbxref=SO:0001459;Ontology_term=CRISPR;Array_quality_score={score}\n"
@@ -644,10 +644,10 @@ def _create_gff_bona_fide(self):
                     repeat_starts = crispr.list_repeat_starts
 
                     repeat_starts = [1 + r_s for r_s in repeat_starts]
-                    repeat_ends = [rs + len(repeat) for rs, repeat in zip(repeat_starts, repeats)]
+                    repeat_ends = [rs + (len(repeat) - 1) for rs, repeat in zip(repeat_starts, repeats)]
 
-                    spacers_starts = [r_e + 1 for r_e in repeat_ends[:-1]]
-                    spacers_ends = [s_s + len(s) for s_s, s in zip(spacers_starts, spacers)]
+                    spacers_starts = [1 + r_e for r_e in repeat_ends[:-1]]
+                    spacers_ends = [s_s + (len(s) - 1) for s_s, s in zip(spacers_starts, spacers)]
 
                     repeat_indexes = list(range(1, len(repeats) + 1))
                     spacer_indexes = list(range(1, len(spacers) + 1))
@@ -800,7 +800,7 @@ def _create_gff_possible(self):
                     f.write(line_repeat)
 
     def _create_gff_complete(self):
-        with open(join(self.gff_folder, "combined.gff"), "w") as f:
+        with open(join(self.result_path, "combined.gff"), "w") as f:
 
             for category_index, category_name in zip([0, 1, 2], ["Bona-fide", "Alternative", "Possible"]):
 

diff --git a/components/module_output_maker.py b/components/module_output_maker.py
@@ -5,7 +5,7 @@
 from components.components_output_maker import CasSummaryMaker
 from components.components_output_maker import FastaOutputArrayMaker
 from components.components_output_maker import JsonOutputMaker
-
+from components.components_output_maker import GFFOutputMaker
 from components.components_output_maker import CompleteFastaOutputMaker
 from components.components_output_maker import CompleteFolderSummaryMaker
 from components.components_output_maker import CompleteCasSummaryFolderMaker
@@ -49,6 +49,8 @@ def _make_output(self):
                                  categories=self.categories,
                                  non_array_data=self.non_array_data)
 
+        gffom = GFFOutputMaker(result_path=self.result_path, categories=self.categories, non_array_data=self.non_array_data,
+                                list_feature_names=self.list_features)
         if self.flags["flag_cas"] is True:
             sm_cas = CasSummaryMaker(result_path=self.result_path,
                                      non_array_data=self.non_array_data)