daler · daler · Jun 5, 2022 · Jun 5, 2022 · Jun 5, 2022 · Jun 5, 2022
diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
@@ -9,9 +9,11 @@ Changes in v0.11
 This is largely a bugfix release, many thanks to contributors Rory Kirchner,
 Stefano Rivera, Daniel Lowengrub, Nolan Woods, Stefen Moeller,  and Husen Umer.
 
-- Avoid deadlocks in tests under Python 3.8 (`#155 <https://github.com/daler/gffutils/pull/155>`_, thanks Stefano Rivera)
+- Avoid deadlocks in tests under Python 3.8 (`#155
+  <https://github.com/daler/gffutils/pull/155>`_, thanks Stefano Rivera)
 - Fix deprecation warning for invalid escape sequence (`#168
-  <https://github.com/daler/gffutils/pull/168>`_, Stefen Moeller, and `#165 <https://github.com/daler/gffutils/pull/165>`_, thanks Rory Kirchner)
+  <https://github.com/daler/gffutils/pull/168>`_, Stefen Moeller, and `#165
+  <https://github.com/daler/gffutils/pull/165>`_, thanks Rory Kirchner)
 - Fix ResourceWarning about unclosed file (`#169
   <https://github.com/daler/gffutils/pull/169>`_, thanks Daniel Lowengrub)
 - Allow database creation when there is an empty string in the transcript ID
@@ -32,6 +34,9 @@ Stefano Rivera, Daniel Lowengrub, Nolan Woods, Stefen Moeller,  and Husen Umer.
   attributes. This solves things like `#128
   <https://github.com/daler/gffutils/issues/128/>`_ where some dialect
   components are otherwise ambiguous.
+- Fix bug in :meth:`FeatureDB.children_bp`, `#157
+  <https://github.com/daler/gffutils/issues/157>`_, where the `ignore_strand`
+  argument is deprecated.
 
 Changes in v0.10.1
 ------------------

diff --git a/gffutils/helpers.py b/gffutils/helpers.py
@@ -485,9 +485,14 @@ def to_unicode(obj, encoding="utf-8"):
 
 
 def canonical_transcripts(db, fasta_filename):
+    """
+    WARNING: this function is currently not well ttested and will likely be
+    replaced with a more modular approach.
+    """
     import pyfaidx
 
-    fasta = pyfaidx.Fasta(fasta_filename, as_raw=True)
+
+    fasta = pyfaidx.Fasta(fasta_filename, as_raw=False)
     for gene in db.features_of_type("gene"):
 
         # exons_list will contain (CDS_length, total_length, transcript, [exons]) tuples.
@@ -502,20 +507,20 @@ def canonical_transcripts(db, fasta_filename):
                     cds_len += exon_length
                 total_len += exon_length
 
-            exon_list.append((cds_len, total_len, transcript, exons))
+            exon_list.append((cds_len, total_len, transcript, exons if cds_len == 0 else [e for e in exons if e.featuretype in ['CDS', 'five_prime_UTR', 'three_prime_UTR']]))
 
         # If we have CDS, then use the longest coding transcript
         if max(i[0] for i in exon_list) > 0:
-            best = sorted(exon_list)[0]
+            best = sorted(exon_list, key=lambda x: x[0], reverse=True)[0]
         # Otherwise, just choose the longest
         else:
-            best = sorted(exon_list, lambda x: x[1])[0]
+            best = sorted(exon_list, key=lambda x: x[1])[0]
 
         print(best)
 
         canonical_exons = best[-1]
         transcript = best[-2]
-        seqs = [i.sequence(fasta) for i in canonical_exons]
+        seqs = [i.sequence(fasta) for i in sorted(canonical_exons, key=lambda x: x.start, reverse=transcript.strand != '+')]
         yield transcript, "".join(seqs)
 
 

diff --git a/gffutils/test/test_issues.py b/gffutils/test/test_issues.py
@@ -330,3 +330,10 @@ def test_issue_157():
     # The way to do it now is the following (we can omit the mc.feature_type
     # since we're preselecting for exons anyway):
     db.children_bp(gene, child_featuretype='exon', merge=True, merge_criteria=(mc.overlap_end_inclusive))
+
+
+def test_issue_159():
+    db = gffutils.create_db(gffutils.example_filename('FBgn0031208.gff'), ":memory:")
+    fasta = gffutils.example_filename('dm6-chr2L.fa')
+    for transcript, seq in gffutils.helpers.canonical_transcripts(db, fasta):
+        pass