Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions doc/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ Changes in v0.11
This is largely a bugfix release, many thanks to contributors Rory Kirchner,
Stefano Rivera, Daniel Lowengrub, Nolan Woods, Stefen Moeller, and Husen Umer.

- Avoid deadlocks in tests under Python 3.8 (`#155 <https://github.com/daler/gffutils/pull/155>`_, thanks Stefano Rivera)
- Avoid deadlocks in tests under Python 3.8 (`#155
<https://github.com/daler/gffutils/pull/155>`_, thanks Stefano Rivera)
- Fix deprecation warning for invalid escape sequence (`#168
<https://github.com/daler/gffutils/pull/168>`_, Stefen Moeller, and `#165 <https://github.com/daler/gffutils/pull/165>`_, thanks Rory Kirchner)
<https://github.com/daler/gffutils/pull/168>`_, Stefen Moeller, and `#165
<https://github.com/daler/gffutils/pull/165>`_, thanks Rory Kirchner)
- Fix ResourceWarning about unclosed file (`#169
<https://github.com/daler/gffutils/pull/169>`_, thanks Daniel Lowengrub)
- Allow database creation when there is an empty string in the transcript ID
Expand All @@ -32,6 +34,9 @@ Stefano Rivera, Daniel Lowengrub, Nolan Woods, Stefen Moeller, and Husen Umer.
attributes. This solves things like `#128
<https://github.com/daler/gffutils/issues/128/>`_ where some dialect
components are otherwise ambiguous.
- Fix bug in :meth:`FeatureDB.children_bp`, `#157
<https://github.com/daler/gffutils/issues/157>`_, where the `ignore_strand`
argument is deprecated.

Changes in v0.10.1
------------------
Expand Down
15 changes: 10 additions & 5 deletions gffutils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,14 @@ def to_unicode(obj, encoding="utf-8"):


def canonical_transcripts(db, fasta_filename):
"""
WARNING: this function is currently not well ttested and will likely be
replaced with a more modular approach.
"""
import pyfaidx

fasta = pyfaidx.Fasta(fasta_filename, as_raw=True)

fasta = pyfaidx.Fasta(fasta_filename, as_raw=False)
for gene in db.features_of_type("gene"):

# exons_list will contain (CDS_length, total_length, transcript, [exons]) tuples.
Expand All @@ -502,20 +507,20 @@ def canonical_transcripts(db, fasta_filename):
cds_len += exon_length
total_len += exon_length

exon_list.append((cds_len, total_len, transcript, exons))
exon_list.append((cds_len, total_len, transcript, exons if cds_len == 0 else [e for e in exons if e.featuretype in ['CDS', 'five_prime_UTR', 'three_prime_UTR']]))

# If we have CDS, then use the longest coding transcript
if max(i[0] for i in exon_list) > 0:
best = sorted(exon_list)[0]
best = sorted(exon_list, key=lambda x: x[0], reverse=True)[0]
# Otherwise, just choose the longest
else:
best = sorted(exon_list, lambda x: x[1])[0]
best = sorted(exon_list, key=lambda x: x[1])[0]

print(best)

canonical_exons = best[-1]
transcript = best[-2]
seqs = [i.sequence(fasta) for i in canonical_exons]
seqs = [i.sequence(fasta) for i in sorted(canonical_exons, key=lambda x: x.start, reverse=transcript.strand != '+')]
yield transcript, "".join(seqs)


Expand Down
7 changes: 7 additions & 0 deletions gffutils/test/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,10 @@ def test_issue_157():
# The way to do it now is the following (we can omit the mc.feature_type
# since we're preselecting for exons anyway):
db.children_bp(gene, child_featuretype='exon', merge=True, merge_criteria=(mc.overlap_end_inclusive))


def test_issue_159():
db = gffutils.create_db(gffutils.example_filename('FBgn0031208.gff'), ":memory:")
fasta = gffutils.example_filename('dm6-chr2L.fa')
for transcript, seq in gffutils.helpers.canonical_transcripts(db, fasta):
pass