Skip to content

Commit 41f6b19

Browse files
committed
Simplify libctabix.pyx by using bgzf_getline() directly
Remove kstream_t, so <htslib/kseq.h> is used only by libcfaidx.pyx.
1 parent 0c5e147 commit 41f6b19

File tree

3 files changed

+18
-45
lines changed

3 files changed

+18
-45
lines changed

pysam/libctabix.pxd

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,10 @@ cdef extern from "unistd.h" nogil:
1717
from pysam.libchtslib cimport hts_idx_t, hts_itr_t, htsFile, \
1818
tbx_t, kstring_t, BGZF, HTSFile
1919

20-
cdef extern from "htslib/kseq.h" nogil:
21-
"""
22-
__KS_TYPE(BGZF *)
23-
"""
24-
ctypedef struct kstream_t
25-
2620

2721
cdef class tabix_file_iterator:
2822
cdef BGZF * fh
29-
cdef kstream_t * kstream
23+
cdef void * unused
3024
cdef kstring_t buffer
3125
cdef size_t size
3226
cdef Parser parser
@@ -88,7 +82,7 @@ cdef class TabixIteratorParsed(TabixIterator):
8882
cdef class GZIterator:
8983
cdef object _filename
9084
cdef BGZF * gzipfile
91-
cdef kstream_t * kstream
85+
cdef void * unused
9286
cdef kstring_t buffer
9387
cdef int __cnext__(self)
9488
cdef encoding

pysam/libctabix.pyx

Lines changed: 7 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ from cpython cimport PyErr_SetString, PyBytes_Check, \
6969
cimport pysam.libctabixproxies as ctabixproxies
7070

7171
from pysam.libchtslib cimport htsFile, hts_open, hts_close, HTS_IDX_START,\
72-
BGZF, bgzf_open, bgzf_dopen, bgzf_close, bgzf_write, \
72+
BGZF, bgzf_open, bgzf_dopen, bgzf_close, bgzf_getline, bgzf_write, \
7373
tbx_index_build2, tbx_index_load2, tbx_itr_queryi, tbx_itr_querys, \
7474
tbx_conf_t, tbx_seqnames, tbx_itr_next, tbx_itr_destroy, \
7575
tbx_destroy, hisremote, region_list, hts_getline, \
@@ -79,22 +79,6 @@ from pysam.libchtslib cimport htsFile, hts_open, hts_close, HTS_IDX_START,\
7979
from pysam.libcutils cimport force_bytes, force_str, charptr_to_str
8080
from pysam.libcutils cimport encode_filename, from_string_and_size
8181

82-
cdef extern from "htslib/kseq.h" nogil:
83-
"""
84-
#undef __KS_TYPE
85-
#define __KS_TYPE(type_t)
86-
KSTREAM_INIT2(static, BGZF *, bgzf_read, 16384)
87-
"""
88-
kstream_t *ks_init(BGZF *)
89-
void ks_destroy(kstream_t *)
90-
91-
# Retrieve characters from stream until delimiter
92-
# is reached placing results in str.
93-
int ks_getuntil(kstream_t *,
94-
int delimiter,
95-
kstring_t * str,
96-
int * dret)
97-
9882

9983
cdef class Parser:
10084

@@ -749,7 +733,6 @@ cdef class GZIterator:
749733
with nogil:
750734
self.gzipfile = bgzf_open(cfilename, "r")
751735
self._filename = filename
752-
self.kstream = ks_init(self.gzipfile)
753736
self.encoding = encoding
754737

755738
self.buffer.l = 0
@@ -763,24 +746,15 @@ cdef class GZIterator:
763746
self.gzipfile = NULL
764747
if self.buffer.s != NULL:
765748
free(self.buffer.s)
766-
if self.kstream != NULL:
767-
ks_destroy(self.kstream)
768749

769750
def __iter__(self):
770751
return self
771752

772753
cdef int __cnext__(self):
773-
cdef int dret = 0
774-
cdef int retval = 0
775-
while 1:
776-
with nogil:
777-
retval = ks_getuntil(self.kstream, b'\n', &self.buffer, &dret)
778-
779-
if retval < 0:
780-
break
781-
782-
return dret
783-
return -1
754+
cdef int retval
755+
with nogil:
756+
retval = bgzf_getline(self.gzipfile, b'\n', &self.buffer)
757+
return retval
784758

785759
def __next__(self):
786760
"""python version of next().
@@ -1144,8 +1118,6 @@ cdef class tabix_file_iterator:
11441118
if self.fh == NULL:
11451119
raise IOError('%s' % strerror(errno))
11461120

1147-
self.kstream = ks_init(self.fh)
1148-
11491121
self.buffer.s = <char*>malloc(buffer_size)
11501122
#if self.buffer == NULL:
11511123
# raise MemoryError( "tabix_file_iterator: could not allocate %i bytes" % buffer_size)
@@ -1158,12 +1130,11 @@ cdef class tabix_file_iterator:
11581130
cdef __cnext__(self):
11591131

11601132
cdef char * b
1161-
cdef int dret = 0
11621133
cdef int retval = 0
11631134
while 1:
11641135
with nogil:
1165-
retval = ks_getuntil(self.kstream, b'\n', &self.buffer, &dret)
1166-
1136+
retval = bgzf_getline(self.fh, b'\n', &self.buffer)
1137+
11671138
if retval < 0:
11681139
break
11691140
#raise IOError('gzip error: %s' % buildGzipError( self.fh ))
@@ -1187,7 +1158,6 @@ cdef class tabix_file_iterator:
11871158

11881159
def __dealloc__(self):
11891160
free(self.buffer.s)
1190-
ks_destroy(self.kstream)
11911161
bgzf_close(self.fh)
11921162

11931163
def __next__(self):

tests/compile_test.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,15 @@ def test_alignments(self):
6363
assert hdr.__sizeof__() == 24
6464
assert aln.__sizeof__() == 72
6565

66+
def test_tabix(self):
67+
gzit = pysam.GZIterator(os.path.join(TABIX_DATADIR, "example.gtf.gz"))
68+
69+
with open(os.path.join(TABIX_DATADIR, "example.gtf.gz")) as fp:
70+
tfit = pysam.tabix_file_iterator(fp, pysam.asTuple())
71+
72+
assert gzit.__sizeof__() == 80
73+
assert tfit.__sizeof__() == 96
74+
6675
def test_variants(self):
6776
fp = pysam.VariantFile(os.path.join(CBCF_DATADIR, "example_vcf43.vcf"))
6877
hdr = pysam.VariantHeader()

0 commit comments

Comments
 (0)