Skip to content

Commit dc5ca68

Browse files
committed
Added VariantRecordSample documentation
1 parent 2f9d50d commit dc5ca68

File tree

2 files changed

+84
-28
lines changed

2 files changed

+84
-28
lines changed

doc/api.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,9 @@ VCF/BCF files
219219
.. autoclass:: pysam.VariantRecord
220220
:members:
221221

222+
.. autoclass:: pysam.VariantRecordSample
223+
:members:
224+
222225
HTSFile
223226
=======
224227

pysam/libcbcf.pyx

Lines changed: 81 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3432,23 +3432,48 @@ cdef VariantRecord makeVariantRecord(VariantHeader header, bcf1_t *r):
34323432
return record
34333433

34343434

3435-
########################################################################
3436-
########################################################################
3437-
## Variant Sampletype object
3438-
########################################################################
3439-
3440-
34413435
cdef class VariantRecordSample(object):
3442-
"""Data for a single sample from a :class:`VariantRecord` object.
3443-
Provides data accessors for genotypes and a mapping interface
3444-
from format name to values.
3436+
"""Data for a single sample from a :class:`VariantRecord` object. Provides data accessors for
3437+
genotypes and a mapping interface from FORMAT fields to values.
3438+
3439+
Notes:
3440+
The :class:`VariantRecordSample` object implements a mapping-like object for a specific
3441+
VCF/BCF row and sample column. The keys are FORMAT fields and the values are the values in
3442+
the VCF/BCF file. There is special handling for ``"GT"``, through the :attr:`alleles`,
3443+
:attr:`allele_indices` and :attr:`phased` attributes. There is also a :attr:`name` property
3444+
that provides the sample's name.
3445+
3446+
Examples:
3447+
Here is an example of accessing and printing the data in a :class:`VariantRecordSample`::
3448+
3449+
variant_file = pysam.VariantFile('/path/to/file.vcf.gz')
3450+
for variant_record in variant_file.fetch():
3451+
for sample_name in variant_record.samples:
3452+
variant_record_sample = variant_record.samples[sample_name]
3453+
variant_record_sample["GT"] = (0, 1)
3454+
print(dict(variant_record_sample))
3455+
3456+
The above code will print the following::
3457+
3458+
{
3459+
'AD': (0, 80),
3460+
'DP': 79,
3461+
'GQ': 32,
3462+
'GT': (0, 1),
3463+
'PL': (33, 34, 0),
3464+
'VAF': (1.0,),
3465+
'PS': None
3466+
}
3467+
3468+
Note:
3469+
The ``"GT"`` value must be provided as a tuple ``(0, 1)`` and not a string ``"0/1"``.
34453470
"""
34463471
def __init__(self, *args, **kwargs):
3447-
raise TypeError('this class cannot be instantiated from Python')
3472+
raise TypeError('This class cannot be instantiated from Python.')
34483473

34493474
@property
34503475
def name(self):
3451-
"""sample name"""
3476+
"""The sample name."""
34523477
cdef bcf_hdr_t *hdr = self.record.header.ptr
34533478
cdef bcf1_t *r = self.record.ptr
34543479
cdef int32_t n = r.n_sample
@@ -3460,7 +3485,7 @@ cdef class VariantRecordSample(object):
34603485

34613486
@property
34623487
def allele_indices(self):
3463-
"""allele indices for called genotype, if present. Otherwise None"""
3488+
"""Allele indices (e.g. ``(0, 1)``) for the called genotype (if present), otherwise None."""
34643489
return bcf_format_get_allele_indices(self)
34653490

34663491
@allele_indices.setter
@@ -3473,7 +3498,7 @@ cdef class VariantRecordSample(object):
34733498

34743499
@property
34753500
def alleles(self):
3476-
"""alleles for called genotype, if present. Otherwise None"""
3501+
"""Alleles (e.g. ``("CT", "C")``) for the called genotype (if present), otherwise None."""
34773502
return bcf_format_get_alleles(self)
34783503

34793504
@alleles.setter
@@ -3502,7 +3527,7 @@ cdef class VariantRecordSample(object):
35023527

35033528
@property
35043529
def phased(self):
3505-
"""False if genotype is missing or any allele is unphased. Otherwise True."""
3530+
"""``False`` if the genotype is missing or any allele is unphased, otherwise ``True``."""
35063531
return bcf_sample_get_phased(self)
35073532

35083533
@phased.setter
@@ -3545,7 +3570,7 @@ cdef class VariantRecordSample(object):
35453570
bcf_format_del_value(self, key)
35463571

35473572
def clear(self):
3548-
"""Clear all format data (including genotype) for this sample"""
3573+
"""Clear all FORMAT fields (including GT) for this sample."""
35493574
cdef bcf_hdr_t *hdr = self.record.header.ptr
35503575
cdef bcf1_t *r = self.record.ptr
35513576
cdef bcf_fmt_t *fmt
@@ -3568,7 +3593,16 @@ cdef class VariantRecordSample(object):
35683593
yield bcf_str_cache_get_charptr(bcf_hdr_int2id(hdr, BCF_DT_ID, fmt.id))
35693594

35703595
def get(self, key, default=None):
3571-
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
3596+
"""Retrieve sample data for FORMAT field ``key`` (e.g. ``"DP"``).
3597+
If ``key`` is not present, return ``default``.
3598+
3599+
Parameters:
3600+
key : str
3601+
FORMAT field to retrieve for the sample.
3602+
default
3603+
Data to return if ``key`` is not present in the FORMAT field for the sample.
3604+
Defaults to None.
3605+
"""
35723606
try:
35733607
return self[key]
35743608
except KeyError:
@@ -3582,35 +3616,37 @@ cdef class VariantRecordSample(object):
35823616
return fmt != NULL and fmt.p != NULL
35833617

35843618
def iterkeys(self):
3585-
"""D.iterkeys() -> an iterator over the keys of D"""
3619+
"""Return an iterator over all FORMAT field names for this record."""
35863620
return iter(self)
35873621

35883622
def itervalues(self):
3589-
"""D.itervalues() -> an iterator over the values of D"""
3623+
"""Return an iterator over all FORMAT field values for this sample."""
35903624
for key in self:
35913625
yield self[key]
35923626

35933627
def iteritems(self):
3594-
"""D.iteritems() -> an iterator over the (key, value) items of D"""
3628+
"""Return an iterator over all FORMAT field ``(name, value)`` tuples for this sample."""
35953629
for key in self:
35963630
yield (key, self[key])
35973631

35983632
def keys(self):
3599-
"""D.keys() -> list of D's keys"""
3633+
"""Return a list of all FORMAT field names for this record."""
36003634
return list(self)
36013635

3602-
def items(self):
3603-
"""D.items() -> list of D's (key, value) pairs, as 2-tuples"""
3604-
return list(self.iteritems())
3605-
36063636
def values(self):
3607-
"""D.values() -> list of D's values"""
3637+
"""Return a list of all FORMAT field values for this sample."""
36083638
return list(self.itervalues())
36093639

3640+
def items(self):
3641+
"""Return a list of all FORMAT field ``(name, value)`` tuples for this sample."""
3642+
return list(self.iteritems())
3643+
36103644
def update(self, items=None, **kwargs):
3611-
"""D.update([E, ]**F) -> None.
3645+
"""Update the FORMAT field values for this sample.
36123646
3613-
Update D from dict/iterable E and F.
3647+
Parameters:
3648+
items : dict | None
3649+
A dictionary or dictionary-like object used to update the FORMAT field names and values.
36143650
"""
36153651
for k, v in items.items():
36163652
self[k] = v
@@ -3620,6 +3656,23 @@ cdef class VariantRecordSample(object):
36203656
self[k] = v
36213657

36223658
def pop(self, key, default=_nothing):
3659+
"""Remove the FORMAT field ``key`` for this sample and returns its value.
3660+
3661+
Parameters:
3662+
key : str
3663+
FORMAT field to retrieve for the sample.
3664+
default: Any
3665+
Data to return if ``key`` is not present.
3666+
3667+
Raises:
3668+
KeyError
3669+
When ``key`` is not present and ``default`` is unset.
3670+
3671+
Returns:
3672+
value
3673+
The value of the removed FORMAT field for this sample.
3674+
"""
3675+
36233676
try:
36243677
value = self[key]
36253678
del self[key]
@@ -3646,7 +3699,7 @@ cdef class VariantRecordSample(object):
36463699

36473700
cdef VariantRecordSample makeVariantRecordSample(VariantRecord record, int32_t sample_index):
36483701
if not record or sample_index < 0:
3649-
raise ValueError('cannot create VariantRecordSample')
3702+
raise ValueError("Cannot create a VariantRecordSample.")
36503703

36513704
cdef VariantRecordSample sample = VariantRecordSample.__new__(VariantRecordSample)
36523705
sample.record = record

0 commit comments

Comments
 (0)