@@ -3432,23 +3432,48 @@ cdef VariantRecord makeVariantRecord(VariantHeader header, bcf1_t *r):
3432
3432
return record
3433
3433
3434
3434
3435
- # #######################################################################
3436
- # #######################################################################
3437
- # # Variant Sampletype object
3438
- # #######################################################################
3439
-
3440
-
3441
3435
cdef class VariantRecordSample(object ):
3442
- """ Data for a single sample from a :class:`VariantRecord` object.
3443
- Provides data accessors for genotypes and a mapping interface
3444
- from format name to values.
3436
+ """ Data for a single sample from a :class:`VariantRecord` object. Provides data accessors for
3437
+ genotypes and a mapping interface from FORMAT fields to values.
3438
+
3439
+ Notes:
3440
+ The :class:`VariantRecordSample` object implements a mapping-like object for a specific
3441
+ VCF/BCF row and sample column. The keys are FORMAT fields and the values are the values in
3442
+ the VCF/BCF file. There is special handling for ``"GT"``, through the :attr:`alleles`,
3443
+ :attr:`allele_indices` and :attr:`phased` attributes. There is also a :attr:`name` property
3444
+ that provides the sample's name.
3445
+
3446
+ Examples:
3447
+ Here is an example of accessing and printing the data in a :class:`VariantRecordSample`::
3448
+
3449
+ variant_file = pysam.VariantFile('/path/to/file.vcf.gz')
3450
+ for variant_record in variant_file.fetch():
3451
+ for sample_name in variant_record.samples:
3452
+ variant_record_sample = variant_record.samples[sample_name]
3453
+ variant_record_sample["GT"] = (0, 1)
3454
+ print(dict(variant_record_sample))
3455
+
3456
+ The above code will print the following::
3457
+
3458
+ {
3459
+ 'AD': (0, 80),
3460
+ 'DP': 79,
3461
+ 'GQ': 32,
3462
+ 'GT': (0, 1),
3463
+ 'PL': (33, 34, 0),
3464
+ 'VAF': (1.0,),
3465
+ 'PS': None
3466
+ }
3467
+
3468
+ Note:
3469
+ The ``"GT"`` value must be provided as a tuple ``(0, 1)`` and not a string ``"0/1"``.
3445
3470
"""
3446
3471
def __init__ (self , *args , **kwargs ):
3447
- raise TypeError (' this class cannot be instantiated from Python' )
3472
+ raise TypeError (' This class cannot be instantiated from Python. ' )
3448
3473
3449
3474
@property
3450
3475
def name (self ):
3451
- """ sample name"""
3476
+ """ The sample name. """
3452
3477
cdef bcf_hdr_t * hdr = self .record.header.ptr
3453
3478
cdef bcf1_t * r = self .record.ptr
3454
3479
cdef int32_t n = r.n_sample
@@ -3460,7 +3485,7 @@ cdef class VariantRecordSample(object):
3460
3485
3461
3486
@property
3462
3487
def allele_indices (self ):
3463
- """ allele indices for called genotype, if present. Otherwise None"""
3488
+ """ Allele indices (e.g. ``(0, 1)``) for the called genotype ( if present), otherwise None. """
3464
3489
return bcf_format_get_allele_indices(self )
3465
3490
3466
3491
@allele_indices.setter
@@ -3473,7 +3498,7 @@ cdef class VariantRecordSample(object):
3473
3498
3474
3499
@property
3475
3500
def alleles (self ):
3476
- """ alleles for called genotype, if present. Otherwise None"""
3501
+ """ Alleles (e.g. ``("CT", "C")``) for the called genotype ( if present), otherwise None. """
3477
3502
return bcf_format_get_alleles(self )
3478
3503
3479
3504
@alleles.setter
@@ -3502,7 +3527,7 @@ cdef class VariantRecordSample(object):
3502
3527
3503
3528
@property
3504
3529
def phased (self ):
3505
- """ False if genotype is missing or any allele is unphased. Otherwise True."""
3530
+ """ `` False`` if the genotype is missing or any allele is unphased, otherwise `` True`` ."""
3506
3531
return bcf_sample_get_phased(self )
3507
3532
3508
3533
@phased.setter
@@ -3545,7 +3570,7 @@ cdef class VariantRecordSample(object):
3545
3570
bcf_format_del_value(self , key)
3546
3571
3547
3572
def clear (self ):
3548
- """ Clear all format data (including genotype ) for this sample"""
3573
+ """ Clear all FORMAT fields (including GT ) for this sample. """
3549
3574
cdef bcf_hdr_t * hdr = self .record.header.ptr
3550
3575
cdef bcf1_t * r = self .record.ptr
3551
3576
cdef bcf_fmt_t * fmt
@@ -3568,7 +3593,16 @@ cdef class VariantRecordSample(object):
3568
3593
yield bcf_str_cache_get_charptr(bcf_hdr_int2id(hdr, BCF_DT_ID, fmt.id))
3569
3594
3570
3595
def get (self , key , default = None ):
3571
- """ D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
3596
+ """ Retrieve sample data for FORMAT field ``key`` (e.g. ``"DP"``).
3597
+ If ``key`` is not present, return ``default``.
3598
+
3599
+ Parameters:
3600
+ key : str
3601
+ FORMAT field to retrieve for the sample.
3602
+ default
3603
+ Data to return if ``key`` is not present in the FORMAT field for the sample.
3604
+ Defaults to None.
3605
+ """
3572
3606
try :
3573
3607
return self [key]
3574
3608
except KeyError :
@@ -3582,35 +3616,37 @@ cdef class VariantRecordSample(object):
3582
3616
return fmt != NULL and fmt.p != NULL
3583
3617
3584
3618
def iterkeys (self ):
3585
- """ D.iterkeys() -> an iterator over the keys of D """
3619
+ """ Return an iterator over all FORMAT field names for this record. """
3586
3620
return iter (self )
3587
3621
3588
3622
def itervalues (self ):
3589
- """ D.itervalues() -> an iterator over the values of D """
3623
+ """ Return an iterator over all FORMAT field values for this sample. """
3590
3624
for key in self :
3591
3625
yield self [key]
3592
3626
3593
3627
def iteritems (self ):
3594
- """ D.iteritems() -> an iterator over the (key , value) items of D """
3628
+ """ Return an iterator over all FORMAT field ``(name , value)`` tuples for this sample. """
3595
3629
for key in self :
3596
3630
yield (key, self [key])
3597
3631
3598
3632
def keys (self ):
3599
- """ D.keys() -> list of D's keys """
3633
+ """ Return a list of all FORMAT field names for this record. """
3600
3634
return list (self )
3601
3635
3602
- def items (self ):
3603
- """ D.items() -> list of D's (key, value) pairs, as 2-tuples"""
3604
- return list (self .iteritems())
3605
-
3606
3636
def values (self ):
3607
- """ D.values() -> list of D's values"""
3637
+ """ Return a list of all FORMAT field values for this sample. """
3608
3638
return list (self .itervalues())
3609
3639
3640
+ def items (self ):
3641
+ """ Return a list of all FORMAT field ``(name, value)`` tuples for this sample."""
3642
+ return list (self .iteritems())
3643
+
3610
3644
def update (self , items = None , **kwargs ):
3611
- """ D.update([E, ]**F) -> None .
3645
+ """ Update the FORMAT field values for this sample .
3612
3646
3613
- Update D from dict/iterable E and F.
3647
+ Parameters:
3648
+ items : dict | None
3649
+ A dictionary or dictionary-like object used to update the FORMAT field names and values.
3614
3650
"""
3615
3651
for k, v in items.items():
3616
3652
self [k] = v
@@ -3620,6 +3656,23 @@ cdef class VariantRecordSample(object):
3620
3656
self [k] = v
3621
3657
3622
3658
def pop (self , key , default = _nothing):
3659
+ """ Remove the FORMAT field ``key`` for this sample and returns its value.
3660
+
3661
+ Parameters:
3662
+ key : str
3663
+ FORMAT field to retrieve for the sample.
3664
+ default: Any
3665
+ Data to return if ``key`` is not present.
3666
+
3667
+ Raises:
3668
+ KeyError
3669
+ When ``key`` is not present and ``default`` is unset.
3670
+
3671
+ Returns:
3672
+ value
3673
+ The value of the removed FORMAT field for this sample.
3674
+ """
3675
+
3623
3676
try :
3624
3677
value = self [key]
3625
3678
del self [key]
@@ -3646,7 +3699,7 @@ cdef class VariantRecordSample(object):
3646
3699
3647
3700
cdef VariantRecordSample makeVariantRecordSample(VariantRecord record, int32_t sample_index):
3648
3701
if not record or sample_index < 0 :
3649
- raise ValueError (' cannot create VariantRecordSample' )
3702
+ raise ValueError (" Cannot create a VariantRecordSample. " )
3650
3703
3651
3704
cdef VariantRecordSample sample = VariantRecordSample.__new__ (VariantRecordSample)
3652
3705
sample.record = record
0 commit comments