diff --git a/bamcleanheader.py b/bamcleanheader.py index 9e854cc..6abe8ba 100755 --- a/bamcleanheader.py +++ b/bamcleanheader.py @@ -38,7 +38,7 @@ def get_args(): # extract read group information from header of original bam def get_clean_header(bam): - clean_header_list = list() + clean_header_list = [] for line in bam.text.split('\n'): if len(line.rstrip()) == 0: continue @@ -101,17 +101,15 @@ def get_clean_header(bam): # add read group info to header of new sam file def bam_clean(bam, is_sam, header_only): if is_sam: - in_bam = pysam.Samfile(bam, 'r', check_sq=False) + in_bam = pysam.AlignmentFile(bam, 'r', check_sq=False) else: - in_bam = pysam.Samfile(bam, 'rb', check_sq=False) + in_bam = pysam.AlignmentFile(bam, 'rb', check_sq=False) - # out_bam = pysam.Samfile('-', 'w', template=in_bam) - - print get_clean_header(in_bam) + print(get_clean_header(in_bam)) if not header_only: for al in in_bam: - print al + print(al) # # this code leads to pipeing errors # if not header_only: @@ -140,6 +138,6 @@ def main(): if __name__ == '__main__': try: sys.exit(main()) - except IOError, e: + except IOError as e: if e.errno != 32: # ignore SIGPIPE raise diff --git a/bamfilterrg.py b/bamfilterrg.py old mode 100755 new mode 100644 index 8a1c097..1e927ec --- a/bamfilterrg.py +++ b/bamfilterrg.py @@ -8,8 +8,6 @@ import sys import argparse from argparse import RawTextHelpFormatter -import string -from string import * __author__ = "Colby Chiang (cc2qe@virginia.edu)" __version__ = "$Revision: 0.0.1 $" @@ -17,24 +15,24 @@ def bamfilterrg(bamfile, readgroup, limit, is_sam, bam_out, uncompressed_out): # set input file - if bamfile == None: + if bamfile is None: if is_sam: - in_bam = pysam.Samfile("-", "r") + in_bam = pysam.AlignmentFile('-', 'r') else: - in_bam = pysam.Samfile('-', 'rb') + in_bam = pysam.AlignmentFile('-', 'rb') else: if is_sam: - in_bam = pysam.Samfile(bamfile, 'r') + in_bam = pysam.AlignmentFile(bamfile, 'r') else: - in_bam = pysam.Samfile(bamfile, "rb") + in_bam = pysam.AlignmentFile(bamfile, 'rb') # set output file if uncompressed_out: - out_bam = pysam.Samfile('-', 'wbu', template=in_bam) + out_bam = pysam.AlignmentFile('-', 'wbu', template=in_bam) elif bam_out: - out_bam = pysam.Samfile('-', 'wb', template=in_bam) + out_bam = pysam.AlignmentFile('-', 'wb', template=in_bam) else: - out_bam = pysam.Samfile('-', 'wh', template=in_bam) + out_bam = pysam.AlignmentFile('-', 'wh', template=in_bam) # parse readgroup string @@ -46,7 +44,7 @@ def bamfilterrg(bamfile, readgroup, limit, is_sam, bam_out, uncompressed_out): counter = 0 for al in in_bam: # must be in a user specified readgroup - if rg_list and al.opt('RG') not in rg_list: + if rg_list and al.get_tag('RG') not in rg_list: continue # write out alignment @@ -54,11 +52,9 @@ def bamfilterrg(bamfile, readgroup, limit, is_sam, bam_out, uncompressed_out): counter += 1 # bail if reached limit - if (limit != None - and counter >= limit): + if limit is not None and counter >= limit: break - # ============================================ # functions # ============================================ @@ -67,7 +63,7 @@ def bamfilterrg(bamfile, readgroup, limit, is_sam, bam_out, uncompressed_out): class Namegroup(): def __init__(self, al): self.alignments = list() - self.name = al.qname + self.name = al.query_name self.sa = 0 self.num_prim = 0 self.add_alignment(al) @@ -77,8 +73,8 @@ def add_alignment(self, al): if not al.is_secondary: self.num_prim += 1 try: - self.sa += len(al.opt('SA').rstrip(';').split(';')) # print self.sa + self.sa += len(al.get_tag('SA').rstrip(';').split(';')) except KeyError: pass @@ -125,7 +121,7 @@ def main(): if __name__ == "__main__": try: sys.exit(main()) - except IOError, e: + except IOError as e: if e.errno != 32: # ignore SIGPIPE raise diff --git a/bamfixflags.py b/bamfixflags.py index a5208ff..9bab566 100755 --- a/bamfixflags.py +++ b/bamfixflags.py @@ -127,10 +127,10 @@ def bamfixflags(bamfile, lib_mean = mean(lib_hist) lib_sd = stdev(lib_hist) - print 'p25', p25 - print 'p75', p75 - print 'mean', lib_mean - print 'sd', lib_sd + print('p25', p25) + print('p75', p75) + print('mean', lib_mean) + print('sd', lib_sd) low = int(p25 - mapping_bound * (p75 - p25) + .499) high = int(p75 + mapping_bound * (p75 - p25) + .499) @@ -171,51 +171,51 @@ def bamfixflags(bamfile, else: out_bam = pysam.Samfile('-', 'wh', template=in_bam) - print proper + print(proper) for al in in_bam: # out_bam.write(al) - print al + print(al) if al.is_supplementary: pass elif al.is_unmapped or al.mate_is_unmapped: if al.is_proper_pair: - print 'mismarked proper (unmapped)' + print('mismarked proper (unmapped)') al.is_proper_pair = False elif al.reference_id != al.next_reference_id: if al.is_proper_pair: - print 'mismarked proper (chrom)' + print('mismarked proper (chrom)') al.is_proper_pair = False elif (al.reference_start < al.next_reference_start and (al.is_reverse or not al.mate_is_reverse)): if al.is_proper_pair: - print 'mismarked proper (orient +)' + print('mismarked proper (orient +)') al.is_proper_pair = False elif (al.reference_start > al.next_reference_start and (not al.is_reverse or al.mate_is_reverse)): if al.is_proper_pair: - print 'mismarked proper (orient -)' + print('mismarked proper (orient -)') al.is_proper_pair = False # if al.supp elif (al.template_length >= proper[al.opt('RG')][0] and al.template_length <= proper[al.opt('RG')][1]): if not al.is_proper_pair: - print 'mismarked improper (insert size)' + print('mismarked improper (insert size)') al.is_proper_pair = True else: if al.is_proper_pair: - print 'mismarked proper (insert size)' + print('mismarked proper (insert size)') al.is_proper_pair = False # out_bam.write(al) - print al # print proper[al.opt('RG')], al.template_length # print al + print(al) # # must be in a user specified readgroup # if al.opt('RG') not in rg_list: # continue @@ -301,7 +301,7 @@ def main(): if __name__ == "__main__": try: sys.exit(main()) - except IOError, e: + except IOError as e: if e.errno != 32: # ignore SIGPIPE raise diff --git a/bamgroupreads.py b/bamgroupreads.py index 10c2b55..e07420e 100755 --- a/bamgroupreads.py +++ b/bamgroupreads.py @@ -8,8 +8,6 @@ import sys import argparse from argparse import RawTextHelpFormatter -import string -from string import * __author__ = "Colby Chiang (cc2qe@virginia.edu)" __version__ = "$Revision: 0.0.1 $" @@ -17,25 +15,24 @@ def bamgroupreads(bamfile, readgroup, reset_dups, fix_flags, is_sam, bam_out, uncompressed_out): # set input file - if bamfile == None: + if bamfile is None: if is_sam: - in_bam = pysam.Samfile("-", "r") + in_bam = pysam.AlignmentFile("-", "r") else: - in_bam = pysam.Samfile('-', 'rb') + in_bam = pysam.AlignmentFile('-', 'rb') else: if is_sam: - in_bam = pysam.Samfile(bamfile, 'r') + in_bam = pysam.AlignmentFile(bamfile, 'r') else: - in_bam = pysam.Samfile(bamfile, "rb") + in_bam = pysam.AlignmentFile(bamfile, "rb") # set output file if uncompressed_out: - out_bam = pysam.Samfile('-', 'wbu', template=in_bam) + out_bam = pysam.AlignmentFile('-', 'wbu', template=in_bam) elif bam_out: - out_bam = pysam.Samfile('-', 'wb', template=in_bam) + out_bam = pysam.AlignmentFile('-', 'wb', template=in_bam) else: - out_bam = pysam.Samfile('-', 'wh', template=in_bam) - + out_bam = pysam.AlignmentFile('-', 'wh', template=in_bam) # parse readgroup string try: @@ -46,11 +43,11 @@ def bamgroupreads(bamfile, readgroup, reset_dups, fix_flags, is_sam, bam_out, un d = {} for al in in_bam: # must be in a user specified readgroup - if rg_list and al.opt('RG') not in rg_list: + if rg_list and al.get_tag('RG') not in rg_list: continue # add read name to dictionary if not already there - key = al.qname + key = al.query_name if key not in d: d.setdefault(key,Namegroup(al)) # print matched read pairs @@ -60,7 +57,7 @@ def bamgroupreads(bamfile, readgroup, reset_dups, fix_flags, is_sam, bam_out, un for al in d[key].alignments: if reset_dups: # unset the duplicate flag - al.is_duplicate = 0 + al.is_duplicate = False if fix_flags: # fix the secondary mate flag proper_pair = False @@ -74,8 +71,7 @@ def bamgroupreads(bamfile, readgroup, reset_dups, fix_flags, is_sam, bam_out, un proper_pair = True if flagcheck.is_duplicate: duplicate = True - if (legacy and flagcheck.is_secondary - or not legacy and flagcheck.flag & 2048 == 2048): + if (legacy and flagcheck.is_secondary) or (not legacy and flagcheck.is_supplementary): continue if flagcheck.is_read1: read1_unmapped = flagcheck.is_unmapped @@ -102,22 +98,21 @@ def bamgroupreads(bamfile, readgroup, reset_dups, fix_flags, is_sam, bam_out, un # ============================================ # class that holds reads from a sequence fragment -class Namegroup(): +class Namegroup: def __init__(self, al): - self.alignments = list() - self.name = al.qname + self.alignments = [] + self.name = al.query_name self.sa = 0 self.num_prim = 0 self.add_alignment(al) def add_alignment(self, al): self.alignments.append(al) - if not (legacy and al.is_secondary - or not legacy and al.flag & 2048 == 2048): + if not ((legacy and al.is_secondary) or (not legacy and al.is_supplementary)): self.num_prim += 1 try: - self.sa += len(al.opt('SA').rstrip(';').split(';')) - # print self.sa + self.sa += len(al.get_tag('SA').rstrip(';').split(';')) + # print(self.sa) except KeyError: pass @@ -169,7 +164,7 @@ def main(): if __name__ == "__main__": try: sys.exit(main()) - except IOError, e: + except IOError as e: if e.errno != 32: # ignore SIGPIPE raise diff --git a/bamheadrg.py b/bamheadrg.py index 62c8e7c..7e36468 100755 --- a/bamheadrg.py +++ b/bamheadrg.py @@ -40,10 +40,10 @@ def get_args(): # extract read group information from header of original bam def extract_rg_info(donor, donor_is_sam, rgs_to_extract): if donor_is_sam: - bam = pysam.Samfile(donor, 'r', check_sq=False) + bam = pysam.AlignmentFile(donor, 'r', check_sq=False) else: - bam = pysam.Samfile(donor, 'rb', check_sq=False) - rg_out = list() + bam = pysam.AlignmentFile(donor, 'rb', check_sq=False) + rg_out = [] for line in bam.text.split('\n'): if line[:3] == "@RG": v = line.rstrip().split('\t') @@ -71,9 +71,9 @@ def bamheadrg(recipient, rg_out): if in_header: if line[0] != '@': for readgroup in rg_out: - print '@RG\t' + '\t'.join([':'.join((t,readgroup[t])) for t in readgroup]) + print('@RG\t' + '\t'.join([':'.join((t,readgroup[t])) for t in readgroup])) in_header = False - print line.rstrip() + print(line.rstrip()) return # -------------------------------------- @@ -101,6 +101,6 @@ def main(): if __name__ == '__main__': try: sys.exit(main()) - except IOError, e: + except IOError as e: if e.errno != 32: # ignore SIGPIPE raise diff --git a/bamlibs.py b/bamlibs.py index c19f5bc..7252bbc 100755 --- a/bamlibs.py +++ b/bamlibs.py @@ -26,7 +26,7 @@ def get_args(): args = parser.parse_args() # if no input, check if part of pipe and if so, read stdin. - if args.input == None: + if args.input is None: if sys.stdin.isatty(): parser.print_help() exit(1) @@ -39,9 +39,9 @@ def get_args(): # add read group info to header of new sam file def get_libs(bam, is_sam, header_only): if is_sam: - in_bam = pysam.Samfile(bam, 'r', check_sq=False) + in_bam = pysam.AlignmentFile(bam, 'r', check_sq=False) else: - in_bam = pysam.Samfile(bam, 'rb', check_sq=False) + in_bam = pysam.AlignmentFile(bam, 'rb', check_sq=False) lib_rg = defaultdict(list) for line in in_bam.text.split('\n'): @@ -58,7 +58,7 @@ def get_libs(bam, is_sam, header_only): # print for lib in lib_rg: - print ','.join(lib_rg[lib]) + print(','.join(lib_rg[lib])) in_bam.close() return @@ -77,6 +77,6 @@ def main(): if __name__ == '__main__': try: sys.exit(main()) - except IOError, e: + except IOError as e: if e.errno != 32: # ignore SIGPIPE raise diff --git a/bamtofastq.py b/bamtofastq.py index 8d4283e..3859f38 100755 --- a/bamtofastq.py +++ b/bamtofastq.py @@ -4,12 +4,11 @@ #/gapp/x64linux/opt/pythonbrew/venvs/Python-2.7.6/gemini/bin/python # for uva cluster: + import pysam import sys import argparse from argparse import RawTextHelpFormatter -import string -from string import * __author__ = "Ira Hall (ihall@genome.wustl.edu) and Colby Chiang (cc2qe@virginia.edu)" __version__ = "$Revision: 0.0.1 $" @@ -18,16 +17,16 @@ def bamtofastq(bamlist, is_sam, readgroup, rename, header): for bamfile in bamlist: # get file and header - if bamfile == None: + if bamfile is None: if is_sam: - bam = pysam.Samfile("-", "r", check_sq=False) + bam = pysam.AlignmentFile("-", "r", check_sq=False) else: - bam = pysam.Samfile('-', 'rb', check_sq=False) + bam = pysam.AlignmentFile('-', 'rb', check_sq=False) else: if is_sam: - bam = pysam.Samfile(bamfile, 'r', check_sq=False) + bam = pysam.AlignmentFile(bamfile, 'r', check_sq=False) else: - bam = pysam.Samfile(bamfile, "rb", check_sq=False) + bam = pysam.AlignmentFile(bamfile, "rb", check_sq=False) # parse readgroup string try: rg_list = readgroup.split(',') @@ -55,35 +54,35 @@ def bamtofastq(bamlist, is_sam, readgroup, rename, header): continue # must be in a user specified readgroup - if rg_list and al.opt('RG') not in rg_list: + if rg_list and al.get_tag('RG') not in rg_list: continue # ensures the read is not hard-clipped. important # when the BAM doesn't have shorter hits flagged as # secondary - if al.cigar is not None and 5 in [x[0] for x in al.cigar]: + if al.cigartuples is not None and 5 in [x[0] for x in al.cigartuples]: continue # add read name to dictionary if not already there - key = al.qname + key = al.query_name if key not in d: d.setdefault(key,al) # print matched read pairs else: # RG:Z:ID try: - RG1 = d[key].opt('RG') + RG1 = d[key].get_tag('RG') except KeyError: RG1 = "" try: - RG2 = al.opt('RG') + RG2 = al.get_tag('RG') except KeyError: RG2 = "" counter += 1 if rename: - al.qname = RG2 + '.' + str(counter) - d[key].qname = RG1 + '.' + str(counter) + al.query_name = RG2 + '.' + str(counter) + d[key].query_name = RG1 + '.' + str(counter) if al.is_read1: printfastq_rg(al,1,RG2) @@ -129,22 +128,23 @@ def get_args(): # send back the user input return args +def revcomp(seq): + seq1 = seq.translate(str.maketrans("AGCTagct", "TCGAtcga")) + seq2 = seq1[::-1] + return seq2 + def printfastq(al,read): if(al.is_reverse): - print "@" + str(al.qname) + "/" + str(read) + "\n" + str(revcomp(al.seq)) + "\n" + "+" + "\n" + str(al.qual[::-1]) + print("@" + str(al.query_name) + "/" + str(read) + "\n" + str(revcomp(al.seq)) + "\n" + "+" + "\n" + str(al.qual[::-1])) else: - print "@" + str(al.qname) + "/" + str(read) + "\n" + str(al.seq) + "\n" + "+" + "\n" + str(al.qual) + print("@" + str(al.query_name) + "/" + str(read) + "\n" + str(al.seq) + "\n" + "+" + "\n" + str(al.qual)) def printfastq_rg(al,read, rg): if(al.is_reverse): - print "@" + str(al.qname) + "/" + str(read) + " " + "RG:Z:" + str(rg) + "\n" + str(revcomp(al.seq)) + "\n" + "+" + "\n" + str(al.qual[::-1]) + print("@" + str(al.query_name) + "/" + str(read) + " " + "RG:Z:" + str(rg) + "\n" + str(revcomp(al.seq)) + "\n" + "+" + "\n" + str(al.qual[::-1])) else: - print "@" + str(al.qname) + "/" + str(read) + " " + "RG:Z:" + str(rg) + "\n" + str(al.seq) + "\n" + "+" + "\n" + str(al.qual) + print("@" + str(al.query_name) + "/" + str(read) + " " + "RG:Z:" + str(rg) + "\n" + str(al.seq) + "\n" + "+" + "\n" + str(al.qual)) -def revcomp(seq): - seq1 = seq.translate(maketrans("AGCTagct", "TCGAtcga")) - seq2 = seq1[::-1] - return seq2 #=================================================================================================================================================== # driver @@ -166,7 +166,7 @@ def main(): if __name__ == "__main__": try: sys.exit(main()) - except IOError, e: + except IOError as e: if e.errno != 32: # ignore SIGPIPE raise