diff --git a/svtools/breakpoint.py b/svtools/breakpoint.py index 689e2ba..010301b 100644 --- a/svtools/breakpoint.py +++ b/svtools/breakpoint.py @@ -1,7 +1,7 @@ import sys -import l_bp -from exceptions import MissingProbabilitiesException +import svtools.l_bp +from svtools.exceptions import MissingProbabilitiesException class BreakpointInterval(object): ''' diff --git a/svtools/vcfpaste.py b/svtools/vcfpaste.py index ca43b54..5100d06 100644 --- a/svtools/vcfpaste.py +++ b/svtools/vcfpaste.py @@ -34,7 +34,7 @@ def open_files(self): self.vcf_files = [] # parse the vcf files to paste for path in self.vcf_file_names: - self.vcf_files.append(InputStream(path, self.tempdir)) + self.vcf_files.append(InputStream(path, self.tempdir)) def write_header(self, output_handle=sys.stdout): master = self.vcf_files[0] diff --git a/svtools/vcftobedpe.py b/svtools/vcftobedpe.py index c83c211..06aa7ec 100755 --- a/svtools/vcftobedpe.py +++ b/svtools/vcftobedpe.py @@ -71,6 +71,9 @@ def vcfToBedpe(vcf_file, bedpe_out): bedpe_out.write(str(converter.convert(var)) + '\n') else: mate_id = var.info['MATEID'] + if "_" in mate_id: + mate_id = mate_id.split('_')[0] + if 'SECONDARY' in var.info: if mate_id in bnds: #primary @@ -89,7 +92,7 @@ def vcfToBedpe(vcf_file, bedpe_out): bedpe_out.write(str(converter.convert(var1, var)) + '\n') del bnds[mate_id] else: - bnds.update({var.var_id:var}) + bnds.update({mate_id:var}) if bnds is not None: for bnd in bnds: sys.stderr.write('Warning: missing secondary multiline variant at ID:' + bnd + '\n') diff --git a/svtools/vcftobedpeconverter.py b/svtools/vcftobedpeconverter.py index 8908fd7..56c9e0a 100644 --- a/svtools/vcftobedpeconverter.py +++ b/svtools/vcftobedpeconverter.py @@ -19,17 +19,24 @@ def bnd_breakpoints(self, vcf_variant): ''' chrom1 = vcf_variant.chrom breakpoint1 = vcf_variant.pos - orientation1 = orientation2 = '+' - sep, chrom2, breakpoint2 = parse_bnd_alt_string(vcf_variant.alt) - breakpoint2 = int(breakpoint2) + if 'MATECHROM' in vcf_variant.info: + chrom2 = vcf_variant.info['MATECHROM'] + breakpoint2 = int(vcf_variant.info['MATEPOS']) + orientation1 = vcf_variant.info['STRAND'] + orientation2 = vcf_variant.info['MATESTRAND'] - if vcf_variant.alt.startswith(sep): - orientation1 = '-' - breakpoint1 -= 1 + else: + orientation1 = orientation2 = '+' + sep, chrom2, breakpoint2 = parse_bnd_alt_string(vcf_variant.alt) + breakpoint2 = int(breakpoint2) + + if vcf_variant.alt.startswith(sep): + orientation1 = '-' + breakpoint1 -= 1 - if sep == '[': - orientation2 = '-' - breakpoint2 -= 1 + if sep == '[': + orientation2 = '-' + breakpoint2 -= 1 return (chrom1, breakpoint1, @@ -74,7 +81,7 @@ def adjust_coordinate(vcf_variant, info_tag, start, end): of the tag (if it exists) ''' if info_tag in vcf_variant.info: - span = map(int, vcf_variant.info[info_tag].split(',')) + span = list(map(int, vcf_variant.info[info_tag].split(','))) if len(span) != 2: raise ValueError('Invalid value for tag {0}. Require 2 values to adjust coordinates.'.format(info_tag)) return (start + span[0], end + span[1]) @@ -129,6 +136,8 @@ def convert(self, primary_variant, secondary_variant=None): # XXX This has probably already been calculated outside of this method. May be a candidate to memoize or otherwise cache? # By adding to the variant class, perhaps? name = vcf_variant.var_id + if '_' in name: + name = name.split('_')[0] if 'EVENT' in vcf_variant.info: name = vcf_variant.info['EVENT'] elif 'MATEID' in vcf_variant.info and vcf_variant.var_id.startswith('Manta'): @@ -137,7 +146,7 @@ def convert(self, primary_variant, secondary_variant=None): - fields = map(str, [ + fields = list(map(str, [ c1, max(s1, 0), max(e1, 0), @@ -158,7 +167,7 @@ def convert(self, primary_variant, secondary_variant=None): orig_alt_b, info_a, info_b, - ]) + ])) if vcf_variant.get_format_string() is not None: fields += [vcf_variant.get_format_string(), vcf_variant.get_gt_string()] return Bedpe(fields)