From 6474a51e2fd4fbb07f6bd58836383053c48ce09a Mon Sep 17 00:00:00 2001 From: Brent Pedersen Date: Thu, 24 Oct 2019 13:59:09 -0600 Subject: [PATCH] updates for python3 --- setup.py | 3 +-- svtools/afreq.py | 2 +- svtools/bedpe.py | 2 +- svtools/breakpoint.py | 4 ++-- svtools/cli.py | 2 +- svtools/sv_classifier.py | 5 +++++ svtools/vcf/file.py | 2 +- svtools/vcfpaste.py | 4 ++-- svtools/vcftobedpe.py | 2 ++ svtools/vcftobedpeconverter.py | 6 +++--- tests/bedpe_tests.py | 4 ++-- tests/breakpoint_tests.py | 2 +- tests/cluster_tests.py | 4 ++-- tests/file_conversion.py | 2 +- tests/reclassifier_tests.py | 23 ++++++++++++++--------- tests/util_tests.py | 9 +++++++-- 16 files changed, 46 insertions(+), 30 deletions(-) diff --git a/setup.py b/setup.py index 21fed969..c38fa4e4 100755 --- a/setup.py +++ b/setup.py @@ -23,7 +23,6 @@ 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering :: Bio-Informatics', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2.7', ], keywords='genomics structural variants sv bioinformatics', @@ -31,7 +30,7 @@ packages=find_packages(exclude=['tests']), include_package_data=True, - install_requires=['svtyper==0.7.1', 'numpy', 'scipy', 'statsmodels', 'pandas==0.19.2', 'setuptools', + install_requires=['svtyper==0.7.1', 'numpy', 'scipy', 'statsmodels', 'pandas', 'setuptools', 'google-auth', 'google-cloud-storage', 'google-compute-engine', diff --git a/svtools/afreq.py b/svtools/afreq.py index 3ed3fdbd..803bf9c3 100644 --- a/svtools/afreq.py +++ b/svtools/afreq.py @@ -15,7 +15,7 @@ def numeric_alleles(gt_string): gt = gt_string.split('/') if len(gt) == 1: gt = gt_string.split('|') - return map(int, gt) + return [int(x) for x in gt] def execute(self, output_handle=sys.stdout): in_header = True diff --git a/svtools/bedpe.py b/svtools/bedpe.py index 72054eaa..5fece719 100644 --- a/svtools/bedpe.py +++ b/svtools/bedpe.py @@ -139,7 +139,7 @@ def retrieve_svtype(self): def retrieve_af(self): try: - af = re.split('=', ''.join(filter(lambda x: x.startswith('AF='), self.info.split(';'))))[1] + af = float(re.split('=', ''.join(filter(lambda x: x.startswith('AF='), self.info.split(';'))))[1]) except IndexError: af = None return af diff --git a/svtools/breakpoint.py b/svtools/breakpoint.py index 689e2ba6..85024b9f 100644 --- a/svtools/breakpoint.py +++ b/svtools/breakpoint.py @@ -1,7 +1,7 @@ import sys -import l_bp -from exceptions import MissingProbabilitiesException +from . import l_bp +from .exceptions import MissingProbabilitiesException class BreakpointInterval(object): ''' diff --git a/svtools/cli.py b/svtools/cli.py index 9e3bb26f..74bd4c75 100644 --- a/svtools/cli.py +++ b/svtools/cli.py @@ -18,7 +18,7 @@ class SupportAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): support_string = 'For further help or to report a bug, please open an issue on the svtools repository: https://github.com/hall-lab/svtools/issues' - print support_string + print(support_string) sys.exit() def svtools_cli_parser(): diff --git a/svtools/sv_classifier.py b/svtools/sv_classifier.py index fae22604..6a91e096 100644 --- a/svtools/sv_classifier.py +++ b/svtools/sv_classifier.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +from __future__ import print_function import argparse, sys, copy, gzip, math import numpy as np @@ -434,6 +435,8 @@ def sv_classify(vcf_in, vcf_out, gender_file, sex_chrom_names, exclude_file, ae_ outf.write("varid\torig_svtype\tsvlen\tnum_pos_samps\tnb_support\tls_support\thybrid_support\thas_rd_support\n") for line in vcf_in: + if isinstance(line, bytes): + line = line.decode() if in_header: if line[0] == '#': header.append(line) @@ -543,6 +546,8 @@ def get_ae_dict(ae_path): ae_bedfile = open(ae_path, 'r') ae_dict = {} for line in ae_bedfile: + if isinstance(line, bytes): + line = line.decode() v = line.rstrip().split('\t') if len(v) < 4: continue diff --git a/svtools/vcf/file.py b/svtools/vcf/file.py index 4360efec..0d67f35c 100644 --- a/svtools/vcf/file.py +++ b/svtools/vcf/file.py @@ -56,7 +56,7 @@ def add_header(self, header): self.other_meta.append(line.rstrip()) elif line[0] == '#' and line[1] != '#': self.sample_list = line.rstrip().split('\t')[9:] - for i in xrange(0, len(self.sample_list)): + for i in range(0, len(self.sample_list)): if self.sample_list[i] not in self.sample_indices: self.sample_indices[self.sample_list[i]] = i + 9 else: diff --git a/svtools/vcfpaste.py b/svtools/vcfpaste.py index ca43b545..710de51f 100644 --- a/svtools/vcfpaste.py +++ b/svtools/vcfpaste.py @@ -34,8 +34,8 @@ def open_files(self): self.vcf_files = [] # parse the vcf files to paste for path in self.vcf_file_names: - self.vcf_files.append(InputStream(path, self.tempdir)) - + self.vcf_files.append(InputStream(path, self.tempdir)) + def write_header(self, output_handle=sys.stdout): master = self.vcf_files[0] while 1: diff --git a/svtools/vcftobedpe.py b/svtools/vcftobedpe.py index c83c211c..80cc7abd 100755 --- a/svtools/vcftobedpe.py +++ b/svtools/vcftobedpe.py @@ -17,6 +17,8 @@ def vcfToBedpe(vcf_file, bedpe_out): sec_bnds = dict() v = [] for line in vcf_file: + if isinstance(line, bytes): + line = line.decode() if in_header: if line[0:2] == '##': if line.split('=')[0] == '##fileformat': diff --git a/svtools/vcftobedpeconverter.py b/svtools/vcftobedpeconverter.py index 8908fd70..3a57c69e 100644 --- a/svtools/vcftobedpeconverter.py +++ b/svtools/vcftobedpeconverter.py @@ -74,7 +74,7 @@ def adjust_coordinate(vcf_variant, info_tag, start, end): of the tag (if it exists) ''' if info_tag in vcf_variant.info: - span = map(int, vcf_variant.info[info_tag].split(',')) + span = [int(x) for x in vcf_variant.info[info_tag].split(',')] if len(span) != 2: raise ValueError('Invalid value for tag {0}. Require 2 values to adjust coordinates.'.format(info_tag)) return (start + span[0], end + span[1]) @@ -137,7 +137,7 @@ def convert(self, primary_variant, secondary_variant=None): - fields = map(str, [ + fields = [str(x) for x in [ c1, max(s1, 0), max(e1, 0), @@ -158,7 +158,7 @@ def convert(self, primary_variant, secondary_variant=None): orig_alt_b, info_a, info_b, - ]) + ]] if vcf_variant.get_format_string() is not None: fields += [vcf_variant.get_format_string(), vcf_variant.get_gt_string()] return Bedpe(fields) diff --git a/tests/bedpe_tests.py b/tests/bedpe_tests.py index b5def2fa..a791d14b 100644 --- a/tests/bedpe_tests.py +++ b/tests/bedpe_tests.py @@ -85,13 +85,13 @@ def test_retrieve_svtype(self): def test_retrieve_af(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) - self.assertEqual(b1.retrieve_af(), '0.2') + self.assertEqual(b1.retrieve_af(), 0.2) entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', 'SVTYPE=BND' ] b2 = Bedpe(entry2) self.assertIsNone(b2.retrieve_af()) entry3 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2;FIN_AF=0.01', 'SVTYPE=BND;AF=0.2;FIN_AF=0.01' ] b3 = Bedpe(entry3) - self.assertEqual(b3.retrieve_af(), '0.2') + self.assertEqual(b3.retrieve_af(), 0.2) def test_str(self): # Note that we are testing float to float equivalence. Actually passing in an integer will result in it being converted to float with diff --git a/tests/breakpoint_tests.py b/tests/breakpoint_tests.py index 74d9b5f8..548ee607 100644 --- a/tests/breakpoint_tests.py +++ b/tests/breakpoint_tests.py @@ -36,7 +36,7 @@ def test_init(self): self.assertEqual(fixed_slop.right.p, [1e-100, 0.025, 0.25, 0.45, 0.25, 0.025, 1e-100]) percent_slop = Breakpoint(test_line, percent_slop = 0.2) - print percent_slop + print(percent_slop) self.assertEqual(percent_slop.left.p, [1e-100, 0.025, 0.25, 0.45, 0.25, 0.025, 1e-100]) self.assertEqual(percent_slop.right.p, [1e-100, 0.025, 0.25, 0.45, 0.25, 0.025, 1e-100]) diff --git a/tests/cluster_tests.py b/tests/cluster_tests.py index 7bb3e1a4..6e3cf771 100644 --- a/tests/cluster_tests.py +++ b/tests/cluster_tests.py @@ -61,7 +61,7 @@ def test_add(self): c.add(b1, None) self.assertEqual(c.size, 1) self.assertEqual(c.sv_event, 'BND') - self.assertEqual(c.filter, '0.2') + self.assertEqual(c.filter, 0.2) self.assertEqual(c.chrom_a, '1') self.assertEqual(c.min_a, 200) self.assertEqual(c.max_a, 300) @@ -74,7 +74,7 @@ def test_add(self): c.add(b2, None) self.assertEqual(c.size, 2) self.assertEqual(c.sv_event, 'BND') - self.assertEqual(c.filter, '0.3') + self.assertEqual(c.filter, 0.3) self.assertEqual(c.chrom_a, '1') self.assertEqual(c.min_a, 195) self.assertEqual(c.max_a, 305) diff --git a/tests/file_conversion.py b/tests/file_conversion.py index 15932175..ec8b1c54 100644 --- a/tests/file_conversion.py +++ b/tests/file_conversion.py @@ -32,7 +32,7 @@ def expected_output_file_path(self, test_name): def test_forward_conversions(self): for test_name in self._test_names: - print test_name + print(test_name) self.convert_and_diff_output(self.forward_convert, self.input_file_path(test_name), self.expected_output_file_path(test_name)) diff --git a/tests/reclassifier_tests.py b/tests/reclassifier_tests.py index acf72680..26434690 100644 --- a/tests/reclassifier_tests.py +++ b/tests/reclassifier_tests.py @@ -7,6 +7,11 @@ import svtools.sv_classifier import gzip +def decode(x): + if isinstance(x, bytes): + return x.decode() + return x + class IntegrationTest_sv_classify(TestCase): def test_chromosome_prefix(self): @@ -27,11 +32,11 @@ def test_integration_nb(self): temp_descriptor, temp_output_path = tempfile.mkstemp(suffix='.vcf') sex=open(sex_file, 'r') sex_chrom_names = set(('X', 'Y')) - with gzip.open(input, 'rb') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle: + with gzip.open(input, 'r') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle: svtools.sv_classifier.run_reclassifier(input_handle, output_handle, sex, sex_chrom_names, annot, 0.9, None, 1.0, 0.2, train, 'naive_bayes', diags_file) - expected_lines = gzip.open(expected_result, 'rb').readlines() + expected_lines = [decode(x) for x in gzip.open(expected_result, 'r').readlines()] expected_lines[1] = '##fileDate=' + time.strftime('%Y%m%d') + '\n' - produced_lines = open(temp_output_path).readlines() + produced_lines = [x.decode() for x in open(temp_output_path).readlines()] diff = difflib.unified_diff(produced_lines, expected_lines, fromfile=temp_output_path, tofile=expected_result) os.remove(temp_output_path) os.remove(diags_file) @@ -53,11 +58,11 @@ def test_integration_ls(self): temp_descriptor, temp_output_path = tempfile.mkstemp(suffix='.vcf') sex=open(sex_file, 'r') sex_chrom_names = set(('X', 'Y')) - with gzip.open(input, 'rb') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle: + with gzip.open(input, 'r') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle: svtools.sv_classifier.run_reclassifier(input_handle, output_handle, sex, sex_chrom_names, annot, 0.9, None, 1.0, 0.2, train, 'large_sample', diags_file) - expected_lines = gzip.open(expected_result, 'rb').readlines() + expected_lines = [decode(x) for x in gzip.open(expected_result, 'r').readlines()] expected_lines[1] = '##fileDate=' + time.strftime('%Y%m%d') + '\n' - produced_lines = open(temp_output_path).readlines() + produced_lines = [decode(x) for x in open(temp_output_path).readlines()] diff = difflib.unified_diff(produced_lines, expected_lines, fromfile=temp_output_path, tofile=expected_result) os.remove(temp_output_path) os.remove(diags_file) @@ -80,11 +85,11 @@ def test_integration_hyb(self): temp_descriptor, temp_output_path = tempfile.mkstemp(suffix='.vcf') sex=open(sex_file, 'r') sex_chrom_names = set(('X', 'Y')) - with gzip.open(input, 'rb') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle: + with gzip.open(input, 'r') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle: svtools.sv_classifier.run_reclassifier(input_handle, output_handle, sex, sex_chrom_names, annot, 0.9, None, 1.0, 0.2, train, 'hybrid', diags_file) - expected_lines = gzip.open(expected_result, 'rb').readlines() + expected_lines = [decode(x) for x in gzip.open(expected_result, 'r').readlines()] expected_lines[1] = '##fileDate=' + time.strftime('%Y%m%d') + '\n' - produced_lines = open(temp_output_path).readlines() + produced_lines = [decode(x) for x in open(temp_output_path).readlines()] diff = difflib.unified_diff(produced_lines, expected_lines, fromfile=temp_output_path, tofile=expected_result) os.remove(temp_output_path) os.remove(diags_file) diff --git a/tests/util_tests.py b/tests/util_tests.py index b6426e77..9f9090db 100644 --- a/tests/util_tests.py +++ b/tests/util_tests.py @@ -3,6 +3,11 @@ import os import svtools.utils as su +def decode(x): + if isinstance(x, bytes): + return x.decode() + return x + class InputStreamTest(TestCase): def test_init_hyphen(self): new_handle = su.InputStream('-') @@ -26,7 +31,7 @@ def test_context_manager(self): with su.InputStream(test_input) as stream: temporary_obj = stream for line in stream: - sys.stdout.write(line) + sys.stdout.write(decode(line)) self.assertTrue(temporary_obj.closed) def test_plain_iteration(self): @@ -36,7 +41,7 @@ def test_plain_iteration(self): stream = su.InputStream(test_input) for line in stream: - sys.stdout.write(line) + sys.stdout.write(decode(line)) stream.close() self.assertTrue(stream.handle.closed)