From 6474a51e2fd4fbb07f6bd58836383053c48ce09a Mon Sep 17 00:00:00 2001
From: Brent Pedersen <bpederse@gmail.com>
Date: Thu, 24 Oct 2019 13:59:09 -0600
Subject: [PATCH] updates for python3

---
 setup.py                       |  3 +--
 svtools/afreq.py               |  2 +-
 svtools/bedpe.py               |  2 +-
 svtools/breakpoint.py          |  4 ++--
 svtools/cli.py                 |  2 +-
 svtools/sv_classifier.py       |  5 +++++
 svtools/vcf/file.py            |  2 +-
 svtools/vcfpaste.py            |  4 ++--
 svtools/vcftobedpe.py          |  2 ++
 svtools/vcftobedpeconverter.py |  6 +++---
 tests/bedpe_tests.py           |  4 ++--
 tests/breakpoint_tests.py      |  2 +-
 tests/cluster_tests.py         |  4 ++--
 tests/file_conversion.py       |  2 +-
 tests/reclassifier_tests.py    | 23 ++++++++++++++---------
 tests/util_tests.py            |  9 +++++++--
 16 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/setup.py b/setup.py
index 21fed969..c38fa4e4 100755
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,6 @@
         'Intended Audience :: Science/Research',
         'Topic :: Scientific/Engineering :: Bio-Informatics',
         'License :: OSI Approved :: MIT License',
-        'Programming Language :: Python :: 2.7',
     ],
 
     keywords='genomics structural variants sv bioinformatics',
@@ -31,7 +30,7 @@
     packages=find_packages(exclude=['tests']),
     include_package_data=True,
 
-    install_requires=['svtyper==0.7.1', 'numpy', 'scipy', 'statsmodels', 'pandas==0.19.2', 'setuptools',
+    install_requires=['svtyper==0.7.1', 'numpy', 'scipy', 'statsmodels', 'pandas', 'setuptools',
         'google-auth',
         'google-cloud-storage',
         'google-compute-engine',
diff --git a/svtools/afreq.py b/svtools/afreq.py
index 3ed3fdbd..803bf9c3 100644
--- a/svtools/afreq.py
+++ b/svtools/afreq.py
@@ -15,7 +15,7 @@ def numeric_alleles(gt_string):
         gt = gt_string.split('/')
         if len(gt) == 1:
             gt = gt_string.split('|')
-        return map(int, gt)
+        return [int(x) for x in gt]
 
     def execute(self, output_handle=sys.stdout):
         in_header = True
diff --git a/svtools/bedpe.py b/svtools/bedpe.py
index 72054eaa..5fece719 100644
--- a/svtools/bedpe.py
+++ b/svtools/bedpe.py
@@ -139,7 +139,7 @@ def retrieve_svtype(self):
 
     def retrieve_af(self):
         try:
-            af = re.split('=', ''.join(filter(lambda x: x.startswith('AF='), self.info.split(';'))))[1]
+            af = float(re.split('=', ''.join(filter(lambda x: x.startswith('AF='), self.info.split(';'))))[1])
         except IndexError:
             af = None
         return af
diff --git a/svtools/breakpoint.py b/svtools/breakpoint.py
index 689e2ba6..85024b9f 100644
--- a/svtools/breakpoint.py
+++ b/svtools/breakpoint.py
@@ -1,7 +1,7 @@
 import sys
 
-import l_bp
-from exceptions import MissingProbabilitiesException
+from . import l_bp
+from .exceptions import MissingProbabilitiesException
 
 class BreakpointInterval(object):
     '''
diff --git a/svtools/cli.py b/svtools/cli.py
index 9e3bb26f..74bd4c75 100644
--- a/svtools/cli.py
+++ b/svtools/cli.py
@@ -18,7 +18,7 @@
 class SupportAction(argparse.Action):
     def __call__(self, parser, namespace, values, option_string=None):
         support_string = 'For further help or to report a bug, please open an issue on the svtools repository: https://github.com/hall-lab/svtools/issues'
-        print support_string
+        print(support_string)
         sys.exit()
 
 def svtools_cli_parser():
diff --git a/svtools/sv_classifier.py b/svtools/sv_classifier.py
index fae22604..6a91e096 100644
--- a/svtools/sv_classifier.py
+++ b/svtools/sv_classifier.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from __future__ import print_function
 
 import argparse, sys, copy, gzip, math
 import numpy as np
@@ -434,6 +435,8 @@ def sv_classify(vcf_in, vcf_out, gender_file, sex_chrom_names, exclude_file, ae_
         outf.write("varid\torig_svtype\tsvlen\tnum_pos_samps\tnb_support\tls_support\thybrid_support\thas_rd_support\n")
 
     for line in vcf_in:
+        if isinstance(line, bytes):
+            line = line.decode()
         if in_header:
             if line[0] == '#':
                 header.append(line)
@@ -543,6 +546,8 @@ def get_ae_dict(ae_path):
         ae_bedfile = open(ae_path, 'r')
     ae_dict = {}
     for line in ae_bedfile:
+        if isinstance(line, bytes):
+            line = line.decode()
         v = line.rstrip().split('\t')
         if len(v) < 4:
             continue
diff --git a/svtools/vcf/file.py b/svtools/vcf/file.py
index 4360efec..0d67f35c 100644
--- a/svtools/vcf/file.py
+++ b/svtools/vcf/file.py
@@ -56,7 +56,7 @@ def add_header(self, header):
                 self.other_meta.append(line.rstrip())
             elif line[0] == '#' and line[1] != '#':
                 self.sample_list = line.rstrip().split('\t')[9:]
-                for i in xrange(0, len(self.sample_list)):
+                for i in range(0, len(self.sample_list)):
                     if self.sample_list[i] not in self.sample_indices:
                         self.sample_indices[self.sample_list[i]] = i + 9
                     else:
diff --git a/svtools/vcfpaste.py b/svtools/vcfpaste.py
index ca43b545..710de51f 100644
--- a/svtools/vcfpaste.py
+++ b/svtools/vcfpaste.py
@@ -34,8 +34,8 @@ def open_files(self):
         self.vcf_files = []
         # parse the vcf files to paste
         for path in self.vcf_file_names:
-	    self.vcf_files.append(InputStream(path, self.tempdir))
-    
+            self.vcf_files.append(InputStream(path, self.tempdir))
+
     def write_header(self, output_handle=sys.stdout):
         master = self.vcf_files[0]
         while 1:
diff --git a/svtools/vcftobedpe.py b/svtools/vcftobedpe.py
index c83c211c..80cc7abd 100755
--- a/svtools/vcftobedpe.py
+++ b/svtools/vcftobedpe.py
@@ -17,6 +17,8 @@ def vcfToBedpe(vcf_file, bedpe_out):
     sec_bnds = dict()
     v = []
     for line in vcf_file:
+        if isinstance(line, bytes):
+            line = line.decode()
         if in_header:
             if line[0:2] == '##':
                 if line.split('=')[0] == '##fileformat':
diff --git a/svtools/vcftobedpeconverter.py b/svtools/vcftobedpeconverter.py
index 8908fd70..3a57c69e 100644
--- a/svtools/vcftobedpeconverter.py
+++ b/svtools/vcftobedpeconverter.py
@@ -74,7 +74,7 @@ def adjust_coordinate(vcf_variant, info_tag, start, end):
         of the tag (if it exists)
         '''
         if info_tag in vcf_variant.info:
-            span = map(int, vcf_variant.info[info_tag].split(','))
+            span = [int(x) for x in vcf_variant.info[info_tag].split(',')]
             if len(span) != 2:
                 raise ValueError('Invalid value for tag {0}. Require 2 values to adjust coordinates.'.format(info_tag))
             return (start + span[0], end + span[1])
@@ -137,7 +137,7 @@ def convert(self, primary_variant, secondary_variant=None):
 
 
 
-        fields = map(str, [
+        fields = [str(x) for x in [
             c1,
             max(s1, 0),
             max(e1, 0),
@@ -158,7 +158,7 @@ def convert(self, primary_variant, secondary_variant=None):
             orig_alt_b,
             info_a,
             info_b,
-            ])
+            ]]
         if vcf_variant.get_format_string() is not None:
             fields += [vcf_variant.get_format_string(), vcf_variant.get_gt_string()]
         return Bedpe(fields)
diff --git a/tests/bedpe_tests.py b/tests/bedpe_tests.py
index b5def2fa..a791d14b 100644
--- a/tests/bedpe_tests.py
+++ b/tests/bedpe_tests.py
@@ -85,13 +85,13 @@ def test_retrieve_svtype(self):
     def test_retrieve_af(self):
         entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ]
         b1 = Bedpe(entry1)
-        self.assertEqual(b1.retrieve_af(), '0.2')
+        self.assertEqual(b1.retrieve_af(), 0.2)
         entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', 'SVTYPE=BND' ]
         b2 = Bedpe(entry2)
         self.assertIsNone(b2.retrieve_af())
         entry3 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2;FIN_AF=0.01', 'SVTYPE=BND;AF=0.2;FIN_AF=0.01' ]
         b3 = Bedpe(entry3)
-        self.assertEqual(b3.retrieve_af(), '0.2')
+        self.assertEqual(b3.retrieve_af(), 0.2)
 
     def test_str(self):
         # Note that we are testing float to float equivalence. Actually passing in an integer will result in it being converted to float with
diff --git a/tests/breakpoint_tests.py b/tests/breakpoint_tests.py
index 74d9b5f8..548ee607 100644
--- a/tests/breakpoint_tests.py
+++ b/tests/breakpoint_tests.py
@@ -36,7 +36,7 @@ def test_init(self):
         self.assertEqual(fixed_slop.right.p, [1e-100, 0.025, 0.25, 0.45, 0.25, 0.025, 1e-100])
 
         percent_slop = Breakpoint(test_line, percent_slop = 0.2)
-        print percent_slop
+        print(percent_slop)
         self.assertEqual(percent_slop.left.p, [1e-100, 0.025, 0.25, 0.45, 0.25, 0.025, 1e-100])
         self.assertEqual(percent_slop.right.p, [1e-100, 0.025, 0.25, 0.45, 0.25, 0.025, 1e-100])
 
diff --git a/tests/cluster_tests.py b/tests/cluster_tests.py
index 7bb3e1a4..6e3cf771 100644
--- a/tests/cluster_tests.py
+++ b/tests/cluster_tests.py
@@ -61,7 +61,7 @@ def test_add(self):
         c.add(b1, None)
         self.assertEqual(c.size, 1)
         self.assertEqual(c.sv_event, 'BND')
-        self.assertEqual(c.filter, '0.2')
+        self.assertEqual(c.filter, 0.2)
         self.assertEqual(c.chrom_a, '1')
         self.assertEqual(c.min_a, 200)
         self.assertEqual(c.max_a, 300)
@@ -74,7 +74,7 @@ def test_add(self):
         c.add(b2, None)
         self.assertEqual(c.size, 2)
         self.assertEqual(c.sv_event, 'BND')
-        self.assertEqual(c.filter, '0.3')
+        self.assertEqual(c.filter, 0.3)
         self.assertEqual(c.chrom_a, '1')
         self.assertEqual(c.min_a, 195)
         self.assertEqual(c.max_a, 305)
diff --git a/tests/file_conversion.py b/tests/file_conversion.py
index 15932175..ec8b1c54 100644
--- a/tests/file_conversion.py
+++ b/tests/file_conversion.py
@@ -32,7 +32,7 @@ def expected_output_file_path(self, test_name):
 
     def test_forward_conversions(self):
         for test_name in self._test_names:
-            print test_name
+            print(test_name)
             self.convert_and_diff_output(self.forward_convert,
                     self.input_file_path(test_name),
                     self.expected_output_file_path(test_name))
diff --git a/tests/reclassifier_tests.py b/tests/reclassifier_tests.py
index acf72680..26434690 100644
--- a/tests/reclassifier_tests.py
+++ b/tests/reclassifier_tests.py
@@ -7,6 +7,11 @@
 import svtools.sv_classifier
 import gzip
 
+def decode(x):
+    if isinstance(x, bytes):
+        return x.decode()
+    return x
+
 class IntegrationTest_sv_classify(TestCase):
 
     def test_chromosome_prefix(self):
@@ -27,11 +32,11 @@ def test_integration_nb(self):
         temp_descriptor, temp_output_path = tempfile.mkstemp(suffix='.vcf')
         sex=open(sex_file, 'r')
         sex_chrom_names = set(('X', 'Y'))
-        with gzip.open(input, 'rb') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle:
+        with gzip.open(input, 'r') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle:
             svtools.sv_classifier.run_reclassifier(input_handle, output_handle, sex, sex_chrom_names, annot,  0.9, None, 1.0, 0.2, train, 'naive_bayes', diags_file)
-            expected_lines = gzip.open(expected_result, 'rb').readlines()
+            expected_lines = [decode(x) for x in gzip.open(expected_result, 'r').readlines()]
             expected_lines[1] = '##fileDate=' + time.strftime('%Y%m%d') + '\n'
-            produced_lines = open(temp_output_path).readlines()
+            produced_lines = [x.decode() for x in open(temp_output_path).readlines()]
             diff = difflib.unified_diff(produced_lines, expected_lines, fromfile=temp_output_path, tofile=expected_result)
             os.remove(temp_output_path)
             os.remove(diags_file)
@@ -53,11 +58,11 @@ def test_integration_ls(self):
         temp_descriptor, temp_output_path = tempfile.mkstemp(suffix='.vcf')
         sex=open(sex_file, 'r')
         sex_chrom_names = set(('X', 'Y'))
-        with gzip.open(input, 'rb') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle:
+        with gzip.open(input, 'r') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle:
             svtools.sv_classifier.run_reclassifier(input_handle, output_handle, sex, sex_chrom_names, annot,  0.9, None, 1.0, 0.2, train, 'large_sample', diags_file)
-            expected_lines = gzip.open(expected_result, 'rb').readlines()
+            expected_lines = [decode(x) for x in gzip.open(expected_result, 'r').readlines()]
             expected_lines[1] = '##fileDate=' + time.strftime('%Y%m%d') + '\n'
-            produced_lines = open(temp_output_path).readlines()
+            produced_lines = [decode(x) for x in open(temp_output_path).readlines()]
             diff = difflib.unified_diff(produced_lines, expected_lines, fromfile=temp_output_path, tofile=expected_result)
             os.remove(temp_output_path)
             os.remove(diags_file)
@@ -80,11 +85,11 @@ def test_integration_hyb(self):
         temp_descriptor, temp_output_path = tempfile.mkstemp(suffix='.vcf')
         sex=open(sex_file, 'r')
         sex_chrom_names = set(('X', 'Y'))
-        with gzip.open(input, 'rb') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle:
+        with gzip.open(input, 'r') as input_handle, os.fdopen(temp_descriptor, 'w') as output_handle:
             svtools.sv_classifier.run_reclassifier(input_handle, output_handle, sex, sex_chrom_names, annot,  0.9, None, 1.0, 0.2, train, 'hybrid', diags_file)
-            expected_lines = gzip.open(expected_result, 'rb').readlines()
+            expected_lines = [decode(x) for x in gzip.open(expected_result, 'r').readlines()]
             expected_lines[1] = '##fileDate=' + time.strftime('%Y%m%d') + '\n'
-            produced_lines = open(temp_output_path).readlines()
+            produced_lines = [decode(x) for x in open(temp_output_path).readlines()]
             diff = difflib.unified_diff(produced_lines, expected_lines, fromfile=temp_output_path, tofile=expected_result)
             os.remove(temp_output_path)
             os.remove(diags_file)
diff --git a/tests/util_tests.py b/tests/util_tests.py
index b6426e77..9f9090db 100644
--- a/tests/util_tests.py
+++ b/tests/util_tests.py
@@ -3,6 +3,11 @@
 import os
 import svtools.utils as su 
 
+def decode(x):
+    if isinstance(x, bytes):
+        return x.decode()
+    return x
+
 class InputStreamTest(TestCase):
     def test_init_hyphen(self):
         new_handle = su.InputStream('-')
@@ -26,7 +31,7 @@ def test_context_manager(self):
         with su.InputStream(test_input) as stream:
             temporary_obj = stream
             for line in stream:
-                sys.stdout.write(line)
+                sys.stdout.write(decode(line))
         self.assertTrue(temporary_obj.closed)
 
     def test_plain_iteration(self):
@@ -36,7 +41,7 @@ def test_plain_iteration(self):
 
         stream = su.InputStream(test_input)
         for line in stream:
-            sys.stdout.write(line)
+            sys.stdout.write(decode(line))
         stream.close()
         self.assertTrue(stream.handle.closed)