diff --git a/moalmanac/annotator.py b/moalmanac/annotator.py index 2c1bf90..3edb380 100644 --- a/moalmanac/annotator.py +++ b/moalmanac/annotator.py @@ -27,8 +27,8 @@ class Annotator: resistance_score_bin = COLNAMES[bin_section]['resistance_score_bin'] prognostic_score_bin = COLNAMES[bin_section]['prognostic_score_bin'] acmg_bin = COLNAMES[bin_section]['acmg'] - cancerhotspots_bin = COLNAMES[bin_section]['cancerhotspots'] - cancerhotspots3d_bin = COLNAMES[bin_section]['cancerhotspots3d'] + cancerhotspots_bin = COLNAMES[bin_section]['cancer_hotspots'] + cancerhotspots3d_bin = COLNAMES[bin_section]['cancer_hotspots_3d'] cgc_bin = COLNAMES[bin_section]['cgc'] clinvar_bin = COLNAMES[bin_section]['clinvar'] cosmic_bin = COLNAMES[bin_section]['cosmic'] @@ -1243,8 +1243,8 @@ class PreclinicalEfficacy: section = 'preclinical' feature_display = COLNAMES[section]['feature_display'] pvalue = COLNAMES[section]['pvalue'] - efficacy = COLNAMES[section]['efficacy_obs'] - lookup = COLNAMES[section]['efficacy_lookup'] + efficacy = COLNAMES[section]['preclinical_efficacy_obs'] + lookup = COLNAMES[section]['preclinical_efficacy_lookup'] @classmethod def annotate(cls, actionable, efficacy, dictionary, append_lookup=True): diff --git a/moalmanac/colnames.ini b/moalmanac/colnames.ini index d7c960c..01af8df 100644 --- a/moalmanac/colnames.ini +++ b/moalmanac/colnames.ini @@ -139,8 +139,8 @@ coverage = total_coverage tumor_f = tumor_f tumor = tumor_sample_barcode normal = normal_sample_barcode -ontology = oncotree_term -code = oncotree_code +oncotree_term = oncotree_term +oncotree_code = oncotree_code context = context tumor_type = reported_tumor_type patient_id = patient_id @@ -340,8 +340,8 @@ gdsc = gdsc ln_ic50 = ln_ic50 ic50 = ic50 tested_subfeature = tested_subfeature -efficacy_obs = preclinical_efficacy_observed -efficacy_lookup = preclinical_efficacy_lookup +preclinical_efficacy_obs = preclinical_efficacy_observed +preclinical_efficacy_lookup = preclinical_efficacy_lookup evidence = evidence evidence_map = evidence_map group1 = group1 @@ -407,8 +407,8 @@ feature_display = ${maps:feature_display} [burden] patient = ${maps:patient_id} tumor_type = ${maps:tumor_type} -ontology = ${maps:ontology} -code = ${maps:code} +oncotree_term = ${maps:oncotree_term} +oncotree_code = ${maps:oncotree_code} bases_covered = ${maps:bases_covered} n_nonsyn_mutations = ${maps:n_nonsyn_mutations} mutational_burden = ${maps:mutational_burden} @@ -430,8 +430,8 @@ number_mutations = ${maps:number_mutations} low_number_mutations = ${maps:low_number_mutations} [oncotree] -ontology = ${maps:ontology} -code = ${maps:code} +term = ${maps:oncotree_term} +code = ${maps:oncotree_code} [validation_sequencing] feature_type = ${maps:feature_type} @@ -555,8 +555,8 @@ exac_sas_an = ${maps:exac_sas_an} exac_oth_an = ${maps:exac_oth_an} mutational_burden = ${maps:mutational_burden} tumor_type = ${maps:tumor_type} -ontology = ${maps:ontology} -code = ${maps:code} +oncotree_term = ${maps:oncotree_term} +oncotree_code = ${maps:oncotree_code} context = ${maps:context} clinvar = ${maps:clinvar} query = ${maps:query} @@ -593,8 +593,8 @@ sensitive_score_bin = sensitive_score_bin resistance_score_bin = resistance_score_bin prognostic_score_bin = prognostic_score_bin acmg = acmg_bin -cancerhotspots = cancerhotspots_bin -cancerhotspots3d = cancerhotspots3D_bin +cancer_hotspots = cancer_hotspots_bin +cancer_hotspots_3d = cancer_hotspots_3D_bin cgc = cgc_bin clinvar = clinvar_bin cosmic = cosmic_bin @@ -605,12 +605,12 @@ msi = msi_bin exac_common = ${maps:exac_common} [report] -code = ${maps:code} date = ${maps:date} description = ${maps:description} normal = ${maps:normal} tumor = ${maps:tumor} -ontology = ${maps:ontology} +oncotree_term = ${maps:oncotree_term} +oncotree_code = ${maps:oncotree_code} patient_id = ${maps:patient_id} stage = ${maps:stage} purity = ${maps:purity} @@ -626,8 +626,8 @@ almanac_bin = ${bin_names:almanac} sensitive_bin = ${bin_names:sensitive_score_bin} resistance_bin = ${bin_names:resistance_score_bin} prognostic_bin = ${bin_names:prognostic_score_bin} -cancerhotspots_bin = ${bin_names:cancerhotspots} -cancerhotspots3d_bin = ${bin_names:cancerhotspots3d} +cancer_hotspots_bin = ${bin_names:cancer_hotspots} +cancer_hotspots_3d_bin = ${bin_names:cancer_hotspots_3d} cgc_bin = ${bin_names:cgc} gsea_pathways_bin = ${bin_names:gsea_pathways} gsea_cm_bin = ${bin_names:gsea_modules} @@ -710,7 +710,7 @@ number_germline_mutations = ${maps:number_germline_mutations} validation_tumor_f = ${maps:validation_tumor_f} validation_coverage = ${maps:validation_coverage} validation_detection_power = ${maps:validation_detection_power} -efficacy_obs = ${maps:efficacy_obs} +preclinical_efficacy_obs = ${maps:preclinical_efficacy_obs} [preclinical] ccle = ccle @@ -762,8 +762,8 @@ gdsc = ${maps:gdsc} ln_ic50 = ${maps:ln_ic50} ic50 = ${maps:ic50} tested_subfeature = ${maps:tested_subfeature} -efficacy_obs = ${maps:efficacy_obs} -efficacy_lookup = ${maps:efficacy_lookup} +preclinical_efficacy_obs = ${maps:preclinical_efficacy_obs} +preclinical_efficacy_lookup = ${maps:preclinical_efficacy_lookup} [matchmaking] feature = ${maps:feature} diff --git a/moalmanac/datasources.py b/moalmanac/datasources.py index 741871f..58c3894 100644 --- a/moalmanac/datasources.py +++ b/moalmanac/datasources.py @@ -22,8 +22,8 @@ class Datasources: allele2 = COLNAMES[datasources_section]['allele2'] disease = COLNAMES[datasources_section]['disease'] - ontology = COLNAMES[datasources_section]['ontology'] - code = COLNAMES[datasources_section]['code'] + ontology = COLNAMES[datasources_section]['oncotree_term'] + code = COLNAMES[datasources_section]['oncotree_code'] context = COLNAMES[datasources_section]['context'] mutational_burden = COLNAMES[datasources_section]['mutational_burden'] therapy = COLNAMES[datasources_section]['therapy'] diff --git a/moalmanac/evaluator.py b/moalmanac/evaluator.py index d541700..d540319 100644 --- a/moalmanac/evaluator.py +++ b/moalmanac/evaluator.py @@ -27,8 +27,8 @@ class Evaluator(object): resistance_bin = COLNAMES[bin_section]['resistance_score_bin'] prognostic_bin = COLNAMES[bin_section]['prognostic_score_bin'] acmg_bin = COLNAMES[bin_section]['acmg'] - cancerhotspots_bin = COLNAMES[bin_section]['cancerhotspots'] - cancerhotspots3d_bin = COLNAMES[bin_section]['cancerhotspots3d'] + cancerhotspots_bin = COLNAMES[bin_section]['cancer_hotspots'] + cancerhotspots3d_bin = COLNAMES[bin_section]['cancer_hotspots_3d'] cgc_bin = COLNAMES[bin_section]['cgc'] clinvar_bin = COLNAMES[bin_section]['clinvar'] cosmic_bin = COLNAMES[bin_section]['cosmic'] diff --git a/moalmanac/features.py b/moalmanac/features.py index bc3ec3d..9e73ee6 100644 --- a/moalmanac/features.py +++ b/moalmanac/features.py @@ -145,8 +145,8 @@ class BurdenReader: burden_section = 'burden' patient_id = COLNAMES[burden_section]['patient'] tumor_type = COLNAMES[burden_section]['tumor_type'] - ontology = COLNAMES[burden_section]['ontology'] - code = COLNAMES[burden_section]['code'] + ontology = COLNAMES[burden_section]['oncotree_term'] + code = COLNAMES[burden_section]['oncotree_code'] bases_covered = COLNAMES[burden_section]['bases_covered'] n_nonsyn_mutations = COLNAMES[burden_section]['n_nonsyn_mutations'] mutational_burden = COLNAMES[burden_section]['mutational_burden'] diff --git a/moalmanac/moalmanac.py b/moalmanac/moalmanac.py index b89223b..85a3fb1 100644 --- a/moalmanac/moalmanac.py +++ b/moalmanac/moalmanac.py @@ -1,8 +1,8 @@ -import time import argparse import os import pandas as pd import subprocess +import time import annotator import datasources @@ -15,63 +15,23 @@ import reporter import writer +from reader import Config from config import COLNAMES from config import CONFIG -snv_handle = 'snv_handle' -indel_handle = 'indel_handle' -bases_covered_handle = 'bases_covered_handle' -cnv_handle = 'cnv_handle' -called_cn_handle = 'called_cn_handle' -fusion_handle = 'fusion_handle' -germline_handle = 'germline_handle' -mutational_signatures_path = 'mutational_signatures_path' -validation_handle = 'validation_handle' -disable_matchmaking = 'disable_matchmaking' - -snv_input = 'snv_input' -indel_input = 'indel_input' -seg_input = 'seg_input' -called_cn_input = 'called_cn_input' -fusion_input = 'fusion_input' -germline_input = 'germline_input' - -patient_section = 'patient' -patient_id = COLNAMES[patient_section]['patient_id'] -tumor_type = COLNAMES[patient_section]['tumor_type'] -stage = COLNAMES[patient_section]['stage'] -description = COLNAMES[patient_section]['description'] -purity = COLNAMES[patient_section]['purity'] -ploidy = COLNAMES[patient_section]['ploidy'] -wgd = COLNAMES[patient_section]['wgd'] -ms_status = COLNAMES[patient_section]['ms_status'] - -oncotree_section = 'oncotree' -ontology = COLNAMES[oncotree_section]['ontology'] -code = COLNAMES[oncotree_section]['code'] - -feature_type_section = 'feature_types' -feature_type_mut = CONFIG[feature_type_section]['mut'] -feature_type_germline = CONFIG[feature_type_section]['germline'] -feature_type_cna = CONFIG[feature_type_section]['cna'] -feature_type_fusion = CONFIG[feature_type_section]['fusion'] -feature_type_burden = CONFIG[feature_type_section]['burden'] -feature_type_signature = CONFIG[feature_type_section]['signature'] -feature_type_microsatellite = CONFIG[feature_type_section]['microsatellite'] -feature_type_aneuploidy = CONFIG[feature_type_section]['aneuploidy'] -feature_types = { - 'mutation': feature_type_mut, - 'germline': feature_type_germline, - 'copynumber': feature_type_cna, - 'fusion': feature_type_fusion, - 'burden': feature_type_burden, - 'signature': feature_type_signature, - 'microsatellite': feature_type_microsatellite, - 'aneuploidy': feature_type_aneuploidy -} - -generate_illustrations = 'generate_illustrations' -TOGGLE_FEATURES = CONFIG['function_toggle'] + +def create_biomarker_type_dictionary(config): + section = config['feature_types'] + return { + 'mutation': section['mut'], + 'germline': section['germline'], + 'copynumber': section['cna'], + 'fusion': section['fusion'], + 'burden': section['burden'], + 'signature': section['signature'], + 'microsatellite': section['microsatellite'], + 'aneuploidy': section['aneuploidy'] + } def create_metadata_dictionary(input_dictionary): @@ -118,37 +78,40 @@ def process_preclinical_efficacy(dbs, dataframe, folder, label, plot: bool = Fal return efficacy_dictionary, efficacy_summary -def main(patient, inputs, output_folder): +def main(patient, inputs, output_folder, config, strings): metadata_dictionary = create_metadata_dictionary(patient) - dbs = datasources.Datasources.generate_db_dict(CONFIG) output_folder = format_output_directory(output_folder) if output_folder != "": execute_cmd(f"mkdir -p {output_folder}") - string_id = metadata_dictionary[patient_id] + string_id = metadata_dictionary['patient_id'] - mapped_ontology = ontologymapper.OntologyMapper.map(dbs, metadata_dictionary[tumor_type]) - metadata_dictionary[ontology] = mapped_ontology[ontology] - metadata_dictionary[code] = mapped_ontology[code] + dbs = datasources.Datasources.generate_db_dict(config) + mapped_ontology = ontologymapper.OntologyMapper.map(dbs, metadata_dictionary[strings['patient']['tumor_type']]) + oncotree_term = mapped_ontology[strings['oncotree']['term']] + oncotree_code = mapped_ontology[strings['oncotree']['code']] + metadata_dictionary[strings['oncotree']['term']] = oncotree_term + metadata_dictionary[strings['oncotree']['code']] = oncotree_code - df_snv, df_snv_reject = features.MAFSomatic.import_feature(inputs[snv_handle]) - df_indel, df_indel_reject = features.MAFSomatic.import_feature(inputs[indel_handle]) - df_cnv, df_cnv_reject = features.CopyNumber.import_feature(inputs[called_cn_handle], inputs[cnv_handle]) - df_fusion, df_fusion_reject = features.Fusion.import_feature(inputs[fusion_handle]) + biomarker_type_dictionary = create_biomarker_type_dictionary(config) + df_snv, df_snv_reject = features.MAFSomatic.import_feature(inputs['snv_handle']) + df_indel, df_indel_reject = features.MAFSomatic.import_feature(inputs['indel_handle']) + df_cnv, df_cnv_reject = features.CopyNumber.import_feature(inputs['called_cn_handle'], inputs['cnv_handle']) + df_fusion, df_fusion_reject = features.Fusion.import_feature(inputs['fusion_handle']) accepted_variants = [df_snv, df_indel, df_cnv, df_fusion] filtered_variants = [df_snv_reject, df_indel_reject, df_cnv_reject, df_fusion_reject] somatic_variants = features.Features.concat_list_of_dataframes(accepted_variants) somatic_filtered = features.Features.concat_list_of_dataframes(filtered_variants) - germline_variants, germline_reject = features.MAFGermline.import_feature(inputs[germline_handle]) + germline_variants, germline_reject = features.MAFGermline.import_feature(inputs['germline_handle']) if not somatic_variants.empty: - annotated_somatic = annotator.Annotator.annotate_somatic(somatic_variants, dbs, metadata_dictionary[code]) + annotated_somatic = annotator.Annotator.annotate_somatic(somatic_variants, dbs, oncotree_code) evaluated_somatic = evaluator.Evaluator.evaluate_somatic(annotated_somatic) - validation_accept, validation_reject = features.MAFValidation.import_feature(inputs[validation_handle]) + validation_accept, validation_reject = features.MAFValidation.import_feature(inputs['validation_handle']) if not validation_accept.empty: evaluated_somatic = annotator.OverlapValidation.append_validation(evaluated_somatic, validation_accept) illustrator.ValidationOverlap.generate_dna_rna_plot(evaluated_somatic, string_id, output_folder) @@ -156,23 +119,46 @@ def main(patient, inputs, output_folder): evaluated_somatic = features.Features.create_empty_dataframe() if not germline_variants.empty: - annotated_germline = annotator.Annotator.annotate_germline(germline_variants, dbs, metadata_dictionary[code]) + annotated_germline = annotator.Annotator.annotate_germline(germline_variants, dbs, oncotree_code) evaluated_germline = evaluator.Evaluator.evaluate_germline(annotated_germline) else: evaluated_germline = features.Features.create_empty_dataframe() evaluated_somatic = annotator.OverlapSomaticGermline.append_germline_hits(evaluated_somatic, evaluated_germline) - integrated = evaluator.Integrative.evaluate(evaluated_somatic, evaluated_germline, dbs, feature_types) + integrated = evaluator.Integrative.evaluate(evaluated_somatic, evaluated_germline, dbs, biomarker_type_dictionary) - somatic_burden = features.BurdenReader.import_feature(inputs[bases_covered_handle], metadata_dictionary, somatic_variants, dbs) + somatic_burden = features.BurdenReader.import_feature( + handle=inputs['bases_covered_handle'], + patient=metadata_dictionary, + variants=somatic_variants, + dbs=dbs + ) - patient_wgd = features.Aneuploidy.summarize(metadata_dictionary[wgd]) - patient_ms_status = features.MicrosatelliteReader.summarize(metadata_dictionary[ms_status]) - metadata_dictionary[ms_status] = features.MicrosatelliteReader.map_status(metadata_dictionary[ms_status]) + patient_wgd = features.Aneuploidy.summarize( + boolean=metadata_dictionary[strings['patient']['wgd']] + ) + patient_ms_status = features.MicrosatelliteReader.summarize( + status=metadata_dictionary[strings['patient']['ms_status']] + ) + metadata_dictionary[strings['patient']['ms_status']] = features.MicrosatelliteReader.map_status( + status=metadata_dictionary[strings['patient']['ms_status']] + ) - annotated_burden = annotator.Annotator.annotate_almanac(somatic_burden, dbs, metadata_dictionary[code]) - annotated_wgd = annotator.Annotator.annotate_almanac(patient_wgd, dbs, metadata_dictionary[code]) - annotated_ms_status = annotator.Annotator.annotate_almanac(patient_ms_status, dbs, metadata_dictionary[code]) + annotated_burden = annotator.Annotator.annotate_almanac( + somatic_burden, + dbs, + metadata_dictionary[strings['oncotree']['code']] + ) + annotated_wgd = annotator.Annotator.annotate_almanac( + patient_wgd, + dbs, + metadata_dictionary[strings['oncotree']['code']] + ) + annotated_ms_status = annotator.Annotator.annotate_almanac( + patient_ms_status, + dbs, + metadata_dictionary[strings['oncotree']['code']] + ) evaluated_burden = evaluator.Evaluator.evaluate_almanac(annotated_burden) evaluated_wgd = evaluator.Evaluator.evaluate_almanac(annotated_wgd) @@ -180,9 +166,9 @@ def main(patient, inputs, output_folder): evaluated_ms_status = evaluator.Microsatellite.evaluate_status(annotated_ms_status, evaluated_ms_variants) evaluated_mutational_signatures = load_and_process_mutational_signatures( - input=inputs[mutational_signatures_path], + input=inputs['mutational_signatures_path'], dbs=dbs, - tumor_type=code + tumor_type=oncotree_code ) actionable = evaluator.Actionable.evaluate( @@ -197,13 +183,14 @@ def main(patient, inputs, output_folder): strategies = evaluator.Strategies.report_therapy_strategies(actionable) + function_toggle = config['function_toggle'] efficacy_summary = investigator.SummaryDataFrame.create_empty_dataframe() efficacy_dictionary = {} cell_lines_dictionary = {} - preclinical_efficacy_on = TOGGLE_FEATURES.getboolean('calculate_preclinical_efficacy') + preclinical_efficacy_on = function_toggle.getboolean('calculate_preclinical_efficacy') # The input argument --disable_matchmaking will be removed in the next non-backwards compatible release - model_similarity_on = TOGGLE_FEATURES.getboolean('calculate_model_similarity') and not inputs[disable_matchmaking] + model_similarity_on = function_toggle.getboolean('calculate_model_similarity') and not inputs['disable_matchmaking'] similarity_results = matchmaker.Matchmaker.create_empty_output() similarity_summary = {} @@ -211,7 +198,7 @@ def main(patient, inputs, output_folder): dbs_preclinical = datasources.Preclinical.import_dbs() cell_lines_dictionary = dbs_preclinical['dictionary'] if preclinical_efficacy_on: - plot_preclinical = TOGGLE_FEATURES.getboolean('plot_preclinical_efficacy') + plot_preclinical = function_toggle.getboolean('plot_preclinical_efficacy') efficacy_results = process_preclinical_efficacy( dbs_preclinical, actionable, @@ -226,29 +213,34 @@ def main(patient, inputs, output_folder): actionable, efficacy_summary, efficacy_dictionary, - append_lookup=TOGGLE_FEATURES.getboolean('include_preclinical_efficacy_in_actionability_report') + append_lookup=function_toggle.getboolean('include_preclinical_efficacy_in_actionability_report') ) if model_similarity_on: similarity_results = matchmaker.Matchmaker.compare(dbs, dbs_preclinical, evaluated_somatic, string_id) similarity_summary = matchmaker.Report.create_report_dictionary(similarity_results, cell_lines_dictionary) - writer.Actionable.write(actionable, string_id, output_folder) - writer.GermlineACMG.write(evaluated_germline, string_id, output_folder) - writer.GermlineCancer.write(evaluated_germline, string_id, output_folder) - writer.GermlineHereditary.write(evaluated_germline, string_id, output_folder) - writer.Integrated.write(integrated, string_id, output_folder) - writer.MSI.write(evaluated_ms_variants, string_id, output_folder) - writer.MutationalBurden.write(evaluated_burden, string_id, output_folder) - writer.SomaticScored.write(evaluated_somatic, string_id, output_folder) - writer.SomaticFiltered.write(somatic_filtered, string_id, output_folder) - writer.Strategies.write(strategies, string_id, output_folder) - writer.PreclinicalEfficacy.write(efficacy_summary, string_id, output_folder) - writer.PreclinicalMatchmaking.write(similarity_results, string_id, output_folder) - - if TOGGLE_FEATURES.getboolean('generate_actionability_report'): + writers_and_dataframes = [ + (writer.Actionable, actionable), + (writer.GermlineACMG, evaluated_germline), + (writer.GermlineCancer, evaluated_germline), + (writer.GermlineHereditary, evaluated_germline), + (writer.Integrated, integrated), + (writer.MSI, evaluated_ms_variants), + (writer.MutationalBurden, evaluated_burden), + (writer.SomaticScored, evaluated_somatic), + (writer.SomaticFiltered, somatic_filtered), + (writer.Strategies, strategies), + (writer.PreclinicalEfficacy, efficacy_summary), + (writer.PreclinicalMatchmaking, similarity_results) + ] + for writer_class, dataframe in writers_and_dataframes: + writer_instance = writer_class(strings=strings) + writer_instance.write(dataframe=dataframe, patient_label=string_id, folder=output_folder) + + if function_toggle.getboolean('generate_actionability_report'): report_dictionary = reporter.Reporter.generate_dictionary(evaluated_somatic, metadata_dictionary) - include_similarity = TOGGLE_FEATURES.getboolean('include_model_similarity_in_actionability_report') + include_similarity = function_toggle.getboolean('include_model_similarity_in_actionability_report') reporter.Reporter.generate_actionability_report( actionable, report_dictionary, @@ -320,35 +312,48 @@ def main(patient, inputs, output_folder): arg_parser.add_argument('--output_directory', default=None, help='Output directory for generated files') + arg_parser.add_argument('--config', + help='Path to config.ini file') + arg_parser.add_argument('--strings', + help='Path to strings.ini file') args = arg_parser.parse_args() patient_dict = { - patient_id: args.patient_id, - tumor_type: args.tumor_type, - stage: args.stage, - description: args.description, - purity: args.purity, - ploidy: args.ploidy, - ms_status: args.ms_status, - wgd: args.wgd + 'patient_id': args.patient_id, + 'tumor_type': args.tumor_type, + 'stage': args.stage, + 'description': args.description, + 'purity': args.purity, + 'ploidy': args.ploidy, + 'ms_status': args.ms_status, + 'wgd': args.wgd } inputs_dict = { - snv_handle: args.snv_handle, - indel_handle: args.indel_handle, - bases_covered_handle: args.bases_covered_handle, - cnv_handle: args.cnv_handle, - called_cn_handle: args.called_cn_handle, - fusion_handle: args.fusion_handle, - germline_handle: args.germline_handle, - mutational_signatures_path: args.mutational_signatures, - validation_handle: args.validation_handle, - disable_matchmaking: args.disable_matchmaking + 'snv_handle': args.snv_handle, + 'indel_handle': args.indel_handle, + 'bases_covered_handle': args.bases_covered_handle, + 'cnv_handle': args.cnv_handle, + 'called_cn_handle': args.called_cn_handle, + 'fusion_handle': args.fusion_handle, + 'germline_handle': args.germline_handle, + 'mutational_signatures_path': args.mutational_signatures, + 'validation_handle': args.validation_handle, + 'disable_matchmaking': args.disable_matchmaking } output_directory = args.output_directory if args.output_directory else os.getcwd() - main(patient_dict, inputs_dict, output_directory) + config_ini = Config.read(args.config, extended_interpolation=False, convert_to_dictionary=False) + strings_dictionary = Config.read(args.strings, extended_interpolation=True, convert_to_dictionary=True) + + main( + patient=patient_dict, + inputs=inputs_dict, + output_folder=output_directory, + config=config_ini, + strings=strings_dictionary + ) end_time = time.time() time_statement = "Molecular Oncology Almanac runtime: %s seconds" % round((end_time - start_time), 4) diff --git a/moalmanac/config.py b/moalmanac/old_config.py similarity index 100% rename from moalmanac/config.py rename to moalmanac/old_config.py diff --git a/moalmanac/reader.py b/moalmanac/reader.py index c935ff7..59f5968 100644 --- a/moalmanac/reader.py +++ b/moalmanac/reader.py @@ -1,9 +1,38 @@ +import configparser import json import pandas as pd import pickle -class Reader(object): +class Config: + @classmethod + def read(cls, path, extended_interpolation=False, convert_to_dictionary=False): + ini = cls.load(path, extended_interpolation=extended_interpolation) + if convert_to_dictionary: + return cls.convert_ini_to_dictionary(ini) + else: + return ini + + @staticmethod + def convert_ini_to_dictionary(ini): + dictionary = {} + for section in ini.sections(): + dictionary[section] = {} + for (key, value) in ini.items(section): + dictionary[section][key] = value + return dictionary + + @staticmethod + def load(path, extended_interpolation=False): + if extended_interpolation: + config = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation()) + else: + config = configparser.ConfigParser() + config.read(path) + return config + + +class Reader: @staticmethod def check_comment_rows(handle, comment_character): skip_rows = 0 diff --git a/moalmanac/reporter.py b/moalmanac/reporter.py index bb48023..bf880f7 100644 --- a/moalmanac/reporter.py +++ b/moalmanac/reporter.py @@ -10,12 +10,12 @@ class Reporter: report_section = 'report' - code = COLNAMES[report_section]['code'] date = COLNAMES[report_section]['date'] description = COLNAMES[report_section]['description'] normal = COLNAMES[report_section]['normal'] tumor = COLNAMES[report_section]['tumor'] - ontology = COLNAMES[report_section]['ontology'] + ontology = COLNAMES[report_section]['oncotree_term'] + code = COLNAMES[report_section]['oncotree_code'] patient_id = COLNAMES[report_section]['patient_id'] stage = COLNAMES[report_section]['stage'] purity = COLNAMES[report_section]['purity'] diff --git a/moalmanac/run_deconstructsigs.R b/moalmanac/run_deconstructsigs.R deleted file mode 100644 index 89900a3..0000000 --- a/moalmanac/run_deconstructsigs.R +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/Rscript -library("deconstructSigs") - -args = commandArgs(trailingOnly=TRUE) -patient_id = args[1] -snv_handle = args[2] -sample = args[3] -ref = args[4] -alt = args[5] -chr = args[6] -pos = args[7] -folder = args[8] - -maf = read.csv(snv_handle, sep = '\t', comment.char = '#') -names(maf) <- tolower(names(maf)) -cols = c(sample, ref, alt, chr, pos) -maf <- maf[colnames(maf) %in% cols] - -maf$tumor_sample_barcode <- sapply(maf$tumor_sample_barcode, as.factor) -maf$reference_allele <- sapply(maf$reference_allele, as.factor) -maf$tumor_seq_allele2 <- sapply(maf$tumor_seq_allele2, as.factor) -maf$chromosome <- sapply(maf$chromosome, as.factor) - -unique.samples = unique(maf$tumor_sample_barcode) - -sigs.input <- mut.to.sigs.input(mut.ref = maf, - sample.id = sample, chr = chr, - pos = pos, ref = ref, - alt = alt) - -temp.filename <- paste(folder, patient_id, ".sigs.context.txt", sep = "") -write.table(sigs.input, file = temp.filename, sep = '\t', row.names = FALSE) - -for (sample_ in unique.samples) { - output.sigs <- whichSignatures(tumor.ref = sigs.input, - signatures.ref = signatures.cosmic, sample.id = sample_, - context = TRUE, tri.counts.method = 'default') - - temp.filename = paste(folder, patient_id, ".sigs.cosmic.txt", sep = "") - write.table(output.sigs, file = temp.filename, sep = '\t', row.names = FALSE) -} diff --git a/moalmanac/writer.py b/moalmanac/writer.py index ef097e3..2cafb5d 100644 --- a/moalmanac/writer.py +++ b/moalmanac/writer.py @@ -1,104 +1,399 @@ -from config import COLNAMES import json class Writer: - section = 'outputs' - score_bin = COLNAMES[section]['score_bin'] - almanac_bin = COLNAMES[section]['almanac_bin'] - cancerhotspots_bin = COLNAMES[section]['cancerhotspots_bin'] - cancerhotspots3D_bin = COLNAMES[section]['cancerhotspots3d_bin'] - cgc_bin = COLNAMES[section]['cgc_bin'] - gsea_pathways_bin = COLNAMES[section]['gsea_pathways_bin'] - gsea_cm_bin = COLNAMES[section]['gsea_cm_bin'] - cosmic_bin = COLNAMES[section]['cosmic_bin'] - clinvar_bin = COLNAMES[section]['clinvar_bin'] - acmg_bin = COLNAMES[section]['acmg_bin'] - hereditary_bin = COLNAMES[section]['hereditary_bin'] - msi_bin = COLNAMES[section]['msi_bin'] - - sensitive_bin = COLNAMES[section]['sensitive_bin'] - resistance_bin = COLNAMES[section]['resistance_bin'] - prognostic_bin = COLNAMES[section]['prognostic_bin'] - sensitive_implication = COLNAMES[section]['sensitive_implication'] - resistance_implication = COLNAMES[section]['resistance_implication'] - prognostic_implication = COLNAMES[section]['prognostic_implication'] - sensitive_map = COLNAMES[section]['sensitive_implication_map'] - resistance_map = COLNAMES[section]['resistance_implication_map'] - prognostic_map = COLNAMES[section]['prognostic_implication_map'] - sensitive_therapy = COLNAMES[section]['sensitive_therapy'] - resistance_therapy = COLNAMES[section]['resistance_therapy'] - sensitive_therapy_strategy = COLNAMES[section]['sensitive_therapy_strategy'] - resistance_therapy_strategy = COLNAMES[section]['resistance_therapy_strategy'] - sensitive_therapy_type = COLNAMES[section]['sensitive_therapy_type'] - resistance_therapy_type = COLNAMES[section]['resistance_therapy_type'] - favorable_prognosis = COLNAMES[section]['favorable_prognosis'] - sensitive_oncotree_code = COLNAMES[section]['sensitive_oncotree_code'] - resistance_oncotree_code = COLNAMES[section]['resistance_oncotree_code'] - prognostic_oncotree_code = COLNAMES[section]['prognostic_oncotree_code'] - sensitive_description = COLNAMES[section]['sensitive_description'] - resistance_description = COLNAMES[section]['resistance_description'] - prognostic_description = COLNAMES[section]['prognostic_description'] - sensitive_url = COLNAMES[section]['sensitive_url'] - resistance_url = COLNAMES[section]['resistance_url'] - prognostic_url = COLNAMES[section]['prognostic_url'] - sensitive_citation = COLNAMES[section]['sensitive_citation'] - resistance_citation = COLNAMES[section]['resistance_citation'] - prognostic_citation = COLNAMES[section]['prognostic_citation'] - - feature_type = COLNAMES[section]['feature_type'] - feature = COLNAMES[section]['feature'] - alt_type = COLNAMES[section]['alt_type'] - alt = COLNAMES[section]['alt'] - chr = COLNAMES[section]['chr'] - start = COLNAMES[section]['start'] - end = COLNAMES[section]['end'] - ref = COLNAMES[section]['ref'] - allele1 = COLNAMES[section]['allele1'] - allele2 = COLNAMES[section]['allele2'] - tumor_f = COLNAMES[section]['tumor_f'] - coverage = COLNAMES[section]['coverage'] - clinvar = COLNAMES[section]['clinvar'] - number_germline_mutations = COLNAMES[section]['number_germline_mutations'] - - exac_common = COLNAMES[section]['exac_common'] - exac_af = COLNAMES[section]['exac_af'] - exac_ac = COLNAMES[section]['exac_ac'] - exac_an = COLNAMES[section]['exac_an'] - exac_afr_ac = COLNAMES[section]['exac_afr_ac'] - exac_amr_ac = COLNAMES[section]['exac_amr_ac'] - exac_eas_ac = COLNAMES[section]['exac_eas_ac'] - exac_fin_ac = COLNAMES[section]['exac_fin_ac'] - exac_nfe_ac = COLNAMES[section]['exac_nfe_ac'] - exac_sas_ac = COLNAMES[section]['exac_sas_ac'] - exac_oth_ac = COLNAMES[section]['exac_oth_ac'] - exac_afr_an = COLNAMES[section]['exac_afr_an'] - exac_amr_an = COLNAMES[section]['exac_amr_an'] - exac_eas_an = COLNAMES[section]['exac_eas_an'] - exac_fin_an = COLNAMES[section]['exac_fin_an'] - exac_nfe_an = COLNAMES[section]['exac_nfe_an'] - exac_sas_an = COLNAMES[section]['exac_sas_an'] - exac_oth_an = COLNAMES[section]['exac_oth_an'] - - spanningfrags = COLNAMES[section]['spanningfrags'] - left_gene = COLNAMES[section]['left_gene'] - left_chr = COLNAMES[section]['left_chr'] - left_start = COLNAMES[section]['left_start'] - right_gene = COLNAMES[section]['right_gene'] - right_chr = COLNAMES[section]['right_chr'] - right_start = COLNAMES[section]['right_start'] - - validation_tumor_f = COLNAMES[section]['validation_tumor_f'] - validation_coverage = COLNAMES[section]['validation_coverage'] - validation_detection_power = COLNAMES[section]['validation_detection_power'] - - feature_display = COLNAMES[section]['feature_display'] - preclinical_efficacy = COLNAMES[section]['efficacy_obs'] - connections = COLNAMES[section]['connections'] - rationale = COLNAMES[section]['rationale'] - patient_id = COLNAMES[section]['patient_id'] - tumor = COLNAMES[section]['tumor'] - normal = COLNAMES[section]['normal'] + section = "outputs" + + def __init__(self, strings): + self.strings = strings + + """ + Defining properties for each datasource bin + """ + + @property + def score_bin(self): + return self.strings[self.section]['score_bin'] + + @property + def almanac_bin(self): + return self.strings[self.section]['almanac_bin'] + + @property + def cancer_hotspots_bin(self): + return self.strings[self.section]['cancer_hotspots_bin'] + + @property + def cancer_hotspots_3d_bin(self): + return self.strings[self.section]['cancer_hotspots_3d_bin'] + + @property + def cgc_bin(self): + return self.strings[self.section]['cgc_bin'] + + @property + def gsea_pathways_bin(self): + return self.strings[self.section]['gsea_pathways_bin'] + + @property + def gsea_cm_bin(self): + return self.strings[self.section]['gsea_cm_bin'] + + @property + def cosmic_bin(self): + return self.strings[self.section]['cosmic_bin'] + + @property + def clinvar_bin(self): + return self.strings[self.section]['clinvar_bin'] + + @property + def acmg_bin(self): + return self.strings[self.section]['acmg_bin'] + + @property + def hereditary_bin(self): + return self.strings[self.section]['hereditary_bin'] + + @property + def msi_bin(self): + return self.strings[self.section]['msi_bin'] + + """ + Defining properties for moalmanac specific annotations + """ + + @property + def sensitive_bin(self): + return self.strings[self.section]['sensitive_bin'] + + @property + def resistance_bin(self): + return self.strings[self.section]['resistance_bin'] + + @property + def prognostic_bin(self): + return self.strings[self.section]['prognostic_bin'] + + @property + def sensitive_implication(self): + return self.strings[self.section]['sensitive_implication'] + + @property + def resistance_implication(self): + return self.strings[self.section]['resistance_implication'] + + @property + def prognostic_implication(self): + return self.strings[self.section]['prognostic_implication'] + + @property + def sensitive_map(self): + return self.strings[self.section]['sensitive_implication_map'] + + @property + def resistance_map(self): + return self.strings[self.section]['resistance_implication_map'] + + @property + def prognostic_map(self): + return self.strings[self.section]['prognostic_implication_map'] + + @property + def sensitive_therapy(self): + return self.strings[self.section]['sensitive_therapy'] + + @property + def resistance_therapy(self): + return self.strings[self.section]['resistance_therapy'] + + @property + def sensitive_therapy_strategy(self): + return self.strings[self.section]['sensitive_therapy_strategy'] + + @property + def resistance_therapy_strategy(self): + return self.strings[self.section]['resistance_therapy_strategy'] + + @property + def sensitive_therapy_type(self): + return self.strings[self.section]['sensitive_therapy_type'] + + @property + def resistance_therapy_type(self): + return self.strings[self.section]['resistance_therapy_type'] + + @property + def favorable_prognosis(self): + return self.strings[self.section]['favorable_prognosis'] + + @property + def sensitive_oncotree_code(self): + return self.strings[self.section]['sensitive_oncotree_code'] + + @property + def resistance_oncotree_code(self): + return self.strings[self.section]['resistance_oncotree_code'] + + @property + def prognostic_oncotree_code(self): + return self.strings[self.section]['prognostic_oncotree_code'] + + @property + def sensitive_description(self): + return self.strings[self.section]['sensitive_description'] + + @property + def resistance_description(self): + return self.strings[self.section]['resistance_description'] + + @property + def prognostic_description(self): + return self.strings[self.section]['prognostic_description'] + + @property + def sensitive_url(self): + return self.strings[self.section]['sensitive_url'] + + @property + def resistance_url(self): + return self.strings[self.section]['resistance_url'] + + @property + def prognostic_url(self): + return self.strings[self.section]['prognostic_url'] + + @property + def sensitive_citation(self): + return self.strings[self.section]['sensitive_citation'] + + @property + def resistance_citation(self): + return self.strings[self.section]['resistance_citation'] + + @property + def prognostic_citation(self): + return self.strings[self.section]['prognostic_citation'] + + """ + Defining properties for describing biomarkers + """ + + @property + def feature_type(self): + return self.strings[self.section]['feature_type'] + + @property + def feature(self): + return self.strings[self.section]['feature'] + + @property + def alt_type(self): + return self.strings[self.section]['alt_type'] + + @property + def alt(self): + return self.strings[self.section]['alt'] + + @property + def chr(self): + return self.strings[self.section]['chr'] + + @property + def start(self): + return self.strings[self.section]['start'] + + @property + def end(self): + return self.strings[self.section]['end'] + + @property + def ref(self): + return self.strings[self.section]['ref'] + + @property + def allele1(self): + return self.strings[self.section]['allele1'] + + @property + def allele2(self): + return self.strings[self.section]['allele2'] + + @property + def tumor_f(self): + return self.strings[self.section]['tumor_f'] + + @property + def coverage(self): + return self.strings[self.section]['coverage'] + + @property + def clinvar(self): + return self.strings[self.section]['clinvar'] + + @property + def number_germline_mutations(self): + return self.strings[self.section]['number_germline_mutations'] + + """ + Defining properties for ExAC annotations + """ + + @property + def exac_common(self): + return self.strings[self.section]['exac_common'] + + @property + def exac_af(self): + return self.strings[self.section]['exac_af'] + + @property + def exac_ac(self): + return self.strings[self.section]['exac_ac'] + + @property + def exac_an(self): + return self.strings[self.section]['exac_an'] + + @property + def exac_afr_ac(self): + return self.strings[self.section]['exac_afr_ac'] + + @property + def exac_amr_ac(self): + return self.strings[self.section]['exac_eas_ac'] + + @property + def exac_eas_ac(self): + return self.strings[self.section]['exac_eas_ac'] + + @property + def exac_fin_ac(self): + return self.strings[self.section]['exac_nfe_ac'] + + @property + def exac_nfe_ac(self): + return self.strings[self.section]['exac_nfe_ac'] + + @property + def exac_sas_ac(self): + return self.strings[self.section]['exac_sas_ac'] + + @property + def exac_oth_ac(self): + return self.strings[self.section]['exac_oth_ac'] + + @property + def exac_afr_an(self): + return self.strings[self.section]['exac_afr_an'] + + @property + def exac_amr_an(self): + return self.strings[self.section]['exac_amr_an'] + + @property + def exac_eas_an(self): + return self.strings[self.section]['exac_eas_an'] + + @property + def exac_fin_an(self): + return self.strings[self.section]['exac_fin_an'] + + @property + def exac_nfe_an(self): + return self.strings[self.section]['exac_nfe_an'] + + @property + def exac_sas_an(self): + return self.strings[self.section]['exac_sas_an'] + + @property + def exac_oth_an(self): + return self.strings[self.section]['exac_oth_an'] + + """ + Defining properties to describe fusions + """ + + @property + def spanningfrags(self): + return self.strings[self.section]['spanningfrags'] + + @property + def left_gene(self): + return self.strings[self.section]['left_gene'] + + @property + def left_chr(self): + return self.strings[self.section]['left_chr'] + + @property + def left_start(self): + return self.strings[self.section]['left_start'] + + @property + def right_gene(self): + return self.strings[self.section]['right_gene'] + + @property + def right_chr(self): + return self.strings[self.section]['right_chr'] + + @property + def right_start(self): + return self.strings[self.section]['right_start'] + + """ + Define properties for annotations to annotate somatic variants with those observed in validation sequencing + """ + + @property + def validation_tumor_f(self): + return self.strings[self.section]['validation_tumor_f'] + + @property + def validation_coverage(self): + return self.strings[self.section]['validation_coverage'] + + @property + def validation_detection_power(self): + return self.strings[self.section]['validation_detection_power'] + + """ + Defining remaining properties + """ + + @property + def feature_display(self): + return self.strings[self.section]['feature_display'] + + @property + def preclinical_efficacy(self): + return self.strings[self.section]['preclinical_efficacy_obs'] + + @property + def connections(self): + return self.strings[self.section]['connections'] + + @property + def rationale(self): + return self.strings[self.section]['rationale'] + + @property + def patient_id(self): + return self.strings[self.section]['patient_id'] + + @property + def tumor(self): + return self.strings[self.section]['tumor'] + + @property + def normal(self): + return self.strings[self.section]['normal'] + + """ + Writer class functions + """ @staticmethod def create_output_name(folder, patient_id, output_suffix): @@ -130,280 +425,708 @@ def return_nonzero_bin_idx(series): class Actionable: - sort_columns = [Writer.almanac_bin, Writer.sensitive_map, Writer.resistance_map, Writer.prognostic_map] - output_columns = [Writer.score_bin, - Writer.sensitive_implication, Writer.resistance_implication, Writer.prognostic_implication, - Writer.feature_type, Writer.feature, Writer.alt_type, Writer.alt, - Writer.tumor_f, Writer.coverage, Writer.exac_af, Writer.exac_common, Writer.clinvar, - Writer.sensitive_bin, - Writer.sensitive_therapy, Writer.sensitive_therapy_strategy, Writer.sensitive_therapy_type, - Writer.sensitive_oncotree_code, - Writer.sensitive_description, Writer.sensitive_citation, Writer.sensitive_url, - Writer.resistance_bin, - Writer.resistance_therapy, Writer.resistance_therapy_strategy, Writer.resistance_therapy_type, - Writer.resistance_oncotree_code, - Writer.resistance_description, Writer.resistance_citation, Writer.resistance_url, - Writer.prognostic_bin, Writer.favorable_prognosis, - Writer.prognostic_oncotree_code, - Writer.prognostic_description, Writer.prognostic_citation, Writer.prognostic_url, - Writer.number_germline_mutations, - Writer.validation_coverage, Writer.validation_tumor_f, Writer.validation_detection_power, - Writer.feature_display, Writer.preclinical_efficacy, - Writer.patient_id, Writer.tumor, Writer.normal] - - output_suffix = 'actionable.txt' - - @classmethod - def write(cls, df, patient_id, folder): - df[Writer.patient_id] = patient_id - df_sorted = Writer.sort_columns(df, cls.sort_columns, False) - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe(df_sorted.loc[:, cls.output_columns], output_name) - return df_sorted + output_suffix = "actionable.txt" + + def __init__(self, strings): + self.writer = Writer(strings) + self.sort_columns = [ + self.writer.almanac_bin, + self.writer.sensitive_map, + self.writer.resistance_map, + self.writer.prognostic_map + ] + self.output_columns = [ + self.writer.score_bin, + self.writer.sensitive_implication, + self.writer.resistance_implication, + self.writer.prognostic_implication, + self.writer.feature_type, + self.writer.feature, + self.writer.alt_type, + self.writer.alt, + self.writer.tumor_f, + self.writer.coverage, + self.writer.exac_af, + self.writer.exac_common, + self.writer.clinvar, + self.writer.sensitive_bin, + self.writer.sensitive_therapy, + self.writer.sensitive_therapy_strategy, + self.writer.sensitive_therapy_type, + self.writer.sensitive_oncotree_code, + self.writer.sensitive_description, + self.writer.sensitive_citation, + self.writer.sensitive_url, + self.writer.resistance_bin, + self.writer.resistance_therapy, + self.writer.resistance_therapy_strategy, + self.writer.resistance_therapy_type, + self.writer.resistance_oncotree_code, + self.writer.resistance_description, + self.writer.resistance_citation, + self.writer.resistance_url, + self.writer.prognostic_bin, + self.writer.favorable_prognosis, + self.writer.prognostic_oncotree_code, + self.writer.prognostic_description, + self.writer.prognostic_citation, + self.writer.prognostic_url, + self.writer.number_germline_mutations, + self.writer.validation_coverage, + self.writer.validation_tumor_f, + self.writer.validation_detection_power, + self.writer.feature_display, + self.writer.preclinical_efficacy, + self.writer.patient_id, + self.writer.tumor, + self.writer.normal + ] + + def write(self, dataframe, patient_label, folder): + dataframe[self.writer.patient_id] = patient_label + dataframe_sorted = Writer.sort_columns( + df=dataframe, + columns=self.sort_columns, + ascending_boolean=False + ) + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix + ) + Writer.export_dataframe( + df=dataframe_sorted.loc[:, self.output_columns], + output_name=output_name + ) + return dataframe_sorted class GermlineACMG: - sort_columns = [Writer.clinvar_bin, Writer.feature, Writer.feature_type] - output_columns = [Writer.feature_type, Writer.feature, Writer.alt_type, Writer.alt, - Writer.chr, Writer.start, Writer.end, Writer.ref, Writer.allele1, Writer.allele2, - Writer.tumor_f, Writer.coverage, - Writer.clinvar, Writer.exac_common, Writer.exac_af, Writer.exac_ac, Writer.exac_an, - Writer.exac_afr_ac, Writer.exac_amr_ac, Writer.exac_eas_ac, Writer.exac_fin_ac, - Writer.exac_nfe_ac, Writer.exac_sas_ac, Writer.exac_oth_ac, - Writer.exac_afr_an, Writer.exac_amr_an, Writer.exac_eas_an, Writer.exac_fin_an, - Writer.exac_nfe_an, Writer.exac_sas_an, Writer.exac_oth_an, - Writer.patient_id, Writer.tumor, Writer.normal] - - output_suffix = 'germline.acmg.txt' - - bin = Writer.acmg_bin - - @classmethod - def write(cls, df, patient_id, folder): - df[Writer.patient_id] = patient_id - df_sorted = Writer.sort_columns(df, cls.sort_columns, False) - idx = Writer.return_nonzero_bin_idx(df.loc[:, cls.bin]) - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe(df_sorted.loc[idx, cls.output_columns], output_name) + output_suffix = "germline.acmg.txt" + + def __init__(self, strings): + self.writer = Writer(strings) + self.bin = self.writer.acmg_bin + self.sort_columns = [ + self.writer.clinvar_bin, + self.writer.feature, + self.writer.feature_type + ] + self.output_columns = [ + self.writer.feature_type, + self.writer.feature, + self.writer.alt_type, + self.writer.alt, + self.writer.chr, + self.writer.start, + self.writer.end, + self.writer.ref, + self.writer.allele1, + self.writer.allele2, + self.writer.tumor_f, + self.writer.coverage, + self.writer.clinvar, + self.writer.exac_common, + self.writer.exac_af, + self.writer.exac_ac, + self.writer.exac_an, + self.writer.exac_afr_ac, + self.writer.exac_amr_ac, + self.writer.exac_eas_ac, + self.writer.exac_fin_ac, + self.writer.exac_nfe_ac, + self.writer.exac_sas_ac, + self.writer.exac_oth_ac, + self.writer.exac_afr_an, + self.writer.exac_amr_an, + self.writer.exac_eas_an, + self.writer.exac_fin_an, + self.writer.exac_nfe_an, + self.writer.exac_sas_ac, + self.writer.exac_oth_an, + self.writer.patient_id, + self.writer.tumor, + self.writer.normal + ] + + def write(self, dataframe, patient_label, folder): + dataframe[self.writer.patient_id] = patient_label + dataframe_sorted = Writer.sort_columns(dataframe, self.sort_columns, ascending_boolean=False) + idx = Writer.return_nonzero_bin_idx(dataframe.loc[:, self.bin]) + output_name = Writer.create_output_name(folder, patient_label, self.__class__.output_suffix) + Writer.export_dataframe(dataframe_sorted.loc[idx, self.output_columns], output_name) + return dataframe_sorted class GermlineCancer: - sort_columns = [Writer.almanac_bin, Writer.cancerhotspots_bin, Writer.cancerhotspots3D_bin, - Writer.cgc_bin, Writer.gsea_pathways_bin, Writer.gsea_cm_bin, Writer.cosmic_bin, - Writer.exac_common, Writer.exac_af] - sort_ascending = [False, False, False, False, False, False, False, True, True] - output_columns = [Writer.score_bin, Writer.sensitive_bin, Writer.resistance_bin, Writer.prognostic_bin, - Writer.feature_type, Writer.feature, Writer.alt_type, Writer.alt, - Writer.chr, Writer.start, Writer.end, Writer.ref, Writer.allele1, Writer.allele2, - Writer.tumor_f, Writer.coverage, - Writer.clinvar, Writer.exac_common, Writer.exac_af, Writer.exac_ac, Writer.exac_an, - Writer.exac_afr_ac, Writer.exac_amr_ac, Writer.exac_eas_ac, Writer.exac_fin_ac, - Writer.exac_nfe_ac, Writer.exac_sas_ac, Writer.exac_oth_ac, - Writer.exac_afr_an, Writer.exac_amr_an, Writer.exac_eas_an, Writer.exac_fin_an, - Writer.exac_nfe_an, Writer.exac_sas_an, Writer.exac_oth_an, - Writer.patient_id, Writer.tumor, Writer.normal] - output_suffix = 'germline.cancer_related.txt' - almanac_bin = Writer.almanac_bin - hotspots_bin = Writer.cancerhotspots_bin - cgc_bin = Writer.cgc_bin - - @classmethod - def get_cancer_idx(cls, df): - idx_almanac = Writer.return_nonzero_bin_idx(df.loc[:, cls.almanac_bin]) - idx_hotspot = Writer.return_nonzero_bin_idx(df.loc[:, cls.hotspots_bin]) - idx_cgc = Writer.return_nonzero_bin_idx(df.loc[:, cls.cgc_bin]) + def __init__(self, strings): + self.writer = Writer(strings) + self.almanac_bin = self.writer.almanac_bin + self.hotspots_bin = self.writer.cancer_hotspots_bin + self.cgc_bin = self.writer.cgc_bin + self.sort_columns = [ + self.writer.almanac_bin, + self.writer.cancer_hotspots_bin, + self.writer.cancer_hotspots_3d_bin, + self.writer.cgc_bin, + self.writer.gsea_pathways_bin, + self.writer.gsea_cm_bin, + self.writer.cosmic_bin, + self.writer.exac_common, + self.writer.exac_af + ] + self.ascending = [ + False, + False, + False, + False, + False, + False, + False, + True, + True + ] + self.output_columns = [ + self.writer.score_bin, + self.writer.sensitive_bin, + self.writer.resistance_bin, + self.writer.prognostic_bin, + self.writer.feature_type, + self.writer.feature, + self.writer.alt_type, + self.writer.alt, + self.writer.chr, + self.writer.start, + self.writer.end, + self.writer.ref, + self.writer.allele1, + self.writer.allele2, + self.writer.tumor_f, + self.writer.coverage, + self.writer.clinvar, + self.writer.exac_common, + self.writer.exac_af, + self.writer.exac_ac, + self.writer.exac_an, + self.writer.exac_afr_ac, + self.writer.exac_amr_ac, + self.writer.exac_eas_ac, + self.writer.exac_fin_ac, + self.writer.exac_nfe_ac, + self.writer.exac_sas_ac, + self.writer.exac_oth_ac, + self.writer.exac_afr_an, + self.writer.exac_amr_an, + self.writer.exac_eas_an, + self.writer.exac_fin_an, + self.writer.exac_nfe_an, + self.writer.exac_sas_ac, + self.writer.exac_oth_an, + self.writer.patient_id, + self.writer.tumor, + self.writer.normal + ] + + def get_cancer_idx(self, dataframe): + idx_almanac = Writer.return_nonzero_bin_idx(dataframe.loc[:, self.almanac_bin]) + idx_hotspot = Writer.return_nonzero_bin_idx(dataframe.loc[:, self.hotspots_bin]) + idx_cgc = Writer.return_nonzero_bin_idx(dataframe.loc[:, self.cgc_bin]) return idx_almanac.union(idx_hotspot).union(idx_cgc) - @classmethod - def write(cls, df, patient_id, folder): - df[Writer.patient_id] = patient_id - df_sorted = Writer.sort_columns(df, cls.sort_columns, cls.sort_ascending) - idx = cls.get_cancer_idx(df) - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe(df_sorted.loc[idx, cls.output_columns], output_name) + def write(self, dataframe, patient_label, folder): + dataframe[self.writer.patient_id] = patient_label + dataframe_sorted = Writer.sort_columns( + df=dataframe, + columns=self.sort_columns, + ascending_boolean=self.ascending + ) + idx = self.__class__.get_cancer_idx(self=self, dataframe=dataframe_sorted) + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix + ) + Writer.export_dataframe( + df=dataframe_sorted.loc[idx, self.output_columns], + output_name=output_name + ) class GermlineHereditary: - sort_columns = [Writer.clinvar_bin, Writer.feature, Writer.feature_type] - output_columns = [Writer.feature_type, Writer.feature, Writer.alt_type, Writer.alt, - Writer.chr, Writer.start, Writer.end, Writer.ref, Writer.allele1, Writer.allele2, - Writer.tumor_f, Writer.coverage, - Writer.clinvar, Writer.exac_common, Writer.exac_af, Writer.exac_ac, Writer.exac_an, - Writer.exac_afr_ac, Writer.exac_amr_ac, Writer.exac_eas_ac, Writer.exac_fin_ac, - Writer.exac_nfe_ac, Writer.exac_sas_ac, Writer.exac_oth_ac, - Writer.exac_afr_an, Writer.exac_amr_an, Writer.exac_eas_an, Writer.exac_fin_an, - Writer.exac_nfe_an, Writer.exac_sas_an, Writer.exac_oth_an, - Writer.patient_id, Writer.tumor, Writer.normal] - output_suffix = 'germline.hereditary_cancers.txt' - bin = Writer.hereditary_bin - - @classmethod - def write(cls, df, patient_id, folder): - df[Writer.patient_id] = patient_id - df_sorted = Writer.sort_columns(df, cls.sort_columns, False) - idx = Writer.return_nonzero_bin_idx(df.loc[:, cls.bin]) - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe(df_sorted.loc[idx, cls.output_columns], output_name) + def __init__(self, strings): + self.writer = Writer(strings) + self.bin = self.writer.hereditary_bin + self.sort_columns = [ + self.writer.clinvar_bin, + self.writer.feature, + self.writer.feature_type + ] + self.output_columns = [ + self.writer.feature_type, + self.writer.feature, + self.writer.alt_type, + self.writer.alt, + self.writer.chr, + self.writer.start, + self.writer.end, + self.writer.ref, + self.writer.allele1, + self.writer.allele2, + self.writer.tumor_f, + self.writer.coverage, + self.writer.clinvar, + self.writer.exac_common, + self.writer.exac_af, + self.writer.exac_ac, + self.writer.exac_an, + self.writer.exac_afr_ac, + self.writer.exac_amr_ac, + self.writer.exac_eas_ac, + self.writer.exac_fin_ac, + self.writer.exac_nfe_ac, + self.writer.exac_sas_ac, + self.writer.exac_oth_ac, + self.writer.exac_afr_an, + self.writer.exac_amr_an, + self.writer.exac_eas_an, + self.writer.exac_fin_an, + self.writer.exac_nfe_an, + self.writer.exac_sas_ac, + self.writer.exac_oth_an, + self.writer.patient_id, + self.writer.tumor, + self.writer.normal + ] + + def write(self, dataframe, patient_label, folder): + dataframe[self.writer.patient_id] = patient_label + dataframe_sorted = Writer.sort_columns( + df=dataframe, + columns=self.sort_columns, + ascending_boolean=False + ) + idx = Writer.return_nonzero_bin_idx(series=dataframe.loc[:, self.bin]) + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix + ) + Writer.export_dataframe( + df=dataframe_sorted.loc[idx, self.output_columns], + output_name=output_name + ) class Illustrations: - @classmethod - def write(cls, figure, folder, profile_id, output_suffix): - output_name = Writer.create_output_name(folder, profile_id, output_suffix) - Writer.save_figure(figure, output_name) + @staticmethod + def write(figure, folder, profile_label, output_suffix): + output_name = Writer.create_output_name(folder, profile_label, output_suffix) + Writer.save_figure(figure=figure, output_name=output_name) class Integrated: - section = 'integrative' - somatic = COLNAMES[section]['somatic'] - copynumber = COLNAMES[section]['copynumber'] - fusion = COLNAMES[section]['fusion'] - germline = COLNAMES[section]['germline'] - - output_columns = [somatic, copynumber, fusion, germline] - output_suffix = 'integrated.summary.txt' + section = 'integrative' - @classmethod - def write(cls, df, patient_id, folder): - df_sorted = df.sort_index() - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe_indexed(df_sorted.loc[:, cls.output_columns], output_name, Writer.feature) + def __init__(self, strings): + self.strings = strings + self.writer = Writer(strings) + self.output_columns = [self.somatic, self.copynumber, self.fusion, self.germline] + + @property + def somatic(self): + return self.strings[self.section]['somatic'] + + @property + def copynumber(self): + return self.strings[self.section]['copynumber'] + + @property + def fusion(self): + return self.strings[self.section]['fusion'] + + @property + def germline(self): + return self.strings[self.section]['germline'] + + def write(self, dataframe, patient_label, folder): + dataframe_sorted = dataframe.sort_index() + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix + ) + Writer.export_dataframe_indexed( + df=dataframe_sorted.loc[:, self.output_columns], + output_name=output_name, + index_label=Writer.feature + ) class MSI: - sort_columns = [Writer.almanac_bin, Writer.cancerhotspots_bin, Writer.cancerhotspots3D_bin, - Writer.cgc_bin, Writer.gsea_pathways_bin, Writer.gsea_cm_bin, Writer.cosmic_bin, - Writer.exac_common, Writer.exac_af] - sort_ascending = [False, False, False, False, False, False, False, True, True] - output_columns = [Writer.score_bin, Writer.sensitive_bin, Writer.resistance_bin, Writer.prognostic_bin, - Writer.feature_type, Writer.feature, Writer.alt_type, Writer.alt, - Writer.chr, Writer.start, Writer.end, Writer.ref, Writer.allele1, Writer.allele2, - Writer.tumor_f, Writer.coverage, Writer.exac_af, Writer.exac_common, Writer.clinvar, - Writer.number_germline_mutations, - Writer.spanningfrags, - Writer.left_gene, Writer.left_chr, Writer.left_start, - Writer.right_gene, Writer.right_chr, Writer.right_start, - Writer.rationale, Writer.patient_id, Writer.tumor, Writer.normal, - Writer.almanac_bin, Writer.cancerhotspots_bin, Writer.cancerhotspots3D_bin, - Writer.cgc_bin, Writer.gsea_pathways_bin, Writer.gsea_cm_bin, Writer.cosmic_bin] - output_suffix = 'msi_variants.txt' - bin = Writer.msi_bin - - @classmethod - def write(cls, df, patient_id, folder): - df[Writer.patient_id] = patient_id - df_sorted = Writer.sort_columns(df, cls.sort_columns, False) - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe(df_sorted.loc[:, cls.output_columns], output_name) + def __init__(self, strings): + self.writer = Writer(strings) + self.bin = self.writer.msi_bin + self.sort_columns = [ + self.writer.almanac_bin, + self.writer.cancer_hotspots_bin, + self.writer.cancer_hotspots_3d_bin, + self.writer.cgc_bin, + self.writer.gsea_pathways_bin, + self.writer.gsea_cm_bin, + self.writer.cosmic_bin, + self.writer.exac_common, + self.writer.exac_af + ], + self.ascending=[ + False, + False, + False, + False, + False, + False, + False, + True, + True + ] + self.output_columns = [ + self.writer.score_bin, + self.writer.sensitive_bin, + self.writer.resistance_bin, + self.writer.prognostic_bin, + self.writer.feature_type, + self.writer.feature, + self.writer.alt_type, + self.writer.alt, + self.writer.chr, + self.writer.start, + self.writer.end, + self.writer.ref, + self.writer.allele1, + self.writer.allele2, + self.writer.tumor_f, + self.writer.coverage, + self.writer.exac_af, + self.writer.exac_common, + self.writer.clinvar, + self.writer.number_germline_mutations, + self.writer.spanningfrags, + self.writer.left_gene, + self.writer.left_chr, + self.writer.left_start, + self.writer.right_gene, + self.writer.right_chr, + self.writer.right_start, + self.writer.rationale, + self.writer.patient_id, + self.writer.tumor, + self.writer.normal, + self.writer.almanac_bin, + self.writer.cancer_hotspots_bin, + self.writer.cancer_hotspots_3d_bin, + self.writer.cgc_bin, + self.writer.gsea_pathways_bin, + self.writer.gsea_cm_bin, + self.writer.cosmic_bin + ] + + def write(self, dataframe, patient_label, folder): + dataframe[Writer.patient_id] = patient_label + dataframe_sorted = Writer.sort_columns( + df=dataframe, + columns=self.sort_columns, + ascending_boolean=False + ) + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix + ) + Writer.export_dataframe( + df=dataframe_sorted.loc[:, self.output_columns], + output_name=output_name + ) class MutationalBurden: - section = 'burden' - patient = COLNAMES[section]['patient'] - tumor_type = COLNAMES[section]['tumor_type'] - ontology = COLNAMES[section]['ontology'] - code = COLNAMES[section]['code'] - bases_covered = COLNAMES[section]['bases_covered'] - n_nonsyn_mutations = COLNAMES[section]['n_nonsyn_mutations'] - mutational_burden = COLNAMES[section]['mutational_burden'] - percentile_tcga = COLNAMES[section]['percentile_tcga'] - percentile_tcga_tissue = COLNAMES[section]['percentile_tcga_tissue'] - high_burden_boolean = COLNAMES[section]['high_burden_boolean'] - - output_columns = [patient, tumor_type, ontology, code, - bases_covered, n_nonsyn_mutations, mutational_burden, - percentile_tcga, percentile_tcga_tissue, high_burden_boolean] - output_suffix = 'mutational_burden.txt' + section = 'burden' - @classmethod - def write(cls, df, patient_id, folder): - df[Writer.patient_id] = patient_id - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe(df.loc[:, cls.output_columns], output_name) + def __init__(self, strings): + self.strings = strings + self.writer = Writer(strings) + self.output_columns = [ + self.patient, + self.tumor_type, + self.ontology, + self.code, + self.bases_covered, + self.n_nonsyn_mutations, + self.mutational_burden, + self.percentile_tcga, + self.percentile_tcga_tissue, + self.high_burden_boolean + ] + + @property + def patient(self): + return self.strings[self.section]['patient'] + + @property + def tumor_type(self): + return self.strings[self.section]['tumor_type'] + + @property + def ontology(self): + return self.strings[self.section]['ontology'] + + @property + def code(self): + return self.strings[self.section]['code'] + + @property + def bases_covered(self): + return self.strings[self.section]['bases_covered'] + + @property + def n_nonsyn_mutations(self): + return self.strings[self.section]['n_nonsyn_mutations'] + + @property + def mutational_burden(self): + return self.strings[self.section]['mutational_burden'] + + @property + def percentile_tcga(self): + return self.strings[self.section]['percentile_tcga'] + + @property + def percentile_tcga_tissue(self): + return self.strings[self.section]['percentile_tcga_tissue'] + + @property + def high_burden_boolean(self): + return self.strings[self.section]['high_burden_boolean'] + + def write(self, dataframe, patient_label, folder): + dataframe[self.writer.patient_id] = patient_label + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix + ) + Writer.export_dataframe( + df=dataframe, + output_name=output_name + ) class PreclinicalEfficacy: output_suffix = 'preclinical.efficacy.txt' - @classmethod - def write(cls, df, patient_id, folder): - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe(df, output_name) + def __init__(self, strings): + self.writer = Writer(strings) + + def write(self, dataframe, patient_label, folder): + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix + ) + Writer.export_dataframe( + df=dataframe, + output_name=output_name + ) class PreclinicalMatchmaking: output_suffix = 'matchmaker.txt' - @classmethod - def write(cls, df, patient_id, folder): - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe(df, output_name) + def __init__(self, strings): + self.writer = Writer(strings) + def write(self, dataframe, patient_label, folder): + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix + ) + Writer.export_dataframe( + df=dataframe, + output_name=output_name + ) -class SomaticFiltered: - sort_columns = [Writer.feature, Writer.feature_type] - output_columns = [Writer.feature_type, Writer.feature, Writer.alt_type, Writer.alt, - Writer.chr, Writer.start, Writer.end, Writer.ref, Writer.allele1, Writer.allele2, - Writer.tumor_f, Writer.coverage, - Writer.spanningfrags, - Writer.left_gene, Writer.left_chr, Writer.left_start, - Writer.right_gene, Writer.right_chr, Writer.right_start, - Writer.rationale, Writer.patient_id, Writer.tumor, Writer.normal] +class SomaticFiltered: output_suffix = 'somatic.filtered.txt' - @classmethod - def write(cls, df, patient_id, folder): - df[Writer.patient_id] = patient_id - df_sorted = Writer.sort_columns(df, cls.sort_columns, False) - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe(df_sorted.loc[:, cls.output_columns], output_name) + def __init__(self, strings): + self.writer = Writer(strings) + self.sort_columns = [ + self.writer.feature, + self.writer.feature_type + ] + self.output_columns = [ + self.writer.feature_type, + self.writer.feature, + self.writer.alt_type, + self.writer.alt, + self.writer.chr, + self.writer.start, + self.writer.end, + self.writer.ref, + self.writer.allele1, + self.writer.allele2, + self.writer.tumor_f, + self.writer.coverage, + self.writer.spanningfrags, + self.writer.left_gene, + self.writer.left_chr, + self.writer.left_start, + self.writer.right_gene, + self.writer.right_chr, + self.writer.right_start, + self.writer.rationale, + self.writer.patient_id, + self.writer.tumor, + self.writer.normal + ] + + def write(self, dataframe, patient_label, folder): + dataframe[Writer.patient_id] = patient_label + dataframe_sorted = Writer.sort_columns( + df=dataframe, + columns=self.sort_columns, + ascending_boolean=False + ) + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix + ) + Writer.export_dataframe( + df=dataframe_sorted.loc[:, self.output_columns], + output_name=output_name + ) class SomaticScored: - sort_columns = [Writer.almanac_bin, Writer.cancerhotspots_bin, Writer.cancerhotspots3D_bin, - Writer.cgc_bin, Writer.gsea_pathways_bin, Writer.gsea_cm_bin, Writer.cosmic_bin, - Writer.validation_detection_power, Writer.validation_coverage, Writer.number_germline_mutations, - Writer.exac_common, Writer.exac_af] - sort_ascending = [False, False, False, False, False, False, False, False, False, False, True, True] - output_columns = [Writer.score_bin, Writer.sensitive_bin, Writer.resistance_bin, Writer.prognostic_bin, - Writer.feature_type, Writer.feature, Writer.alt_type, Writer.alt, - Writer.chr, Writer.start, Writer.end, Writer.ref, Writer.allele1, Writer.allele2, - Writer.tumor_f, Writer.coverage, Writer.exac_af, Writer.exac_common, Writer.clinvar, - Writer.number_germline_mutations, - Writer.spanningfrags, - Writer.left_gene, Writer.left_chr, Writer.left_start, - Writer.right_gene, Writer.right_chr, Writer.right_start, - Writer.validation_coverage, Writer.validation_tumor_f, Writer.validation_detection_power, - Writer.rationale, Writer.patient_id, Writer.tumor, Writer.normal, - Writer.almanac_bin, Writer.cancerhotspots_bin, Writer.cancerhotspots3D_bin, - Writer.cgc_bin, Writer.gsea_pathways_bin, Writer.gsea_cm_bin, Writer.cosmic_bin] - output_suffix = 'somatic.scored.txt' - @classmethod - def write(cls, df, patient_id, folder): - df[Writer.patient_id] = patient_id - df_sorted = Writer.sort_columns(df, cls.sort_columns, cls.sort_ascending) - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe(df_sorted.loc[:, cls.output_columns], output_name) + def __init__(self, strings): + self.writer = Writer(strings) + self.sort_columns = [ + self.writer.almanac_bin, + self.writer.cancer_hotspots_bin, + self.writer.cancer_hotspots_3d_bin, + self.writer.cgc_bin, + self.writer.gsea_pathways_bin, + self.writer.gsea_cm_bin, + self.writer.cosmic_bin, + self.writer.validation_detection_power, + self.writer.validation_coverage, + self.writer.number_germline_mutations, + self.writer.exac_common, + self.writer.exac_af + ] + self.ascending = [ + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + True, + True + ] + self.output_columns = [ + self.writer.score_bin, + self.writer.sensitive_bin, + self.writer.resistance_bin, + self.writer.prognostic_bin, + self.writer.feature_type, + self.writer.feature, + self.writer.alt_type, + self.writer.alt, + self.writer.chr, + self.writer.start, + self.writer.end, + self.writer.ref, + self.writer.allele1, + self.writer.allele2, + self.writer.tumor_f, + self.writer.coverage, + self.writer.exac_af, + self.writer.exac_common, + self.writer.clinvar, + self.writer.number_germline_mutations, + self.writer.spanningfrags, + self.writer.left_gene, + self.writer.left_chr, + self.writer.left_start, + self.writer.right_gene, + self.writer.right_chr, + self.writer.right_start, + self.writer.validation_coverage, + self.writer.validation_tumor_f, + self.writer.validation_detection_power, + self.writer.rationale, + self.writer.patient_id, + self.writer.tumor, + self.writer.normal, + self.writer.almanac_bin, + self.writer.cancer_hotspots_bin, + self.writer.cancer_hotspots_3d_bin, + self.writer.cgc_bin, + self.writer.gsea_pathways_bin, + self.writer.gsea_cm_bin, + self.writer.cosmic_bin + ] + + def write(self, dataframe, patient_label, folder): + dataframe[self.writer.patient_id] = patient_label + dataframe_sorted = Writer.sort_columns( + df=dataframe, + columns=self.sort_columns, + ascending_boolean=self.ascending + ) + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix) + Writer.export_dataframe( + df=dataframe_sorted.loc[:, self.output_columns], + output_name=output_name + ) class Strategies: output_suffix = 'therapeutic_strategies.txt' - @classmethod - def write(cls, df, patient_id, folder): - output_name = Writer.create_output_name(folder, patient_id, cls.output_suffix) - Writer.export_dataframe_indexed(df, output_name, 'Assertion / Strategy') + def __init__(self, strings): + self.writer = Writer(strings) + + def write(self, dataframe, patient_label, folder): + output_name = Writer.create_output_name( + folder=folder, + patient_id=patient_label, + output_suffix=self.__class__.output_suffix + ) + Writer.export_dataframe_indexed( + df=dataframe, + output_name=output_name, + index_label='Assertion / Strategy' + ) class Json: