diff --git a/src/BanzaiDB/banzaidb.py b/src/BanzaiDB/banzaidb.py index 0ffd4f9..0b20ddf 100644 --- a/src/BanzaiDB/banzaidb.py +++ b/src/BanzaiDB/banzaidb.py @@ -64,7 +64,7 @@ def init_database_with_default_tables(args): """ # Add additional (default) tables here... def_tables = ['determined_variants', 'strains_under_investigation', - 'references', 'reference_features'] + 'references', 'reference_features', 'strain_features'] with database.make_connection() as connection: try: r.db_create(connection.db).run(connection) @@ -177,11 +177,15 @@ def populate_mapping(args): cur_ref = r.table('references').get('current_reference').run(connection) ref = cur_ref["reference_id"]+"_"+str(cur_ref["revision"]) for e in strains: - # open the userplot of the current reference & strain - not_called = mapping.get_N_char_positions(run_path, e['StrainID']) - ranges = misc.get_intervals(not_called) - r.table('strains_under_investigation').get(e['StrainID']).update({"reference": ref, "coverage": json.dumps(ranges)}).run(connection) - + coverage = mapping.get_coverage(run_path, e['StrainID']) + r.table('strain_features').insert(coverage).run(connection) + + # Add relations from ref_feat to variants + for feature in ref_meta: + r.table('variants')\ + .filter({"LocusTag" : feature['LocusTag']})\ + .update({"RefFeat" : feature['id']})\ + .run(connection) def populate_assembly(): """ diff --git a/src/BanzaiDB/mapping.py b/src/BanzaiDB/mapping.py index 94d0872..29639f9 100644 --- a/src/BanzaiDB/mapping.py +++ b/src/BanzaiDB/mapping.py @@ -45,3 +45,37 @@ def get_N_char_positions(run_path, sid): if e == 'N': no_call.append(idx) return no_call + +def get_coverage(run_path, sid): + """ + Return any abnormal coverage information in the .consequences file + """ + filename = sid + ".consequences" #change this when we work out naming convention + strain_features = [] + + with open(os.path.join(os.path.join(run_path, sid), filename)) as fin: + for idx, line in enumerate(fin): + feature = {} + if idx >= 1: + cur = line.split("\t") + feature['id'] = cur[0]+'_'+sid+'_'+cur[1] + coverage = float(cur[5]) + + # Only store if there is interesting coverage statistics + if coverage != 1.0: # + feature['coverage'] = coverage + + old_len, new_len = int(cur[2]), int(cur[3]) + AA_len = old_len - new_len + + # Only store if there is an indel + if AA_len != 0: + feature['aa_difference'] = AA_len + + if 'coverage' in feature or 'difference' in feature: + feature['StrainID'] = sid + feature['Reference'] = cur[0] + feature['LocusTag'] = cur[1] + strain_features.append(feature) + + return strain_features