diff --git a/README.md b/README.md index 23ce5c2..4ef03ab 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,8 @@ # hAMRonization -This repo contains the hAMRonization module and CLI parser tools combine the outputs of -18 disparate antimicrobial resistance gene detection tools into a single unified format. +This repo contains the hAMRonization module and CLI parser tools that combine the outputs +of 18 disparate antimicrobial resistance gene detection tools into a single unified format. This is an implementation of the [hAMRonization AMR detection specification scheme](docs/hAMRonization_specification_details.csv) which supports gene presence/absence resistance and mutational resistance (if supported by the underlying tool). @@ -80,8 +80,7 @@ Tools with hAMRonizable reports: report`) kmerresistance hAMRonize kmerresistance's output report i.e., OUTPUT.res resfams hAMRonize resfams's output report i.e., resfams.tblout - resfinder hAMRonize resfinder's output report i.e., - ResFinder_results_tab.txt + resfinder hAMRonize resfinder's JSON output report (use -j to produce) mykrobe hAMRonize mykrobe's output report i.e., OUTPUT.json pointfinder hAMRonize pointfinder's output report i.e., PointFinder_results.txt @@ -193,7 +192,7 @@ If you want to write multiple reports to one file, this `.write` method can acce Currently implemented parsers and the last tool version for which they have been validated: 1. [abricate](hAMRonization/AbricateIO.py): last updated for v1.0.0 -2. [amrfinderplus](hAMRonization/AmrFinderPlusIO.py): last updated for v3.12.18 +2. [amrfinderplus](hAMRonization/AmrFinderPlusIO.py): last updated for v4.0.3 3. [amrplusplus](hAMRonization/AmrPlusPlusIO.py): last updated for c6b097a 4. [ariba](hAMRonization/AribaIO.py): last updated for v2.14.6 5. [csstar](hAMRonization/CSStarIO.py): last updated for v2.1.0 diff --git a/hAMRonization/AmrFinderPlusIO.py b/hAMRonization/AmrFinderPlusIO.py index 9d1f2f0..f60da66 100644 --- a/hAMRonization/AmrFinderPlusIO.py +++ b/hAMRonization/AmrFinderPlusIO.py @@ -18,78 +18,56 @@ class AmrFinderPlusIterator(hAMRonizedResultIterator): + + nuc_field_map = { + "Protein id": None, + "Contig id": "input_sequence_id", + "Start": "input_gene_start", + "Stop": "input_gene_stop", + "Strand": "strand_orientation", + "Element symbol": "gene_symbol", + "Element name": "gene_name", + "Scope": None, + "Type": None, + "Subtype": None, + "Class": "drug_class", + "Subclass": "antimicrobial_agent", + "Method": None, + "Target length": "input_gene_length", + "Reference sequence length": "reference_gene_length", + "% Coverage of reference": "coverage_percentage", + "% Identity to reference": "sequence_identity", + "Alignment length": None, + "Closest reference accession": "reference_accession", + "Closest reference name": None, + "HMM accession": None, + "HMM description": None, + "Hierarchy node": None, + # Fields we compute below (not in TSV) + "amino_acid_mutation": "amino_acid_mutation", + "nucleotide_mutation": "nucleotide_mutation", + "genetic_variation_type": "genetic_variation_type", + } + + # AMP outputs the same column set for nuc and prot detections, + # with Start and Stop always in nt units; however target and + # reference length are reported in AA for proteins. + prot_field_map = nuc_field_map.copy() + prot_field_map.update({ + "Target length": "input_protein_length", + "Reference sequence length": "reference_protein_length" + }) + def __init__(self, source, metadata): metadata["analysis_software_name"] = "amrfinderplus" metadata["reference_database_name"] = "NCBI Reference Gene Database" self.metadata = metadata - # check source for whether AMFP has been run in protein or nt mode - - nucleotide_field_mapping = { - "Protein identifier": None, - "Contig id": "input_sequence_id", - "Start": "input_gene_start", - "Stop": "input_gene_stop", - "Strand": "strand_orientation", - "Gene symbol": "gene_symbol", - "Sequence name": "gene_name", - "Scope": None, - "Element type": None, - "Element subtype": None, - "Class": "drug_class", - "Subclass": "antimicrobial_agent", - "Method": None, - "Target length": "input_protein_length", - "Reference sequence length": "reference_protein_length", - "% Coverage of reference sequence": "coverage_percentage", - "% Identity to reference sequence": "sequence_identity", - "Alignment length": None, - "Accession of closest sequence": "reference_accession", - "Name of closest sequence": None, - "HMM id": None, - "HMM description": None, - "AA Mutation": "amino_acid_mutation", - "Nucleotide Mutation": "nucleotide_mutation", - "genetic_variation_type": "genetic_variation_type", - } - protein_field_mapping = { - "Protein identifier": "input_sequence_id", - "Gene symbol": "gene_symbol", - "Sequence name": "gene_name", - "Scope": None, - "Element": None, - "Element subtype": None, - "Class": "drug_class", - "Subclass": "antimicrobial_agent", - "Method": None, - "Target length": "input_protein_length", - "Reference sequence length": "reference_protein_length", - "% Coverage of reference sequence": "coverage_percentage", - "% Identity to reference sequence": "sequence_identity", - "Alignment length": None, - "Accession of closest sequence": "reference_accession", - "Name of closest sequence": None, - "HMM id": None, - "HMM description": None, - "AA Mutation": "amino_acid_mutation", - "genetic_variation_type": "genetic_variation_type", - } - - with open(source) as fh: - header = next(fh).strip().split("\t") - try: - first_result = next(fh) - prot_id = header.index("Protein identifier") - if first_result.strip().split("\t")[prot_id] == "NA": - self.field_mapping = nucleotide_field_mapping - else: - self.field_mapping = protein_field_mapping - except StopIteration: - # doesn't really matter which mapping as this error indicates - # this is an empty results file - self.field_mapping = nucleotide_field_mapping - - super().__init__(source, self.field_mapping, self.metadata) + # We pass None for the field_map as it differs depending on + # whether we return a nucleotide or protein variant detection. + # TODO: refactor field_map out of super's constructor, and make + # it a parameter on super's hARMonize(). + super().__init__(source, None, self.metadata) def parse(self, handle): """ @@ -98,11 +76,16 @@ def parse(self, handle): skipped_truncated = 0 reader = csv.DictReader(handle, delimiter="\t") for result in reader: - # replace NA value with None for consitency + + # Replace NA value with None for consistency for field, value in result.items(): if value == "NA": result[field] = None + # Skip reported virulence genes + if result['Type'] == "VIRULENCE": + continue + # AFP reports partial hits so to avoid misleadingly listing these # as present skip results with INTERNAL_STOP # recommended by developers @@ -113,24 +96,40 @@ def parse(self, handle): # "POINT" indicates mutational resistance # amrfinderplus has no special fields but the mutation itself is # appended to the symbol name so we want to split this - result["AA Mutation"] = None - result["Nucleotide Mutation"] = None - result["genetic_variation_type"] = GENE_PRESENCE + result['amino_acid_mutation'] = None + result['nucleotide_mutation'] = None + result['genetic_variation_type'] = GENE_PRESENCE - if result["Element subtype"] == "POINT": - gene_symbol, mutation = result["Gene symbol"].rsplit("_", 1) - result["Gene symbol"] = gene_symbol + if result['Subtype'] == "POINT": + gene_symbol, mutation = result['Element symbol'].rsplit("_", 1) + result['Element symbol'] = gene_symbol _, ref, pos, alt, _ = re.split(r"(\D+)(\d+)(\D+)", mutation) # this means it is a protein mutation - if result["Method"] in ["POINTX", "POINTP"]: - result["AA Mutation"] = f"p.{ref}{pos}{alt}" - result["genetic_variation_type"] = AMINO_ACID_VARIANT - elif result["Method"] == "POINTN": + if result['Method'] in ["POINTX", "POINTP"]: + result['amino_acid_mutation'] = f"p.{ref}{pos}{alt}" + result['genetic_variation_type'] = AMINO_ACID_VARIANT + elif result['Method'] == "POINTN": # e.g., 23S_G2032G ampC_C-11C -> c.2032G>G - result["Nucleotide Mutation"] = f"c.{pos}{ref}>{alt}" - result["genetic_variation_type"] = NUCLEOTIDE_VARIANT + result['nucleotide_mutation'] = f"c.{pos}{ref}>{alt}" + result['genetic_variation_type'] = NUCLEOTIDE_VARIANT + + # Determine the field_map to use depending on the method used + # The following seems to cover all bases with a minimum of fuss + have_prot = result['Protein id'] is not None + method = result['Method'] + if method.endswith('P') or method.endswith('X'): + field_map = self.prot_field_map + elif method.endswith('N'): + field_map = self.nuc_field_map + elif method in ['COMPLETE', 'HMM']: + field_map = self.prot_field_map if have_prot else self.nuc_field_map + else: + warnings.warn(f"Assuming unknown method {method} implies a protein detection" + f" in {self.metadata['input_file_name']}") + field_map = self.prot_field_map - yield self.hAMRonize(result, self.metadata) + # This uses the "override hack" that should perhaps be cleaned up + yield self.hAMRonize(result, self.metadata, field_map) if skipped_truncated > 0: warnings.warn(f"Skipping {skipped_truncated} records with INTERNAL_STOP " diff --git a/hAMRonization/Interfaces.py b/hAMRonization/Interfaces.py index d1ab584..f9b7d5d 100644 --- a/hAMRonization/Interfaces.py +++ b/hAMRonization/Interfaces.py @@ -50,16 +50,24 @@ def __init__(self, source, field_map, metadata): except Exception: self.stream.close() - def hAMRonize(self, report_data, metadata): + # TODO: the field_map_override is a half-hack to support the scenario + # (as for amrfinderplus) where different records need different mappings, + # so setting a field_map in the constructor makes no sense. + # It might be cleaner to remove it from the constructor altogether and + # make it a parameter of this method (which is the only place where it + # is referenced anyway), and subclasses can trivially pass it in. + def hAMRonize(self, report_data, metadata, field_map_override=None): """ Convert a line of parsed AMR report in original format to the hAMRonization specification - - report_result parsed dict of single results from report - - metadata dict of additional metadata fields that need added + - report_data parsed dict of single result from report + - metadata dict of additional metadata fields + - field_map_override optional override of field_map passed in c'tor """ hAMRonized_result_data = {**metadata} - for original_field, hAMRonized_field in self.field_map.items(): + field_map = field_map_override or self.field_map + for original_field, hAMRonized_field in field_map.items(): if hAMRonized_field: hAMRonized_result_data[hAMRonized_field] = report_data[original_field] diff --git a/hAMRonization/ResFinderIO.py b/hAMRonization/ResFinderIO.py index 052d06d..b06d8ec 100644 --- a/hAMRonization/ResFinderIO.py +++ b/hAMRonization/ResFinderIO.py @@ -63,11 +63,13 @@ def set_shared_fields(r): res.gene_symbol = r.get('name', "unspecified") res.gene_name = r.get('name', "unspecified") res.reference_accession = r.get('ref_acc', r.get('ref_id', r.get('key', "unknown"))) + res.reference_database_name = _get_db_name(r.get('ref_database')) + res.reference_database_version = _get_db_ver(r.get('ref_database')) # optional res.coverage_percentage = _safe_round(r.get('coverage'), 1) res.coverage_depth = None # we may have this for mutations detected from reads - res.coverage_ratio = r.get('coverage')/100.0 + res.coverage_ratio = None res.input_sequence_id = r.get('query_id') res.input_gene_length = _get_length(r.get('query_start_pos'), r.get('query_end_pos')) res.input_gene_start = _get_start_pos(r.get('query_start_pos'), r.get('query_end_pos')) @@ -118,9 +120,9 @@ def set_variation_fields(r, vs): _codon.append(v.get('codon_change')) # Add the content of the list fields to the bags above - fold(lambda s, e: s.add(e), _phenos, v.get('phenotypes', [])) - fold(lambda s, e: s.add(e), _notes, v.get('notes', [])) - fold(lambda s, e: s.add(e), _pmids, v.get('pmids', [])) + _phenos.update(v.get('phenotypes', [])) + _notes.update(v.get('notes', [])) + _pmids.update(v.get('pmids', [])) # We have collected all variations on region r, now collapse into fields on res res.predicted_phenotype = _empty_to_none(", ".join(filter(None, _phenos))) @@ -145,11 +147,9 @@ def set_variation_fields(r, vs): # - for each r report one AMINO_ACID_VARIANT record, collapsing the seq_variations for p in filter(lambda d: d.get('amr_resistant', False), data['phenotypes'].values()): - # Set the fields available on phenotype object + # Set the fields available on the phenotype object res.drug_class = ", ".join(p.get('amr_classes', [])) res.antimicrobial_agent = p.get('amr_resistance', "unspecified") - res.reference_database_name = _get_db_name(p.get('ref_database')) - res.reference_database_version = _get_db_ver(p.get('ref_database')) # Iterate r over the regions (AMR genes) referenced by p, and yield each in turn for r in map(lambda k: data['seq_regions'][k], p.get('seq_regions', [])): diff --git a/hAMRonization/hAMRonizedResult.py b/hAMRonization/hAMRonizedResult.py index 439ef42..3a2d8e4 100644 --- a/hAMRonization/hAMRonizedResult.py +++ b/hAMRonization/hAMRonizedResult.py @@ -77,7 +77,7 @@ def __post_init__(self): input_file_name = getattr(self, "input_file_name") input_file_name = os.path.basename(input_file_name) - for suffix in [ ".gz", ".fna", ".fasta", ".fsa", ".faa", ".fa" ]: + for suffix in [".gz", ".fna", ".fasta", ".fsa", ".faa", ".fa"]: input_file_name = input_file_name.removesuffix(suffix) setattr(self, "input_file_name", input_file_name) diff --git a/test/data/dummy/amrfinderplus/report.tsv b/test/data/dummy/amrfinderplus/report.tsv index ff86d55..2388d0b 100644 --- a/test/data/dummy/amrfinderplus/report.tsv +++ b/test/data/dummy/amrfinderplus/report.tsv @@ -1,2 +1,2 @@ -Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description -NA NZ_LR792628.1 1333611 1334783 - oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100 99.49 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NF000272.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA +Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description +NA NZ_LR792628.1 1333611 1334783 - oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 99.49 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA diff --git a/test/data/raw_outputs/amrfinderplus/afp_non_coding.tsv b/test/data/raw_outputs/amrfinderplus/afp_non_coding.tsv index 3da9343..f56f1b9 100644 --- a/test/data/raw_outputs/amrfinderplus/afp_non_coding.tsv +++ b/test/data/raw_outputs/amrfinderplus/afp_non_coding.tsv @@ -1,2 +1,2 @@ -Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description -DAWXTK010000082_noncoding_test NA DAWXTK010000082.1:68-2970 1 2903 + 23S_A2062G Neisseria gonorrhoeae azithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN POINTN 2903 2910 100.00 99.35 2910 NC_002946.2:1119158-1116249 23S ribosomal RNA NA NA +Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description +NA DAWXTK010000082.1:68-2970 1 2903 + 23S_A2059G Neisseria gonorrhoeae azithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN POINTN 2903 2910 100.00 99.35 2910 NC_002946.2:1119158-1116249 23S ribosomal RNA NA NA diff --git a/test/data/raw_outputs/amrfinderplus/empty_report_with_header.tsv b/test/data/raw_outputs/amrfinderplus/empty_report_with_header.tsv index d2a14f3..cfd40d2 100644 --- a/test/data/raw_outputs/amrfinderplus/empty_report_with_header.tsv +++ b/test/data/raw_outputs/amrfinderplus/empty_report_with_header.tsv @@ -1 +1 @@ -Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node diff --git a/test/data/raw_outputs/amrfinderplus/report_nucleotide.tsv b/test/data/raw_outputs/amrfinderplus/report_nucleotide.tsv index 8e70e9f..13fc47c 100644 --- a/test/data/raw_outputs/amrfinderplus/report_nucleotide.tsv +++ b/test/data/raw_outputs/amrfinderplus/report_nucleotide.tsv @@ -1,89 +1,89 @@ -Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description -Testname NA contig01 101 958 + blaTEM-156 class A beta-lactamase TEM-156 core AMR AMR BETA-LACTAM BETA-LACTAM ALLELEX 286 286 100.00 100.00 286 WP_061158039.1 class A beta-lactamase TEM-156 NA NA -Testname NA contig02 1 1191 + blaPDC PDC family class C beta-lactamase core AMR AMR BETA-LACTAM CEPHALOSPORIN BLASTX 397 397 100.00 99.75 397 WP_061189306.1 class C beta-lactamase PDC-114 NA NA -Testname NA contig03 101 802 + blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALX 234 265 88.30 100.00 234 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NA NA -Testname NA contig04 101 1147 + vanG D-alanine--D-serine ligase VanG core AMR AMR GLYCOPEPTIDE VANCOMYCIN EXACTX 349 349 100.00 100.00 349 WP_063856695.1 D-alanine--D-serine ligase VanG NA NA -Testname NA contig04 1261 2391 + blaEC BlaEC family class C beta-lactamase plus AMR AMR BETA-LACTAM BETA-LACTAM BLASTX 377 377 100.00 98.14 377 WP_063610930.1 extended-spectrum class C beta-lactamase EC-15 NA NA -Testname NA contig08 101 700 + blaTEM TEM family class A beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIAL_CONTIG_ENDX 200 286 69.93 100.00 200 WP_061158039.1 class A beta-lactamase TEM-156 NA NA -Testname NA contig09 1 675 - aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIAL_CONTIG_ENDX 225 275 81.82 100.00 225 WP_109545041.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NA NA -Testname NA contig09 715 1377 - sul2 sulfonamide-resistant dihydropteroate synthase Sul2 core AMR AMR SULFONAMIDE SULFONAMIDE PARTIAL_CONTIG_ENDX 221 271 81.55 100.00 221 WP_001043265.1 sulfonamide-resistant dihydropteroate synthase Sul2 NA NA -Testname NA contig10 486 1307 + blaOXA OXA-9 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM INTERNAL_STOP 274 274 100.00 99.64 274 WP_000722315.1 oxacillin-hydrolyzing class D beta-lactamase OXA-9 NA NA -Testname NA contig11 101 958 + blaTEM TEM family class A beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM INTERNAL_STOP 286 286 100.00 93.01 286 WP_061158039.1 class A beta-lactamase TEM-156 NA NA -Testname NA contig12 71 634 + qacR multidrug-binding transcriptional regulator QacR plus STRESS BIOCIDE QUATERNARY AMMONIUM QUATERNARY AMMONIUM BLASTX 188 188 100.00 99.47 188 ADK23698.1 multidrug-binding transcriptional regulator QacR NA NA -Testname NA contig13 1 1137 + emrD3 multidrug efflux MFS transporter EmrD-3 plus AMR AMR EFFLUX EFFLUX EXACTX 379 379 100.00 100.00 379 ABQ18953.1 multidrug efflux MFS transporter EmrD-3 NA NA -Testname NA contig14 1 1089 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig15 1 2905 + 23S_A2058T Escherichia azithromycin/erythromycin/telithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig16 1 720 + nfsA_K141STOP Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_R15C Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig17 1 247 + ampC_T-14TGT Escherichia cephalosporin resistant ampC core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig05 237 1224 - 23S_A2058A Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig05 237 1224 - 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig05 237 1224 - 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig05 237 1224 - 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig05 237 1224 - 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig14 1 1089 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_C84C two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig15 1 2905 + 23S_A2058T Escherichia azithromycin/erythromycin/telithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT LINCOSAMIDE/OXAZOLIDINONE CLINDAMYCIN/LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE/OXAZOLIDINONE CLARITHROMYCIN/LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_T754T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_A2058A Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT LINCOSAMIDE/OXAZOLIDINONE CLINDAMYCIN/LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE/OXAZOLIDINONE CLARITHROMYCIN/LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_T754T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig16 1 720 + nfsA_E223E nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_G131G nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_K141STOP Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_Q44Q nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_R133R nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_R15C Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_R203R nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_S33S nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig17 1 247 + ampC_C-11C Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig17 1 247 + ampC_C-42C Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig17 1 247 + ampC_G-15G Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig17 1 247 + ampC_T-14T Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig17 1 247 + ampC_T-14TGT Escherichia cephalosporin resistant ampC core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig17 1 247 + ampC_T-32T Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node +NA contig01 101 958 + blaTEM-156 class A beta-lactamase TEM-156 core AMR AMR BETA-LACTAM BETA-LACTAM ALLELEX 286 286 100.00 100.00 286 WP_061158039.1 class A beta-lactamase TEM-156 NA NA +NA contig02 1 1191 + blaPDC PDC family class C beta-lactamase core AMR AMR BETA-LACTAM CEPHALOSPORIN BLASTX 397 397 100.00 99.75 397 WP_061189306.1 class C beta-lactamase PDC-114 NA NA +NA contig03 101 802 + blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALX 234 265 88.30 100.00 234 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NA NA +NA contig04 101 1147 + vanG D-alanine--D-serine ligase VanG core AMR AMR GLYCOPEPTIDE VANCOMYCIN EXACTX 349 349 100.00 100.00 349 WP_063856695.1 D-alanine--D-serine ligase VanG NA NA +NA contig04 1261 2391 + blaEC BlaEC family class C beta-lactamase plus AMR AMR BETA-LACTAM BETA-LACTAM BLASTX 377 377 100.00 98.14 377 WP_063610930.1 extended-spectrum class C beta-lactamase EC-15 NA NA +NA contig08 101 700 + blaTEM TEM family class A beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIAL_CONTIG_ENDX 200 286 69.93 100.00 200 WP_061158039.1 class A beta-lactamase TEM-156 NA NA +NA contig09 1 675 - aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIAL_CONTIG_ENDX 225 275 81.82 100.00 225 WP_109545041.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NA NA +NA contig09 715 1377 - sul2 sulfonamide-resistant dihydropteroate synthase Sul2 core AMR AMR SULFONAMIDE SULFONAMIDE PARTIAL_CONTIG_ENDX 221 271 81.55 100.00 221 WP_001043265.1 sulfonamide-resistant dihydropteroate synthase Sul2 NA NA +NA contig10 486 1307 + blaOXA OXA-9 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM INTERNAL_STOP 274 274 100.00 99.64 274 WP_000722315.1 oxacillin-hydrolyzing class D beta-lactamase OXA-9 NA NA +NA contig11 101 958 + blaTEM TEM family class A beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM INTERNAL_STOP 286 286 100.00 93.01 286 WP_061158039.1 class A beta-lactamase TEM-156 NA NA +NA contig12 71 634 + qacR multidrug-binding transcriptional regulator QacR plus STRESS BIOCIDE QUATERNARY AMMONIUM QUATERNARY AMMONIUM BLASTX 188 188 100.00 99.47 188 ADK23698.1 multidrug-binding transcriptional regulator QacR NA NA +NA contig13 1 1137 + emrD3 multidrug efflux MFS transporter EmrD-3 plus AMR AMR EFFLUX EFFLUX EXACTX 379 379 100.00 100.00 379 ABQ18953.1 multidrug efflux MFS transporter EmrD-3 NA NA +NA contig14 1 1089 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig15 1 2905 + 23S_A2058T Escherichia azithromycin/erythromycin/telithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig16 1 720 + nfsA_K141STOP Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_R15C Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig17 1 247 + ampC_T-14TGT Escherichia cephalosporin resistant ampC core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig05 237 1224 - 23S_A2058A Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig05 237 1224 - 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig05 237 1224 - 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig05 237 1224 - 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig05 237 1224 - 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig14 1 1089 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_C84C two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig15 1 2905 + 23S_A2058T Escherichia azithromycin/erythromycin/telithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT LINCOSAMIDE/OXAZOLIDINONE CLINDAMYCIN/LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE/OXAZOLIDINONE CLARITHROMYCIN/LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_T754T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_A2058A Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT LINCOSAMIDE/OXAZOLIDINONE CLINDAMYCIN/LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE/OXAZOLIDINONE CLARITHROMYCIN/LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_T754T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig16 1 720 + nfsA_E223E nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_G131G nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_K141STOP Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_Q44Q nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_R133R nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_R15C Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_R203R nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_S33S nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig17 1 247 + ampC_C-11C Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig17 1 247 + ampC_C-42C Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig17 1 247 + ampC_G-15G Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig17 1 247 + ampC_T-14T Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig17 1 247 + ampC_T-14TGT Escherichia cephalosporin resistant ampC core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig17 1 247 + ampC_T-32T Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA diff --git a/test/data/raw_outputs/amrfinderplus/report_protein.tsv b/test/data/raw_outputs/amrfinderplus/report_protein.tsv index 50b9cdc..a5a3f17 100644 --- a/test/data/raw_outputs/amrfinderplus/report_protein.tsv +++ b/test/data/raw_outputs/amrfinderplus/report_protein.tsv @@ -1,4 +1,4 @@ -Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description blaTEM-156 contig01 101 961 + blaTEM-156 class A beta-lactamase TEM-156 core AMR AMR BETA-LACTAM BETA-LACTAM ALLELEP 286 286 100.00 100.00 286 WP_061158039.1 class A beta-lactamase TEM-156 NF000531.2 TEM family class A beta-lactamase blaPDC-114_blast contig02 1 1191 + blaPDC PDC family class C beta-lactamase core AMR AMR BETA-LACTAM CEPHALOSPORIN BLASTP 397 397 100.00 99.75 397 WP_061189306.1 class C beta-lactamase PDC-114 NF000422.6 PDC family class C beta-lactamase blaOXA-436_partial contig03 101 802 + blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase diff --git a/test/run_integration_test.sh b/test/run_integration_test.sh index 28445a6..64bdb11 100755 --- a/test/run_integration_test.sh +++ b/test/run_integration_test.sh @@ -10,10 +10,10 @@ hamronize abricate data/raw_outputs/abricate/report.tsv --reference_database_ver hamronize ariba data/raw_outputs/ariba/report.tsv --reference_database_version db_v_1 --reference_database_name dbname --input_file_name ariba_report --analysis_software_version ariba_v1 --format json --output hamronized_ariba.json hamronize ariba data/raw_outputs/ariba/report.tsv --reference_database_version db_v_1 --reference_database_name dbname --input_file_name ariba_report --analysis_software_version ariba_v1 --format tsv --output hamronized_ariba.tsv -hamronize amrfinderplus --input_file_name amrfinderplus_nucleotide_report --analysis_software_version AFP_nt_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_nucleotide.tsv --format json --output hamronized_amrfinderplus_nt.json -hamronize amrfinderplus --input_file_name amrfinderplus_nucleotide_report --analysis_software_version AFP_nt_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_nucleotide.tsv --format tsv --output hamronized_amrfinderplus_nt.tsv -hamronize amrfinderplus --input_file_name amrfinderplus_protein_report --analysis_software_version AFP_aa_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_protein.tsv --format json --output hamronized_amrfinderplus_aa.json -hamronize amrfinderplus --input_file_name amrfinderplus_protein_report --analysis_software_version AFP_aa_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_protein.tsv --format tsv --output hamronized_amrfinderplus_aa.tsv +hamronize amrfinderplus --input_file_name afp_nt_report --analysis_software_version AFP4 --reference_database_version v4 data/raw_outputs/amrfinderplus/report_nucleotide.tsv --format json --output hamronized_amrfinderplus_nt.json +hamronize amrfinderplus --input_file_name afp_nt_report --analysis_software_version AFP4 --reference_database_version v4 data/raw_outputs/amrfinderplus/report_nucleotide.tsv --format tsv --output hamronized_amrfinderplus_nt.tsv +hamronize amrfinderplus --input_file_name afp_aa_report --analysis_software_version AFP4 --reference_database_version v4 data/raw_outputs/amrfinderplus/report_protein.tsv --format json --output hamronized_amrfinderplus_aa.json +hamronize amrfinderplus --input_file_name afp_aa_report --analysis_software_version AFP4 --reference_database_version v4 data/raw_outputs/amrfinderplus/report_protein.tsv --format tsv --output hamronized_amrfinderplus_aa.tsv hamronize rgi --input_file_name rgi_report --analysis_software_version rgi_v1 --reference_database_version card_v1 data/raw_outputs/rgi/rgi.txt --format json --output hamronized_rgi.json hamronize rgi --input_file_name rgi_report --analysis_software_version rgi_v1 --reference_database_version card_v1 data/raw_outputs/rgi/rgi.txt --format tsv --output hamronized_rgi.tsv diff --git a/test/test_parsing_validity.py b/test/test_parsing_validity.py index 7e81c52..d098ff1 100644 --- a/test/test_parsing_validity.py +++ b/test/test_parsing_validity.py @@ -64,8 +64,8 @@ def test_abricate(): def test_amrfinderplus(): metadata = { - "analysis_software_version": "3.6.10", - "reference_database_version": "2019-Jul-28", + "analysis_software_version": "4.0.3", + "reference_database_version": "2024-12-18.1", "input_file_name": "Dummy", } parsed_report = hAMRonization.parse( @@ -81,10 +81,10 @@ def test_amrfinderplus(): == "multidrug efflux RND transporter periplasmic adaptor subunit OqxA" ) assert result.reference_database_name == "NCBI Reference Gene Database" - assert result.reference_database_version == "2019-Jul-28" + assert result.reference_database_version == "2024-12-18.1" assert result.reference_accession == "WP_002914189.1" assert result.analysis_software_name == "amrfinderplus" - assert result.analysis_software_version == "3.6.10" + assert result.analysis_software_version == "4.0.3" assert result.genetic_variation_type == "gene_presence_detected" # optional fields - present in dummy dataset @@ -100,8 +100,8 @@ def test_amrfinderplus(): assert result.input_protein_length == 391 # missing data in report - assert result.reference_gene_length == None - assert result.input_gene_length == None + assert result.reference_gene_length is None + assert result.input_gene_length is None assert result.coverage_depth is None assert result.coverage_ratio is None assert result.resistance_mechanism is None @@ -354,11 +354,10 @@ def test_resfinder(): assert result.reference_database_version == "2.4.0" assert result.reference_accession == "EU370913" - # optional fields (13) + # optional fields (12) assert result.predicted_phenotype == "ciprofloxacin, nalidixic acid, trimethoprim, chloramphenicol" assert result.predicted_phenotype_confidence_level == "Must be in an operon with oqxB,phenotype differs based on genomic location of the operon PMID 25801572,also nitrofurantoin resistance PMID 26552976. Natural in K. pneumoniae. PMIDs: 18440636" assert result.coverage_percentage == 100.0 - assert result.coverage_ratio == 1.0 assert result.input_sequence_id == "contig1" assert result.input_gene_length == 1176 assert result.input_gene_start == 101 @@ -369,8 +368,9 @@ def test_resfinder(): assert result.reference_gene_stop == 1176 assert result.sequence_identity == 100.0 - # not set (12) + # not set (13) assert result.coverage_depth is None + assert result.coverage_ratio is None assert result.input_protein_length is None assert result.input_protein_start is None assert result.input_protein_stop is None @@ -400,7 +400,6 @@ def test_resfinder(): assert result.predicted_phenotype == "ampicillin" assert result.predicted_phenotype_confidence_level == "The nineteen pbp5 mutations must be present simultaneously for resistance phenotype. PMIDs: 25182648" assert result.coverage_percentage == 100.0 - assert result.coverage_ratio == 1.0 assert result.input_sequence_id == "contig2" assert result.input_gene_length == 2037 assert result.input_gene_start == 64029 @@ -411,13 +410,14 @@ def test_resfinder(): assert result.reference_gene_stop == 2037 assert result.sequence_identity == 95.34 - # mutation fields (2) + # mutation fields (3) assert result.amino_acid_mutation == "p.V24A, p.S27G, p.R34Q, p.G66E, p.A68T, p.E85D, p.E100Q, p.K144Q, p.T172A, p.L177I, p.D204G, p.A216S, p.T324A, p.N496K, p.A499T, p.E525D, p.P667S" assert result.nucleotide_mutation is None assert result.nucleotide_mutation_interpretation == "Codon changes: gta>gca agt>ggt cgg>cag gga>gaa gca>aca gaa>gat gag>cag aaa>caa aca>gca tta>ata gac>ggc gca>tcc aca>gca aat>aaa gca>aca gag>gat ccc>tcg" # not set (10) assert result.coverage_depth is None + assert result.coverage_ratio is None assert result.input_protein_length is None assert result.input_protein_start is None assert result.input_protein_stop is None @@ -428,7 +428,7 @@ def test_resfinder(): assert result.amino_acid_mutation_interpretation is None else: - assert result.genetic_variation_type == False # just to stop + assert result.genetic_variation_type is False # just to stop # Check that we saw all assert seen_genes == 4