Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions R/converters.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
#' @param protein_id_col Use 'Protein.Groups'(default) column for protein name.
#' @param fasta_protein_name Name of column that matches with the protein names
#' in `protein_id_col`. The protein names in these two columns must match in
#' order to join the FASTA file with the DIA-NN output.
#' order to join the FASTA file with the DIA-NN output. Default is "uniprot_ac"
#' for uniprot ID. For uniprot mnemonic ID, use "entry_name"
#' @param global_qvalue_cutoff The global qvalue cutoff. Default is 0.01.
#' @param qvalue_cutoff local qvalue cutoff for library. Default is 0.01.
#' @param pg_qvalue_cutoff local qvalue cutoff for protein groups Run should be
Expand All @@ -46,14 +47,26 @@
#' @export
#'
#' @examples
#' # ptm = read.csv("Phospho/report.tsv", sep="\t")
#' # protein = read.csv("Protein/report.tsv", sep="\t")
#' # annotation = read.csv("Phospho/annotation.csv")
#' # annotation_protein = read.csv("Protein/annotation.csv")
#' # Example from PRIDE ID PXD053502
#' input = system.file("tinytest/raw_data/DIANN/report.tsv",
#' package = "MSstatsPTM")
#' input = data.table::fread(input)
#' annot = system.file("tinytest/raw_data/DIANN/annot.csv",
#' package = "MSstatsPTM")
#' annot = data.table::fread(annot)
#' fasta_path = system.file("extdata", "diann.fasta",
#' package="MSstatsPTM")
#'
#' msstatsptm_format = DIANNtoMSstatsPTMFormat(
#' input,
#' annot,
#' protein_id_col = "Protein.Names",
#' fasta_path = fasta_path,
#' fasta_protein_name = "entry_name",
#' use_log_file = FALSE
#' )
#'
#' #DIANNtoMSstatsPTMFormat(ptm, annotation,
#' # protein, annotation_protein,
#' # fasta_path="fasta_file.fasta")
#' head(msstatsptm_format$PTM)
#'
DIANNtoMSstatsPTMFormat = function(input,
annotation,
Expand Down
53 changes: 53 additions & 0 deletions inst/extdata/diann.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
>sp|P25440|BRD2_HUMAN Bromodomain-containing protein 2 OS=Homo sapiens OX=9606 GN=BRD2 PE=1 SV=2
MLQNVTPHNKLPGEGNAGLLGLGPEAAAPGKRIRKPSLLYEGFESPTMASVPALQLTPAN
PPPPEVSNPKKPGRVTNQLQYLHKVVMKALWKHQFAWPFRQPVDAVKLGLPDYHKIIKQP
MDMGTIKRRLENNYYWAASECMQDFNTMFTNCYIYNKPTDDIVLMAQTLEKIFLQKVASM
PQEEQELVVTIPKNSHKKGAKLAALQGSVTSAHQVPAVSSVSHTALYTPPPEIPTTVLNI
PHPSVISSPLLKSLHSAGPPLLAVTAAPPAQPLAKKKGVKRKADTTTPTPTAILAPGSPA
SPPGSLEPKAARLPPMRRESGRPIKPPRKDLPDSQQQHQSSKKGKLSEQLKHCNGILKEL
LSKKHAAYAWPFYKPVDASALGLHDYHDIIKHPMDLSTVKRKMENRDYRDAQEFAADVRL
MFSNCYKYNPPDHDVVAMARKLQDVFEFRYAKMPDEPLEPGPLPVSTAMPPGLAKSSSES
SSEESSSESSSEEEEEEDEEDEEEEESESSDSEEERAHRLAELQEQLRAVHEQLAALSQG
PISKPKRKREKKEKKKKRKAEKHRGRAGADEDDKGPRAPRPPQPKKSKKASGSGGGSAAL
GPSGFGPSGGSGTKLPKKATKTAPPALPTGYDSEEEEESRPMSYDEKRQLSLDINKLPGE
KLGRVVHIIQAREPSLRDSNPEEIEIDFETLKPSTLRELERYVLSCLRKKPRKPYTIKKP
VGKTKEELALEKKRELEKRLQDVSGQLNSTKKPPKKANEKTESSSAQQVAVSRLSASSSS
SDSSSSSSSSSSSDTSDSDSG
>sp|Q15059|BRD3_HUMAN Bromodomain-containing protein 3 OS=Homo sapiens OX=9606 GN=BRD3 PE=1 SV=1
MSTATTVAPAGIPATPGPVNPPPPEVSNPSKPGRKTNQLQYMQNVVVKTLWKHQFAWPFY
QPVDAIKLNLPDYHKIIKNPMDMGTIKKRLENNYYWSASECMQDFNTMFTNCYIYNKPTD
DIVLMAQALEKIFLQKVAQMPQEEVELLPPAPKGKGRKPAAGAQSAGTQQVAAVSSVSPA
TPFQSVPPTVSQTPVIAATPVPTITANVTSVPVPPAAAPPPPATPIVPVVPPTPPVVKKK
GVKRKADTTTPTTSAITASRSESPPPLSDPKQAKVVARRESGGRPIKPPKKDLEDGEVPQ
HAGKKGKLSEHLRYCDSILREMLSKKHAAYAWPFYKPVDAEALELHDYHDIIKHPMDLST
VKRKMDGREYPDAQGFAADVRLMFSNCYKYNPPDHEVVAMARKLQDVFEMRFAKMPDEPV
EAPALPAPAAPMVSKGAESSRSSEESSSDSGSSDSEEERATRLAELQEQLKAVHEQLAAL
SQAPVNKPKKKKEKKEKEKKKKDKEKEKEKHKVKAEEEKKAKVAPPAKQAQQKKAPAKKA
NSTTTAGRQLKKGGKQASASYDSEEEEEGLPMSYDEKRQLSLDINRLPGEKLGRVVHIIQ
SREPSLRDSNPDEIEIDFETLKPTTLRELERYVKSCLQKKQRKPFSASGKKQAAKSKEEL
AQEKKKELEKRLQDVSGQLSSSKKPARKEKPGSAPSGGPSRLSSSSSSESGSSSSSGSSS
DSSDSE
>sp|O60885|BRD4_HUMAN Bromodomain-containing protein 4 OS=Homo sapiens OX=9606 GN=BRD4 PE=1 SV=2
MSAESGPGTRLRNLPVMGDGLETSQMSTTQAQAQPQPANAASTNPPPPETSNPNKPKRQT
NQLQYLLRVVLKTLWKHQFAWPFQQPVDAVKLNLPDYYKIIKTPMDMGTIKKRLENNYYW
NAQECIQDFNTMFTNCYIYNKPGDDIVLMAEALEKLFLQKINELPTEETEIMIVQAKGRG
RGRKETGTAKPGVSTVPNTTQASTPPQTQTPQPNPPPVQATPHPFPAVTPDLIVQTPVMT
VVPPQPLQTPPPVPPQPQPPPAPAPQPVQSHPPIIAATPQPVKTKKGVKRKADTTTPTTI
DPIHEPPSLPPEPKTTKLGQRRESSRPVKPPKKDVPDSQQHPAPEKSSKVSEQLKCCSGI
LKEMFAKKHAAYAWPFYKPVDVEALGLHDYCDIIKHPMDMSTIKSKLEAREYRDAQEFGA
DVRLMFSNCYKYNPPDHEVVAMARKLQDVFEMRFAKMPDEPEEPVVAVSSPAVPPPTKVV
APPSSSDSSSDSSSDSDSSTDDSEEERAQRLAELQEQLKAVHEQLAALSQPQQNKPKKKE
KDKKEKKKEKHKRKEEVEENKKSKAKEPPPKKTKKNNSSNSNVSKKEPAPMKSKPPPTYE
SEEEDKCKPMSYEEKRQLSLDINKLPGEKLGRVVHIIQSREPSLKNSNPDEIEIDFETLK
PSTLRELERYVTSCLRKKRKPQAEKVDVIAGSSKMKGFSSSESESSSESSSSDSEDSETE
MAPKSKKKGHPGREQKKHHHHHHQQMQQAPAPVPQQPPPPPQQPPPPPPPQQQQQPPPPP
PPPSMPQQAAPAMKSSPPPFIATQVPVLEPQLPGSVFDPIGHFTQPILHLPQPELPPHLP
QPPEHSTPPHLNQHAVVSPPALHNALPQQPSRPSNRAAALPPKPARPPAVSPALTQTPLL
PQPPMAQPPQVLLEDEEPPAPPLTSMQMQLYLQQLQKVQPPTPLLPSVKVQSQPPPPLPP
PPHPSVQQQLQQQPPPPPPPQPQPPPQQQHQPPPRPVHLQPMQFSTHIQQPPPPQGQQPP
HPPPGQQPPPPQPAKPQQVIQHHHSPRHHKSDPYSTGHLREAPSPLMIHSPQMSQFQSLT
HQSPPQQNVQPKKQELRAASVVQPQPLVVVKEEKIHSPIIRSEPFSPSLRPEPPKHPESI
KAPVHLPQRPEMKPVDVGRPVIRPPEQNAPPPGAPDKDKQKQEPKTPVAPKKDLKIKNMG
SWASLVQKHPTTPSSTAKSSSDSFEQFRRAAREKEEREKALKAQAEHAEKEKERLRQERM
RSREDEDALEQARRAHEEARRRQEQQQQQRQEQQQQQQQQAAAVAAAATPQAQSSQPQSM
LDQQRELARKREQERRRREAMAATIDMNFQSDLLSIFEENLF
7 changes: 7 additions & 0 deletions inst/tinytest/raw_data/DIANN/annot.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"Run","Condition","BioReplicate"
"144-2024-GS-DMSO-R1","DMSO",1
"144-2024-GS-DMSO-R2","DMSO",2
"144-2024-GS-DMSO-R3","DMSO",3
"144-2024-GS-MZ1-R1","MZ-1",4
"144-2024-GS-MZ1-R2","MZ-1",5
"144-2024-GS-MZ1-R3","MZ-1",6
Loading
Loading