diff --git a/README.md b/README.md index 792c30bf3..607b5efc9 100644 --- a/README.md +++ b/README.md @@ -1076,6 +1076,10 @@ The earth sciences folder contain subfolders for different data formats encounte - 'ipython_notebook.md': exemplary markdown notebook - rmarkdown - 'rmarkdown_notebook.Rmd': exemplary R notebook +- gene_ontology + - 'ontology.obo': a mock gene ontology with 5 terms. + - 'ontology_slim.obo': a mock GO slim with 2 terms. + - 'go_annotation.gaf': a mock GO annotation in GAF format, containing annotations for 8 genes. - tsv - 'test.tsv': exemplary tab-separated file obtained from [here](https://bioinf.shenwei.me/csvtk/usage/#split) - 'ani.tsv': exemplary tab-seperated file describing pairwise similarities from [here](https://github.com/refresh-bio/clusty). @@ -1083,6 +1087,8 @@ The earth sciences folder contain subfolders for different data formats encounte - 'hello.txt': one-line txt file - 'taxonomy_ids.txt': contains species names, to be used as input for [goat-cli taxon search tool](https://github.com/genomehubs/goat-cli). - 'ani_ids.txt': list of ids associated with 'tsv/ani.tsv' used as an input for [clusty](https://github.com/refresh-bio/clusty). + - 'sample_genes.txt': a list of 3 sample genes for GO enrichment analysis. + - 'population_genes.txt': a list of 9 population genes for GO enrichment analysis. - tar - 'hello.tar.gz': gzipped tar archive containing a single file without a directory diff --git a/data/generic/gene_ontology/go_annotation.gaf b/data/generic/gene_ontology/go_annotation.gaf new file mode 100644 index 000000000..fa578dd9b --- /dev/null +++ b/data/generic/gene_ontology/go_annotation.gaf @@ -0,0 +1,13 @@ +!gaf-version: 2.1 +! +!Mock GAF for testing enrichment tool +!Generated: 2023-10-27 +! +MockDB A GENE_A GO:0000004 PMID:123 IMP P Metabolism Gene A gene taxon:9606 20231027 MockSource +MockDB B GENE_B GO:0000004 PMID:123 IMP P Metabolism Gene B gene taxon:9606 20231027 MockSource +MockDB C GENE_C GO:0000002 PMID:456 IDA P Metabolism Gene C gene taxon:9606 20231027 MockSource +MockDB D GENE_D GO:0000001 PMID:789 TAS P General Gene D gene taxon:9606 20231027 MockSource +MockDB E GENE_E GO:0000001 PMID:789 TAS P General Gene E gene taxon:9606 20231027 MockSource +MockDB X GENE_X GO:0000005 PMID:101 IMP P Defense Gene X gene taxon:9606 20231027 MockSource +MockDB Y GENE_Y GO:0000005 PMID:101 IMP P Defense Gene Y gene taxon:9606 20231027 MockSource +MockDB Z GENE_Z GO:0000003 PMID:202 IDA P Defense Gene Z gene taxon:9606 20231027 MockSource diff --git a/data/generic/gene_ontology/ontology.obo b/data/generic/gene_ontology/ontology.obo new file mode 100644 index 000000000..b150f4c5a --- /dev/null +++ b/data/generic/gene_ontology/ontology.obo @@ -0,0 +1,31 @@ +format-version: 1.2 +default-namespace: mock_ontology + +[Term] +id: GO:0000001 +name: cellular_process +def: "A broad biological process." + +[Term] +id: GO:0000002 +name: metabolic_process +def: "Chemical reactions within a cell." +is_a: GO:0000001 ! cellular_process + +[Term] +id: GO:0000003 +name: defense_response +def: "Response to external stimuli." +is_a: GO:0000001 ! cellular_process + +[Term] +id: GO:0000004 +name: carbohydrate_metabolism +def: "Breaking down sugars." +is_a: GO:0000002 ! metabolic_process + +[Term] +id: GO:0000005 +name: viral_defense +def: "Specific defense against viruses." +is_a: GO:0000003 ! defense_response diff --git a/data/generic/gene_ontology/ontology_slim.obo b/data/generic/gene_ontology/ontology_slim.obo new file mode 100644 index 000000000..832fc7bb6 --- /dev/null +++ b/data/generic/gene_ontology/ontology_slim.obo @@ -0,0 +1,12 @@ +format-version: 1.2 +ontology: mock_slim + +[Term] +id: GO:0000002 +name: metabolic_process +namespace: biological_process + +[Term] +id: GO:0000003 +name: defense_response +namespace: biological_process diff --git a/data/generic/txt/population_genes.txt b/data/generic/txt/population_genes.txt new file mode 100644 index 000000000..cb509128a --- /dev/null +++ b/data/generic/txt/population_genes.txt @@ -0,0 +1,9 @@ +A +B +C +D +E +F +X +Y +Z diff --git a/data/generic/txt/sample_genes.txt b/data/generic/txt/sample_genes.txt new file mode 100644 index 000000000..b1e67221a --- /dev/null +++ b/data/generic/txt/sample_genes.txt @@ -0,0 +1,3 @@ +A +B +C