From bc331883233077f8ceb074f663c7b4b62773cfd6 Mon Sep 17 00:00:00 2001 From: Apoorva Srinivasan Date: Wed, 15 Mar 2023 13:39:07 -0700 Subject: [PATCH 1/9] Apply pre-commit hook changes --- data/nsides/offsides/meta.yaml | 39 + data/nsides/offsides/offsides_data_prep.ipynb | 872 ++++++++++++++++++ data/nsides/offsides/transform.py | 124 +++ 3 files changed, 1035 insertions(+) create mode 100644 data/nsides/offsides/meta.yaml create mode 100644 data/nsides/offsides/offsides_data_prep.ipynb create mode 100644 data/nsides/offsides/transform.py diff --git a/data/nsides/offsides/meta.yaml b/data/nsides/offsides/meta.yaml new file mode 100644 index 000000000..2849ce626 --- /dev/null +++ b/data/nsides/offsides/meta.yaml @@ -0,0 +1,39 @@ +--- +name: offsides +description: OffSIDES is a database of individual drug side effect signals mined from the FDA's Adverse Event Reporting System. The innovation of OffSIDES + is that a propensity score matching (PSM) model is used to identify control drugs and produce better PRR estimates. In OffSIDES we focus on drug safety + signals that are not already established by being listed on the structured product label -- hence they are off-label drug side effects. +targets: + - id: PRR + description: Proportional reporting ratio, PRR=(A/(A+B))/(C/(C+D)) + type: continuous + names: + - Proportional reporting ratio + - id: PRR_error + description: Standard error of the PRR estimate + type: continuous + names: + - Proportional reporting ratio error + - id: mean_reporting_frequency + description: Proportion of reports for the drug that report the side effect, A/(A+B) + type: continuous + names: + - mean reporting frequency +identifier: + - id: drug_concept_name + description: RxNorm name string for the drug + type: categorical + - id: condition_concept_name + description: MedDRA identifier for the side effect + type: categorical +license: CC BY 4.0 +links: + - url: https://tatonettilab.org/resources/nsides/ + description: data source + - url: https://nsides.io/ + description: database website +num_points: 3042873 +bibtex: "\n @article{Tatonetti2012,\n author = {Tatonetti, Nicholas P. and Ye, Peter P. and Daneshjou, Roxana and Altman, Russ B.},\n \ + \ title = {Data-driven prediction of drug effects and interactions},\n journal = {Sci Transl Med},\n volume = {4},\n number\ + \ = {125},\n pages = {125ra31},\n year = {2012},\n doi = {10.1126/scitranslmed.3003377},\n pmid = {22422992},\n pmcid\ + \ = {PMC3382018}\n }\n " diff --git a/data/nsides/offsides/offsides_data_prep.ipynb b/data/nsides/offsides/offsides_data_prep.ipynb new file mode 100644 index 000000000..e498392bf --- /dev/null +++ b/data/nsides/offsides/offsides_data_prep.ipynb @@ -0,0 +1,872 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ee354cad", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import yaml" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a5577953", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/3c/d8_kt2gd6n5857w_5x4gccrc0000gn/T/ipykernel_28019/4254589737.py:1: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.\n", + "\n", + "\n", + " df = pd.read_csv('https://tatonettilab.org/resources/nsides/OFFSIDES.csv.gz', compression='gzip',\n", + "/var/folders/3c/d8_kt2gd6n5857w_5x4gccrc0000gn/T/ipykernel_28019/4254589737.py:1: DtypeWarning: Columns (0,2,4,5,6,7,8,9,10) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv('https://tatonettilab.org/resources/nsides/OFFSIDES.csv.gz', compression='gzip',\n" + ] + } + ], + "source": [ + "df = pd.read_csv('https://tatonettilab.org/resources/nsides/OFFSIDES.csv.gz', compression='gzip',\n", + " error_bad_lines=False)\n", + "#df.shape\n", + "#df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "da5b4b81", + "metadata": {}, + "outputs": [], + "source": [ + " # check if fields are the same\n", + "\n", + "\n", + "expected_columns = ['drug_rxnorn_id',\n", + " 'drug_concept_name',\n", + " 'condition_meddra_id',\n", + " 'condition_concept_name',\n", + " 'A',\n", + " 'B',\n", + " 'C',\n", + " 'D',\n", + " 'PRR',\n", + " 'PRR_error',\n", + " 'mean_reporting_frequency']\n", + "\n", + "assert df.columns.tolist() == expected_columns\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "8d016424", + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "Found duplicate rows in the dataframe", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[33], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m df\u001b[38;5;241m.\u001b[39mduplicated()\u001b[38;5;241m.\u001b[39msum(), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFound duplicate rows in the dataframe\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2\u001b[0m df\u001b[38;5;241m.\u001b[39mdrop_duplicates(inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "\u001b[0;31mAssertionError\u001b[0m: Found duplicate rows in the dataframe" + ] + } + ], + "source": [ + "assert not df.duplicated().sum(), \"Found duplicate rows in the dataframe\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f523b30a", + "metadata": {}, + "outputs": [], + "source": [ + "df.drop_duplicates(inplace=True)\n", + "# df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "43ed46ed", + "metadata": {}, + "outputs": [], + "source": [ + "fn_data_csv = \"data_clean.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a6230d38", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(fn_data_csv, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8b1da608", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 apoorvasrinivasan staff 279M Mar 14 18:22 data_clean.csv\r\n" + ] + } + ], + "source": [ + "!ls -lh {fn_data_csv}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d1509dca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drug_rxnorn_id,drug_concept_name,condition_meddra_id,condition_concept_name,A,B,C,D,PRR,PRR_error,mean_reporting_frequency\r\n", + "4024,\"ergoloid mesylates, USP\",10002034,Anaemia,6,126,21,1299,2.85714,0.45382,0.0454545\r\n", + "4024,\"ergoloid mesylates, USP\",10002965,Aplasia pure red cell,1,131,1,1319,10.0,1.41126,0.00757576\r\n", + "4024,\"ergoloid mesylates, USP\",10013442,Disseminated intravascular coagulation,1,131,6,1314,1.66667,1.07626,0.00757576\r\n", + "4024,\"ergoloid mesylates, USP\",10023126,Jaundice,2,130,7,1313,2.85714,0.79657,0.0151515\r\n" + ] + } + ], + "source": [ + "!head -n 5 {fn_data_csv}" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f33da1dd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
drug_rxnorn_iddrug_concept_namecondition_meddra_idcondition_concept_nameABCDPRRPRR_errormean_reporting_frequency
04024ergoloid mesylates, USP10002034Anaemia61262112992.857140.453820.045455
14024ergoloid mesylates, USP10002965Aplasia pure red cell11311131910.01.411260.007576
24024ergoloid mesylates, USP10013442Disseminated intravascular coagulation1131613141.666671.076260.007576
34024ergoloid mesylates, USP10023126Jaundice2130713132.857140.796570.015152
44024ergoloid mesylates, USP10016288Febrile neutropenia1131513152.01.091630.007576
\n", + "
" + ], + "text/plain": [ + " drug_rxnorn_id drug_concept_name condition_meddra_id \\\n", + "0 4024 ergoloid mesylates, USP 10002034 \n", + "1 4024 ergoloid mesylates, USP 10002965 \n", + "2 4024 ergoloid mesylates, USP 10013442 \n", + "3 4024 ergoloid mesylates, USP 10023126 \n", + "4 4024 ergoloid mesylates, USP 10016288 \n", + "\n", + " condition_concept_name A B C D PRR \\\n", + "0 Anaemia 6 126 21 1299 2.85714 \n", + "1 Aplasia pure red cell 1 131 1 1319 10.0 \n", + "2 Disseminated intravascular coagulation 1 131 6 1314 1.66667 \n", + "3 Jaundice 2 130 7 1313 2.85714 \n", + "4 Febrile neutropenia 1 131 5 1315 2.0 \n", + "\n", + " PRR_error mean_reporting_frequency \n", + "0 0.45382 0.045455 \n", + "1 1.41126 0.007576 \n", + "2 1.07626 0.007576 \n", + "3 0.79657 0.015152 \n", + "4 1.09163 0.007576 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "17bcca0d", + "metadata": {}, + "source": [ + "## Load from csv" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4d12adad", + "metadata": {}, + "outputs": [], + "source": [ + "fn_data_csv = \"data_clean.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ae4cbf36", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/3c/d8_kt2gd6n5857w_5x4gccrc0000gn/T/ipykernel_28019/2664504625.py:1: DtypeWarning: Columns (0,2,4,5,6,7,8,9,10) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(fn_data_csv)\n" + ] + } + ], + "source": [ + "df = pd.read_csv(fn_data_csv)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "214a2b81", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
drug_rxnorn_iddrug_concept_namecondition_meddra_idcondition_concept_nameABCDPRRPRR_errormean_reporting_frequency
04024ergoloid mesylates, USP10002034Anaemia61262112992.857140.453820.045455
14024ergoloid mesylates, USP10002965Aplasia pure red cell11311131910.01.411260.007576
24024ergoloid mesylates, USP10013442Disseminated intravascular coagulation1131613141.666671.076260.007576
34024ergoloid mesylates, USP10023126Jaundice2130713132.857140.796570.015152
44024ergoloid mesylates, USP10016288Febrile neutropenia1131513152.01.091630.007576
\n", + "
" + ], + "text/plain": [ + " drug_rxnorn_id drug_concept_name condition_meddra_id \\\n", + "0 4024 ergoloid mesylates, USP 10002034 \n", + "1 4024 ergoloid mesylates, USP 10002965 \n", + "2 4024 ergoloid mesylates, USP 10013442 \n", + "3 4024 ergoloid mesylates, USP 10023126 \n", + "4 4024 ergoloid mesylates, USP 10016288 \n", + "\n", + " condition_concept_name A B C D PRR \\\n", + "0 Anaemia 6 126 21 1299 2.85714 \n", + "1 Aplasia pure red cell 1 131 1 1319 10.0 \n", + "2 Disseminated intravascular coagulation 1 131 6 1314 1.66667 \n", + "3 Jaundice 2 130 7 1313 2.85714 \n", + "4 Febrile neutropenia 1 131 5 1315 2.0 \n", + "\n", + " PRR_error mean_reporting_frequency \n", + "0 0.45382 0.045455 \n", + "1 1.41126 0.007576 \n", + "2 1.07626 0.007576 \n", + "3 0.79657 0.015152 \n", + "4 1.09163 0.007576 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "854b807c", + "metadata": {}, + "source": [ + "## Meta YAML" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "2d24e114", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "meta = {\n", + " \"name\": \"offsides\",\n", + " \"description\": \"OffSIDES is a database of individual drug side effect signals mined from the FDA's Adverse Event Reporting System. The innovation of OffSIDES is that a propensity score matching (PSM) model is used to identify control drugs and produce better PRR estimates. In OffSIDES we focus on drug safety signals that are not already established by being listed on the structured product label -- hence they are off-label drug side effects.\",\n", + " \"targets\": [\n", + " {\n", + " \"id\": \"PRR\",\n", + " \"description\": \"Proportional reporting ratio, PRR=(A/(A+B))/(C/(C+D))\",\n", + " \"type\": \"continuous\",\n", + " \"names\": [\"Proportional reporting ratio\"]\n", + " },\n", + " {\n", + " \"id\": \"PRR_error\",\n", + " \"description\": \"Standard error of the PRR estimate\",\n", + " \"type\": \"continuous\",\n", + " \"names\": [\"Proportional reporting ratio error\"]\n", + " },\n", + " {\n", + " \"id\": \"mean_reporting_frequency\",\n", + " \"description\": \"Proportion of reports for the drug that report the side effect, A/(A+B)\",\n", + " \"type\": \"continuous\",\n", + " \"names\": [\"mean reporting frequency\"]\n", + " }\n", + " ],\n", + " \"identifier\": [\n", + " {\n", + " \"id\": \"drug_concept_name\",\n", + " \"description\": \"RxNorm name string for the drug\",\n", + " \"type\": \"categorical\"\n", + " },\n", + " {\n", + " \"id\": \"condition_concept_name\",\n", + " \"description\": \"MedDRA identifier for the side effect\",\n", + " \"type\": \"categorical\"\n", + " }\n", + " ],\n", + " \"license\": \"CC BY 4.0\",\n", + " \"links\": [\n", + " {\n", + " \"url\": \"https://tatonettilab.org/resources/nsides/\",\n", + " \"description\": \"data source\"\n", + " },\n", + " {\n", + " \"url\": \"https://nsides.io/\",\n", + " \"description\": \"database website\"\n", + " }\n", + " ],\n", + " \"num_points\": len(df),\n", + " \"bibtex\": \"\"\"\n", + " @article{Tatonetti2012,\n", + " author = {Tatonetti, Nicholas P. and Ye, Peter P. and Daneshjou, Roxana and Altman, Russ B.},\n", + " title = {Data-driven prediction of drug effects and interactions},\n", + " journal = {Sci Transl Med},\n", + " volume = {4},\n", + " number = {125},\n", + " pages = {125ra31},\n", + " year = {2012},\n", + " doi = {10.1126/scitranslmed.3003377},\n", + " pmid = {22422992},\n", + " pmcid = {PMC3382018}\n", + " }\n", + " \"\"\"\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6e8aafee", + "metadata": {}, + "outputs": [], + "source": [ + "fn_meta = \"meta.yaml\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "6ff83544", + "metadata": {}, + "outputs": [], + "source": [ + "with open(fn_meta, \"w\") as f:\n", + " yaml.dump(meta, f, sort_keys=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "d370342f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 apoorvasrinivasan staff 1.8K Mar 14 18:25 meta.yaml\r\n" + ] + } + ], + "source": [ + "!ls -lh {fn_meta}" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "40548210", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name: offsides\r\n", + "description: OffSIDES is a database of individual drug side effect signals mined from\r\n", + " the FDA's Adverse Event Reporting System. The innovation of OffSIDES is that a propensity\r\n", + " score matching (PSM) model is used to identify control drugs and produce better\r\n", + " PRR estimates. In OffSIDES we focus on drug safety signals that are not already\r\n", + " established by being listed on the structured product label -- hence they are off-label\r\n", + " drug side effects.\r\n", + "targets:\r\n", + "- id: PRR\r\n", + " description: Proportional reporting ratio, PRR=(A/(A+B))/(C/(C+D))\r\n", + " type: continuous\r\n", + " names:\r\n", + " - Proportional reporting ratio\r\n", + "- id: PRR_error\r\n", + " description: Standard error of the PRR estimate\r\n", + " type: continuous\r\n", + " names:\r\n", + " - Proportional reporting ratio error\r\n", + "- id: mean_reporting_frequency\r\n", + " description: Proportion of reports for the drug that report the side effect, A/(A+B)\r\n", + " type: continuous\r\n", + " names:\r\n", + " - mean reporting frequency\r\n", + "identifier:\r\n", + "- id: drug_concept_name\r\n", + " description: RxNorm name string for the drug\r\n", + " type: categorical\r\n", + "- id: condition_concept_name\r\n", + " description: MedDRA identifier for the side effect\r\n", + " type: categorical\r\n", + "license: CC BY 4.0\r\n", + "links:\r\n", + "- url: https://tatonettilab.org/resources/nsides/\r\n", + " description: data source\r\n", + "- url: https://nsides.io/\r\n", + " description: database website\r\n", + "num_points: 3206558\r\n", + "bibtex: \"\\n @article{Tatonetti2012,\\n author = {Tatonetti, Nicholas\\\r\n", + " \\ P. and Ye, Peter P. and Daneshjou, Roxana and Altman, Russ B.},\\n title\\\r\n", + " \\ = {Data-driven prediction of drug effects and interactions},\\n journal\\\r\n", + " \\ = {Sci Transl Med},\\n volume = {4},\\n number = {125},\\n pages\\\r\n", + " \\ = {125ra31},\\n year = {2012},\\n doi = {10.1126/scitranslmed.3003377},\\n\\\r\n", + " \\ pmid = {22422992},\\n pmcid = {PMC3382018}\\n }\\n \"\r\n" + ] + } + ], + "source": [ + "!cat {fn_meta}" + ] + }, + { + "cell_type": "markdown", + "id": "0ff77293", + "metadata": {}, + "source": [ + "## Create transform.py" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "38e8a677", + "metadata": {}, + "outputs": [], + "source": [ + "path_file = \"transform.py\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e2f46f61", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting transform.py\n" + ] + } + ], + "source": [ + "%%writefile $path_file\n", + "import pandas as pd\n", + "import requests\n", + "import yaml\n", + "\n", + "\n", + "def get_and_transform_data():\n", + " # load data\n", + " df = pd.read_csv('https://tatonettilab.org/resources/nsides/OFFSIDES.csv.gz', compression='gzip',\n", + " error_bad_lines=False)\n", + "\n", + " # check if fields are the same\n", + " expected_columns = ['drug_rxnorn_id',\n", + " 'drug_concept_name',\n", + " 'condition_meddra_id',\n", + " 'condition_concept_name',\n", + " 'A',\n", + " 'B',\n", + " 'C',\n", + " 'D',\n", + " 'PRR',\n", + " 'PRR_error',\n", + " 'mean_reporting_frequency']\n", + "\n", + " assert df.columns.tolist() == expected_columns\n", + " \n", + " # remove duplicates\n", + " df.drop_duplicates(inplace=True)\n", + " # check duplicates\n", + " assert not df.duplicated().sum(), \"Found duplicate rows in the dataframe\"\n", + " \n", + " \n", + "\n", + " # save to csv\n", + " fn_data_csv = \"data_clean.csv\"\n", + " df.to_csv(fn_data_csv, index=False)\n", + "\n", + " # create meta yaml\n", + " meta = {\n", + " \"name\": \"offsides\",\n", + " \"description\": \"OffSIDES is a database of individual drug side effect signals mined from the FDA's Adverse Event Reporting System. The innovation of OffSIDES is that a propensity score matching (PSM) model is used to identify control drugs and produce better PRR estimates. In OffSIDES we focus on drug safety signals that are not already established by being listed on the structured product label -- hence they are off-label drug side effects.\",\n", + " \"targets\": [\n", + " {\n", + " \"id\": \"PRR\",\n", + " \"description\": \"Proportional reporting ratio, PRR=(A/(A+B))/(C/(C+D))\",\n", + " \"type\": \"continuous\",\n", + " \"names\": [\"Proportional reporting ratio\"]\n", + " },\n", + " {\n", + " \"id\": \"PRR_error\",\n", + " \"description\": \"Standard error of the PRR estimate\",\n", + " \"type\": \"continuous\",\n", + " \"names\": [\"Proportional reporting ratio error\"]\n", + " },\n", + " {\n", + " \"id\": \"mean_reporting_frequency\",\n", + " \"description\": \"Proportion of reports for the drug that report the side effect, A/(A+B)\",\n", + " \"type\": \"continuous\",\n", + " \"names\": [\"mean reporting frequency\"]\n", + " }\n", + " ],\n", + " \"identifier\": [\n", + " {\n", + " \"id\": \"drug_concept_name\",\n", + " \"description\": \"RxNorm name string for the drug\",\n", + " \"type\": \"categorical\"\n", + " },\n", + " {\n", + " \"id\": \"condition_concept_name\",\n", + " \"description\": \"MedDRA identifier for the side effect\",\n", + " \"type\": \"categorical\"\n", + " }\n", + " ],\n", + " \"license\": \"CC BY 4.0\",\n", + " \"links\": [\n", + " {\n", + " \"url\": \"https://tatonettilab.org/resources/nsides/\",\n", + " \"description\": \"data source\"\n", + " },\n", + " {\n", + " \"url\": \"https://nsides.io/\",\n", + " \"description\": \"database website\"\n", + " }\n", + " ],\n", + " \"num_points\": len(df),\n", + " \"bibtex\": \"\"\"\n", + " @article{Tatonetti2012,\n", + " author = {Tatonetti, Nicholas P. and Ye, Peter P. and Daneshjou, Roxana and Altman, Russ B.},\n", + " title = {Data-driven prediction of drug effects and interactions},\n", + " journal = {Sci Transl Med},\n", + " volume = {4},\n", + " number = {125},\n", + " pages = {125ra31},\n", + " year = {2012},\n", + " doi = {10.1126/scitranslmed.3003377},\n", + " pmid = {22422992},\n", + " pmcid = {PMC3382018}\n", + " }\n", + " \"\"\"\n", + " }\n", + "\n", + " def str_presenter(dumper, data):\n", + " \"\"\"configures yaml for dumping multiline strings\n", + " Ref: https://stackoverflow.com/questions/8640959/how-can-i-control-what-scalar-form-pyyaml-uses-for-my-data\n", + " \"\"\"\n", + " if data.count(\"\\n\") > 0: # check for multiline string\n", + " return dumper.represent_scalar(\"tag:yaml.org,2002:str\", data, style=\"|\")\n", + " return dumper.represent_scalar(\"tag:yaml.org,2002:str\", data)\n", + "\n", + " yaml.add_representer(str, str_presenter)\n", + " yaml.representer.SafeRepresenter.add_representer(\n", + " str, str_presenter\n", + " ) # to use with safe_dum\n", + " fn_meta = \"meta.yaml\"\n", + " with open(fn_meta, \"w\") as f:\n", + " yaml.dump(meta, f, sort_keys=False)\n", + "\n", + " print(f\"Finished processing {meta['name']} dataset!\")\n", + "\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " get_and_transform_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1ac51787", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "transform.py:8: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.\n", + "\n", + "\n", + " df = pd.read_csv('https://tatonettilab.org/resources/nsides/OFFSIDES.csv.gz', compression='gzip',\n", + "transform.py:8: DtypeWarning: Columns (0,2,4,5,6,7,8,9,10) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv('https://tatonettilab.org/resources/nsides/OFFSIDES.csv.gz', compression='gzip',\n", + "Finished processing offsides dataset!\n" + ] + } + ], + "source": [ + "!python3 transform.py" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/data/nsides/offsides/transform.py b/data/nsides/offsides/transform.py new file mode 100644 index 000000000..af5ee116f --- /dev/null +++ b/data/nsides/offsides/transform.py @@ -0,0 +1,124 @@ +import pandas as pd +import yaml + + +def get_and_transform_data(): + # load data + df = pd.read_csv( + "https://tatonettilab.org/resources/nsides/OFFSIDES.csv.gz", + compression="gzip", + error_bad_lines=False, + ) + + # check if fields are the same + expected_columns = [ + "drug_rxnorn_id", + "drug_concept_name", + "condition_meddra_id", + "condition_concept_name", + "A", + "B", + "C", + "D", + "PRR", + "PRR_error", + "mean_reporting_frequency", + ] + + assert df.columns.tolist() == expected_columns + # remove duplicates + df.drop_duplicates(inplace=True) + # check duplicates + assert not df.duplicated().sum(), "Found duplicate rows in the dataframe" + # save to csv + fn_data_csv = "data_clean.csv" + df.to_csv(fn_data_csv, index=False) + + # create meta yaml + meta = { + "name": "offsides", + "description": ( + "OffSIDES is a database of individual drug side effect signals mined from the FDA's " + "Adverse Event Reporting System. The innovation of OffSIDES is that a propensity score " + "matching (PSM) model is used to identify control drugs and produce better PRR estimates. " + "In OffSIDES we focus on drug safety signals that are not already established by being " + "listed on the structured product label -- hence they are off-label drug side effects." + ), + "targets": [ + { + "id": "PRR", + "description": "Proportional reporting ratio, PRR=(A/(A+B))/(C/(C+D))", + "type": "continuous", + "names": ["Proportional reporting ratio"], + }, + { + "id": "PRR_error", + "description": "Standard error of the PRR estimate", + "type": "continuous", + "names": ["Proportional reporting ratio error"], + }, + { + "id": "mean_reporting_frequency", + "description": "Proportion of reports for the drug that report the side effect, A/(A+B)", + "type": "continuous", + "names": ["mean reporting frequency"], + }, + ], + "identifier": [ + { + "id": "drug_concept_name", + "description": "RxNorm name string for the drug", + "type": "categorical", + }, + { + "id": "condition_concept_name", + "description": "MedDRA identifier for the side effect", + "type": "categorical", + }, + ], + "license": "CC BY 4.0", + "links": [ + { + "url": "https://tatonettilab.org/resources/nsides/", + "description": "data source", + }, + {"url": "https://nsides.io/", "description": "database website"}, + ], + "num_points": len(df), + "bibtex": """ + @article{Tatonetti2012, + author = {Tatonetti, Nicholas P. and Ye, Peter P. and Daneshjou, Roxana and Altman, Russ B.}, + title = {Data-driven prediction of drug effects and interactions}, + journal = {Sci Transl Med}, + volume = {4}, + number = {125}, + pages = {125ra31}, + year = {2012}, + doi = {10.1126/scitranslmed.3003377}, + pmid = {22422992}, + pmcid = {PMC3382018} + } + """, + } + + def str_presenter(dumper, data): + """configures yaml for dumping multiline strings + Ref: https://stackoverflow.com/questions/8640959/how-can-i-control-what-scalar-form-pyyaml-uses-for-my-data + """ + if data.count("\n") > 0: # check for multiline string + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + yaml.add_representer(str, str_presenter) + yaml.representer.SafeRepresenter.add_representer( + str, str_presenter + ) # to use with safe_dum + fn_meta = "meta.yaml" + with open(fn_meta, "w") as f: + yaml.dump(meta, f, sort_keys=False) + + print(f"Finished processing {meta['name']} dataset!") + + +if __name__ == "__main__": + get_and_transform_data() From dfcd06d3eff38dde7dff2be31266464012676e00 Mon Sep 17 00:00:00 2001 From: Apoorva Srinivasan Date: Wed, 29 Mar 2023 09:51:59 -0700 Subject: [PATCH 2/9] updatign changes --- data/nsides/offsides/transform.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/data/nsides/offsides/transform.py b/data/nsides/offsides/transform.py index af5ee116f..2b75cc44e 100644 --- a/data/nsides/offsides/transform.py +++ b/data/nsides/offsides/transform.py @@ -26,6 +26,10 @@ def get_and_transform_data(): ] assert df.columns.tolist() == expected_columns + + # drop columns + # drop A, B, C, D + df.drop(columns=["A", "B", "C", "D"], inplace=True) # remove duplicates df.drop_duplicates(inplace=True) # check duplicates @@ -47,7 +51,7 @@ def get_and_transform_data(): "targets": [ { "id": "PRR", - "description": "Proportional reporting ratio, PRR=(A/(A+B))/(C/(C+D))", + "description": "Proportional reporting ratio", "type": "continuous", "names": ["Proportional reporting ratio"], }, @@ -59,7 +63,7 @@ def get_and_transform_data(): }, { "id": "mean_reporting_frequency", - "description": "Proportion of reports for the drug that report the side effect, A/(A+B)", + "description": "Proportion of reports for the drug that report the side effect", "type": "continuous", "names": ["mean reporting frequency"], }, From b3fbbcacc0b60ee5d9e79903d1c4847f7c62ce04 Mon Sep 17 00:00:00 2001 From: Apoorva Srinivasan Date: Wed, 29 Mar 2023 23:14:43 -0700 Subject: [PATCH 3/9] updating meta.yaml --- data/nsides/offsides/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/nsides/offsides/meta.yaml b/data/nsides/offsides/meta.yaml index 2849ce626..9e3933093 100644 --- a/data/nsides/offsides/meta.yaml +++ b/data/nsides/offsides/meta.yaml @@ -5,7 +5,7 @@ description: OffSIDES is a database of individual drug side effect signals mined signals that are not already established by being listed on the structured product label -- hence they are off-label drug side effects. targets: - id: PRR - description: Proportional reporting ratio, PRR=(A/(A+B))/(C/(C+D)) + description: Proportional reporting ratio) type: continuous names: - Proportional reporting ratio @@ -15,7 +15,7 @@ targets: names: - Proportional reporting ratio error - id: mean_reporting_frequency - description: Proportion of reports for the drug that report the side effect, A/(A+B) + description: Proportion of reports for the drug that report the side effect type: continuous names: - mean reporting frequency From 102536be0af033b01d6d1ea55feb5746eaae7507 Mon Sep 17 00:00:00 2001 From: Apoorva Srinivasan <43023448+apoorvasrinivasan26@users.noreply.github.com> Date: Mon, 3 Apr 2023 09:13:36 -0700 Subject: [PATCH 4/9] Update data/nsides/offsides/meta.yaml Co-authored-by: Kevin M Jablonka <32935233+kjappelbaum@users.noreply.github.com> --- data/nsides/offsides/meta.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/data/nsides/offsides/meta.yaml b/data/nsides/offsides/meta.yaml index 9e3933093..0559f5664 100644 --- a/data/nsides/offsides/meta.yaml +++ b/data/nsides/offsides/meta.yaml @@ -9,11 +9,6 @@ targets: type: continuous names: - Proportional reporting ratio - - id: PRR_error - description: Standard error of the PRR estimate - type: continuous - names: - - Proportional reporting ratio error - id: mean_reporting_frequency description: Proportion of reports for the drug that report the side effect type: continuous From cb00a026c0b50703aa286bd83a5c06753d009a7d Mon Sep 17 00:00:00 2001 From: Apoorva Srinivasan <43023448+apoorvasrinivasan26@users.noreply.github.com> Date: Mon, 3 Apr 2023 09:13:51 -0700 Subject: [PATCH 5/9] Update data/nsides/offsides/meta.yaml Co-authored-by: Kevin M Jablonka <32935233+kjappelbaum@users.noreply.github.com> --- data/nsides/offsides/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/nsides/offsides/meta.yaml b/data/nsides/offsides/meta.yaml index 0559f5664..4fb28b949 100644 --- a/data/nsides/offsides/meta.yaml +++ b/data/nsides/offsides/meta.yaml @@ -5,7 +5,7 @@ description: OffSIDES is a database of individual drug side effect signals mined signals that are not already established by being listed on the structured product label -- hence they are off-label drug side effects. targets: - id: PRR - description: Proportional reporting ratio) + description: Proportional reporting ratio type: continuous names: - Proportional reporting ratio From ae9e9287f84f0acd93a274145f17f0b67eb7e3b3 Mon Sep 17 00:00:00 2001 From: Michael Pieler Date: Wed, 3 May 2023 16:18:54 +0200 Subject: [PATCH 6/9] feat: fix small issues and move dir --- data/nsides/offsides/meta.yaml | 34 ------------ data/offsides/meta.yaml | 53 +++++++++++++++++++ .../offsides/offsides_data_prep.ipynb | 0 data/{nsides => }/offsides/transform.py | 43 ++++++++------- 4 files changed, 74 insertions(+), 56 deletions(-) delete mode 100644 data/nsides/offsides/meta.yaml create mode 100644 data/offsides/meta.yaml rename data/{nsides => }/offsides/offsides_data_prep.ipynb (100%) rename data/{nsides => }/offsides/transform.py (75%) diff --git a/data/nsides/offsides/meta.yaml b/data/nsides/offsides/meta.yaml deleted file mode 100644 index 4fb28b949..000000000 --- a/data/nsides/offsides/meta.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -name: offsides -description: OffSIDES is a database of individual drug side effect signals mined from the FDA's Adverse Event Reporting System. The innovation of OffSIDES - is that a propensity score matching (PSM) model is used to identify control drugs and produce better PRR estimates. In OffSIDES we focus on drug safety - signals that are not already established by being listed on the structured product label -- hence they are off-label drug side effects. -targets: - - id: PRR - description: Proportional reporting ratio - type: continuous - names: - - Proportional reporting ratio - - id: mean_reporting_frequency - description: Proportion of reports for the drug that report the side effect - type: continuous - names: - - mean reporting frequency -identifier: - - id: drug_concept_name - description: RxNorm name string for the drug - type: categorical - - id: condition_concept_name - description: MedDRA identifier for the side effect - type: categorical -license: CC BY 4.0 -links: - - url: https://tatonettilab.org/resources/nsides/ - description: data source - - url: https://nsides.io/ - description: database website -num_points: 3042873 -bibtex: "\n @article{Tatonetti2012,\n author = {Tatonetti, Nicholas P. and Ye, Peter P. and Daneshjou, Roxana and Altman, Russ B.},\n \ - \ title = {Data-driven prediction of drug effects and interactions},\n journal = {Sci Transl Med},\n volume = {4},\n number\ - \ = {125},\n pages = {125ra31},\n year = {2012},\n doi = {10.1126/scitranslmed.3003377},\n pmid = {22422992},\n pmcid\ - \ = {PMC3382018}\n }\n " diff --git a/data/offsides/meta.yaml b/data/offsides/meta.yaml new file mode 100644 index 000000000..ef1d9bb77 --- /dev/null +++ b/data/offsides/meta.yaml @@ -0,0 +1,53 @@ +--- +name: offsides +description: |- + OffSIDES is a database of individual drug side effect + signals mined from the FDA's. Adverse Event Reporting System. The + innovation of OffSIDES is that a propensity score matching (PSM) model + is used to identify control drugs and produce better PRR estimates. In + OffSIDES we focus on drug safety signals that are not already + established by being listed on the structured product label - hence + they are off-label drug side effects. +targets: + - id: PRR + description: Proportional reporting ratio + type: continuous + names: + - Proportional reporting ratio + - id: PRR_error + description: Standard error of the PRR estimate + type: continuous + names: + - Proportional reporting ratio error + - id: mean_reporting_frequency + description: Proportion of reports for the drug that report the side effect + type: continuous + names: + - mean reporting frequency +identifier: + - id: drug_concept_name + description: RxNorm name string for the drug + type: categorical + - id: condition_concept_name + description: MedDRA identifier for the side effect + type: categorical +license: CC BY 4.0 +links: + - url: https://tatonettilab.org/resources/nsides/ + description: data source + - url: https://nsides.io/ + description: database website +num_points: 2977338 +bibtex: |- + @article{Tatonetti2012, + author = {Tatonetti, Nicholas P. and Ye, Peter P. and Daneshjou, Roxana and Altman, Russ B.}, + title = {Data-driven prediction of drug effects and interactions}, + journal = {Sci Transl Med}, + volume = {4}, + number = {125}, + pages = {125ra31}, + year = {2012}, + doi = {10.1126/scitranslmed.3003377}, + pmid = {22422992}, + pmcid = {PMC3382018} + } diff --git a/data/nsides/offsides/offsides_data_prep.ipynb b/data/offsides/offsides_data_prep.ipynb similarity index 100% rename from data/nsides/offsides/offsides_data_prep.ipynb rename to data/offsides/offsides_data_prep.ipynb diff --git a/data/nsides/offsides/transform.py b/data/offsides/transform.py similarity index 75% rename from data/nsides/offsides/transform.py rename to data/offsides/transform.py index 2b75cc44e..66220b631 100644 --- a/data/nsides/offsides/transform.py +++ b/data/offsides/transform.py @@ -7,7 +7,8 @@ def get_and_transform_data(): df = pd.read_csv( "https://tatonettilab.org/resources/nsides/OFFSIDES.csv.gz", compression="gzip", - error_bad_lines=False, + on_bad_lines="skip", + low_memory=False, ) # check if fields are the same @@ -41,13 +42,13 @@ def get_and_transform_data(): # create meta yaml meta = { "name": "offsides", - "description": ( - "OffSIDES is a database of individual drug side effect signals mined from the FDA's " - "Adverse Event Reporting System. The innovation of OffSIDES is that a propensity score " - "matching (PSM) model is used to identify control drugs and produce better PRR estimates. " - "In OffSIDES we focus on drug safety signals that are not already established by being " - "listed on the structured product label -- hence they are off-label drug side effects." - ), + "description": """OffSIDES is a database of individual drug side effect +signals mined from the FDA's. Adverse Event Reporting System. The +innovation of OffSIDES is that a propensity score matching (PSM) model +is used to identify control drugs and produce better PRR estimates. In +OffSIDES we focus on drug safety signals that are not already +established by being listed on the structured product label - hence +they are off-label drug side effects.""", "targets": [ { "id": "PRR", @@ -89,20 +90,18 @@ def get_and_transform_data(): {"url": "https://nsides.io/", "description": "database website"}, ], "num_points": len(df), - "bibtex": """ - @article{Tatonetti2012, - author = {Tatonetti, Nicholas P. and Ye, Peter P. and Daneshjou, Roxana and Altman, Russ B.}, - title = {Data-driven prediction of drug effects and interactions}, - journal = {Sci Transl Med}, - volume = {4}, - number = {125}, - pages = {125ra31}, - year = {2012}, - doi = {10.1126/scitranslmed.3003377}, - pmid = {22422992}, - pmcid = {PMC3382018} - } - """, + "bibtex": """@article{Tatonetti2012, +author = {Tatonetti, Nicholas P. and Ye, Peter P. and Daneshjou, Roxana and Altman, Russ B.}, +title = {Data-driven prediction of drug effects and interactions}, +journal = {Sci Transl Med}, +volume = {4}, +number = {125}, +pages = {125ra31}, +year = {2012}, +doi = {10.1126/scitranslmed.3003377}, +pmid = {22422992}, +pmcid = {PMC3382018} +}""", } def str_presenter(dumper, data): From 2dfd8a6490cdfc04b4e27dc6509a2098ffa5a787 Mon Sep 17 00:00:00 2001 From: Kevin M Jablonka <32935233+kjappelbaum@users.noreply.github.com> Date: Fri, 5 May 2023 13:19:58 +0200 Subject: [PATCH 7/9] Update data/offsides/meta.yaml --- data/offsides/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/data/offsides/meta.yaml b/data/offsides/meta.yaml index ef1d9bb77..f8ae8dbe4 100644 --- a/data/offsides/meta.yaml +++ b/data/offsides/meta.yaml @@ -17,6 +17,7 @@ targets: - id: PRR_error description: Standard error of the PRR estimate type: continuous + sample: false names: - Proportional reporting ratio error - id: mean_reporting_frequency From 568e777f87da96a9642c20bde74773cba0a928e8 Mon Sep 17 00:00:00 2001 From: Kevin M Jablonka <32935233+kjappelbaum@users.noreply.github.com> Date: Fri, 5 May 2023 13:20:39 +0200 Subject: [PATCH 8/9] Update data/offsides/transform.py --- data/offsides/transform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/data/offsides/transform.py b/data/offsides/transform.py index 66220b631..ea538e655 100644 --- a/data/offsides/transform.py +++ b/data/offsides/transform.py @@ -60,6 +60,7 @@ def get_and_transform_data(): "id": "PRR_error", "description": "Standard error of the PRR estimate", "type": "continuous", + "sample": False, "names": ["Proportional reporting ratio error"], }, { From 2ffa75c6f07fad27d55f270f71b7458bd542708d Mon Sep 17 00:00:00 2001 From: Michael Pieler Date: Fri, 5 May 2023 14:17:26 +0200 Subject: [PATCH 9/9] feat: minor text changes --- data/offsides/meta.yaml | 12 ++++++------ data/offsides/transform.py | 15 +++++++-------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/data/offsides/meta.yaml b/data/offsides/meta.yaml index f8ae8dbe4..3f9270c58 100644 --- a/data/offsides/meta.yaml +++ b/data/offsides/meta.yaml @@ -2,7 +2,7 @@ name: offsides description: |- OffSIDES is a database of individual drug side effect - signals mined from the FDA's. Adverse Event Reporting System. The + signals mined from the FDA's Adverse Event Reporting System. The innovation of OffSIDES is that a propensity score matching (PSM) model is used to identify control drugs and produce better PRR estimates. In OffSIDES we focus on drug safety signals that are not already @@ -10,18 +10,18 @@ description: |- they are off-label drug side effects. targets: - id: PRR - description: Proportional reporting ratio + description: proportional reporting ratio type: continuous names: - - Proportional reporting ratio + - proportional reporting ratio - id: PRR_error - description: Standard error of the PRR estimate + description: standard error of the PRR estimate type: continuous sample: false names: - - Proportional reporting ratio error + - standard error of the proportional reporting ratio error - id: mean_reporting_frequency - description: Proportion of reports for the drug that report the side effect + description: mean reporting frequency for the drug type: continuous names: - mean reporting frequency diff --git a/data/offsides/transform.py b/data/offsides/transform.py index ea538e655..4ea0ed0fb 100644 --- a/data/offsides/transform.py +++ b/data/offsides/transform.py @@ -28,8 +28,7 @@ def get_and_transform_data(): assert df.columns.tolist() == expected_columns - # drop columns - # drop A, B, C, D + # drop columns A, B, C, D df.drop(columns=["A", "B", "C", "D"], inplace=True) # remove duplicates df.drop_duplicates(inplace=True) @@ -43,7 +42,7 @@ def get_and_transform_data(): meta = { "name": "offsides", "description": """OffSIDES is a database of individual drug side effect -signals mined from the FDA's. Adverse Event Reporting System. The +signals mined from the FDA's Adverse Event Reporting System. The innovation of OffSIDES is that a propensity score matching (PSM) model is used to identify control drugs and produce better PRR estimates. In OffSIDES we focus on drug safety signals that are not already @@ -52,20 +51,20 @@ def get_and_transform_data(): "targets": [ { "id": "PRR", - "description": "Proportional reporting ratio", + "description": "proportional reporting ratio", "type": "continuous", - "names": ["Proportional reporting ratio"], + "names": ["proportional reporting ratio"], }, { "id": "PRR_error", - "description": "Standard error of the PRR estimate", + "description": "standard error of the PRR estimate", "type": "continuous", "sample": False, - "names": ["Proportional reporting ratio error"], + "names": ["standard error of the proportional reporting ratio error"], }, { "id": "mean_reporting_frequency", - "description": "Proportion of reports for the drug that report the side effect", + "description": "mean reporting frequency for the drug", "type": "continuous", "names": ["mean reporting frequency"], },