diff --git a/DESCRIPTION b/DESCRIPTION index dc87bd4..34d7982 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: MND Type: Package Title: A package for epidemiology study about Motor neuron disease (MND) in the NEUROGEN Version: 0.1.0 -Depends: data.table, SCCS, lubridate -Imports: data.table, SCCS, lubridate +Depends: data.table, SCCS, lubridate, readxl, ggplot2, incidence2 +Imports: data.table, SCCS, lubridate, readxl, ggplot2, incidence2 Suggests: Author: c(person("FAN", "Min", email = "minfan@connect.hku.hk", role =c("aut", "cre"))) Maintainer: FAN Min diff --git a/NAMESPACE b/NAMESPACE index 57d2c25..1218d56 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,10 @@ # Generated by roxygen2: do not edit by hand S3method(print,mndsccs) +export(clean_4_survival) export(get_DT_Exposure_Endpoint) export(get_DT_SCCS) -export(run) +export(plot_inci) +export(run_incidence) +export(run_sccs) import(data.table) diff --git a/R/clean.R b/R/clean.R index 80a500e..fd4fcf1 100644 --- a/R/clean.R +++ b/R/clean.R @@ -19,7 +19,7 @@ shrink_interval <- function(data,st,ed,gap=1){ setorder(data,id,st) output <- data[,indx:=c(0, cumsum(as.numeric(shift(st,type="lead"))> cummax(as.numeric(ed)+gap))[-.N]),id - ][,.(st=first(st),ed=last(ed)),.(id,indx)] + ][,.(st=min(st),ed=max(ed)),.(id,indx)] setnames(output,c("st","ed"),c(st,ed)) return(output) } @@ -32,6 +32,7 @@ shrink_interval <- function(data,st,ed,gap=1){ #' @param demo the dataset with demographic information, including id, dob, dod, sex, onset_date. Pls check the data shall. #' @param ip the dataset with all in-hospitalization records, including id, date of adminssion, date of discharge, type of records (setting, IP, OP, AE). Pls check the data shall. #' @param rx the dataset with all prescription records, including id, drug name, date of prescription start and end, type of presciption (IP, OP, AE, Discharge). Pls check the data shall. +#' @param riluzole_name the name in your database stands for riluzole. #' @return a combined data set in data table format. One patients may have multiple rows of records. #' @import data.table #' @export @@ -49,12 +50,16 @@ shrink_interval <- function(data,st,ed,gap=1){ #' 2665: 9903734 F 1955-07-02 2018-11-28 2017-11-28 2018-07-25 2018-11-20 2017-11-27 2017-11-28 #' 2666: 9903734 F 1955-07-02 2018-11-28 2017-11-28 2018-07-25 2018-11-20 2018-07-30 2018-08-10 #' 2667: 9903734 F 1955-07-02 2018-11-28 2017-11-28 2018-07-25 2018-11-20 2018-10-31 2018-11-02 -get_DT_Exposure_Endpoint <- function(demo, rx, ip){ - rx_riluzole<- shrink_interval(rx[grepl('riluzole|riluteck',drug_name,ignore.case = T) & +get_DT_Exposure_Endpoint <- function(demo, rx, ip,riluzole_name='riluzole|riluteck',...){ + rx_riluzole<- shrink_interval(rx[grepl(riluzole_name,drug_name,ignore.case = T) & !setting %in% c("I")],"date_rx_st","date_rx_end") + rx_earliest <- rx_riluzole[,.(earliest_rx=min(date_rx_st)),id][,unique(.SD)] + ip_riluzole<- shrink_interval(ip[id %in% demo$id & id %in% rx_riluzole$id],"date_adm","date_dsg",gap = 7) rx_ip <- merge(rx_riluzole[,indx:=NULL], ip_riluzole[,indx:=NULL],by="id",allow.cartesian = T) + + rx_ip <- merge(rx_ip,rx_earliest,by="id",all.x=T) demo_rx_ip<-merge(demo,rx_ip,by="id") return(demo_rx_ip) } @@ -70,9 +75,13 @@ get_DT_Exposure_Endpoint <- function(demo, rx, ip){ #' @export #' #' @examples no example -get_DT_SCCS <- function(data,obst="2001-08-24"){ - df_mnd <- data[,`:=`(obst=pmax(onset_date,lubridate::ymd(obst),na.rm = T), - obed=pmin(dod,onset_date %m+% years(2),na.rm=T)) +get_DT_SCCS <- function(data,...){ + df_mnd <- data[,`:=`(obst=pmax(pmin(onset_date %m-% years(1), + earliest_rx %m-% years(1),na.rm=T), + lubridate::ymd(obst),na.rm = T), + obed=pmin(dod, + pmin(onset_date , + earliest_rx,na.rm=T) %m+% years(2),obed,na.rm=T)) ][,`:=`(obst=as.numeric(obst-dob), obed=as.numeric(obed-dob), event=as.numeric(date_adm-dob), @@ -81,6 +90,7 @@ get_DT_SCCS <- function(data,obst="2001-08-24"){ df_mnd <- df_mnd[,`:=`(strx=as.numeric(date_rx_st-dob), edrx=as.numeric(date_rx_end-dob))] + df_mnd <- df_mnd[strx>=obst & strx<= obed & event >=obst & event <= obed] # for more than one rx periods: last_rx_time <- unique(df_mnd[,.(id,strx,edrx)])[,last_rx_ed:=as.numeric(shift(edrx,n = 1,fill = NA,type = "lag")),.(id)] @@ -140,3 +150,67 @@ get_DT_SCCS <- function(data,obst="2001-08-24"){ return(df_mnd) } + +get_subtype <- function(data,icd_subtypes_temp){ + apply(icd_subtypes_temp,1,function(x) data[,(paste0("subtype.",x[["abbr"]])):=fifelse(grepl(x[["grepl"]],codes),T,F)]) +} + + + +#' Create a dataset for survival and incidence calculation +#' +#' @param dx +#' @param demo +#' @param codes_sys can be "icd9", "icd10", or "readcodes" +#' +#' @return +#' @export +#' +#' @examples +clean_4_survival <- function(demo,dx,codes_sys,...){ + + if(codes_sys=="icd9"){ + codes_defined <- "^335.2$|^335.2[01249]" + }else if(codes_sys=="icd10"){ + codes_defined <- "^G12.2" + }else if(codes_sys=="readcode"){ + stop("Don't worry. Fm is working on Readcode now.") + } + + icd_subtypes <- as.data.table(readxl::read_excel("data/codes_mnd.xlsx",sheet = "subtype")) + icd_subtypes_temp <- icd_subtypes[,.(Dx, abbr, grepl=get(codes_sys))] + temp_dx <- copy(dx) + setorder(temp_dx,"id","ref_date") + temp_dx <- temp_dx[grepl(codes_defined,codes),.SD[ref_date==min(ref_date)],id + ][,unique(.SD)][,.(id,onset_date=ref_date,codes=codes)] + temp_dx_codes <- dcast(temp_dx,id+onset_date~.,value.var = "codes",fun.aggregate = function(x) paste(unique(x),collapse = ",")) + setnames(temp_dx_codes,".","codes") + + get_subtype(temp_dx,icd_subtypes_temp) + temp_dx[,codes:=NULL] + temp_dx <- dcast(temp_dx, + id+onset_date~., + value.var = c("subtype.als","subtype.pma","subtype.pbp","subtype.pls","subtype.others"), + fun.aggregate = function(x) any(x)) + temp_dx <- merge(temp_dx_codes,temp_dx,by=c("id","onset_date")) + + + df_surv <- merge(temp_dx, + demo[,.(id,sex,dob,dod)], + by="id",all.x=T) + + df_surv[,outcome:=fifelse(!is.na(dod),1,0)] + df_surv[,obs.deadline:=fifelse(is.na(dod),ymd('20191231'),dod)] + df_surv[,age_adm:=as.numeric((onset_date-dob)/365.25)] + df_surv[,age_group:=cut(age_adm, breaks = c(0,6,13,20,48,65,80,Inf), + include.lowest = T,right=FALSE)] + df_surv[,age_group_std:=cut(age_adm, breaks = c(seq(0,85,5),+Inf), + include.lowest = T,right=FALSE,labels=c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", + "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", + "70-74", "75-79", "80-84", "85+"))] + df_surv[,year_onset:=year(onset_date)] + df_surv[,time_to_event:=as.numeric(obs.deadline-onset_date)] + df_surv <- df_surv[time_to_event>=0 & onset_date>=ymd("1994-01-01")] + return(df_surv) +} + diff --git a/R/run.R b/R/run.R index eee717e..fefde7c 100644 --- a/R/run.R +++ b/R/run.R @@ -1,20 +1,28 @@ #' sccs for mnd study -#' +#' @param demo +#' @param rx +#' @param ip +#' @param target_drugs the drug name of riluzole. In hong kong, there are two type of drug names in HK: riluzole and riluteck +#' @param obst the defined study observation date. Choose the earliest avaiable date with *Riluzole* in the hospital or approved date by FDA. For instance, 2001-8-24 in Hong Kong. +#' @param obed the defined study observation date. Default : 2019-12-31 #' @return #' @export #' -#' @examples run() -run <- function(){ +#' @examples run_sccs() +run_sccs <- function(demo, rx, ip, target_drugs='riluzole|riluteck', + obst="2001-08-24", obed="2018-12-31", ...){ message("merge exposure and outcome datasets") - dt_combined <- get_DT_Exposure_Endpoint(demo,rx,ip) - message("SCCS data cleanning") - dt_sccs <- get_DT_SCCS(dt_combined) + message(nrow(demo)," in the cohort","\n========================\n\n") + dt_combined <- get_DT_Exposure_Endpoint(demo,rx,ip,...) + dt_sccs <- get_DT_SCCS(dt_combined,...) + message("After cleanning, count of participants for sccs:\n", dt_sccs[,uniqueN(id)],"\n========================\n\n") ageq <- floor(seq(20,90,10)*365) dt_sccs$id <- as.numeric(dt_sccs$id) - dt_sccs$dob_dmy <- as.numeric(format(dt_sccs$dob,"%d%m%Y")) -# dob_dmy_model <- dt_sccs[,.(id,event,dob_dmy)][,unique(.SD)][,dob_dmy] # this is the dob for previous SCCS package, with version less than 1.3 + setorder(dt_sccs,id,event,date_rx_st) + dt_sccs$dob_dmy <- as.numeric(format(dt_sccs$dob,"%d%m%Y")) # this is for version 1.4 or above + dob_dmy_model <- dt_sccs[,.(id,event,dob_dmy)][,unique(.SD)][,dob_dmy] # this is the dob for previous SCCS package, with version less than 1.3 result <- sccs(event ~ strx_30b + strx_0a + strx_30a + strx_60a + strx_90a+ strx_120a + strx_150a +strx_180a + age + season, @@ -25,10 +33,12 @@ run <- function(){ adrug = list(strx_30b,strx_0a, strx_30a, strx_60a,strx_90a,strx_120a, strx_150a, strx_180a), - aedrug = list(edrx_30b,edrx_0a,edrx_30a,edrx_60a,edrx_90a, - edrx_120a, edrx_150a, edrx_180a), + aedrug = list(edrx_30b,edrx_0a,edrx_30a, + edrx_60a,edrx_90a,edrx_120a, + edrx_150a, edrx_180a), dataformat = "stack", agegrp = ageq, seasongrp = c(0103,0105,0109,0111), - dob=dob_dmy, + # dob=dob_dmy, # for sccs version 1.5 or above + dob=dob_dmy_model, # for sccs version 1.3 only data = as.data.frame(dt_sccs)) return(result) } @@ -96,3 +106,104 @@ sccs <- function(fml,...){ print.mndsccs <- function(x){ print(x$res) } + +show_digit<- function(x){ + return(sprintf(as.numeric(x),fmt="%#.2f")) +} + +#' Inci Ci calculation +#' +#' @param x +#' +#' @return +#' +#' @examples +get_inci_CI <- function(x){ + temp <- poisson.test(as.numeric(x[["stdN"]]),as.numeric(x[["pop_raw"]])) + est <- temp[["estimate"]]*100000 + est_l <- temp$conf.int[2]*100000 + est_h <- temp$conf.int[1]*100000 + est_cb <- paste0(show_digit(est)," (", + show_digit(est_l),"-", + show_digit(est_h),")") + return(data.frame(est,est_l,est_h,est_cb)) +} + + +#' run analysis for incidence +#' +#' @param demo +#' @param dx +#' @param region +#' @param codes_sys +#' +#' @return +#' @export +#' +#' @examples +run_incidence <- function(demo, dx, region="hk",codes_sys = "icd9"){ + dx_inci <- clean_4_survival(demo=demo,dx=dx,codes_sys) + + raw_pop <- setDT(read_xlsx("./data/codes_mnd.xlsx", sheet=paste0(region,"_pop"))) + raw_pop <- melt(raw_pop,id.vars = "Age") + setnames(raw_pop,c("variable","value"),c("year_onset","pop_raw")) + + std_pop <- setDT(read_xlsx("./data/codes_mnd.xlsx",sheet="stdpop")) + std_pop$Age<-factor(std_pop$Age) + + + incident_raw <- dx_inci[,.(id,year_onset,age_group_std)][,.N,by=.(age_group_std,year_onset)] + setnames(incident_raw,"age_group_std","Age") + incident_raw$year_onset <- as.factor(incident_raw$year_onset) + setorder(incident_raw,Age,year_onset) + + incident_raw <- merge(incident_raw, + raw_pop, + by=c("Age","year_onset"),all.y=T) + incident_raw <- merge(incident_raw, + std_pop[,.(Age,pop_std=`Standard For SEER*Stat`,pop_std_ratio=`WHO std pop`)], + by=c("Age")) + incident_raw[is.na(N),N:=0] + incident_std <- incident_raw[,std_risk:=N/pop_raw*pop_std_ratio*100000 + ][,.(std_risk=sum(std_risk),pop_raw=sum(pop_raw)),year_onset + ][,stdN:=round(std_risk*pop_raw/100000)] + + + + incident_std <- cbind(incident_std,rbindlist(apply(incident_std,1,get_inci_CI))) + incident_std[,`:=`(est=as.numeric(est), + est_l=as.numeric(est_l), + est_h=as.numeric(est_h), + year_onset=factor(year_onset))] + return(list(std_inci=incident_std, raw_dt=dx_inci)) +} + +#' Plot the figure of incidence +#' +#' @param data +#' @param region +#' +#' @return +#' @export +#' +#' @examples +p_inci <- function(data,region="Hong Kong"){ + ggplot(data,aes(x=year_onset,y=est,group=1))+ + geom_line()+theme_light()+ + xlab("Onset year")+ + ylab("Age standardized MND incidence \nby year per 100,000 population")+ + ggtitle(region)+ + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1,size=18), + axis.text.y = element_text(size=18), + plot.title = element_text(size=22), + axis.title = element_text(size=18) + ) +} + + +p_inci_sg <- function(x){ + iw <- incidence(x, interval = "6 months", date_index = onset_date, groups = sex) + plot(iw, fill = "sex", color = "white",border="grey",title = "Hong Kong") +} + + diff --git a/README.md b/README.md index a38b430..457f889 100644 --- a/README.md +++ b/README.md @@ -71,5 +71,3 @@ ```r run_sccs(demo,rx,ip) ``` - - diff --git a/data/codes_mnd.xlsx b/data/codes_mnd.xlsx new file mode 100644 index 0000000..5d65496 Binary files /dev/null and b/data/codes_mnd.xlsx differ diff --git a/example.rda b/example.rda new file mode 100644 index 0000000..447b697 Binary files /dev/null and b/example.rda differ diff --git a/man/clean_4_survival.Rd b/man/clean_4_survival.Rd new file mode 100644 index 0000000..6f06d44 --- /dev/null +++ b/man/clean_4_survival.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean.R +\name{clean_4_survival} +\alias{clean_4_survival} +\title{Create a dataset for survival and incidence calculation} +\usage{ +clean_4_survival(demo, dx, codes_sys, ...) +} +\arguments{ +\item{codes_sys}{can be "icd9", "icd10", or "readcodes"} +} +\value{ + +} +\description{ +Create a dataset for survival and incidence calculation +} diff --git a/man/get_DT_Exposure_Endpoint.Rd b/man/get_DT_Exposure_Endpoint.Rd index 9c19184..6242d08 100644 --- a/man/get_DT_Exposure_Endpoint.Rd +++ b/man/get_DT_Exposure_Endpoint.Rd @@ -4,7 +4,13 @@ \alias{get_DT_Exposure_Endpoint} \title{Combine the datasets with exposure and outcomes} \usage{ -get_DT_Exposure_Endpoint(demo, rx, ip) +get_DT_Exposure_Endpoint( + demo, + rx, + ip, + riluzole_name = "riluzole|riluteck", + ... +) } \arguments{ \item{demo}{the dataset with demographic information, including id, dob, dod, sex, onset_date. Pls check the data shall.} @@ -12,6 +18,8 @@ get_DT_Exposure_Endpoint(demo, rx, ip) \item{rx}{the dataset with all prescription records, including id, drug name, date of prescription start and end, type of presciption (IP, OP, AE, Discharge). Pls check the data shall.} \item{ip}{the dataset with all in-hospitalization records, including id, date of adminssion, date of discharge, type of records (setting, IP, OP, AE). Pls check the data shall.} + +\item{riluzole_name}{the name in your database stands for riluzole.} } \value{ a combined data set in data table format. One patients may have multiple rows of records. diff --git a/man/get_DT_SCCS.Rd b/man/get_DT_SCCS.Rd index 8f35cda..4571ecd 100644 --- a/man/get_DT_SCCS.Rd +++ b/man/get_DT_SCCS.Rd @@ -4,7 +4,7 @@ \alias{get_DT_SCCS} \title{Genrate the dataset and Cleaning for SCCS} \usage{ -get_DT_SCCS(data, obst = "2001-08-24") +get_DT_SCCS(data, ...) } \arguments{ \item{data}{the dataset including exposure and endpoint information} diff --git a/man/get_inci_CI.Rd b/man/get_inci_CI.Rd new file mode 100644 index 0000000..772e7f7 --- /dev/null +++ b/man/get_inci_CI.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run.R +\name{get_inci_CI} +\alias{get_inci_CI} +\title{Inci Ci calculation} +\usage{ +get_inci_CI(x) +} +\arguments{ +\item{x}{} +} +\value{ + +} +\description{ +Inci Ci calculation +} diff --git a/man/plot_inci.Rd b/man/plot_inci.Rd new file mode 100644 index 0000000..3f43853 --- /dev/null +++ b/man/plot_inci.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run.R +\name{plot_inci} +\alias{plot_inci} +\title{Plot the figure of incidence} +\usage{ +plot_inci(data, region = "Hong Kong") +} +\arguments{ +\item{region}{} +} +\value{ + +} +\description{ +Plot the figure of incidence +} diff --git a/man/run.Rd b/man/run.Rd deleted file mode 100644 index 40c9dc7..0000000 --- a/man/run.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run.R -\name{run} -\alias{run} -\title{sccs for mnd study} -\usage{ -run() -} -\value{ - -} -\description{ -sccs for mnd study -} -\examples{ -run() -} diff --git a/man/run_incidence.Rd b/man/run_incidence.Rd new file mode 100644 index 0000000..db233ba --- /dev/null +++ b/man/run_incidence.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run.R +\name{run_incidence} +\alias{run_incidence} +\title{run analysis for incidence} +\usage{ +run_incidence(demo, dx, region = "hk", codes_sys = "icd9") +} +\arguments{ +\item{codes_sys}{} +} +\value{ + +} +\description{ +run analysis for incidence +} diff --git a/man/run_sccs.Rd b/man/run_sccs.Rd new file mode 100644 index 0000000..f084cfb --- /dev/null +++ b/man/run_sccs.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run.R +\name{run_sccs} +\alias{run_sccs} +\title{sccs for mnd study} +\usage{ +run_sccs( + demo, + rx, + ip, + target_drugs = "riluzole|riluteck", + obst = "2001-08-24", + obed = "2018-12-31", + ... +) +} +\arguments{ +\item{target_drugs}{the drug name of riluzole. In hong kong, there are two type of drug names in HK: riluzole and riluteck} + +\item{obst}{the defined study observation date. Choose the earliest avaiable date with *Riluzole* in the hospital or approved date by FDA. For instance, 2001-8-24 in Hong Kong.} + +\item{obed}{the defined study observation date. Default : 2019-12-31} +} +\value{ + +} +\description{ +sccs for mnd study +} +\examples{ +run_sccs() +}