Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Package: MND
Type: Package
Title: A package for epidemiology study about Motor neuron disease (MND) in the NEUROGEN
Version: 0.1.0
Depends: data.table, SCCS, lubridate
Imports: data.table, SCCS, lubridate
Depends: data.table, SCCS, lubridate, readxl, ggplot2, incidence2
Imports: data.table, SCCS, lubridate, readxl, ggplot2, incidence2
Suggests:
Author: c(person("FAN", "Min", email = "minfan@connect.hku.hk", role =c("aut", "cre")))
Maintainer: FAN Min <minfan@connect.hku.hk>
Expand Down
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Generated by roxygen2: do not edit by hand

S3method(print,mndsccs)
export(clean_4_survival)
export(get_DT_Exposure_Endpoint)
export(get_DT_SCCS)
export(run)
export(plot_inci)
export(run_incidence)
export(run_sccs)
import(data.table)
86 changes: 80 additions & 6 deletions R/clean.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ shrink_interval <- function(data,st,ed,gap=1){
setorder(data,id,st)
output <- data[,indx:=c(0, cumsum(as.numeric(shift(st,type="lead"))>
cummax(as.numeric(ed)+gap))[-.N]),id
][,.(st=first(st),ed=last(ed)),.(id,indx)]
][,.(st=min(st),ed=max(ed)),.(id,indx)]
setnames(output,c("st","ed"),c(st,ed))
return(output)
}
Expand All @@ -32,6 +32,7 @@ shrink_interval <- function(data,st,ed,gap=1){
#' @param demo the dataset with demographic information, including id, dob, dod, sex, onset_date. Pls check the data shall.
#' @param ip the dataset with all in-hospitalization records, including id, date of adminssion, date of discharge, type of records (setting, IP, OP, AE). Pls check the data shall.
#' @param rx the dataset with all prescription records, including id, drug name, date of prescription start and end, type of presciption (IP, OP, AE, Discharge). Pls check the data shall.
#' @param riluzole_name the name in your database stands for riluzole.
#' @return a combined data set in data table format. One patients may have multiple rows of records.
#' @import data.table
#' @export
Expand All @@ -49,12 +50,16 @@ shrink_interval <- function(data,st,ed,gap=1){
#' 2665: 9903734 F 1955-07-02 2018-11-28 2017-11-28 2018-07-25 2018-11-20 2017-11-27 2017-11-28
#' 2666: 9903734 F 1955-07-02 2018-11-28 2017-11-28 2018-07-25 2018-11-20 2018-07-30 2018-08-10
#' 2667: 9903734 F 1955-07-02 2018-11-28 2017-11-28 2018-07-25 2018-11-20 2018-10-31 2018-11-02
get_DT_Exposure_Endpoint <- function(demo, rx, ip){
rx_riluzole<- shrink_interval(rx[grepl('riluzole|riluteck',drug_name,ignore.case = T) &
get_DT_Exposure_Endpoint <- function(demo, rx, ip,riluzole_name='riluzole|riluteck',...){
rx_riluzole<- shrink_interval(rx[grepl(riluzole_name,drug_name,ignore.case = T) &
!setting %in% c("I")],"date_rx_st","date_rx_end")
rx_earliest <- rx_riluzole[,.(earliest_rx=min(date_rx_st)),id][,unique(.SD)]

ip_riluzole<- shrink_interval(ip[id %in% demo$id & id %in% rx_riluzole$id],"date_adm","date_dsg",gap = 7)
rx_ip <- merge(rx_riluzole[,indx:=NULL],
ip_riluzole[,indx:=NULL],by="id",allow.cartesian = T)

rx_ip <- merge(rx_ip,rx_earliest,by="id",all.x=T)
demo_rx_ip<-merge(demo,rx_ip,by="id")
return(demo_rx_ip)
}
Expand All @@ -70,9 +75,13 @@ get_DT_Exposure_Endpoint <- function(demo, rx, ip){
#' @export
#'
#' @examples no example
get_DT_SCCS <- function(data,obst="2001-08-24"){
df_mnd <- data[,`:=`(obst=pmax(onset_date,lubridate::ymd(obst),na.rm = T),
obed=pmin(dod,onset_date %m+% years(2),na.rm=T))
get_DT_SCCS <- function(data,...){
df_mnd <- data[,`:=`(obst=pmax(pmin(onset_date %m-% years(1),
earliest_rx %m-% years(1),na.rm=T),
lubridate::ymd(obst),na.rm = T),
obed=pmin(dod,
pmin(onset_date ,
earliest_rx,na.rm=T) %m+% years(2),obed,na.rm=T))
][,`:=`(obst=as.numeric(obst-dob),
obed=as.numeric(obed-dob),
event=as.numeric(date_adm-dob),
Expand All @@ -81,6 +90,7 @@ get_DT_SCCS <- function(data,obst="2001-08-24"){
df_mnd <- df_mnd[,`:=`(strx=as.numeric(date_rx_st-dob),
edrx=as.numeric(date_rx_end-dob))]

df_mnd <- df_mnd[strx>=obst & strx<= obed & event >=obst & event <= obed]
# for more than one rx periods:
last_rx_time <- unique(df_mnd[,.(id,strx,edrx)])[,last_rx_ed:=as.numeric(shift(edrx,n = 1,fill = NA,type = "lag")),.(id)]

Expand Down Expand Up @@ -140,3 +150,67 @@ get_DT_SCCS <- function(data,obst="2001-08-24"){
return(df_mnd)
}


get_subtype <- function(data,icd_subtypes_temp){
apply(icd_subtypes_temp,1,function(x) data[,(paste0("subtype.",x[["abbr"]])):=fifelse(grepl(x[["grepl"]],codes),T,F)])
}



#' Create a dataset for survival and incidence calculation
#'
#' @param dx
#' @param demo
#' @param codes_sys can be "icd9", "icd10", or "readcodes"
#'
#' @return
#' @export
#'
#' @examples
clean_4_survival <- function(demo,dx,codes_sys,...){

if(codes_sys=="icd9"){
codes_defined <- "^335.2$|^335.2[01249]"
}else if(codes_sys=="icd10"){
codes_defined <- "^G12.2"
}else if(codes_sys=="readcode"){
stop("Don't worry. Fm is working on Readcode now.")
}

icd_subtypes <- as.data.table(readxl::read_excel("data/codes_mnd.xlsx",sheet = "subtype"))
icd_subtypes_temp <- icd_subtypes[,.(Dx, abbr, grepl=get(codes_sys))]
temp_dx <- copy(dx)
setorder(temp_dx,"id","ref_date")
temp_dx <- temp_dx[grepl(codes_defined,codes),.SD[ref_date==min(ref_date)],id
][,unique(.SD)][,.(id,onset_date=ref_date,codes=codes)]
temp_dx_codes <- dcast(temp_dx,id+onset_date~.,value.var = "codes",fun.aggregate = function(x) paste(unique(x),collapse = ","))
setnames(temp_dx_codes,".","codes")

get_subtype(temp_dx,icd_subtypes_temp)
temp_dx[,codes:=NULL]
temp_dx <- dcast(temp_dx,
id+onset_date~.,
value.var = c("subtype.als","subtype.pma","subtype.pbp","subtype.pls","subtype.others"),
fun.aggregate = function(x) any(x))
temp_dx <- merge(temp_dx_codes,temp_dx,by=c("id","onset_date"))


df_surv <- merge(temp_dx,
demo[,.(id,sex,dob,dod)],
by="id",all.x=T)

df_surv[,outcome:=fifelse(!is.na(dod),1,0)]
df_surv[,obs.deadline:=fifelse(is.na(dod),ymd('20191231'),dod)]
df_surv[,age_adm:=as.numeric((onset_date-dob)/365.25)]
df_surv[,age_group:=cut(age_adm, breaks = c(0,6,13,20,48,65,80,Inf),
include.lowest = T,right=FALSE)]
df_surv[,age_group_std:=cut(age_adm, breaks = c(seq(0,85,5),+Inf),
include.lowest = T,right=FALSE,labels=c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34",
"35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69",
"70-74", "75-79", "80-84", "85+"))]
df_surv[,year_onset:=year(onset_date)]
df_surv[,time_to_event:=as.numeric(obs.deadline-onset_date)]
df_surv <- df_surv[time_to_event>=0 & onset_date>=ymd("1994-01-01")]
return(df_surv)
}

133 changes: 122 additions & 11 deletions R/run.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@


#' sccs for mnd study
#'
#' @param demo
#' @param rx
#' @param ip
#' @param target_drugs the drug name of riluzole. In hong kong, there are two type of drug names in HK: riluzole and riluteck
#' @param obst the defined study observation date. Choose the earliest avaiable date with *Riluzole* in the hospital or approved date by FDA. For instance, 2001-8-24 in Hong Kong.
#' @param obed the defined study observation date. Default : 2019-12-31
#' @return
#' @export
#'
#' @examples run()
run <- function(){
#' @examples run_sccs()
run_sccs <- function(demo, rx, ip, target_drugs='riluzole|riluteck',
obst="2001-08-24", obed="2018-12-31", ...){
message("merge exposure and outcome datasets")
dt_combined <- get_DT_Exposure_Endpoint(demo,rx,ip)
message("SCCS data cleanning")
dt_sccs <- get_DT_SCCS(dt_combined)
message(nrow(demo)," in the cohort","\n========================\n\n")
dt_combined <- get_DT_Exposure_Endpoint(demo,rx,ip,...)
dt_sccs <- get_DT_SCCS(dt_combined,...)
message("After cleanning, count of participants for sccs:\n", dt_sccs[,uniqueN(id)],"\n========================\n\n")
ageq <- floor(seq(20,90,10)*365)
dt_sccs$id <- as.numeric(dt_sccs$id)
dt_sccs$dob_dmy <- as.numeric(format(dt_sccs$dob,"%d%m%Y"))
# dob_dmy_model <- dt_sccs[,.(id,event,dob_dmy)][,unique(.SD)][,dob_dmy] # this is the dob for previous SCCS package, with version less than 1.3
setorder(dt_sccs,id,event,date_rx_st)
dt_sccs$dob_dmy <- as.numeric(format(dt_sccs$dob,"%d%m%Y")) # this is for version 1.4 or above
dob_dmy_model <- dt_sccs[,.(id,event,dob_dmy)][,unique(.SD)][,dob_dmy] # this is the dob for previous SCCS package, with version less than 1.3
result <- sccs(event ~ strx_30b + strx_0a + strx_30a + strx_60a + strx_90a+ strx_120a +
strx_150a +strx_180a +
age + season,
Expand All @@ -25,10 +33,12 @@ run <- function(){
adrug = list(strx_30b,strx_0a, strx_30a,
strx_60a,strx_90a,strx_120a,
strx_150a, strx_180a),
aedrug = list(edrx_30b,edrx_0a,edrx_30a,edrx_60a,edrx_90a,
edrx_120a, edrx_150a, edrx_180a),
aedrug = list(edrx_30b,edrx_0a,edrx_30a,
edrx_60a,edrx_90a,edrx_120a,
edrx_150a, edrx_180a),
dataformat = "stack", agegrp = ageq, seasongrp = c(0103,0105,0109,0111),
dob=dob_dmy,
# dob=dob_dmy, # for sccs version 1.5 or above
dob=dob_dmy_model, # for sccs version 1.3 only
data = as.data.frame(dt_sccs))
return(result)
}
Expand Down Expand Up @@ -96,3 +106,104 @@ sccs <- function(fml,...){
print.mndsccs <- function(x){
print(x$res)
}

show_digit<- function(x){
return(sprintf(as.numeric(x),fmt="%#.2f"))
}

#' Inci Ci calculation
#'
#' @param x
#'
#' @return
#'
#' @examples
get_inci_CI <- function(x){
temp <- poisson.test(as.numeric(x[["stdN"]]),as.numeric(x[["pop_raw"]]))
est <- temp[["estimate"]]*100000
est_l <- temp$conf.int[2]*100000
est_h <- temp$conf.int[1]*100000
est_cb <- paste0(show_digit(est)," (",
show_digit(est_l),"-",
show_digit(est_h),")")
return(data.frame(est,est_l,est_h,est_cb))
}


#' run analysis for incidence
#'
#' @param demo
#' @param dx
#' @param region
#' @param codes_sys
#'
#' @return
#' @export
#'
#' @examples
run_incidence <- function(demo, dx, region="hk",codes_sys = "icd9"){
dx_inci <- clean_4_survival(demo=demo,dx=dx,codes_sys)

raw_pop <- setDT(read_xlsx("./data/codes_mnd.xlsx", sheet=paste0(region,"_pop")))
raw_pop <- melt(raw_pop,id.vars = "Age")
setnames(raw_pop,c("variable","value"),c("year_onset","pop_raw"))

std_pop <- setDT(read_xlsx("./data/codes_mnd.xlsx",sheet="stdpop"))
std_pop$Age<-factor(std_pop$Age)


incident_raw <- dx_inci[,.(id,year_onset,age_group_std)][,.N,by=.(age_group_std,year_onset)]
setnames(incident_raw,"age_group_std","Age")
incident_raw$year_onset <- as.factor(incident_raw$year_onset)
setorder(incident_raw,Age,year_onset)

incident_raw <- merge(incident_raw,
raw_pop,
by=c("Age","year_onset"),all.y=T)
incident_raw <- merge(incident_raw,
std_pop[,.(Age,pop_std=`Standard For SEER*Stat`,pop_std_ratio=`WHO std pop`)],
by=c("Age"))
incident_raw[is.na(N),N:=0]
incident_std <- incident_raw[,std_risk:=N/pop_raw*pop_std_ratio*100000
][,.(std_risk=sum(std_risk),pop_raw=sum(pop_raw)),year_onset
][,stdN:=round(std_risk*pop_raw/100000)]



incident_std <- cbind(incident_std,rbindlist(apply(incident_std,1,get_inci_CI)))
incident_std[,`:=`(est=as.numeric(est),
est_l=as.numeric(est_l),
est_h=as.numeric(est_h),
year_onset=factor(year_onset))]
return(list(std_inci=incident_std, raw_dt=dx_inci))
}

#' Plot the figure of incidence
#'
#' @param data
#' @param region
#'
#' @return
#' @export
#'
#' @examples
p_inci <- function(data,region="Hong Kong"){
ggplot(data,aes(x=year_onset,y=est,group=1))+
geom_line()+theme_light()+
xlab("Onset year")+
ylab("Age standardized MND incidence \nby year per 100,000 population")+
ggtitle(region)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1,size=18),
axis.text.y = element_text(size=18),
plot.title = element_text(size=22),
axis.title = element_text(size=18)
)
}


p_inci_sg <- function(x){
iw <- incidence(x, interval = "6 months", date_index = onset_date, groups = sex)
plot(iw, fill = "sex", color = "white",border="grey",title = "Hong Kong")
}


2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,3 @@
```r
run_sccs(demo,rx,ip)
```


Binary file added data/codes_mnd.xlsx
Binary file not shown.
Binary file added example.rda
Binary file not shown.
17 changes: 17 additions & 0 deletions man/clean_4_survival.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion man/get_DT_Exposure_Endpoint.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/get_DT_SCCS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/get_inci_CI.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading