diff --git a/analysis/hyp_reg_measures.py b/analysis/hyp_reg_measures.py new file mode 100644 index 0000000..9427cc9 --- /dev/null +++ b/analysis/hyp_reg_measures.py @@ -0,0 +1,80 @@ +from ehrql import INTERVAL, case, create_measures, months, when, codelist_from_csv, create_dataset +from ehrql.tables.tpp import ( + clinical_events, + patients, + practice_registrations, +) + +# Hypertension codes +hyp_cod = codelist_from_csv( + "codelists/nhsd-primary-care-domain-refsets-hyp_cod.csv", + column="code" +) +# Hypertension resolved codes +hypres_cod = codelist_from_csv( + "codelists/nhsd-primary-care-domain-refsets-hypres_cod.csv", + column="code" +) + +index_date = "2024-03-31" +measures = create_measures() + +### variables ### + +selected_events = clinical_events.where( + clinical_events.date.is_on_or_before(INTERVAL.end_date) +) + +# date of the most recent hypertension diagnosis up to and including the achievement date +hyplat_dat = ( + selected_events.where(selected_events.snomedct_code.is_in(hyp_cod)) + .sort_by(selected_events.date) + .last_for_patient() + .date +) + +# date of the most recent hypertension diagnosis resolved code recorded after the most recent hypertension diagnosis and up to and including the achievement date +hypres_dat = ( + selected_events.where(selected_events.snomedct_code.is_in(hypres_cod)) + .sort_by(selected_events.date) + .last_for_patient() + .date +) + +# Rule for patients with a hypertension code that has not been resolved +hyp_reg_r1 = hyplat_dat.is_not_null() & hypres_dat.is_null() | (hyplat_dat > hypres_dat) + +age = patients.age_on(INTERVAL.start_date) +age_band = case( + when((age >= 0) & (age < 20)).then("0-19"), + when((age >= 20) & (age < 40)).then("20-39"), + when((age >= 40) & (age < 60)).then("40-59"), + when((age >= 60) & (age < 80)).then("60-79"), + when(age >= 80).then("80+"), +) + +registration = practice_registrations.for_patient_on(INTERVAL.end_date) + +# Measure defining the rate of patients with unresolved hypertension per age band and sex +measures.define_measure( + name="diagnosis_by_age", + numerator=hyp_reg_r1 == True, + denominator=registration.exists_for_patient(), + group_by={ + "age_band": age_band, + "sex": patients.sex, + }, + intervals=months(12).starting_on("2023-04-01"), +) + +# Measure defining the rate of patients with resolved hypertension per age band and sex +measures.define_measure( + name="resolved_by_age", + numerator=hyp_reg_r1 == False, + denominator=registration.exists_for_patient(), + group_by={ + "age_band": age_band, + "sex": patients.sex, + }, + intervals=months(12).starting_on("2023-04-01"), +) \ No newline at end of file diff --git a/analysis/visualise_measures.R b/analysis/visualise_measures.R new file mode 100644 index 0000000..bf87f69 --- /dev/null +++ b/analysis/visualise_measures.R @@ -0,0 +1,72 @@ +library(tidyverse) +library(patchwork) +library(here) + +# Load data +df_measures <- read_csv( + here("output", "hyp", "hyp001_measures.csv")) %>% + replace_na(list(age_band = "(Missing)")) %>% + mutate( + start_date = as.Date(interval_start, format = "%Y-%m-%d"), + end_date = as.Date(interval_end, format = "%Y-%m-%d"), + age_band = factor( + age_band, + levels = c("0-19", "20-39", "40-59", "60-79", "80+", "(Missing)"), + labels = c("0-19", "20-39", "40-59", "60-79", "80+", "(Missing)") + ) + ) + +plot_hypres <- df_measures %>% + filter(measure == "resolved_by_age") %>% + ggplot(aes( + x = end_date, + y = ratio, + colour = age_band, + )) + + geom_point() + + geom_line(alpha = .5) + + labs( + title = NULL, + x = NULL, + y = "Patients with hypertension resolved code", + colour = "Age Band" + ) + + scale_y_continuous( + labels = scales::label_percent(), + limits = c(0,1) + ) + + facet_wrap(~ factor( + sex, + levels = c("female", "male", "intersex", "unknown"), + labels = c("Female", "Male", "Intersex", "Unknown") + )) + +plot_hyp <- df_measures %>% + filter(measure == "diagnosis_by_age") %>% + ggplot(aes( + x = end_date, + y = ratio, + colour = age_band, + )) + + geom_point() + + geom_line(alpha = .5) + + labs( + title = NULL, + x = NULL, + y = "Patients with hypertension code", + colour = "Age Band" + ) + + scale_y_continuous( + labels = scales::label_percent(), + limits = c(0,1) + ) + + facet_wrap(~ factor( + sex, + levels = c("female", "male", "intersex", "unknown"), + labels = c("Female", "Male", "Intersex", "Unknown") + )) + +plot_hyp <- (plot_hyp / plot_hypres) + +plot_layout(guides = "collect") + +ggsave("output/hyp/hyp_plot.png", width = 8, height = 8) diff --git a/project.yaml b/project.yaml index 1658493..420497d 100644 --- a/project.yaml +++ b/project.yaml @@ -39,3 +39,21 @@ actions: outputs: highly_sensitive: cohort: output/dm/dm017_milan.csv.gz + + generate_hyp001_measures: + run: > + ehrql:v1 generate-measures analysis/hyp_reg_measures.py + --output output/hyp/hyp001_measures.csv + outputs: + moderately_sensitive: + measure: output/hyp/hyp001_measures.csv + + generate_hyp001_plots: + run: > + r:latest + analysis/visualise_measures.R + --output output/hyp/hyp_plot.png + needs: [generate_hyp001_measures] + outputs: + moderately_sensitive: + hyp_plot: output/hyp/hyp_plot.png \ No newline at end of file