Mae hwn yn fersiwn HTML o atodiad i'r cais Rhyddid Gwybodaeth 'Calculations of expected vaccine deaths in PHS COVID-19 statistical report from 6 June 2021'.

#This script is used to calculate expected deaths in the 28 days following a Covid-19 vaccination.

#load packages
library(epitools)
library(survival)
library(tidyverse)

#read in NRS mid-year population estimates from 2014 to 2019
population.data <- read.csv("Z:\\JP\\vaccine safety\\data\\pop_est_2014_2019.csv")

#read in mortality data from 2014 to 2019
death.data <- read.csv("Z:\\JP\\vaccine safety\\data\\all_deaths_dec2014_nov2019.csv")

#Calculate background day rate and stratify by age, gender and month
z1 <- population.data
z2 <- death.data
z3 <- data.frame(mnth=1:12,no_days=c(31,28,31,30,31,30,31,31,30,31,30,31))
z2 <- merge(z2,z3)
z2$Year <- substr(z2$yrmnth_death,1,4)
z2 <- merge(z2, z1, by.x=c("age_band","SEX","Year"), by.y=c("age_band","Sex","Year"))
z2$rate_day <- z2$deaths/z2$pop/z2$no_days
z.agg <- aggregate(z2[,c("rate_day")], list(z2$SEX,z2$age_band,z2$mnth),mean)
names(z.agg)[1:3] <- c("SEX","age_band","mnth")
z.agg$SEX <- ifelse(z.agg$SEX==1,"MALE", "FEMALE")
exp_rate <- z.agg



#read in a vaccine uptake extract containing date of 1st dose, date of 2nd dose, age, gender and date of death (if applicable)
Vaccine_death <- readRDS("stage one mortality analysis\\cleaned_vaccine_death_May.rds")
z <- Vaccine_death
z <- z %>%
mutate(age_1st_vacc= floor(as.numeric(date_vacc_1-patient_date_of_birth)/365.25))%>%
filter(age_1st_vacc>15)
z$agegp2 <- cut(z$age_1st_vacc, breaks=c(15,seq(19,79,5),max(z$age_1st_vacc,na.rm=TRUE)),label=unique(population.data$age_band))



#End follow up (persondays) 27 days post vacc and at date of death whichever comes first
a_end <- max(z$DATE_OF_DEATH,na.rm=TRUE)
z<- subset(z, date_vacc_1<=a_end)
a_start <- as.Date("2020-12-08")#date vaccination start
z$end <- z$date_vacc_1 + 27
z[z$DATE_OF_DEATH<=z$end & !is.na(z$DATE_OF_DEATH),"end"] <- z[z$DATE_OF_DEATH<=z$end& !is.na(z$DATE_OF_DEATH),"DATE_OF_DEATH"]
z[z$end>a_end,"end"] <- a_end



#event variable - all cause death 0-27 days post 1st dose
z$days_1 <- as.numeric(z$end- z$date_vacc_1 )
z$days_2 <- as.numeric(z$DATE_OF_DEATH- z$date_vacc_1 )
z$days_28_event =ifelse(z$days_2 <= 27 & !is.na(z$DATE_OF_DEATH) , 1,0)
z$days_28_event[is.na(z$days_28_event)] <- 0


#persondays censored at the date of second dose
z[z$date_vacc_2 <=z$end & !is.na(z$vacc_type_2), "days_28_event"] <- 0
z[z$date_vacc_2 <=z$end& !is.na(z$vacc_type_2), "end"] <- z[z$date_vacc_2 <=z$end& !is.na(z$vacc_type_2), "date_vacc_2"]
z$days_1 <- as.numeric(z$end- z$date_vacc_1 )


#stratify persondays by month for each individual
z$p2date <- as.Date("2020-12-31")
z$p3date <- as.Date("2021-01-31")
z$p4date <- as.Date("2021-02-28")
z$p5date <- as.Date("2021-03-31")
z$p6date <- as.Date("2021-04-30")
z1 <- tmerge(z, z, id=ID, endpt=event(end,days_28_event),tstart=date_vacc_1-1,tstop=end)
z1 <- tmerge(z1,z, id=ID, m1=tdc(p2date))
z1 <- tmerge(z1,z, id=ID, m2=tdc(p3date))
z1 <- tmerge(z1,z, id=ID, m3=tdc(p4date))
z1 <- tmerge(z1,z, id=ID, m4=tdc(p5date))
z1 <- tmerge(z1,z, id=ID, m5=tdc(p6date))


z1$month <- z1$m1+z1$m2+z1$m3+z1$m4+z1$m5
z1$month <- factor(z1$month, levels=0:5,labels=c("12","1", "2", "3", "4","5"))
z1$tstart1 <- as.numeric(z1$tstart-a_start)+2#+2 to adjust for <0, both tstart1 and tstart2 + 2 so the length of persondays not change
z1$tstop1 <- as.numeric(z1$tstop-a_start)+2
z.surv <- z1

#count obeserved number and persondays by age gender and month
z.agg <- pyears(Surv(tstart1,tstop1,endpt) ~ agegp2+patient_sex+month, data=z1 , scale=1, data.frame=TRUE)
z.res <- z.agg$data

#merge over background rate to the persondays by age gender and month
z.res$month <- as.numeric(as.character(z.res$month))
z.res <- merge(z.res, exp_rate[,c("age_band","SEX","mnth","rate_day")], by.x=c("agegp2", "patient_sex", "month"), by.y=c("age_band","SEX","mnth"))


#cacluated the expected number of deaths in each age/gender/month combinition
z.res$exp_event <- z.res$pyears * z.res$rate_day

#set up reporting age group
z.res$agegp <- z.res$agegp2
levels(z.res$agegp) <- c(rep("<50",7),rep("50-69",4),rep("70-79",2),"80+")


#sum up the observed number of death, persondays and expeced number of death by reporting age group for 0-27 days post 1st dose
z.agg22 <- aggregate(z.res[,c("event","pyears", "exp_event")], list(z.res$agegp),sum)


#sum up the observed number of death, persondays and expeced number of death for 0-27 days post 1st dose
z.agg23 <- apply(z.res[,c("event","pyears", "exp_event")], 2,sum)
z.agg23 <- data.frame(event=z.agg23[1],pyears=z.agg23[2],exp_event=z.agg23[3])

#the calculation of the expected number of death post second dose are very similar
#- just change the date of 1st dose to the date of the second dose in the above code
#and omit the code to censor the persondays at the date of second dose