#============================================================= # fit ASF model to observed data using ABC SMC with EasyABC package library(EasyABC); library(HDInterval);library(reshape2);library(dplyr); library(survival); library(ggplot2) #============================================================= # source the model which is a stochastic version of an SEIR using a Gillespie routine SEIR.onestep <- function (x, params) { #function to calculate one step of stochastic SIR t <- x[1] #local variable for time S <- x[2] #local variable for susceptibles E1 <- x[3] E2 <- x[4] E3 <- x[5] E4 <- x[6] I1 <- x[7] I2 <- x[8] I3 <- x[9] I4 <- x[10] R <- x[11] N = sum(x[2:10]) beta = params[1] sigma = params[2]*4 gamma = params[3]*4 rates <- c(beta*S*(I1+I2+I3+I4)/N, sigma*E1, sigma*E2, sigma*E3, sigma*E4, gamma*I1, gamma*I2, gamma*I3, gamma*I4 ) changes <- matrix( c(-1,1,0,0,0,0,0,0,0,0, #beta*S*I 0,-1,1,0,0,0,0,0,0,0, #sigma*E1 0,0,-1,1,0,0,0,0,0,0, #sigma*E2 0,0,0,-1,1,0,0,0,0,0, #sigma*E3 0,0,0,0,-1,1,0,0,0,0, #sigma*E4 0,0,0,0,0,-1,1,0,0,0, #gamma*I1 0,0,0,0,0,0,-1,1,0,0, #gamma*I2 0,0,0,0,0,0,0,-1,1,0, #gamma*I3 0,0,0,0,0,0,0,0,-1,1 #gamma*I4 ), ncol=10, byrow=TRUE) tau <- rexp(n=1,rate=sum(rates)) # exponential waiting time U <- runif(1) #uniform random deviate m <- min(which(cumsum(rates)>=U*sum(rates))) x <- x[2:11] + changes[m,] t <- t + tau return(out <- c(t, x)) } # function for simulating a whole epidemic. SEIR.stoc.model.I4 <- function (xstart, params, tfinal) { #function to simulate stochastic SIR output <- array(dim=c(1,11)) #set up array to store results colnames(output) <- c("time","S", "E1","E2","E3","E4", "I1","I2","I3","I4", "R") #name variables output[1,] <- xstart #first record of output is initial condition k=1 while (as.logical(output[k,1]=as.numeric(priors[[p]][2]) & theta.proposed[p]<=as.numeric(priors[[p]][3]),1,0) } #if other prior options can go here } return(ret) } prior_pr_func(proposal_func(myPriors), myPriors) #matrix to store weights per particle per SMC step weights <- matrix(1/n.particles,epsilon.steps,n.particles) #arrays to store inferred parameter sets per particle per step theta.a<-array(NA,dim=c(epsilon.steps,n.particles,length(myPriors))) #matrices to store weighted estimates of theta mean and variance per particle per step tau.mu.a<-matrix(NA,epsilon.steps,length(myPriors)) tau.var.a<-matrix(NA,epsilon.steps,length(myPriors)) #matrixes to store distances - per tolerance metric, per particle, per step d1 <- matrix(NA,epsilon.steps,n.particles) d2 <- matrix(NA,epsilon.steps,n.particles) d3 <- matrix(NA,epsilon.steps,n.particles) #distance function - from summary statistic(s) dfunc<-function(x,y){abs(x-y)} #euclidean distance, separately for each tolerance #matrices to store outputs per timestep - for subsequent posterior sampling x.a<-list(vector("list", n.particles), vector("list", n.particles), vector("list", n.particles), vector("list", n.particles), vector("list", n.particles)) x.a<-vector("list", epsilon.steps) for(i in 1:epsilon.steps){ x.a[[i]]<-vector("list", n.particles) } #matrix for calculating acceptance %s per step j.it.s <- matrix(NA,epsilon.steps,n.particles) ################################################################ # ABC SMC steps # Note: the next loop takes several hours to run # For example purposes, run less steps # In practice, it is important to run until there is diminished gain for(s in 1:epsilon.steps){ if(s>1){ #set tolerance of this step based on percentile of distances[t-1] epsilon.t[s,] <- c(as.numeric(quantile(d1[s-1,],.75)), as.numeric(quantile(d2[s-1,],.75)), as.numeric(quantile(d3[s-1,],.75))) for(i in 1:length(myPriors)){ #weighted empirical mean of theta[t-1]s - log scale because ratios tau.mu.a[s,i]<- sum(theta.a[s-1,,i] * weights[s-1,]) #weighted empirical variance of theta[t-1]s - log scale because ratios tau.var.a[s,i] <- sum((theta.a[s-1,,i] - tau.mu.a[s,i])^2 * weights[s-1,]) } } for(j in 1:n.particles){ j.it<-0 d1[s,j] <- epsilon.t[s,1] + 1 #initiate with distance > epsilon.t[t] d2[s,j] <- epsilon.t[s,2] + 1 d3[s,j] <- epsilon.t[s,3] + 1 while((d1[s,j] > epsilon.t[s,1]) | (d2[s,j] > epsilon.t[s,2]) | (d3[s,j] > epsilon.t[s,3])) { j.it = j.it + 1 if(s==1){ #draw from the prior theta.proposed <- proposal_func(myPriors) } if(s>1){ #take 1 empirical weighted sample of theta.star from the theta[t-1]s sample.particle <- sample(n.particles, 1, prob = weights[s-1,]) #propose based on the sampled particle theta.star <- theta.a[s-1,sample.particle,] #perturb around the proposed particle (kernel from Beaumont 2009) for(i in 1:length(myPriors)){ #if uniform priors, if outside the priors resample theta.proposed[i]<-as.numeric(myPriors[[i]][2])-1 while((theta.proposed[i]as.numeric(myPriors[[i]][3]))){ theta.proposed[i]<-rnorm(1, theta.star[i], sqrt(2*tau.var.a[s,i])) } } } #forwards process (the model) x <- myModel(theta.proposed) #calculate distance stats for this particle d1[s,j] <- dfunc(ssMax(x), myTarget[1]) d2[s,j] <- dfunc(ssMaxTime(x), myTarget[2]) d3[s,j] <- dfunc(ssResids(x), myTarget[3]) } #once tolerance met, store the particle components theta.a[s,j,]<-as.numeric(theta.proposed) #store simulated data for posterior sampling x.a[[s]][[j]]<-x #store the iteration counter for calculating acceptance rates j.it.s[s,j] <- j.it if(s==1){}#weights remain unchanged, i.e. all = 1/N if(s>1){ #numerator = prior probability of theta.proposed weight.j.numerator<-prod(prior_pr_func(theta.proposed, myPriors)) #denominator = weighted sum of likelihoods from the pertubation kernel tmp<-0 for(i in 1:length(myPriors)){ tmp<-tmp + dnorm(theta.proposed[i],theta.a[s-1,,i],sqrt(2*tau.var.a[s,i]),log=T) } weight.j.denom<- sum(weights[s-1,]*exp(tmp)) weights[s,j] <- weight.j.numerator/weight.j.denom } } #close for j n.particles #normalise weights to sum to 1 weights[s,] <- weights[s,]/sum(weights[s,]) } ##################################################################################### #collate the outputs from the approximate posterior propability distribution out.post <- as.data.frame(theta.a[s,,]) #calculated output R0 R0.s<-matrix(NA,n.particles,epsilon.steps) for(s in 1:epsilon.steps){ R0.s[,s]<-theta.a[s,,1]/theta.a[s,,3] } out.post <- cbind(out.post, R0.s[,s]) names(out.post) <- c('beta', 'sigma','gamma','R0') #write.csv(out.post, 'posteriors.csv', row.names = F) #summary stats of marginal posteriors (ro.ss <- c('R0',round(c(median(out.post$R0),hdi(out.post$R0, credMass = .95)),1))) (b.ss <- c('beta',round(c(median(out.post$beta),hdi(out.post$beta, credMass = .95)),1))) (s.ss <- c('1/sigma',round(c(median(1/out.post$sigma),hdi(1/out.post$sigma, credMass = .95)),1))) (g.ss <- c('1/gamma', round(c(median(1/out.post$gamma),hdi(1/out.post$gamma, credMass = .95)),1))) #================================================================================= # plot median and envelopes directly from the accepted particles' simulations #As the number of particles goes to infinity, the set of final step particles is an asymptotic estimator of the posterior. These particles will be included and excluded with some randomness, depending on the model, but with a large enough population size the ones that belong in the posterior will be included. #create a data frame with constant time steps and get the nearest value for each iteration sims<-x.a[[s]] out<-matrix(NA,nrow=n.particles,ncol=length(df.obs$time)) for(p in 1:n.particles){ tmp<-sims[[p]] t.id<-vector() for(i in 1:length(df.obs$time)){ t.id[i]<-which.min(abs(tmp$time - df.obs$time[i])) } #tmp$time[t.id] out[p,]<-tmp$I[t.id] } plot(df.obs$time, df.obs$I, type = 'o') for(p in 1:n.particles){ lines(out[p,], col='grey90') } #================================================================================= # plot median and envelopes re-simulating from posterior samples nsim=2000 out <- list() for(i in 1:nsim){ init.state <- c(time=0,S=16,E1=0,E2=0,E3=0,E4=0, I1=1,I2=0,I3=0,I4=0,R=0) index <- sample(1:nrow(out.post),1) SIR.par <- unlist(c(out.post[index,])) out[[i]] <- SEIR.stoc.model.I4(init.state,SIR.par,60) out[[i]]$iter = i } dat <- do.call('rbind', out) #var for all I dat$I = rowSums(dat[,c(7:10)]) #create a data frame with constant time steps and get the nearest value for each iteration t.limit = seq(0,60,1) mean.dat <- data.frame(time = rep(t.limit,nsim),iter = rep(c(1:nsim), each = length(t.limit))) indx1 <- neardate(mean.dat$iter, dat$iter, mean.dat$time, dat$time) indx2 <- neardate(mean.dat$iter, dat$iter, mean.dat$time, dat$time, best = "prior") mean.dat$var <- dat$I[ifelse(is.na(indx1), indx2, # none after, take before ifelse(is.na(indx2), indx1, #none before ifelse(abs(dat$time[indx2]- mean.dat$time) < abs(dat$time[indx1]- mean.dat$time), indx2, indx1)))] tdat <- mean.dat %>% group_by(time) %>% summarise(q2.5=quantile(var, probs=0.025), q25=quantile(var, probs=0.25), median=quantile(var, probs=0.5), q75=quantile(var, probs=0.75), q97.5=quantile(var, probs=0.975)) ggplot(data = tdat, aes(x = time))+ ylab('Count')+ xlab('Time (days)')+ xlim(0,60)+ ylim(0,8)+ theme_bw()+ geom_ribbon(aes(ymin=q2.5, ymax=q97.5), alpha=0.3, fill='gray')+ geom_ribbon(aes(ymin=q25, ymax=q75), alpha=0.5, fill='gray')+ geom_line(data = df.obs, aes(time,I), size = 1, color = 'black')+ geom_line(aes(y = median),color = 'black', size = 1, alpha=0.25)