if (FALSE) {
## In this script we will create 5km resolution pixellated grid over Kenya,
## and generate tables of estimated (both target and general) population
## totals at the area (e.g. Admin-1) and subarea (e.g. Admin-2) levels. Then
## we will use that to simulate populations of
# download Kenya GADM shapefiles from SUMMERdata github repository
githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/",
"kenyaMaps.rda?raw=true")
tempDirectory = "~/"
mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
if(!file.exists(mapsFilename)) {
download.file(githubURL,mapsFilename)
}
# load it in
out = load(mapsFilename)
out
adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
# some Admin-2 areas have the same name
adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") &
(adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") &
(adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") &
(adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") &
(adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
# The spatial area of unknown 8 is so small, it causes problems unless its removed or
# unioned with another subarea. Union it with neighboring Kakeguria:
newadm2 = adm2
unknown8I = which(newadm2$NAME_2 == "unknown 8")
newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <-
"Kapenguria + unknown 8"
admin2.IDs <- newadm2$NAME_2
newadm2@data = cbind(newadm2@data, NAME_2OLD = newadm2@data$NAME_2)
newadm2@data$NAME_2OLD = newadm2@data$NAME_2
newadm2@data$NAME_2 = admin2.IDs
newadm2$NAME_2 = admin2.IDs
temp <- terra::aggregate(as(newadm2, "SpatVector"), by="NAME_2")
library(sf)
temp <- sf::st_as_sf(temp)
temp <- sf::as_Spatial(temp)
tempData = newadm2@data[-unknown8I,]
tempData = tempData[order(tempData$NAME_2),]
newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
adm2 = newadm2
# download 2014 Kenya population density TIF file
githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/",
"Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
if(!file.exists(popTIFFilename)) {
download.file(githubURL,popTIFFilename)
}
# load it in
pop = terra::rast(popTIFFilename)
eastLim = c(-110.6405, 832.4544)
northLim = c(-555.1739, 608.7130)
## Construct poppsubKenya, a table of urban/rural general population totals
## in each subarea. Technically, this is not necessary since we can load in
## poppsubKenya via data(kenyaPopulationData). First, we will need to calculate
## the areas in km^2 of the areas and subareas
# use Lambert equal area projection of areas (Admin-1) and subareas (Admin-2)
midLon = mean(adm1@bbox[1,])
midLat = mean(adm1@bbox[2,])
p4s = paste0("+proj=laea +x_0=0 +y_0=0 +lon_0=", midLon,
" +lat_0=", midLat, " +units=km")
adm1_sf = st_as_sf(adm1)
adm1proj_sf = st_transform(adm1_sf, p4s)
adm1proj = as(adm1proj_sf, "Spatial")
adm2_sf = st_as_sf(adm2)
adm2proj_sf = st_transform(adm2_sf, p4s)
adm2proj = as(adm2proj_sf, "Spatial")
# now calculate spatial area in km^2
admin1Areas = as.numeric(st_area(adm1proj_sf))
admin2Areas = as.numeric(st_area(adm2proj_sf))
areapaKenya = data.frame(area=adm1proj@data$NAME_1, spatialArea=admin1Areas)
areapsubKenya = data.frame(area=adm2proj@data$NAME_1, subarea=adm2proj@data$NAME_2,
spatialArea=admin2Areas)
# Calculate general population totals at the subarea (Admin-2) x urban/rural
# level and using 1km resolution population grid. Assign urbanicity by
# thresholding population density based on estimated proportion population
# urban/rural, making sure total area (Admin-1) urban/rural populations in
# each area matches poppaKenya.
# NOTE: the following function will typically take ~15-20 minutes. Can speed up
# by setting kmRes to be higher, but we recommend fine resolution for
# this step, since it only needs to be done once.
system.time(poppsubKenya <- getPoppsub(
kmRes=1, pop=pop, domainMapDat=adm0,
eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
poppa = poppaKenya, areapa=areapaKenya, areapsub=areapsubKenya,
areaMapDat=adm1, subareaMapDat=adm2,
areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
# Now generate a general population integration table at 5km resolution,
# based on subarea (Admin-2) x urban/rural population totals. This takes
# ~1 minute
system.time(popMatKenya <- makePopIntegrationTab(
kmRes=5, pop=pop, domainMapDat=adm0,
eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
poppa = poppaKenya, poppsub=poppsubKenya,
areaMapDat = adm1, subareaMapDat = adm2,
areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
## Adjust popMat to be target (neonatal) rather than general population
## density. First create the target population frame
## (these numbers are based on IPUMS microcensus data)
mothersPerHouseholdUrb = 0.3497151
childrenPerMotherUrb = 1.295917
mothersPerHouseholdRur = 0.4787696
childrenPerMotherRur = 1.455222
targetPopPerStratumUrban = easpaKenya$HHUrb * mothersPerHouseholdUrb *
childrenPerMotherUrb
targetPopPerStratumRural = easpaKenya$HHRur * mothersPerHouseholdRur *
childrenPerMotherRur
easpaKenyaNeonatal = easpaKenya
easpaKenyaNeonatal$popUrb = targetPopPerStratumUrban
easpaKenyaNeonatal$popRur = targetPopPerStratumRural
easpaKenyaNeonatal$popTotal = easpaKenyaNeonatal$popUrb +
easpaKenyaNeonatal$popRur
easpaKenyaNeonatal$pctUrb = 100 * easpaKenyaNeonatal$popUrb /
easpaKenyaNeonatal$popTotal
easpaKenyaNeonatal$pctTotal =
100 * easpaKenyaNeonatal$popTotal / sum(easpaKenyaNeonatal$popTotal)
# Generate the target population density by scaling the current
# population density grid at the Admin1 x urban/rural level
popMatKenyaNeonatal = adjustPopMat(popMatKenya, easpaKenyaNeonatal)
# Generate neonatal population table from the neonatal population integration
# matrix. This is technically not necessary for population simulation purposes,
# but is here for illustrative purposes
poppsubKenyaNeonatal = poppRegionFromPopMat(popMatKenyaNeonatal,
popMatKenyaNeonatal$subarea)
poppsubKenyaNeonatal =
cbind(subarea=poppsubKenyaNeonatal$region,
area=adm2@data$NAME_1[match(poppsubKenyaNeonatal$region, adm2@data$NAME_2)],
poppsubKenyaNeonatal[,-1])
print(head(poppsubKenyaNeonatal))
## Now we're ready to simulate neonatal populations along with neonatal
## mortality risks and prevalences
# use the following model to simulate the neonatal population based roughly
# on Paige et al. (2020) neonatal mortality modeling for Kenya.
beta0=-2.9 # intercept
gamma=-1 # urban effect
rho=(1/3)^2 # spatial variance
effRange = 400 # effective spatial range in km
sigmaEpsilon=sqrt(1/2.5) # cluster (nugget) effect standard deviation
# Run a simulation! This produces multiple dense nEA x nsim and nPixel x nsim
# matrices. In the future sparse matrices and chunk by chunk computations
# may be incorporated.
simPop = simPopSPDE(nsim=1, easpa=easpaKenyaNeonatal,
popMat=popMatKenya, targetPopMat=popMatKenyaNeonatal,
poppsub=poppsubKenya, spdeMesh=kenyaMesh,
margVar=rho, sigmaEpsilon=sigmaEpsilon,
gamma=gamma, effRange=effRange, beta0=beta0,
seed=12, inla.seed=12, nHHSampled=25,
stratifyByUrban=TRUE, subareaLevel=TRUE,
doFineScaleRisk=TRUE, doSmoothRisk=TRUE,
min1PerSubarea=TRUE)
# get average absolute percent error relative to fine scale prevalence at Admin-2 level
tempDat = simPop$subareaPop$aggregationResults[c("region", "pFineScalePrevalence",
"pFineScaleRisk", "pSmoothRisk")]
100*mean(abs(tempDat$pFineScalePrevalence - tempDat$pFineScaleRisk) /
tempDat$pFineScalePrevalence)
100*mean(abs(tempDat$pFineScalePrevalence - tempDat$pSmoothRisk) /
tempDat$pFineScalePrevalence)
100*mean(abs(tempDat$pFineScaleRisk - tempDat$pSmoothRisk) /
tempDat$pFineScalePrevalence)
# verify number of EAs per area and subarea
cbind(aggregate(simPop$eaPop$eaSamples[,1], by=list(area=popMatKenya$area), FUN=sum),
trueNumEAs=easpaKenya$EATotal[order(easpaKenya$area)])
aggregate(simPop$eaPop$eaSamples[,1], by=list(area=popMatKenya$subarea), FUN=sum)
## plot simulated population
# directory for plotting
# (mapPlot takes longer when it doesn't save to a file)
tempDirectory = "~/"
# pixel level plots. Some pixels have no simulated EAs, in which case they will be
# plotted as white. Expected noisy looking plots of fine scale risk and prevalence
# due to EAs being discrete, as compared to a very smooth plot of smooth risk.
zlim = c(0, quantile(probs=.995, c(simPop$pixelPop$pFineScalePrevalence,
simPop$pixelPop$pFineScaleRisk,
simPop$pixelPop$pSmoothRisk), na.rm=TRUE))
pdf(file=paste0(tempDirectory, "simPopSPDEPixel.pdf"), width=8, height=8)
par(mfrow=c(2,2))
plot(adm2, asp=1)
points(simPop$eaPop$eaDatList[[1]]$lon, simPop$eaPop$eaDatList[[1]]$lat, pch=".", col="blue")
plot(adm2, asp=1)
quilt.plot(popMatKenya$lon, popMatKenya$lat, simPop$pixelPop$pFineScalePrevalence,
zlim=zlim, add=TRUE, FUN=function(x) {mean(x, na.rm=TRUE)})
plot(adm2, asp=1)
quilt.plot(popMatKenya$lon, popMatKenya$lat, simPop$pixelPop$pFineScaleRisk,
zlim=zlim, add=TRUE, FUN=function(x) {mean(x, na.rm=TRUE)})
quilt.plot(popMatKenya$lon, popMatKenya$lat, simPop$pixelPop$pSmoothRisk,
zlim=zlim, FUN=function(x) {mean(x, na.rm=TRUE)}, asp=1)
plot(adm2, add=TRUE)
dev.off()
range(simPop$eaPop$eaDatList[[1]]$N)
# areal (Admin-1) level: these results should look essentially identical
tempDat = simPop$areaPop$aggregationResults[c("region", "pFineScalePrevalence",
"pFineScaleRisk", "pSmoothRisk")]
pdf(file=paste0(tempDirectory, "simPopSPDEAdmin-1.pdf"), width=7, height=7)
mapPlot(tempDat,
variables=c("pFineScalePrevalence", "pFineScaleRisk", "pSmoothRisk"),
geo=adm1, by.geo="NAME_1", by.data="region", is.long=FALSE)
dev.off()
# subareal (Admin-2) level: these results should look subtley different
# depending on the type of prevalence/risk considered
tempDat = simPop$subareaPop$aggregationResults[c("region", "pFineScalePrevalence",
"pFineScaleRisk", "pSmoothRisk")]
pdf(file=paste0(tempDirectory, "simPopSPDEAdmin-2.pdf"), width=7, height=7)
mapPlot(tempDat,
variables=c("pFineScalePrevalence", "pFineScaleRisk", "pSmoothRisk"),
geo=adm2, by.geo="NAME_2", by.data="region", is.long=FALSE)
dev.off()
}
Run the code above in your browser using DataLab