9.1 Data & data preparation

9.1.1 data

library(geoR)
data(gambia)
head(gambia)
##             x       y pos  age netuse treated green phc
## 1850 349631.3 1458055   1 1783      0       0 40.85   1
## 1851 349631.3 1458055   0  404      1       0 40.85   1
## 1852 349631.3 1458055   0  452      1       0 40.85   1
## 1853 349631.3 1458055   1  566      1       0 40.85   1
## 1854 349631.3 1458055   0  598      1       0 40.85   1
## 1855 349631.3 1458055   1  590      1       0 40.85   1
dim(gambia)
## [1] 2035    8

The analysis is done at the village level by aggregating the malaria tests by village.

library(tidyverse)

gambia %>%
  count(x,y) %>%
  head()
##          x       y  n
## 1 349631.3 1458055 33
## 2 358543.1 1460112 63
## 3 360308.1 1460026 17
## 4 363795.7 1496919 24
## 5 366400.5 1460248 26
## 6 366687.5 1463002 18

9.1.2 Prevalence

Grouping coordinates found 65 locations, or villages were the test were conducted.

gambia %>%
  count(x,y) %>%
  dim()
## [1] 65  3
d <- gambia %>%
  group_by(x,y) %>%
  summarise(total = n(),
            positive = sum(pos),
            prev = positive / total,.groups = "drop")
head(d)
## # A tibble: 6 × 5
##         x       y total positive  prev
##     <dbl>   <dbl> <int>    <dbl> <dbl>
## 1 349631. 1458055    33       17 0.515
## 2 358543. 1460112    63       19 0.302
## 3 360308. 1460026    17        7 0.412
## 4 363796. 1496919    24        8 0.333
## 5 366400. 1460248    26       10 0.385
## 6 366688. 1463002    18        7 0.389

9.1.3 Transforming coordinates

library(sp)
library(rgdal)
sps <- SpatialPoints(d[, c("x", "y")],
  proj4string = CRS("+proj=utm +zone=28")
)
spst <- spTransform(sps, CRS("+proj=longlat +datum=WGS84"))
library(sf)

sps <- d %>%
  st_as_sf(coords=c(1,2),
           crs = "+proj=utm +zone=28")

sps %>%head()
## Simple feature collection with 6 features and 3 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 349631.3 ymin: 1458055 xmax: 366687.5 ymax: 1496919
## CRS:           +proj=utm +zone=28
## # A tibble: 6 × 4
##   total positive  prev           geometry
##   <int>    <dbl> <dbl>        <POINT [m]>
## 1    33       17 0.515 (349631.3 1458055)
## 2    63       19 0.302 (358543.1 1460112)
## 3    17        7 0.412 (360308.1 1460026)
## 4    24        8 0.333 (363795.7 1496919)
## 5    26       10 0.385 (366400.5 1460248)
## 6    18        7 0.389 (366687.5 1463002)
spst <- sps %>%
  st_transform(crs = "+proj=longlat +datum=WGS84")
spst %>%head()
## Simple feature collection with 6 features and 3 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -16.38755 ymin: 13.18541 xmax: -16.23041 ymax: 13.53742
## CRS:           +proj=longlat +datum=WGS84
## # A tibble: 6 × 4
##   total positive  prev             geometry
##   <int>    <dbl> <dbl>          <POINT [°]>
## 1    33       17 0.515 (-16.38755 13.18541)
## 2    63       19 0.302 (-16.30543 13.20444)
## 3    17        7 0.412 (-16.28914 13.20374)
## 4    24        8 0.333 (-16.25869 13.53742)
## 5    26       10 0.385 (-16.23294 13.20603)
## 6    18        7 0.389 (-16.23041 13.23094)
d1 <- spst %>%
  st_coordinates() %>%
  bind_cols(d) %>%
  rename(long=X,lat=Y)

d1 %>% head()
## # A tibble: 6 × 7
##    long   lat       x       y total positive  prev
##   <dbl> <dbl>   <dbl>   <dbl> <int>    <dbl> <dbl>
## 1 -16.4  13.2 349631. 1458055    33       17 0.515
## 2 -16.3  13.2 358543. 1460112    63       19 0.302
## 3 -16.3  13.2 360308. 1460026    17        7 0.412
## 4 -16.3  13.5 363796. 1496919    24        8 0.333
## 5 -16.2  13.2 366400. 1460248    26       10 0.385
## 6 -16.2  13.2 366688. 1463002    18        7 0.389

9.1.4 Mapping prevalence

library(leaflet)
library(viridis)

pal <- colorBin("viridis", 
                bins = c(0, 0.25, 0.5, 0.75, 1))

d1 %>%
  leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addCircles(lng = ~long, lat = ~lat, 
             color = ~ pal(prev)) %>%
  addLegend("bottomright",
            pal = pal, values = ~prev,
            title = "Prev.") %>%
  addScaleBar(position = c("bottomleft"))

9.1.5 Environmental covariates

Model malaria prevalence using a covariate that indicates the altitude in The Gambia:

library(raster)
r <- getData(name = "alt", 
             country = "GMB", 
             path = "data",
             mask = TRUE)
r
## class      : RasterLayer 
## dimensions : 108, 384, 41472  (nrow, ncol, ncell)
## resolution : 0.008333333, 0.008333333  (x, y)
## extent     : -16.9, -13.7, 13, 13.9  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : GMB_msk_alt.grd 
## names      : GMB_msk_alt 
## values     : -2, 63  (min, max)
pal <- colorNumeric("viridis", 
                    values(r),
                    na.color = "transparent")

leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addRasterImage(r, 
                 colors = pal, 
                 opacity = 0.5) %>%
  addLegend("bottomright",
            pal = pal, 
            values = values(r),
            title = "Altitude"
            ) %>%
  addScaleBar(position = c("bottomleft"))

Add the altitude:

# ?raster::extract

d2 <- d1 %>%
  mutate(alt = raster::extract(r,
                               d1[, c("long","lat")]))

d2 %>% head()
## # A tibble: 6 × 8
##    long   lat       x       y total positive  prev   alt
##   <dbl> <dbl>   <dbl>   <dbl> <int>    <dbl> <dbl> <dbl>
## 1 -16.4  13.2 349631. 1458055    33       17 0.515    14
## 2 -16.3  13.2 358543. 1460112    63       19 0.302    30
## 3 -16.3  13.2 360308. 1460026    17        7 0.412    32
## 4 -16.3  13.5 363796. 1496919    24        8 0.333    20
## 5 -16.2  13.2 366400. 1460248    26       10 0.385    28
## 6 -16.2  13.2 366688. 1463002    18        7 0.389    17