Case study 2: predict a model response from three inputs
Rcpp is smoking fast for agent based models in data frames by Gary Weissman, MD, MSHP.
Starts with this code:
vacc1a <- function(age, female, ily) {
p <- 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily
p <- p * if (female) 1.25 else 0.75
p <- max(0, p)
p <- min(1, p)
p
}
R code with a for loop:
vacc1 <- function(age, female, ily) {
n <- length(age)
out <- numeric(n)
for (i in seq_len(n)) {
out[i] <- vacc1a(age[i], female[i], ily[i])
}
out
}
Vectorized R code:
vacc2 <- function(age, female, ily) {
p <- 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily
p <- p * ifelse(female, 1.25, 0.75)
p <- pmax(0, p)
p <- pmin(1, p)
p
}
C++:
#include <Rcpp.h>
using namespace Rcpp;
double vacc3a(double age, bool female, bool ily){
double p = 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily;
p = p * (female ? 1.25 : 0.75);
p = std::max(p, 0.0);
p = std::min(p, 1.0);
return p;
}
// [[Rcpp::export]]
NumericVector vacc3(NumericVector age, LogicalVector female,
LogicalVector ily) {
int n = age.size();
NumericVector out(n);
for(int i = 0; i < n; ++i) {
out[i] = vacc3a(age[i], female[i], ily[i]);
}
return out;
}
Sample data:
n <- 1000
age <- rnorm(n, mean = 50, sd = 10)
female <- sample(c(T, F), n, rep = TRUE)
ily <- sample(c(T, F), n, prob = c(0.8, 0.2), rep = TRUE)
stopifnot(
all.equal(vacc1(age, female, ily), vacc2(age, female, ily)),
all.equal(vacc1(age, female, ily), vacc3(age, female, ily))
)

bench::mark(
vacc1 = vacc1(age, female, ily),
vacc2 = vacc2(age, female, ily),
vacc3 = vacc3(age, female, ily)
)
#> # A tibble: 3 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 vacc1 1.45ms 1.49ms 664. 7.86KB 15.5
#> 2 vacc2 43.15µs 45.24µs 21465. 147.33KB 30.4
#> 3 vacc3 12.48µs 12.84µs 75352. 12.63KB 15.1