profvis
f <- function() { pause(0.1) g() h()}g <- function() { pause(0.1) h()}h <- function() { pause(0.1)}
profvis::profvis(f())
R.framework/Versions/Current
directory alias directly using ln -s
x <- integer()for(i in 1:1e4) { x <- c(x, i)}
i <- function() { pause(0.1) 10}j <- function(x) { x + 10}j(i())
"...profiling would make it seem like
i()
was called byj()
because the argument isn't evaluated until it's needed byj()
."
library(shiny)profvis({ runExample(example = "06_tabsets", display.mode = "normal")})
"a deep understanding of subatomic physics is not very helpful when baking"
bench
packagex <- runif(100)(lb <- bench::mark( sqrt(x), x ^ 0.5))
## # A tibble: 2 x 6## expression min median `itr/sec` mem_alloc `gc/sec`## <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>## 1 sqrt(x) 334ns 700ns 232862. 848B 0## 2 x^0.5 2.1µs 2.25µs 269744. 848B 0
median is probably the best metric to use
plot(lb)
"Four" techniques:
Write a function for each approach:
mean1 <- function(x) mean(x)mean2 <- function(x) sum(x)/length(x)
Generate representative test cases:
x <- runif(1e5)
Precisely compare the variants (and include unit tests (not included))
bench::mark( mean1(x), mean2(x))[c("expression", "min", "median", "itr/sec", "n_gc")]
## # A tibble: 2 x 4## expression min median `itr/sec`## <bch:expr> <bch:tm> <bch:tm> <dbl>## 1 mean1(x) 153.9µs 155.6µs 6133.## 2 mean2(x) 76.2µs 76.9µs 12133.
[R]
rowSums()
, colSums()
, rowMeans()
, and colMeans()
are faster than equivalent invocations that use apply()
because they are vectorised vapply()
is faster than sapply()
because it pre-specifies the output typeany(x == 10)
is much faster than 10 %in% x
because testing equality is simpler than testing set inclusion.apply()
read.csv()
: specify known column types or use readr::read_csv()
or data.table::fread()
factor()
: specify known levelscut()
: use labels = FALSE
or findInterval()
unlist(x, use.names = FALSE)
is faster than unlist(x)
interaction()
: use drop = TRUE
if you canx <- runif(1e2)bench::mark( mean(x), mean.default(x))[c("expression", "min", "median", "itr/sec", "n_gc")]
## # A tibble: 2 x 4## expression min median `itr/sec`## <bch:expr> <bch:tm> <bch:tm> <dbl>## 1 mean(x) 2.39µs 3.5µs 251432.## 2 mean.default(x) 1.2µs 1.39µs 533535.
x <- runif(1e4)bench::mark( mean(x), mean.default(x))[c("expression", "min", "median", "itr/sec", "n_gc")]
## # A tibble: 2 x 4## expression min median `itr/sec`## <bch:expr> <bch:tm> <bch:tm> <dbl>## 1 mean(x) 17.6µs 18.8µs 50802.## 2 mean.default(x) 16.3µs 16.6µs 56833.
as.data.frame()
is slow because it coerces each element into a data frame.
You could, instead, store you data in a named list of equal-length vectors:
quickdf <- function(l) { class(l) <- "data.frame" attr(l, "row.names") <- .set_row_names(length(l[[1]])) l}l <- lapply(1:26, function(i) runif(1e3))names(l) <- lettersdplyr::glimpse(l[1:6])
## List of 6## $ a: num [1:1000] 0.3726 0.9029 0.8664 0.0337 0.8816 ...## $ b: num [1:1000] 0.946 0.109 0.767 0.237 0.614 ...## $ c: num [1:1000] 0.659 0.938 0.317 0.414 0.152 ...## $ d: num [1:1000] 0.559 0.888 0.872 0.917 0.669 ...## $ e: num [1:1000] 0.933 0.923 0.757 0.407 0.272 ...## $ f: num [1:1000] 0.452 0.533 0.915 0.198 0.259 ...
bench::mark( as.data.frame = as.data.frame(l), quick_df = quickdf(l))[c("expression", "min", "median", "itr/sec", "n_gc")]
## # A tibble: 2 x 4## expression min median `itr/sec`## <bch:expr> <bch:tm> <bch:tm> <dbl>## 1 as.data.frame 961.29µs 1.1ms 861.## 2 quick_df 7.07µs 7.78µs 106802.
This approach requires carefully reading through source code!
rowSums()
, colSums()
, rowMeans()
, and colMeans()
cut()
and findInterval()
for converting continuous variables to categoricalcumsum()
and diff()
c()
, append()
, cbind()
, rbind()
, paste()
random_string <- function() { paste(sample(letters, 50, replace = TRUE), collapse = "")}strings10 <- replicate(10, random_string())strings100 <- replicate(100, random_string())collapse <- function(xs) { out <- "" for (x in xs) { out <- paste0(out, x) } out}bench::mark( loop10 = collapse(strings10), loop100 = collapse(strings100), vec10 = paste(strings10, collapse = ""), vec100 = paste(strings100, collapse = ""), check = FALSE)[c("expression", "min", "median", "itr/sec", "n_gc")]
bench::mark( loop10 = collapse(strings10), loop100 = collapse(strings100), vec10 = paste(strings10, collapse = ""), vec100 = paste(strings100, collapse = ""), check = FALSE)[c("expression", "min", "median", "itr/sec", "n_gc")]
## # A tibble: 4 x 4## expression min median `itr/sec`## <bch:expr> <bch:tm> <bch:tm> <dbl>## 1 loop10 29.01µs 30.66µs 29660.## 2 loop100 612.86µs 644.28µs 1488.## 3 vec10 5.37µs 5.71µs 162555.## 4 vec100 28.17µs 29.06µs 32390.
m <- 1000n <- 50X <- matrix(rnorm(m * n, mean = 10, sd = 3), nrow = m)grp <- rep(1:2, each = n/2)
Formula interface:
system.time( for(i in 1:m) { t.test(X[i, ] ~ grp)$statistic })
## user system elapsed ## 0.601 0.014 0.640
Provide two vectors
system.time( for(i in 1:m) { t.test(X[i, grp == 1], X[i, grp == 2])$statistic })
## user system elapsed ## 0.140 0.002 0.143
compT <- function(i) { t.test(X[i, grp == 1], X[i, grp == 2])$statistic}system.time(t1 <- purrr::map_dbl(1:m, compT))
## user system elapsed ## 0.140 0.002 0.143
my_t <- function(x, grp) { t_stat <- function(x) { m <- mean(x) n <- length(x) var <- sum((x - m) ^ 2)/(n-1) list(m = m, n = n, var = var) } g1 <- t_stat(x[grp == 1]) g2 <- t_stat(x[grp == 2]) se_total <- sqrt(g1$var / g1$n + g2$var / g2$n) (g1$m - g2$m) / se_total}system.time(t2 <- purrr::map_dbl(1:m, ~ my_t(X[.,], grp)))
## user system elapsed ## 0.038 0.015 0.053
stopifnot(all.equal(t1, t2))
rowtstat <- function(X, grp) { t_stat <- function(X) { m <- rowMeans(X) n <- ncol(X) var <- rowSums((X - m) ^ 2)/(n - 1) list(m = m, n = n, var = var) } g1 <- t_stat(X[, grp == 1]) g2 <- t_stat(X[, grp == 2]) se_total <- sqrt(g1$var/g1$n + g2$var/g2$n) (g1$m - g2$m) / se_total}system.time(t3 <- rowtstat(X, grp))
## user system elapsed ## 7.515 0.893 0.014
stopifnot(all.equal(t1, t3))
This slide doesn't have a logo
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |