Seattle library parquet files
- Apache Hive “self-describing” directory/file names
tibble(
files = list.files(pq_path, recursive = TRUE),
size_MB = file.size(file.path(pq_path, files)) / 1024^2
)
#> # A tibble: 18 × 2
#> files size_MB
#> <chr> <dbl>
#> 1 CheckoutYear=2005/part-0.parquet 109.
#> 2 CheckoutYear=2006/part-0.parquet 164.
#> 3 CheckoutYear=2007/part-0.parquet 178.
#> 4 CheckoutYear=2008/part-0.parquet 195.
#> 5 CheckoutYear=2009/part-0.parquet 214.
#> 6 CheckoutYear=2010/part-0.parquet 222.
#> # ℹ 12 more rows