2.4 Reading and writing data

First download the compGenomRData package from Github if you haven’t done so yet. We are using this as a companion for the course!

devtools::install_github("compgenomr/compGenomRData")

Read files in base R

cpgiFilePath=system.file("extdata",
                "subset.cpgi.hg18.bed",
                package="compGenomRData")

# read CpG island BED file
cpgi.df <- read.table(cpgiFilePath, header = FALSE) 
enhancerFilePath=system.file("extdata",
                "subset.enhancers.hg18.bed",
                package="compGenomRData")

# read enhancer marker BED file
enh.df <- read.table(enhancerFilePath, header = FALSE) 

# check first lines to see how the data looks like
head(enh.df)
##      V1     V2     V3 V4   V5 V6    V7      V8 V9
## 1 chr20 266275 267925  . 1000  .  9.11 13.1693 -1
## 2 chr20 287400 294500  . 1000  . 10.53 13.0231 -1
## 3 chr20 300500 302500  . 1000  .  9.10 13.3935 -1
## 4 chr20 330400 331800  . 1000  .  6.39 13.5105 -1
## 5 chr20 341425 343400  . 1000  .  6.20 12.9852 -1
## 6 chr20 437975 439900  . 1000  .  6.31 13.5184 -1

the key here is

read.table()

Reading large tables with readr.

library(readr)
df.f2=read_table(enhancerFilePath, col_names = FALSE)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character(),
##   X2 = col_double(),
##   X3 = col_double(),
##   X4 = col_character(),
##   X5 = col_double(),
##   X6 = col_character(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
df.f2
## # A tibble: 50,416 × 9
##    X1        X2     X3 X4       X5 X6       X7    X8    X9
##    <chr>  <dbl>  <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl>
##  1 chr20 266275 267925 .      1000 .      9.11  13.2    -1
##  2 chr20 287400 294500 .      1000 .     10.5   13.0    -1
##  3 chr20 300500 302500 .      1000 .      9.1   13.4    -1
##  4 chr20 330400 331800 .      1000 .      6.39  13.5    -1
##  5 chr20 341425 343400 .      1000 .      6.2   13.0    -1
##  6 chr20 437975 439900 .      1000 .      6.31  13.5    -1
##  7 chr20 516650 518525 .      1000 .     12.5   13.7    -1
##  8 chr20 519100 521475 .      1000 .      7.1   13.1    -1
##  9 chr20 543800 545775 .      1000 .      9.52  13.0    -1
## 10 chr20 573550 574975 .      1000 .      7.71  13.7    -1
## # ℹ 50,406 more rows

Readr is part of the tidyverse (tidyverse is life).

Writing data w/ write.table

write.table(enh.df,file="enh.txt",quote=FALSE,
            row.names=FALSE,col.names=FALSE,sep="\t")
  • The type of file it is save as depends on the sep (^ saved a tab separated file or .tsv)

Saving/loading R objects directly into/from a file

# save() saves many objects at once, regardless of class
save(cpgi.df,enh.df,file="mydata.RData")
load("mydata.RData")
# saveRDS() can save one object at a type
saveRDS(cpgi.df,file="cpgi.rds")
x=readRDS("cpgi.rds")
head(x)
# when using saveRDS() must assign output of readRDS() to new variable in session