11.5 Random graphs galore
One problem is that we are basing our analysis on a single random graph. Because edges are drawn randomly, there is a lot of variation in the structure of random graphs, especially when the number of nodes in the graph is small (less than one thousand).
What we really want is a distribution of random graphs and their triad censuses, against which our own could be compared. So let’s generate one hundred random graphs, and create a distribution of random graph triad censuses and see where our graph lies on that distribution
<- vector("list", 100) # this creates a list with 100 spaces to store things. We will store each result here.
trial
<- lapply(seq(100), function(x){
trial <- erdos.renyi.game(n = net59_n, p.or.m = net59_density, directed = TRUE)
random_graph triad.census(random_graph)
})
<- do.call("rbind", trial) # We can use the do.call and "rbind" functions together to combine all of the results into a matrix, where each row is one of our trials
trial_df
colnames(trial_df) <- c("003", "012", "102", "021D", "021U", "021C", "111D", "111U", "030T", "030C", "201", "120D", "120U", "120C", "210", "300") # It is worth naming the columns too.
<- rbind(trial_df, as.numeric(triad.census(net59))) # add in the observed results trial_df_w_observed
# First, standardize all of the columns by dividing each of their values by the largest value in that column, so that each will be on a similar scale (0 to 1), we can visualize them meaningfully
<- as.data.frame(trial_df_w_observed)
trial_df_w_observed
1:ncol(trial_df_w_observed)] <- sapply(trial_df_w_observed[,1:length(trial_df_w_observed)], function(x) x/max(x))
trial_df_w_observed[,
# Then split the observed from the simulation results
<- as.data.frame(trial_df_w_observed[1:100,])
trial_df <- as.numeric(trial_df_w_observed[101,])
observed
# Summarize the simulation results and add the observed data set back in for comparison
<- data.frame(TriadType = colnames(trial_df),
summarized_stats Means = sapply(trial_df, mean),
LowerCI = sapply(trial_df, function(x) quantile(x, 0.05)),
UpperCI = sapply(trial_df, function(x) quantile(x, 0.95)),
Observed = observed)
summarized_stats
## TriadType Means LowerCI UpperCI Observed
## 003 003 0.9948345802 0.994071010 0.9954347900 1.0000000
## 012 012 0.9628859418 0.942326115 0.9887844619 0.5875339
## 102 102 0.0108590651 0.004875651 0.0158361233 1.0000000
## 021D 021D 0.9279801959 0.887611797 0.9815694208 0.3855409
## 021U 021U 0.9275492388 0.879782817 0.9872671138 0.4604493
## 021C 021C 0.9200247969 0.872451725 0.9788962752 0.2725018
## 111D 111D 0.0187313803 0.008676763 0.0300769613 1.0000000
## 111U 111U 0.0208897485 0.009671180 0.0326333241 1.0000000
## 030T 030T 0.1688000000 0.135555556 0.2023333333 1.0000000
## 030C 030C 0.6794871795 0.461538462 0.9000000000 0.5897436
## 201 201 0.0001570167 0.000000000 0.0009813543 1.0000000
## 120D 120D 0.0004761905 0.000000000 0.0034013605 1.0000000
## 120U 120U 0.0004062500 0.000000000 0.0031250000 1.0000000
## 120C 120C 0.0032846715 0.000000000 0.0076642336 1.0000000
## 210 210 0.0000000000 0.000000000 0.0000000000 1.0000000
## 300 300 0.0000000000 0.000000000 0.0000000000 1.0000000
ggplot(summarized_stats) +
geom_point(aes(x=TriadType, y=Means, colour=TriadType)) +
geom_errorbar(aes(x = TriadType, ymin=LowerCI, ymax=UpperCI, colour=TriadType), width=.1) +
geom_point(aes(x=TriadType, y=Observed, colour="Observed")) +
coord_flip()