8.1

Load libraries.

library(igraph)
library(tidyverse)

We begin by reading in the data from the GSS network module 2004.

gss_url <-"https://raw.githubusercontent.com/mahoffman/stanford_networks/main/data/gss_local_nets.csv"
gss <- read_csv(gss_url) 
## Rows: 1426 Columns: 41
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): sex, race, partyid, relig, educ2, educ3, educ4, educ5, relig1, rel...
## dbl (28): age, numgiven, close12, close13, close14, close15, close23, close2...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Let’s have a broad overview of the data

visdat::vis_dat(gss,sort_type = FALSE)+
  labs(title = "GSS data",
       subtitle = glue::glue("#rows = {nrow(gss)}, #columns = {ncol(gss)} "))

The first five concern the attributes of a given respondent:

  • sex

  • age

  • race

  • partyid

  • religion.

atrr_n <- 5
visdat::vis_dat(gss[,1:atrr_n],sort_type = FALSE)+
  labs(title = "GSS data: attributes of a given respondent",
       subtitle = glue::glue("#rows = {nrow(gss[,1:atrr_n])}, #columns = {ncol(gss[,1:atrr_n])} "))

The basic idea of the module was to ask people about up to five others with whom they discussed “important matters” in the past six months. The respondents reported the number of people whom they discussed “important matters”:

  • numgiven: the number of others whom they repondents discussed important matters with.

  • “close” columns: The relationship between others (e.g., close12 is the closeness of person 1 to person 2, for each respondent).

  • “sex, race, age” columns: attributes of each of the others (n=5) in the ego network. (3*5)

net_n <- 41
visdat::vis_dat(gss[,(atrr_n+1):net_n],sort_type = FALSE)+
  labs(title = "GSS data: ''netwok' part",
       subtitle = glue::glue("#rows = {nrow(gss[,(atrr_n+1):net_n])}, #columns = {ncol(gss[,(atrr_n+1):net_n])} "))

To do so, we have to first turn the variables close12 through close45 into an edge list, one for each respondent.

ties <- gss %>%
  dplyr::select(starts_with("close"))
head(ties)
## # A tibble: 6 × 10
##   close12 close13 close14 close15 close23 close24 close25 close34 close35
##     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1      NA      NA      NA      NA      NA      NA      NA      NA      NA
## 2      NA      NA      NA      NA      NA      NA      NA      NA      NA
## 3       1       2       0      NA       2       2      NA       1      NA
## 4       2       0       2      NA       2       2      NA       2      NA
## 5      NA      NA      NA      NA      NA      NA      NA      NA      NA
## 6       0       2       1       1       1       1       1       2       2
## # ℹ 1 more variable: close45 <dbl>

A function, which uses the code above to turn any row in the ties data set into an ego network, and then apply that function to every row in the data

make_ego_nets <- function(tie){
  # make the matrix
  mat = matrix(nrow = 5, ncol = 5)
  # assign the tie values to the lower triangle
  mat[lower.tri(mat)] <- as.numeric(tie)
  # symmetrize
  mat[upper.tri(mat)] = t(mat)[upper.tri(mat)]
  # identify missing values
  na_vals <- is.na(mat)
  # identify rows where all values are missing
  non_missing_rows <- rowSums(na_vals) < nrow(mat)
  
  # if any rows 
  if(sum(!non_missing_rows) > 0){
    mat <- mat[non_missing_rows,non_missing_rows]
  }
  diag(mat) <- 0
  ego_net <- graph.adjacency(mat, mode = "undirected", weighted = T)
  return(ego_net)
}

A simpler approach

make_ego_nets_simple <- function(tie){
  #get the all possible links among others
  tie <- tie %>% unlist
  #remove missing links
  tie <- tie[!is.na(tie)]
  #remove zero links
  tie <- tie[tie!=0]
  #get the identity of linked pairs
  others <- str_extract(names(tie), "[0-9]+")
  #split the linked others
  others_link <- str_split(others, "",simplify = TRUE) %>% as.data.frame
  #make edge list of others
  others_link <- cbind(others_link, tie)
  #ego graph with 
  graph_from_data_frame(others_link,
                        directed=FALSE)
}

Clean ties before creating the networks

#set zero links as missing links
ties[ties==0] <- NA
#repondents where all links among others are missing
others_missing <- rowSums(is.na(ties))==ncol(ties)

#remove any respondent that falls in any of the above
ties <- ties[!(others_missing),]

Compare the two functions

ego_nets <- apply(ties,1,make_ego_nets)
ego_nets_simple <- apply(ties,1,make_ego_nets_simple)

plot(ego_nets[[1]])

plot(ego_nets_simple[[1]])

Not the same ! Where did thing go wrong?

ties[1,]
## # A tibble: 1 × 10
##   close12 close13 close14 close15 close23 close24 close25 close34 close35
##     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1       1       2      NA      NA       2       2      NA       1      NA
## # ℹ 1 more variable: close45 <dbl>