Graph mining

Packages

igraph
statnet
ggnet
ggnetwork
ggraph
visNetwork
networkD3
sigma
rgexf
threejs

Creating graph

library(igraph)
g <- graph_from_data_frame(d = #edge, vertices = #vertices, directed = #FALSE)
- Else:
  - g <- graph.edgelist(#df, directed = #FALSE) #df with 2 columns of relationship (edge list)
  - set_vertex_attr(g, "#attributename", value = c(#value))
    - vertex_attr(g) #View attribute
  - set_edge_attr(g, "#attributename", value = c(#value))
    - edge_attr(g) #View attribute
V(#g)[[#row]] #View vertices
E(#g) #View edges
- E(g)[[inc('#search')]] #Search edges with certain value
delete_edges(#g, E(#g)[#col #< #criterion])
gsize(#g) # Count number of edges
gorder(#g) # Count number of vertices
is.directed(#g) #Check if graph is directed
is.weighted(#g) #Check if graph is weighted
#g['#vertex1', '#vertex2'] #Check if there is an edge
incident(#g, '#vertex1', mode = c("all / in / out") #Show all edges to and from this vertex
table(head_of(#g, E(#g))) #Find starting vertex of all edges

Random network

Used to compare with network graph in network randomization test

#gl <- vector('list', 1000)
for(i in 1:1000){
- #gl[[i]] <- erdos.renyi.game(n = gorder(#g), p.or.m = edge_density(#g), type = "gnp") }#Generate random graphs with same number of vertices and similar density
gl_avgpathlength <- unlist(lapply(#gl, average.path.length, directed = #FALSE)) #Calculate average path length of 1000 random graphs
hist(#gl_avgpathlength, xlim = range(c(#1.5, #6))), abline(v = #g_avgpathlength, col = #"red", lty = #3, lwd = #2)
mean(#gl_avgpathlength < #g_avgpathlength)

Plot

V(#g)$#colour <- ifelse(V(#g)$#col == "criterion", "#red", "#white")
plot(#g, vertex.label = #NA / #dist_var, vertex.label.colour = '#black', vertex.size = sqrt(#g)+1, vertex.label.cex = #0.6, edge.colour = '#black', vertex.size = #0, edge.width = E(#g)$#weight_col, main = #title, layout = #layout_randomly / layout_in_circle / layout_on_sphere / layout_with_fr / layout_with_kk / layout_with_lgl / layout_as_bipartite / layout.fruchterman.reingold(#g)) #Plot network graph

Path length (geodesic distance)

farthest_vertices(#g) #diameter of network for longest direct path
get_diameter(#g) #Show sequence of connections for longest direct path (Only return 1 if there are many)

mean_distance(#g, directed = #TRUE) #Average shortest path between all vertices in network
OR average.path.length(#g, directed = #TRUE)

ego(#g, #n_dist, '#vertex', mode = c('out')) #Identify vertices reachable in n dist
#g_ego <- make_ego_graph(#g, diameter(#g), nodes = '#vertex', mode = c(#"all"))[[1]] #second parameter: maximal number of connections for any vertex to be connected to point zero
#dist <- distances(#g_ego, "#vertex")
#colors <- c("black", "red", "orange", "blue", "dodgerblue", "cyan")
V(#g_ego)$color <- colors[#dist + 1] #Plot vertices of same distance with same colour

Neighbours

neighbors(#g, "#vertex", mode = c('all')) #Identify neighbours
intersection(neighbors(#g, "#vertex1", mode = c('all')) , neighbors(#g, "#vertex2", mode = c('all')) ) #Find common connections though vertices are not directly connected

Centrality

g_out = degree(#g, mode = c("out")) #Calculate number of connections
table(g_out) #Summary of number of nodes with x degrees
hist(g_out) #Plot summary
which.max(g_out) #Identify node with highest degrees

edge_density(#g) #Interconnectedness: Proportion of edges wrt to all potential edges between every pair of vertices

betweenness(#g, directed = #TRUE, normalized = #TRUE to see prob) #Betweenness to see how important vertex is in the network

g_ec <- eigen_centrality(#g)$vector #How well connected a vertex is to many others, especially to those who are highly connected to others too
which.max(g_ec) #Identify node with highest centrality

Transitivity

Triangles (Triads)

triangles(#g) #Identify closed triangles
count_triangles(#g, vids = '#node') #Count number of closed triangles for each vertex
#gl_transitivity <- unlist(lapply(#gl, transitivity(#g, vids = '#node', type = 'local'))) #Probability of adjacent vertices are connected
summary(#gl_transitivity)

Cliques

#lc <- largest_cliques(#g) #Return maximum clique - number and id of vertices in largest interconnected triangles
- #g_sg1 <- as.undirected(subgraph(#g, #lc[[1]])) #Subsetting to undirected subgraph
#g_mc <- max_cliques(#g) #Return list of maximal cliques - number and id of vertices in largest interconnected triangles (may not be part of largest cliques)
table(unlist(lapply(#g_mc, length)))

Assortativity

Preferential attachment of vertices to other vertices that are similar to each other: Higher assortativity means more association between vertices similar to each other

#attr <- as.numeric(factor(V(#g)$#col))
assortativity(#g, #attr) #Connectivity between those of similar attributes #attr must be numeric (categorical var converted to factors)

#results <- vector('list', #1000)
for(i in 1:#1000){
#results[[i]] <- assortativity(#g, sample(#attr))} #Connectivity between those of similar attributes #attr must be numeric (categorical var converted to factors) #Randomization of attributes
hist(unlist(#results))
assortativity.degree(#g, directed = #FALSE) #Connectivity between highly connected individuals with other highly connected individuals

Reciprocity

Equal to proportion of edges that are symmetrical (both directions: outgoing edges have incoming edges)

reciprocity(#g)

Community

Modules / Groups / Clusters

#comm <- fastgreedy.community(#g) #Return vertices in community #Add vertices to community and assess modularity score (interconnected edges are within VS between communities) at each step
#comm <- edge.betweenness.community(#g) #Return vertices in community #Divide network into smaller parts until it finds edges hat are bridges between communities
#comm <- leading.eigenvector.community(#g)

length(#comm) #Number of communities
sizes(#comm) #Summary of number of communities in different community sizes
membership(#comm) #Membership of vertex in community
plot(#comm, #g) #Plot community

Interactive graph visualization

library(igraph)
library(threejs)
#g <- set_vertex_attr(#g, "color", value = "dodgerblue") #Set a vertex attribute called 'color' to 'dodgerblue'
#g_ec <- as.numeric(eigen_centrality(#g)$vector)

#m <- membership(#g)
#g_a <- set_vertex_attr(#g, "color", value = c("yellow", "blue", "red")[#m])
graphjs(#g, vertex.size = #5*sqrt(#g_ec) )

Google Sites

Report abuse