I have the dataframe
test <- structure(list(
y2002 = c("freshman","freshman","freshman","sophomore","sophomore","senior"),
y2003 = c("freshman","junior","junior","sophomore","sophomore","senior"),
y2004 = c("junior","sophomore","sophomore","senior","senior",NA),
y2005 = c("senior","senior","senior",NA, NA, NA)),
.Names = c("2002","2003","2004","2005"),
row.names = c(c(1:6)),
class = "data.frame")
> test
2002 2003 2004 2005
1 freshman freshman junior senior
2 freshman junior sophomore senior
3 freshman junior sophomore senior
4 sophomore sophomore senior <NA>
5 sophomore sophomore senior <NA>
6 senior senior <NA> <NA>
and I want to create a graph that should resemble the ugly text art below:
freshman ---- junior ----------------------\
freshman ---- junior --- sophomore -------- senior
sophomore ================================/
senior ---------------------------------/
In other words, I need to show in a graph the possible paths to "senior", giving weights to edges according to the number of cases using that path.
First attempt
This code generates a graph, but not one similar to the text art above.
library(igraph)
elist <- lapply(seq_len(nrow(test)), function(i) {
x <- as.character(test[i,])
x <- unique(na.omit(x))
x <- rep(x, each=2)
x <- x[-1]
x <- x[-length(x)]
r <- matrix(x, ncol=2, byrow=TRUE)
if (nrow(r) > 0) { r <- cbind(r, i) } else { r <- cbind(r, numeric()) }
r
})
result <- as.data.frame(do.call(rbind, elist))
names(result) <- c("vertex","edge", "id")
categories <- data.frame(name=c("freshman","junior","sophomore","senior"))
g <- graph.data.frame(result,directed=T,vertices=categories)
g <- set.edge.attribute(g, "weight", value=runif(ecount(g))*10)
igraph.par("plot.layout", layout.reingold.tilford)
plot(g, vertex.label=categories$name, vertex.label.dist=7,
edge.width=get.edge.attribute(g,"weight"), edge.arrow.size=1.5)
Result (not what I wanted)
**This question is related to this post*
**And this post is a necessary step for solving this question*
I don't think you understand exactly what your up to, in your example sketch of the graph you have 8 nodes. But in the figure you produced you only have 4 nodes. This is because in the graph you create you only have 4 nodes. igraph
will treat nodes with the same name (e.g. two sophomores as the same node but with two edges)
However, once you have made the graph with multiple sophomore etc nodes. e.g. sophomore1 and sophomore2. You can then place the nodes where you want them with layout
as below
df<- read.table(text="vertex edge weight
freshman junior 2
junior senior 2
freshman2 junior2 2
junior2 sophomore 2
sophomore senior 2
sophomore2 senior 3
senior2 senior 2",header=TRUE)
categories <-data.frame(name=c("freshman","junior","sophomore","senior","freshman2",
"junior2","sophomore2","senior2"))
g <- graph.data.frame(df,directed=T,vertices=categories)
layOUT<-data.frame(x=c(1,2,3,4,1,2,1,1),y=c(4,4,3,3,3,3,2,1))
l<-as.matrix(layOUT)
plot(g,layout=l)
This is a completely worked out solution. One has to read the graph bottom up, bearing in mind that each individual is represented as a vertical line that represents their career path. Yes, I did ditch iGraph for this task. \o/
require(reshape2)
meltpath <- function(x){
require(data.table)
x <- melt(data = x, id.vars = 'id', measure.vars = names(x)[-1])
names(x) <- c('id','year','category')
x$year <- factor(x$year)
id <- unique(x$id)
idtable <- data.table(id = id, count = 1:length(id))
x <- x[order(x$id), ]
x <- merge(x, idtable, by='id')
return(x)
}
carpath <- function(datatable, max_x = max(datatable$count)){
require(ggplot2)
p = ggplot(datatable, aes(x = count, y = year, fill = category)) +
geom_tile() +
scale_y_discrete(name = "year\n",
breaks = rev(levels(datatable$year))) +
scale_x_continuous(name = "cumulative count",
limits = c(0,max_x)) +
guides(fill = guide_legend(title="Career stage\n",
reverse=TRUE)) +
theme(panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(vjust = 1.2, face="bold", size=20),
axis.title.y = element_text(size=15, face="bold"),
axis.text.y = element_text(size=15, colour="black"),
legend.title = element_text(size = 15),
legend.text = element_text(size = 15)) +
scale_fill_brewer(palette = "Dark2") +
ggtitle("Career path of individual Students by year")
p
}
test <- structure(list(
id = 1:6,
y2002 = c("freshman","freshman","freshman","sophomore","sophomore","senior"),
y2003 = c("freshman","junior","sophomore","sophomore","sophomore","senior"),
y2004 = c("junior","sophomore","sophomore","senior","senior",NA),
y2005 = c("senior","senior","senior",NA, NA, NA)),
.Names = c("id","2002","2003","2004","2005"),
row.names = c(c(1:6)),
class = "data.frame")
# Grow dataset
testg = data.frame()
for (i in rownames(test)) {
test0 <- test[rep(i, each=abs(floor(rnorm(1)*100))),]
testg <- rbind(testg, test0)
}
testg$id <- 1:nrow(testg)
# Munge
test0 <- testg
test1 <- melt(data = test0, id.vars = 'id', measure.vars = names(test0)[-1])
names(test1) <- c('id','year','category')
test1$category[test1$category == 'freshman'] <- 1
test1$category[test1$category == 'junior'] <- 2
test1$category[test1$category == 'sophomore'] <- 3
test1$category[test1$category == 'senior'] <- 4
test1$category <- factor(test1$category, levels=1:4, labels = c('1. freshman','2. junior','3. sophomore','4. senior'))
test1 <- test1[order(test1$category), ]
test1 <- dcast(test1, id ~ year)
test1 <- test1[order(test1$'2005',test1$'2004',test1$'2003',test1$'2002'), ]
test2 <- meltpath(test1)
carpath(test2)