larisa

Upload new file

setwd("/Users/larisams/Documents/LCG/Sem_4/Bioinformatics/CarlosMendez")
library(ggplot2)
# Automated analysis
sent.dat<-as.data.frame(read.table("annotated-sentence-results.txt",header=TRUE))
sent.dat$Proportion<-sent.dat$GeneDisease/sent.dat$Total
sent.dat$constant<-apply(sent.dat,1,function(x){
if(x[3]==25){return("4")}
else if(x[3]==46){return("3")}
else if(x[3]==17){return("2")}
else{PConst<-return("0")}
})
g1<-ggplot(sent.dat,aes(x=Cluster,y=Proportion,fill=constant))+
geom_bar(aes(x=Cluster,y=Proportion),stat="identity")+
scale_fill_manual("Constant Partitions", values = c("4" = "#005b7a", "3" = "#DF0664", "2" = "#66CCCC" ,"0" = "#b2afaf"))+
facet_wrap(~Partition,ncol=3,nrow=2)+facet_grid(~Partition, labeller = label_context)+theme(legend.position="none")
ggsave("Figure3.png",g1,width = 7.5, height = 5, dpi = 300, units = "in", device='png')
g2<-ggplot(sent.dat,aes(x=Total,y=GeneDisease,fill=constant))+
geom_bar(aes(x=Total,y=GeneDisease),stat="identity")+
scale_fill_manual("Constant Partitions", values = c("4" = "#005b7a", "3" = "#DF0664", "2" = "#66CCCC" ,"0" = "#b2afaf"))+
facet_wrap(~Partition,ncol=3,nrow=2)+facet_grid(~Partition, labeller = label_context)+
xlab("Total sentences")+ylab("Gene disease sentences")+theme(legend.position="none")
ggsave("Figure2.png",g2,width = 7.5, height = 5, dpi = 300, units = "in", device='png')
# Manual analysis
words.dat<-as.data.frame(read.table("word-counts.txt",header=TRUE))
words.dat$Fibrosis<-words.dat$Fibrosis/words.dat$Total
words.dat$TGF<-words.dat$TGF/words.dat$Total
words.dat$constant<-sent.dat$constant[c(-1,-2)]
g3<-ggplot(words.dat,aes(x=Cluster,y=Fibrosis,fill=constant))+
geom_bar(aes(x=Cluster,y=Fibrosis),stat="identity")+
scale_fill_manual("Constant Partitions", values = c("4" = "#005b7a", "3" = "#DF0664", "2" = "#66CCCC" ,"0" = "#b2afaf"))+
facet_wrap(~Partition,ncol=3,nrow=2)+facet_grid(~Partition, labeller = label_context)+theme(legend.position="none")
ggsave("Figure7.png",g3,width = 7.5, height = 5, dpi = 300, units = "in", device='png')
g4<-ggplot(words.dat,aes(x=Cluster,y=TGF,fill=constant))+
geom_bar(aes(x=Cluster,y=TGF),stat="identity")+
scale_fill_manual("Constant Partitions", values = c("4" = "#005b7a", "3" = "#DF0664", "2" = "#66CCCC" ,"0" = "#b2afaf"))+
facet_wrap(~Partition,ncol=3,nrow=2)+facet_grid(~Partition, labeller = label_context)+theme(legend.position="none")
ggsave("Figure8.png",g4,width = 7.5, height = 5, dpi = 300, units = "in", device='png')