graphics.R
2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
setwd("/Users/larisams/Documents/LCG/Sem_4/Bioinformatics/CarlosMendez")
library(ggplot2)
# Automated analysis
sent.dat<-as.data.frame(read.table("annotated-sentence-results.txt",header=TRUE))
sent.dat$Proportion<-sent.dat$GeneDisease/sent.dat$Total
sent.dat$constant<-apply(sent.dat,1,function(x){
if(x[3]==25){return("4")}
else if(x[3]==46){return("3")}
else if(x[3]==17){return("2")}
else{PConst<-return("0")}
})
g1<-ggplot(sent.dat,aes(x=Cluster,y=Proportion,fill=constant))+
geom_bar(aes(x=Cluster,y=Proportion),stat="identity")+
scale_fill_manual("Constant Partitions", values = c("4" = "#005b7a", "3" = "#DF0664", "2" = "#66CCCC" ,"0" = "#b2afaf"))+
facet_wrap(~Partition,ncol=3,nrow=2)+facet_grid(~Partition, labeller = label_context)+theme(legend.position="none")
ggsave("Figure3.png",g1,width = 7.5, height = 5, dpi = 300, units = "in", device='png')
g2<-ggplot(sent.dat,aes(x=Total,y=GeneDisease,fill=constant))+
geom_bar(aes(x=Total,y=GeneDisease),stat="identity")+
scale_fill_manual("Constant Partitions", values = c("4" = "#005b7a", "3" = "#DF0664", "2" = "#66CCCC" ,"0" = "#b2afaf"))+
facet_wrap(~Partition,ncol=3,nrow=2)+facet_grid(~Partition, labeller = label_context)+
xlab("Total sentences")+ylab("Gene disease sentences")+theme(legend.position="none")
ggsave("Figure2.png",g2,width = 7.5, height = 5, dpi = 300, units = "in", device='png')
# Manual analysis
words.dat<-as.data.frame(read.table("word-counts.txt",header=TRUE))
words.dat$Fibrosis<-words.dat$Fibrosis/words.dat$Total
words.dat$TGF<-words.dat$TGF/words.dat$Total
words.dat$constant<-sent.dat$constant[c(-1,-2)]
g3<-ggplot(words.dat,aes(x=Cluster,y=Fibrosis,fill=constant))+
geom_bar(aes(x=Cluster,y=Fibrosis),stat="identity")+
scale_fill_manual("Constant Partitions", values = c("4" = "#005b7a", "3" = "#DF0664", "2" = "#66CCCC" ,"0" = "#b2afaf"))+
facet_wrap(~Partition,ncol=3,nrow=2)+facet_grid(~Partition, labeller = label_context)+theme(legend.position="none")
ggsave("Figure7.png",g3,width = 7.5, height = 5, dpi = 300, units = "in", device='png')
g4<-ggplot(words.dat,aes(x=Cluster,y=TGF,fill=constant))+
geom_bar(aes(x=Cluster,y=TGF),stat="identity")+
scale_fill_manual("Constant Partitions", values = c("4" = "#005b7a", "3" = "#DF0664", "2" = "#66CCCC" ,"0" = "#b2afaf"))+
facet_wrap(~Partition,ncol=3,nrow=2)+facet_grid(~Partition, labeller = label_context)+theme(legend.position="none")
ggsave("Figure8.png",g4,width = 7.5, height = 5, dpi = 300, units = "in", device='png')