Clustering_Analysis.R
2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
library(methods)
library(cluster)
# Funcion para imprimir los clusters
print_cluster <- function(obj, filename) {
for(cl in 1:length(obj)) {
write.table(paste("\nCluster: ", cl, "\n"), file = filename, append = TRUE, quote = FALSE, row.names = TRUE, col.names = FALSE)
write.table(obj[[cl]], file = filename, append = TRUE, quote = FALSE, row.names = TRUE, col.names = FALSE, sep = " ")
}
}
###################################################################################################################################################
# Receive arguments
arg = commandArgs(trailingOnly = T)
if (length(arg)==0) {
stop("Must supply input file.n", call.=FALSE)
}
################################################# Run analysis ##################################################
vecs <- read.table(arg[1],
header = F, row.names = 1, sep = ' ',
colClasses = c("character", rep("numeric", 299)))
senclus <- hclust(dist(vecs), method = 'ward.D')
print("agglomerative coefficient: ")
print(coef.hclust(senclus))
# Guardamos la imagen del dendograma original
png("Dendogram_ward.png", height = 608, width = 975)
plot(senclus, hang = -1)
dev.off()
###
# Particion en dos clusters
png("Dendogram_2clusters.png", height = 608, width = 975)
plot(senclus, hang = -1)
cls2 <- rect.hclust(senclus, k=2, border = 3:4)
dev.off()
# Escribir archivo
print_cluster(cls2, "SentenceMembership_2clusters.txt")
#######
# Particion en tres clusters
png("Dendogram_3clusters.png", height = 608, width = 975)
plot(senclus, hang = -1)
cls3 <- rect.hclust(senclus, k=3, border = 3:4)
dev.off()
# Escribir archivo
print_cluster(cls3, "SentenceMembership_3clusters.txt")
#####
# Particion en cuatro clusters
png("Dendogram_4clusters.png", height = 608, width = 975)
plot(senclus, hang = -1)
cls4 <- rect.hclust(senclus, k=4, border = 3:4)
dev.off()
# Escribir archivo
print_cluster(cls4, "SentenceMembership_4clusters.txt")
####
# 5 clusters
png("Dendogram_5clusters.png", height = 600, width = 975)
plot(senclus, hang=-1)
cls5 <- rect.hclust(senclus, k=5, border = 3:4)
dev.off()
print_cluster(cls5, "SentenceMembership_5clusters.txt")
###
# 6 clusters
png("Dendogram_6clusters.png", height = 600, width = 975)
plot(senclus, hang=-1)
cls6 <- rect.hclust(senclus, k=6, border = 3:4)
dev.off()
print_cluster(cls6, "SentenceMembership_6clusters.txt")