Clustering_Analysis.R 2.35 KB
library(methods)
library(cluster)

# Funcion para imprimir los clusters
print_cluster <- function(obj, filename) {
  
  for(cl in 1:length(obj)) {
    
    write.table(paste("\nCluster: ", cl, "\n"), file = filename, append = TRUE, quote = FALSE, row.names = TRUE, col.names = FALSE)
    write.table(obj[[cl]], file = filename, append = TRUE, quote = FALSE, row.names = TRUE, col.names = FALSE, sep = "   ")
    
  }
}
###################################################################################################################################################
# Receive arguments
arg = commandArgs(trailingOnly = T)

if (length(arg)==0) {
  stop("Must supply input file.n", call.=FALSE)
}

################################################# Run analysis ##################################################
vecs <- read.table(arg[1], 
                   header = F, row.names = 1, sep = ' ', 
                   colClasses = c("character", rep("numeric", 299)))

senclus <- hclust(dist(vecs), method = 'ward.D')
print("agglomerative coefficient: ")
print(coef.hclust(senclus))

# Guardamos la imagen del dendograma original
png("Dendogram_ward.png", height = 608, width = 975)
plot(senclus, hang = -1)
dev.off()

###
# Particion en dos clusters
png("Dendogram_2clusters.png", height = 608, width = 975)
plot(senclus, hang = -1)
cls2 <- rect.hclust(senclus, k=2, border = 3:4)
dev.off()

# Escribir archivo
print_cluster(cls2, "SentenceMembership_2clusters.txt")

#######
# Particion en tres clusters
png("Dendogram_3clusters.png", height = 608, width = 975)
plot(senclus, hang = -1)
cls3 <- rect.hclust(senclus, k=3, border = 3:4)
dev.off()

# Escribir archivo
print_cluster(cls3, "SentenceMembership_3clusters.txt")

#####
# Particion en cuatro clusters
png("Dendogram_4clusters.png", height = 608, width = 975)
plot(senclus, hang = -1)
cls4 <- rect.hclust(senclus, k=4, border = 3:4)
dev.off()

# Escribir archivo
print_cluster(cls4, "SentenceMembership_4clusters.txt")




####
# 5 clusters
png("Dendogram_5clusters.png", height = 600, width = 975)
plot(senclus, hang=-1)
cls5 <- rect.hclust(senclus, k=5, border = 3:4)
dev.off()

print_cluster(cls5, "SentenceMembership_5clusters.txt")


###
# 6 clusters
png("Dendogram_6clusters.png", height = 600, width = 975)
plot(senclus, hang=-1)
cls6 <- rect.hclust(senclus, k=6, border = 3:4)
dev.off()
print_cluster(cls6, "SentenceMembership_6clusters.txt")