Roberto Olayo Alarcon

Resutls and Scripts

This diff could not be displayed because it is too large.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
1 +1
2 +Cluster: 1
3 +
4 +3_s 3
5 +17_s 17
6 +33_s 33
7 +42_s 42
8 +43_s 43
9 +59_s 59
10 +66_s 66
11 +82_s 82
12 +84_s 84
13 +100_s 100
14 +123_s 123
15 +124_s 124
16 +132_s 131
17 +136_s 135
18 +137_s 136
19 +148_s 147
20 +188_s 186
21 +196_s 194
22 +216_s 214
23 +219_s 217
24 +226_s 224
25 +227_s 225
26 +229_s 227
27 +230_s 228
28 +234_s 232
29 +238_s 236
30 +244_s 242
31 +245_s 243
32 +269_s 266
33 +275_s 272
34 +281_s 278
35 +287_s 284
36 +290_s 287
37 +301_s 297
38 +302_s 298
39 +303_s 299
40 +315_s 311
41 +332_s 328
42 +350_s 346
43 +361_s 357
44 +366_s 362
45 +379_s 375
46 +415_s 411
47 +421_s 417
48 +444_s 440
49 +445_s 441
50 +446_s 442
51 +453_s 449
52 +1
53 +Cluster: 2
54 +
55 +1_s 1
56 +5_s 5
57 +6_s 6
58 +7_s 7
59 +8_s 8
60 +10_s 10
61 +12_s 12
62 +13_s 13
63 +14_s 14
64 +16_s 16
65 +18_s 18
66 +20_s 20
67 +22_s 22
68 +23_s 23
69 +24_s 24
70 +25_s 25
71 +28_s 28
72 +29_s 29
73 +31_s 31
74 +34_s 34
75 +37_s 37
76 +38_s 38
77 +44_s 44
78 +47_s 47
79 +48_s 48
80 +49_s 49
81 +54_s 54
82 +55_s 55
83 +58_s 58
84 +64_s 64
85 +65_s 65
86 +67_s 67
87 +69_s 69
88 +70_s 70
89 +71_s 71
90 +72_s 72
91 +74_s 74
92 +75_s 75
93 +79_s 79
94 +80_s 80
95 +81_s 81
96 +83_s 83
97 +86_s 86
98 +87_s 87
99 +88_s 88
100 +89_s 89
101 +90_s 90
102 +91_s 91
103 +92_s 92
104 +93_s 93
105 +94_s 94
106 +98_s 98
107 +101_s 101
108 +102_s 102
109 +104_s 104
110 +106_s 106
111 +109_s 109
112 +110_s 110
113 +112_s 112
114 +115_s 115
115 +117_s 117
116 +118_s 118
117 +120_s 120
118 +127_s 126
119 +128_s 127
120 +130_s 129
121 +138_s 137
122 +141_s 140
123 +142_s 141
124 +144_s 143
125 +146_s 145
126 +147_s 146
127 +149_s 148
128 +150_s 149
129 +156_s 155
130 +159_s 158
131 +161_s 160
132 +162_s 161
133 +163_s 162
134 +164_s 163
135 +165_s 164
136 +166_s 165
137 +169_s 168
138 +171_s 169
139 +174_s 172
140 +177_s 175
141 +179_s 177
142 +182_s 180
143 +184_s 182
144 +186_s 184
145 +189_s 187
146 +190_s 188
147 +192_s 190
148 +195_s 193
149 +197_s 195
150 +198_s 196
151 +199_s 197
152 +203_s 201
153 +204_s 202
154 +205_s 203
155 +208_s 206
156 +210_s 208
157 +214_s 212
158 +215_s 213
159 +217_s 215
160 +220_s 218
161 +221_s 219
162 +222_s 220
163 +223_s 221
164 +225_s 223
165 +233_s 231
166 +236_s 234
167 +237_s 235
168 +239_s 237
169 +240_s 238
170 +241_s 239
171 +242_s 240
172 +243_s 241
173 +246_s 244
174 +247_s 245
175 +248_s 246
176 +250_s 248
177 +251_s 249
178 +253_s 250
179 +254_s 251
180 +255_s 252
181 +256_s 253
182 +260_s 257
183 +262_s 259
184 +264_s 261
185 +265_s 262
186 +268_s 265
187 +272_s 269
188 +273_s 270
189 +278_s 275
190 +279_s 276
191 +280_s 277
192 +284_s 281
193 +286_s 283
194 +289_s 286
195 +291_s 288
196 +293_s 289
197 +295_s 291
198 +297_s 293
199 +298_s 294
200 +299_s 295
201 +300_s 296
202 +305_s 301
203 +307_s 303
204 +308_s 304
205 +310_s 306
206 +313_s 309
207 +314_s 310
208 +316_s 312
209 +317_s 313
210 +318_s 314
211 +320_s 316
212 +322_s 318
213 +325_s 321
214 +326_s 322
215 +328_s 324
216 +330_s 326
217 +333_s 329
218 +335_s 331
219 +336_s 332
220 +340_s 336
221 +343_s 339
222 +345_s 341
223 +347_s 343
224 +348_s 344
225 +349_s 345
226 +352_s 348
227 +353_s 349
228 +354_s 350
229 +355_s 351
230 +356_s 352
231 +357_s 353
232 +360_s 356
233 +362_s 358
234 +363_s 359
235 +364_s 360
236 +365_s 361
237 +367_s 363
238 +368_s 364
239 +371_s 367
240 +372_s 368
241 +374_s 370
242 +375_s 371
243 +376_s 372
244 +377_s 373
245 +378_s 374
246 +380_s 376
247 +383_s 379
248 +384_s 380
249 +387_s 383
250 +388_s 384
251 +390_s 386
252 +391_s 387
253 +392_s 388
254 +398_s 394
255 +399_s 395
256 +401_s 397
257 +402_s 398
258 +403_s 399
259 +407_s 403
260 +408_s 404
261 +409_s 405
262 +411_s 407
263 +413_s 409
264 +414_s 410
265 +416_s 412
266 +418_s 414
267 +419_s 415
268 +420_s 416
269 +424_s 420
270 +428_s 424
271 +429_s 425
272 +431_s 427
273 +433_s 429
274 +437_s 433
275 +438_s 434
276 +440_s 436
277 +442_s 438
278 +443_s 439
279 +447_s 443
280 +448_s 444
281 +450_s 446
282 +451_s 447
283 +452_s 448
284 +1
285 +Cluster: 3
286 +
287 +9_s 9
288 +19_s 19
289 +21_s 21
290 +26_s 26
291 +32_s 32
292 +39_s 39
293 +52_s 52
294 +73_s 73
295 +77_s 77
296 +95_s 95
297 +107_s 107
298 +113_s 113
299 +125_s 125
300 +129_s 128
301 +131_s 130
302 +145_s 144
303 +153_s 152
304 +158_s 157
305 +172_s 170
306 +175_s 173
307 +180_s 178
308 +200_s 198
309 +202_s 200
310 +207_s 205
311 +224_s 222
312 +228_s 226
313 +231_s 229
314 +259_s 256
315 +263_s 260
316 +274_s 271
317 +283_s 280
318 +288_s 285
319 +306_s 302
320 +321_s 317
321 +323_s 319
322 +334_s 330
323 +359_s 355
324 +382_s 378
325 +395_s 391
326 +417_s 413
327 +434_s 430
328 +454_s 450
329 +1
330 +Cluster: 4
331 +
332 +2_s 2
333 +4_s 4
334 +11_s 11
335 +15_s 15
336 +27_s 27
337 +30_s 30
338 +35_s 35
339 +36_s 36
340 +40_s 40
341 +41_s 41
342 +45_s 45
343 +46_s 46
344 +50_s 50
345 +51_s 51
346 +53_s 53
347 +56_s 56
348 +57_s 57
349 +60_s 60
350 +61_s 61
351 +62_s 62
352 +63_s 63
353 +68_s 68
354 +76_s 76
355 +78_s 78
356 +85_s 85
357 +96_s 96
358 +97_s 97
359 +99_s 99
360 +103_s 103
361 +105_s 105
362 +108_s 108
363 +111_s 111
364 +114_s 114
365 +116_s 116
366 +119_s 119
367 +121_s 121
368 +122_s 122
369 +133_s 132
370 +134_s 133
371 +135_s 134
372 +139_s 138
373 +140_s 139
374 +143_s 142
375 +151_s 150
376 +152_s 151
377 +154_s 153
378 +155_s 154
379 +157_s 156
380 +160_s 159
381 +167_s 166
382 +168_s 167
383 +173_s 171
384 +176_s 174
385 +178_s 176
386 +181_s 179
387 +183_s 181
388 +185_s 183
389 +187_s 185
390 +191_s 189
391 +193_s 191
392 +194_s 192
393 +201_s 199
394 +206_s 204
395 +209_s 207
396 +211_s 209
397 +212_s 210
398 +213_s 211
399 +218_s 216
400 +232_s 230
401 +235_s 233
402 +249_s 247
403 +257_s 254
404 +258_s 255
405 +261_s 258
406 +266_s 263
407 +267_s 264
408 +270_s 267
409 +271_s 268
410 +276_s 273
411 +277_s 274
412 +282_s 279
413 +285_s 282
414 +294_s 290
415 +296_s 292
416 +304_s 300
417 +309_s 305
418 +311_s 307
419 +312_s 308
420 +319_s 315
421 +324_s 320
422 +327_s 323
423 +329_s 325
424 +331_s 327
425 +337_s 333
426 +338_s 334
427 +339_s 335
428 +341_s 337
429 +342_s 338
430 +344_s 340
431 +346_s 342
432 +351_s 347
433 +358_s 354
434 +369_s 365
435 +370_s 366
436 +373_s 369
437 +381_s 377
438 +385_s 381
439 +386_s 382
440 +389_s 385
441 +393_s 389
442 +394_s 390
443 +396_s 392
444 +397_s 393
445 +400_s 396
446 +404_s 400
447 +405_s 401
448 +406_s 402
449 +410_s 406
450 +412_s 408
451 +422_s 418
452 +423_s 419
453 +425_s 421
454 +426_s 422
455 +427_s 423
456 +430_s 426
457 +432_s 428
458 +435_s 431
459 +436_s 432
460 +439_s 435
461 +441_s 437
462 +449_s 445
463 +455_s 451
1 +library(methods)
2 +library(cluster)
3 +
4 +# Funcion para imprimir los clusters
5 +print_cluster <- function(obj, filename) {
6 +
7 + for(cl in 1:length(obj)) {
8 +
9 + write.table(paste("\nCluster: ", cl, "\n"), file = filename, append = TRUE, quote = FALSE, row.names = TRUE, col.names = FALSE)
10 + write.table(obj[[cl]], file = filename, append = TRUE, quote = FALSE, row.names = TRUE, col.names = FALSE, sep = " ")
11 +
12 + }
13 +}
14 +###################################################################################################################################################
15 +# Receive arguments
16 +arg = commandArgs(trailingOnly = T)
17 +
18 +if (length(arg)==0) {
19 + stop("Must supply input file.n", call.=FALSE)
20 +}
21 +
22 +################################################# Run analysis ##################################################
23 +vecs <- read.table(arg[1],
24 + header = F, row.names = 1, sep = ' ',
25 + colClasses = c("character", rep("numeric", 299)))
26 +
27 +senclus <- hclust(dist(vecs), method = 'ward.D')
28 +print("agglomerative coefficient: ")
29 +print(coef.hclust(senclus))
30 +
31 +# Guardamos la imagen del dendograma original
32 +png("Dendogram_ward.png", height = 608, width = 975)
33 +plot(senclus, hang = -1)
34 +dev.off()
35 +
36 +###
37 +# Particion en dos clusters
38 +png("Dendogram_2clusters.png", height = 608, width = 975)
39 +plot(senclus, hang = -1)
40 +cls2 <- rect.hclust(senclus, k=2, border = 3:4)
41 +dev.off()
42 +
43 +# Escribir archivo
44 +print_cluster(cls2, "SentenceMembership_2clusters.txt")
45 +
46 +#######
47 +# Particion en tres clusters
48 +png("Dendogram_3clusters.png", height = 608, width = 975)
49 +plot(senclus, hang = -1)
50 +cls3 <- rect.hclust(senclus, k=3, border = 3:4)
51 +dev.off()
52 +
53 +# Escribir archivo
54 +print_cluster(cls3, "SentenceMembership_3clusters.txt")
55 +
56 +#####
57 +# Particion en cuatro clusters
58 +png("Dendogram_4clusters.png", height = 608, width = 975)
59 +plot(senclus, hang = -1)
60 +cls4 <- rect.hclust(senclus, k=4, border = 3:4)
61 +dev.off()
62 +
63 +# Escribir archivo
64 +print_cluster(cls4, "SentenceMembership_4clusters.txt")
65 +
66 +
67 +
68 +
1 +from optparse import OptionParser
2 +
3 +# Recibir input y output
4 +parser = OptionParser()
5 +parser.add_option("-i", dest="inF",help="Input vector file. Sentence is separated by tabs from values which are sparated by simple space", metavar="PATH")
6 +parser.add_option("-o", dest="otF",help="output file name", metavar="PATH")
7 +
8 +(options, args) = parser.parse_args()
9 +if len(args) > 0:
10 + parser.error("Please indicate an input directory")
11 + sys.exit(1)
12 +
13 +# Asignar variables
14 +infile = options.inF
15 +outfile = options.otF
16 +
17 +# Abrir nuevo archivo
18 +newfile = open(outfile, 'w')
19 +
20 +# Reemplazar tab por espacio
21 +with open(infile) as vectors:
22 + for line in vectors:
23 + # Aislar el numero de articulo de sus valores
24 + elements = line.rstrip().split('\t')
25 +
26 + # Ponemos una letra para facilitar la indentificacion posterior
27 + index = elements[0] + '_s'
28 +
29 + # Armar la nueva linea
30 + newline = ' '.join([index,elements[1]])
31 + newline = newline + '\n'
32 + newfile.write(newline)
33 +
34 +newfile.close()
...\ No newline at end of file ...\ No newline at end of file