Showing
3 changed files
with
1080 additions
and
356 deletions
data-sets/genes.txt
0 → 100644
| 1 | +TGF-beta1 | ||
| 2 | +Insulin-like growth factor-1 | ||
| 3 | +CD86 | ||
| 4 | +HS6ST2 | ||
| 5 | +Snail1/2 | ||
| 6 | +interferon-gamma | ||
| 7 | +Focal adhesion kinase | ||
| 8 | +protease-activated receptor-1 | ||
| 9 | +mPAP | ||
| 10 | +nuclear factor erythroid 2-related factor 2 | ||
| 11 | +NF-kB | ||
| 12 | +Fos | ||
| 13 | +HDAC6 | ||
| 14 | +CD90 | ||
| 15 | +interleukin-12p40 | ||
| 16 | +Mitogen-activated protein kinase-activated protein kinase-2 | ||
| 17 | +collagen-1, ET-1 | ||
| 18 | +smooth muscle a-actin | ||
| 19 | +caspase-3 | ||
| 20 | +Angiotensin II | ||
| 21 | +IL-23 | ||
| 22 | +HDAC | ||
| 23 | +matriptase | ||
| 24 | +CD124 | ||
| 25 | +Keap1 | ||
| 26 | +transforming growth factor-beta-1 | ||
| 27 | +TGF-a | ||
| 28 | +Cysteine-rich protein 1 | ||
| 29 | +glycogen synthase kinase-3beta | ||
| 30 | +Cartilage oligomeric matrix protein | ||
| 31 | +TGFb2 | ||
| 32 | +miR-338* | ||
| 33 | +C3aR | ||
| 34 | +E -cadherin | ||
| 35 | +TGF beta 1 | ||
| 36 | +miR-200b | ||
| 37 | +pVHL | ||
| 38 | +Activin | ||
| 39 | +BMP-8B | ||
| 40 | +Foxp-3 | ||
| 41 | +HAI-1 | ||
| 42 | +IL-1b | ||
| 43 | +WNT5A | ||
| 44 | +AQP5 | ||
| 45 | +MT1 | ||
| 46 | +stromal-cell-derived factor-1 | ||
| 47 | +tgfb2 | ||
| 48 | +monocyte chemotactic protein-1 | ||
| 49 | +SpA | ||
| 50 | +IL-1 beta, -2, -4, -5, -6, -8, -10, -17 | ||
| 51 | +insulin-like growth factor-I | ||
| 52 | +ECSOD | ||
| 53 | +SPARC | ||
| 54 | +E-CAD | ||
| 55 | +TGF-b(1) | ||
| 56 | +il17a | ||
| 57 | +COL1A2 | ||
| 58 | +TGF-b-1 | ||
| 59 | +Atg4b | ||
| 60 | +ET-B | ||
| 61 | +KGF | ||
| 62 | +NOX-4 | ||
| 63 | +col1a2 | ||
| 64 | +pten | ||
| 65 | +miR-26a | ||
| 66 | +S1PL | ||
| 67 | +alpha-smooth muscle actin | ||
| 68 | +Glut-1 and glucokinase | ||
| 69 | +TGFbeta1 | ||
| 70 | +Transforming growth factor beta1 | ||
| 71 | +CCl4 | ||
| 72 | +transforming growth factor -b1 | ||
| 73 | +serum amyloid P | ||
| 74 | +miR-338 | ||
| 75 | +Bone morphogenetic protein 3 | ||
| 76 | +COL3A1 | ||
| 77 | +ALK5 | ||
| 78 | +PSPH | ||
| 79 | +HO-1 | ||
| 80 | +histone deacetylase 4 | ||
| 81 | +tsp1 | ||
| 82 | +FGF-10 | ||
| 83 | +interferon (IFN)-gamma | ||
| 84 | +E-Cad | ||
| 85 | +Protease nexin-1 | ||
| 86 | +ILK | ||
| 87 | +TGF-b receptor II | ||
| 88 | +TGFB | ||
| 89 | +insulin promoting factor-1 | ||
| 90 | +EGFR | ||
| 91 | +LXRa | ||
| 92 | +Interleukin 17A | ||
| 93 | +MiR-185 | ||
| 94 | +Semaphorin-7A | ||
| 95 | +SPHK1 | ||
| 96 | +transforming growth factor-b(1) | ||
| 97 | +K-ras | ||
| 98 | +p27 | ||
| 99 | +pai1 | ||
| 100 | +Mmp19 | ||
| 101 | +Col1A1 | ||
| 102 | +Follistatin-like 1 | ||
| 103 | +Serine Protease | ||
| 104 | +Smad3 and 4 | ||
| 105 | +Bone morphogenetic protein | ||
| 106 | +PDGF-B | ||
| 107 | +BMP-4 | ||
| 108 | +min(-1 | ||
| 109 | +ORP150 | ||
| 110 | +bmp1 | ||
| 111 | +RAGE | ||
| 112 | +SGPL1 | ||
| 113 | +SOCS-1 | ||
| 114 | +tagln2 | ||
| 115 | +MMP-12 | ||
| 116 | +DRB1 | ||
| 117 | +MyD88 | ||
| 118 | +STC1 | ||
| 119 | +150-kDa oxygen-regulated protein | ||
| 120 | +MMP-9 | ||
| 121 | +IP10 | ||
| 122 | +terminal deoxynucleotidyl transferase | ||
| 123 | +TGF-beta 1 | ||
| 124 | +NADPH oxidase-4 | ||
| 125 | +transforming growth factor-beta | ||
| 126 | +Prostaglandin A(1) | ||
| 127 | +p70 | ||
| 128 | +IgG1 | ||
| 129 | +N-acetyl-l-cysteine | ||
| 130 | +Tgfbr1/2 | ||
| 131 | +HGFA | ||
| 132 | +DNMT1 | ||
| 133 | +nuclear factor kappa B | ||
| 134 | +plasminogen activator inhibitor 1 | ||
| 135 | +S1P | ||
| 136 | +angiopoietin-2 | ||
| 137 | +KCa3.1 | ||
| 138 | +ENA-78 | ||
| 139 | +WNT3a | ||
| 140 | +miR-134 | ||
| 141 | +P53 | ||
| 142 | +matrix metalloproteinase (MMP) -2 | ||
| 143 | +vimentin | ||
| 144 | +MMP-2 | ||
| 145 | +endoglin | ||
| 146 | +tumor necrosis factor superfamily protein 14 | ||
| 147 | +Transforming Growth Factor Beta 1 | ||
| 148 | +Cebpb | ||
| 149 | +Mknk2 | ||
| 150 | +SDF-1-TR1 | ||
| 151 | +Endothelin-1 | ||
| 152 | +HFL-1 | ||
| 153 | +MIG | ||
| 154 | +TGF-beta receptors type I and II (T beta R-I and T beta R-II | ||
| 155 | +IL-8 | ||
| 156 | +YY-1 | ||
| 157 | +MMP1 | ||
| 158 | +interferon gamma (IFN-y | ||
| 159 | +MIP-1 alpha | ||
| 160 | +IL-4RA | ||
| 161 | +HS6ST1 | ||
| 162 | +BMPR2 | ||
| 163 | +sL1 | ||
| 164 | +transforming growth factor-alpha | ||
| 165 | +Transgelin | ||
| 166 | +TGF-beta 3 | ||
| 167 | +lox | ||
| 168 | +CTGF | ||
| 169 | +MCP-1 | ||
| 170 | +tumor necrosis factor | ||
| 171 | +MT3 | ||
| 172 | +cytosolic phospholipase A(2) | ||
| 173 | +Thymic stromal lymphopoietin | ||
| 174 | +phosphoglycerate dehydrogenase | ||
| 175 | +Bax | ||
| 176 | +Caveolin-1 (cav-1 | ||
| 177 | +UCHL5 | ||
| 178 | +TIMP-1 | ||
| 179 | +JunD | ||
| 180 | +Transforming growth factor (TGF)-b1 | ||
| 181 | +MicroRNA (miR)-221 | ||
| 182 | +miR-424 | ||
| 183 | +YAP | ||
| 184 | +Aortic carboxypeptidase-like protein | ||
| 185 | +Microsomal prostaglandin E synthase-1 | ||
| 186 | +b2 -adrenoceptors | ||
| 187 | +YKL-40 | ||
| 188 | +VE-cadherin | ||
| 189 | +transforming growth factor-beta(1) | ||
| 190 | +PML | ||
| 191 | +CXCL12 | ||
| 192 | +VEGF-C | ||
| 193 | +LMP1 | ||
| 194 | +miR-30a | ||
| 195 | +insulin-like growth factor)-1 | ||
| 196 | +histone deacetylase | ||
| 197 | +b-catenin | ||
| 198 | +RANTES | ||
| 199 | +latent membrane protein (LMP) 1 | ||
| 200 | +Itgb6 | ||
| 201 | +CXCL1 | ||
| 202 | +VEGFR-3 | ||
| 203 | +glucokinase | ||
| 204 | +TNFSF14 | ||
| 205 | +matrix metalloproteinase-3 | ||
| 206 | +TNFa | ||
| 207 | +renin | ||
| 208 | +VCAM1 | ||
| 209 | +GATA-6 | ||
| 210 | +Transforming growth factor-b(1 | ||
| 211 | +angiotensinogen | ||
| 212 | +Smad7 | ||
| 213 | +cC1q-R | ||
| 214 | +IL-1R | ||
| 215 | +tgfbr1/2 | ||
| 216 | +FoxP3 | ||
| 217 | +a-SMA | ||
| 218 | +CCL12 | ||
| 219 | +CCN2 | ||
| 220 | +PDE1A | ||
| 221 | +PAR-2 | ||
| 222 | +serum albumin (HSA)-thioredoxin 1 | ||
| 223 | +tissue inhibitor of metalloproteinase (TIMP)-1 | ||
| 224 | +HIF-1a | ||
| 225 | +FRNK | ||
| 226 | +TGF-alpha | ||
| 227 | +miR-200c | ||
| 228 | +poly-ADP ribose polymerase | ||
| 229 | +Spiruchostatin A | ||
| 230 | +Sgpl1 | ||
| 231 | +FXa | ||
| 232 | +Extracellular superoxide dismutase | ||
| 233 | +Tgf-b | ||
| 234 | +Tb4 | ||
| 235 | +CUX1 | ||
| 236 | +gC1q-R | ||
| 237 | +FGF10 | ||
| 238 | +Tpo | ||
| 239 | +AKT | ||
| 240 | +insulin-like growth factor-1 | ||
| 241 | +IL-4R alpha | ||
| 242 | +IFN-gamma | ||
| 243 | +JAK2 | ||
| 244 | +MMP-7 | ||
| 245 | +smad3 | ||
| 246 | +NOX4 | ||
| 247 | +Bach1 | ||
| 248 | +caspase-9 | ||
| 249 | +transforming growth factor b1 | ||
| 250 | +interleukin-6 | ||
| 251 | +Serpin B3 | ||
| 252 | +Pentraxin-2 | ||
| 253 | +T-cell lymphoma invasion and metastasis 1 | ||
| 254 | +GR | ||
| 255 | +prostaglandin F (PGF) receptor | ||
| 256 | +serine protease | ||
| 257 | +MMP-19 | ||
| 258 | +SMAD 3 | ||
| 259 | +MMP7 | ||
| 260 | +p62 | ||
| 261 | +connective tissue growth factor | ||
| 262 | +Renin | ||
| 263 | +discoidin domain receptor 2 | ||
| 264 | +mothers against decapentaplegic homolog 3 | ||
| 265 | +IL-1RA | ||
| 266 | +Trx | ||
| 267 | +HAI-2 | ||
| 268 | +WNT1-inducible signaling pathway protein 1 | ||
| 269 | +Ubiquitin carboxyl-terminal hydrolase-L5 | ||
| 270 | +CC16 | ||
| 271 | +interleukin-10 | ||
| 272 | +LTBP-1 | ||
| 273 | +EDA | ||
| 274 | +BMP-5 | ||
| 275 | +miR-154 | ||
| 276 | +CD80 | ||
| 277 | +p110 | ||
| 278 | +LTBP1 and 2 | ||
| 279 | +periostin | ||
| 280 | +EP2 | ||
| 281 | +TGF-beta | ||
| 282 | +C3a | ||
| 283 | +H2O2 and tumor necrosis factor alpha | ||
| 284 | +CD1 | ||
| 285 | +Mir-154 | ||
| 286 | +cyclooxygenase-2 | ||
| 287 | +LTBP] 1, 2, and 4 | ||
| 288 | +matrix metalloproteinase-14(+)/matrix metalloproteinase-2(+) myofibroblasts | ||
| 289 | +Chop | ||
| 290 | +TGF-beta(1) | ||
| 291 | +MK2 | ||
| 292 | +SMO | ||
| 293 | +PPARy | ||
| 294 | +insulin-like growth factor binding protein-3 | ||
| 295 | +TGF-beta(1), collagen type Ialpha1 | ||
| 296 | +hyaluronan synthase 2 | ||
| 297 | +endothelin type A receptors | ||
| 298 | +TNF-alpha | ||
| 299 | +TNC | ||
| 300 | +transforming growth factor (TGF)-beta 1 | ||
| 301 | +Enhancer of zeste homolog 2 | ||
| 302 | +Snail | ||
| 303 | +IL-1Ra | ||
| 304 | +MMP-1 | ||
| 305 | +interleukin-8 | ||
| 306 | +PGA(1) | ||
| 307 | +lymphotoxin beta receptor | ||
| 308 | +TGF-beta 2 | ||
| 309 | +Lefty A | ||
| 310 | +Sulf1 | ||
| 311 | +serine hydroxymethyltransferase 2 | ||
| 312 | +IFN-y | ||
| 313 | +LOX | ||
| 314 | +Transforming growth factor (TGF)-b | ||
| 315 | +TGF- b | ||
| 316 | +NOS | ||
| 317 | +Smad 7 | ||
| 318 | +hypoxia-inducible factor 1a | ||
| 319 | +Transglutaminase 2 | ||
| 320 | +mTORC2 | ||
| 321 | +C5a | ||
| 322 | +annexin V | ||
| 323 | +thyroid transcription factor (TTF)-1 | ||
| 324 | +CXCL9 | ||
| 325 | +transforming growth factor-beta 1 | ||
| 326 | +Vascular endothelial growth factor | ||
| 327 | +beclin-1 | ||
| 328 | +extracellular signal-regulated kinase (ERK)1/2 | ||
| 329 | +TGF-{beta}1 | ||
| 330 | +Caspase-3 | ||
| 331 | +CCN5 | ||
| 332 | +IL-17A | ||
| 333 | +ARPC2 | ||
| 334 | +matrix metalloproteinase 9 | ||
| 335 | +p16 | ||
| 336 | +glucagon like peptide-1 | ||
| 337 | +L1-CAM | ||
| 338 | +TGF -b | ||
| 339 | +fibroblast growth factor-1 | ||
| 340 | +TGFbeta | ||
| 341 | +IGFBP-1 | ||
| 342 | +ubiquitin carboxyl-terminal hydrolase-L5 | ||
| 343 | +SMAD3 | ||
| 344 | +glucocorticoid receptor | ||
| 345 | +Transforming growth factor b1 | ||
| 346 | +TNF)-alpha | ||
| 347 | +IL18 | ||
| 348 | +TOB2 | ||
| 349 | +TbRII | ||
| 350 | +WNT7B | ||
| 351 | +SMAD-3 | ||
| 352 | +HLA-A, -B, -DRB1, tumor necrosis factor alpha | ||
| 353 | +TF | ||
| 354 | +miR-3107 | ||
| 355 | +zonula occludens-1 | ||
| 356 | +Nuclear factor-erythroid-related factor 2 | ||
| 357 | +Sulf2 | ||
| 358 | +ADAMTS9 | ||
| 359 | +Surfactant Protein-C | ||
| 360 | +TNFalpha | ||
| 361 | +IL10 | ||
| 362 | +actin related protein 2/3 complex, subunit 2 | ||
| 363 | +secreted protein acidic and rich in cysteine | ||
| 364 | +Krebs Von Den Lungen-6 | ||
| 365 | +Il-1b | ||
| 366 | +HNP-1 | ||
| 367 | +Fstl1 | ||
| 368 | +miR-382 | ||
| 369 | +matrix metalloproteinase (MMP)-2 and -9 | ||
| 370 | +SphK1/2 | ||
| 371 | +IQGAP1 | ||
| 372 | +SNAI2 | ||
| 373 | +Rictor | ||
| 374 | +SHH | ||
| 375 | +ACTA2 | ||
| 376 | +SIRT1 | ||
| 377 | +Sema 7a-CD4 | ||
| 378 | +WNT10A | ||
| 379 | +Insulin-like growth factor binding protein-3 | ||
| 380 | +FSP-1 | ||
| 381 | +poly(ADP-ribose) polymerase | ||
| 382 | +LRP5 | ||
| 383 | +MMP-3 | ||
| 384 | +Interleukin (IL) 8 | ||
| 385 | +Wilms' tumor 1 | ||
| 386 | +Fibrillin-2 | ||
| 387 | +tnf-alpha | ||
| 388 | +aSMA | ||
| 389 | +IL-9 | ||
| 390 | +HLA-A | ||
| 391 | +cartilage oligomeric matrix protein | ||
| 392 | +thrombin | ||
| 393 | +tumor necrosis factor alpha | ||
| 394 | +beta-catenin | ||
| 395 | +FAK | ||
| 396 | +Th1 | ||
| 397 | +YY1 | ||
| 398 | +NFkB | ||
| 399 | +Lox | ||
| 400 | +Caveolin-1 | ||
| 401 | +Membrane-type (MT)-MMPs | ||
| 402 | +Galectin-3 | ||
| 403 | +smoothened | ||
| 404 | +Smad3 | ||
| 405 | +claudins-1 and -3 | ||
| 406 | +ERK1/2 | ||
| 407 | +Bone Morphogenic Protein Receptor 2 | ||
| 408 | +acyl-CoA oxidase 1 | ||
| 409 | +serpine1 | ||
| 410 | +VASH-2 | ||
| 411 | +miR-326 | ||
| 412 | +TGFB1 | ||
| 413 | +phosphoinositide 3-kinase | ||
| 414 | +bone morphogenetic protein | ||
| 415 | +interleukin (IL)-13 | ||
| 416 | +c-Myc | ||
| 417 | +TGF-b3 | ||
| 418 | +NFATc2 | ||
| 419 | +TIMP-2 | ||
| 420 | +SMAD2 | ||
| 421 | +CD25 | ||
| 422 | +Smad2/3 | ||
| 423 | +V-ATPase | ||
| 424 | +LMP-1 | ||
| 425 | +C1q receptor | ||
| 426 | +glutathione peroxidase 1 | ||
| 427 | +C5a receptor | ||
| 428 | +IL-1 alpha, -1R, -1RA, -2, -4, -4R alpha, -6, -10 | ||
| 429 | +platelet-derived growth factor isoforms (PDGF) A and B | ||
| 430 | +IL-1-beta | ||
| 431 | +Transforming growth factor-beta1 | ||
| 432 | +galectin-3 | ||
| 433 | +PAR1 | ||
| 434 | +SIRT7 | ||
| 435 | +p65 | ||
| 436 | +Transforming growth factor beta | ||
| 437 | +cPLA(2) | ||
| 438 | +desmin | ||
| 439 | +Histone deacetylase 6 | ||
| 440 | +EMT | ||
| 441 | +transforming growth factor (TGF)-beta1 | ||
| 442 | +IGFBP-1 and -2 | ||
| 443 | +TGFBR-2 | ||
| 444 | +transforming growth factor beta | ||
| 445 | +HSP90 | ||
| 446 | +miR-29b | ||
| 447 | +CD248 | ||
| 448 | +PPARbeta | ||
| 449 | +follistatin | ||
| 450 | +TGF-beta(1 | ||
| 451 | +Janus kinase type 2 | ||
| 452 | +A-myb | ||
| 453 | +nuclear factor E2-related factor 2 | ||
| 454 | +Heat shock protein (HSP) 47 | ||
| 455 | +VCAM-1 | ||
| 456 | +mmu-miR-326 | ||
| 457 | +PARP | ||
| 458 | +LXA4 receptor | ||
| 459 | +G-CSF | ||
| 460 | +transforming-growth factor beta 1 | ||
| 461 | +Matriptase | ||
| 462 | +MiR-5100 | ||
| 463 | +IL-6 | ||
| 464 | +VEGFR | ||
| 465 | +CXCL-9 | ||
| 466 | +Rpn6 | ||
| 467 | +IL-10 | ||
| 468 | +alpha1 type I collagen | ||
| 469 | +Smad4 | ||
| 470 | +matrix metalloproteinase-9 | ||
| 471 | +PHLPP | ||
| 472 | +Tumor necrosis factor-alpha | ||
| 473 | +thyroid transcription factor-1 | ||
| 474 | +insulin | ||
| 475 | +Ang-2 | ||
| 476 | +basic FGF | ||
| 477 | +tagln | ||
| 478 | +TGFbeta(1) | ||
| 479 | +b-FGF | ||
| 480 | +miR-210 | ||
| 481 | +Lrp5 and 6 | ||
| 482 | +PDGF-b | ||
| 483 | +FN1 | ||
| 484 | +HMGA2 | ||
| 485 | +LYCAT | ||
| 486 | +Tumor necrosis factor a | ||
| 487 | +IL-2 | ||
| 488 | +IL1-b | ||
| 489 | +PAI-1 | ||
| 490 | +VEGFR-2 | ||
| 491 | +igf1 | ||
| 492 | +Ho-1 | ||
| 493 | +aquaporin-5 | ||
| 494 | +VEGF receptor-2 | ||
| 495 | +COMP | ||
| 496 | +c-jun | ||
| 497 | +mir-155 | ||
| 498 | +megakaryoblastic leukemia 1 | ||
| 499 | +Kca3.1 | ||
| 500 | +tissue inhibitors of metalloproteinases-1 | ||
| 501 | +Secreted protein acidic and rich in cysteine | ||
| 502 | +CD-1 | ||
| 503 | +Cyclin D1 | ||
| 504 | +tenascin C | ||
| 505 | +phosphoserine aminotransferase 1 | ||
| 506 | +Lin28B | ||
| 507 | +Gremlin | ||
| 508 | +tropomodulin 3 | ||
| 509 | +PIAS4 | ||
| 510 | +interleukin 10 | ||
| 511 | +epidermal growth factor receptor | ||
| 512 | +c-IAP2 | ||
| 513 | +fibroblast growth factor receptor 2 | ||
| 514 | +CRP1 | ||
| 515 | +Collagen Triple Helix Repeat-Containing-1 | ||
| 516 | +transforming growth factor-b1 | ||
| 517 | +PTX-2 | ||
| 518 | +CD11b | ||
| 519 | +IL-4 R alpha | ||
| 520 | +TG2 | ||
| 521 | +IGFBP-2 | ||
| 522 | +cytochrome b | ||
| 523 | +BLTR | ||
| 524 | +lysyl oxidase | ||
| 525 | +alpha smooth muscle actin | ||
| 526 | +UCH37 | ||
| 527 | +Receptor for advanced glycation end products | ||
| 528 | +IL-1 beta | ||
| 529 | +miR-376c | ||
| 530 | +miR-153 | ||
| 531 | +Smad2/3/4 | ||
| 532 | +LEF/TCF | ||
| 533 | +thymosin b4 | ||
| 534 | +plasminogen activator inhibitor-1 | ||
| 535 | +beta-galactosidase | ||
| 536 | +Stanniocalcin-1 | ||
| 537 | +THP-1 | ||
| 538 | +Egr-1 | ||
| 539 | +beta-gal | ||
| 540 | +PDGFR | ||
| 541 | +Transforming growth factor b-1 | ||
| 542 | +transforming growth factor beta 1 | ||
| 543 | +miR-410 | ||
| 544 | +TGF-b(1 | ||
| 545 | +focal adhesion kinase | ||
| 546 | +STAT3 | ||
| 547 | +Prostaglandin F(2alpha) receptor | ||
| 548 | +Nox-4 | ||
| 549 | +Toll-like receptor 9 | ||
| 550 | +CCL2 | ||
| 551 | +GM-CSF | ||
| 552 | +folate receptor beta | ||
| 553 | +Elk1 | ||
| 554 | +interleukin (IL)-1beta | ||
| 555 | +mTOR | ||
| 556 | +vascular cell adhesion molecule 1 | ||
| 557 | +E-cadherin | ||
| 558 | +PPARgamma | ||
| 559 | +Serpine1 | ||
| 560 | +PAI1 | ||
| 561 | +TIMP | ||
| 562 | +SFTPC | ||
| 563 | +VEGF and IL-12 | ||
| 564 | +LTBP 4 | ||
| 565 | +Nuclear factor erythroid 2-related factor 2 | ||
| 566 | +Jun NH2-terminal kinase | ||
| 567 | +FAK(Y397 | ||
| 568 | +IL-18 | ||
| 569 | +Transforming growth factor-b | ||
| 570 | +il-1b | ||
| 571 | +SphK | ||
| 572 | +DDR2 | ||
| 573 | +FOXF1 | ||
| 574 | +TIMP1 | ||
| 575 | +SHMT2 | ||
| 576 | +SOD3 | ||
| 577 | +TGFb1 | ||
| 578 | +FN | ||
| 579 | +TIMP2 | ||
| 580 | +FRbeta | ||
| 581 | +Interleukin 4 | ||
| 582 | +E-cad | ||
| 583 | +p38 | ||
| 584 | +VEGF-D | ||
| 585 | +Periostin | ||
| 586 | +Sp1 | ||
| 587 | +CC1q-R | ||
| 588 | +KL-6 | ||
| 589 | +ADAM19 | ||
| 590 | +miR-185 | ||
| 591 | +USP11 | ||
| 592 | +IL8 | ||
| 593 | +Akt2 | ||
| 594 | +BMPER | ||
| 595 | +IFN-gammaR | ||
| 596 | +Akt | ||
| 597 | +IL-1 | ||
| 598 | +hepatocyte growth factor | ||
| 599 | +MAPKAPK2 | ||
| 600 | +uncoupling protein 2 | ||
| 601 | +thrombospondin-1 | ||
| 602 | +serum response factor | ||
| 603 | +CD55 | ||
| 604 | +Gpx1 | ||
| 605 | +Id3 | ||
| 606 | +PAR-1 | ||
| 607 | +keratinocyte growth factor | ||
| 608 | +TIGAR | ||
| 609 | +NADPH oxidase 4 | ||
| 610 | +integrin-linked kinase | ||
| 611 | +interleukin-1 receptor antagonist protein | ||
| 612 | +PHGDH | ||
| 613 | +mPGES-1 | ||
| 614 | +matrix metalloproteinase 14 | ||
| 615 | +STIP1 | ||
| 616 | +CCN1 | ||
| 617 | +angiopoietin-1 | ||
| 618 | +CD44 | ||
| 619 | +TGF-b1 | ||
| 620 | +PN-1 | ||
| 621 | +BMP endothelial cell precursor-derived regulator | ||
| 622 | +MFG-E8 | ||
| 623 | +PPAR | ||
| 624 | +protein kinase B | ||
| 625 | +IGFBP-3 | ||
| 626 | +EMMPRIN | ||
| 627 | +cyclosporine A | ||
| 628 | +semaphorin-7A | ||
| 629 | +SNAI1 | ||
| 630 | +Pink1 | ||
| 631 | +PINK1 | ||
| 632 | +Bone morphogenetic protein-4 | ||
| 633 | +CBP | ||
| 634 | +IL-17 | ||
| 635 | +AT1 | ||
| 636 | +TGFBR2 | ||
| 637 | +N-acetyl-L-cysteine | ||
| 638 | +endothelin-1 | ||
| 639 | +smad-2 | ||
| 640 | +Interleukin (IL)-6 | ||
| 641 | +ET-1 | ||
| 642 | +AP-1 | ||
| 643 | +HDAC4 | ||
| 644 | +c-Fos | ||
| 645 | +HSP27 | ||
| 646 | +WISP1 | ||
| 647 | +Transforming growth factor beta 1 | ||
| 648 | +jag1 | ||
| 649 | +Nrf2 | ||
| 650 | +cyclooxygenase 2 | ||
| 651 | +smad6/7 | ||
| 652 | +WNT5a | ||
| 653 | +mir-154 | ||
| 654 | +SP-D | ||
| 655 | +Matrix metalloproteinase (MMP)-19 | ||
| 656 | +Vasohibin-2 | ||
| 657 | +caspase 3 | ||
| 658 | +Smad1/5 | ||
| 659 | +miR-200a | ||
| 660 | +TNF-a | ||
| 661 | +IGFBP-3 and -5 | ||
| 662 | +p53 | ||
| 663 | +Serpin B4 | ||
| 664 | +Transcription factor GATA-6 | ||
| 665 | +ACLP | ||
| 666 | +transgelin | ||
| 667 | +NADPH Oxidase 4 | ||
| 668 | +ZO-1 | ||
| 669 | +Cthrc1 | ||
| 670 | +VEGF-A | ||
| 671 | +Plasminogen activator inhibitor 1 | ||
| 672 | +p300 | ||
| 673 | +extent, type B receptors | ||
| 674 | +il12p40 | ||
| 675 | +miR-29c | ||
| 676 | +IL-1beta | ||
| 677 | +interleukin (IL)-17 | ||
| 678 | +transforming growth factor b(1) | ||
| 679 | +LTB(4) receptor | ||
| 680 | +BMP | ||
| 681 | +extracellular signal--regulated kinase | ||
| 682 | +interleukin-1 beta | ||
| 683 | +TLR4 | ||
| 684 | +AGT | ||
| 685 | +PP1 | ||
| 686 | +IGF-1 | ||
| 687 | +Thymosin b4 | ||
| 688 | +SOCS1 | ||
| 689 | +SMAD)2 | ||
| 690 | +E prostanoid receptor 2 | ||
| 691 | +b2 -AR | ||
| 692 | +microRNA (miR)-155 | ||
| 693 | +peroxisome proliferator-activated receptor-y | ||
| 694 | +Discoidin Domain Receptor 2 | ||
| 695 | +smad2/3 | ||
| 696 | +gp130 | ||
| 697 | +miR-31 | ||
| 698 | +MKL1 | ||
| 699 | +PPARalpha | ||
| 700 | +TTF-1 | ||
| 701 | +Erk1/2 | ||
| 702 | +ERK | ||
| 703 | +RXFP1 | ||
| 704 | +interleukin-18 | ||
| 705 | +protease nexin-1 | ||
| 706 | +Syndecan-2 | ||
| 707 | +RhoA | ||
| 708 | +CD34 | ||
| 709 | +N -cadherin | ||
| 710 | +Rta | ||
| 711 | +PI3K | ||
| 712 | +fibroblast specific protein-1 | ||
| 713 | +IGFBP-5 | ||
| 714 | +PDGB | ||
| 715 | +gremlin | ||
| 716 | +HMG-CoA) reductase | ||
| 717 | +Yin Yang 1 | ||
| 718 | +interleukin-1 | ||
| 719 | +p38 mitogen-activated protein kinase | ||
| 720 | +Vi | ||
| 721 | +CD11c | ||
| 722 | +IL-4 | ||
| 723 | +NEU1 | ||
| 724 | +VEGF | ||
| 725 | +CD46 | ||
| 726 | +protease-activated receptor (PAR)-2 | ||
| 727 | +C/EBP homologous protein | ||
| 728 | +ATG4B | ||
| 729 | +IKKa | ||
| 730 | +AKT2 | ||
| 731 | +calnexin | ||
| 732 | +CXCR3 | ||
| 733 | +peroxisome proliferator-activated receptor y | ||
| 734 | +fibroblast growth factor-2 | ||
| 735 | +TGF-beta receptor II | ||
| 736 | +CsA | ||
| 737 | +miR -221 | ||
| 738 | +BAX inhibitor-1 | ||
| 739 | +miR-5100 | ||
| 740 | +Ang-1 | ||
| 741 | +PEX13p | ||
| 742 | +SDC2 | ||
| 743 | +PARK2 | ||
| 744 | +5-HTR(1A/B) and 5-HTR(2B | ||
| 745 | +fibronectin | ||
| 746 | +interleukin (IL)-8 | ||
| 747 | +BMP-7 | ||
| 748 | +EP1 | ||
| 749 | +CDCP1 | ||
| 750 | +protease-activated receptor-2 | ||
| 751 | +CD8 | ||
| 752 | +CD206 | ||
| 753 | +TGF-beta receptors (T beta R-I and T beta R-II | ||
| 754 | +HGF | ||
| 755 | +c-Jun NH-terminal kinase | ||
| 756 | +Col3a1 | ||
| 757 | +IRAP | ||
| 758 | +Bcl-2 | ||
| 759 | +GLP-1 | ||
| 760 | +N-cadherin | ||
| 761 | +Sema 7a | ||
| 762 | +SDF-1 | ||
| 763 | +Wnt | ||
| 764 | +GLP-1 receptor | ||
| 765 | +sphingosine kinase 1 | ||
| 766 | +Smad2 | ||
| 767 | +transforming growth factor b-1 | ||
| 768 | +p63 | ||
| 769 | +TLR9 | ||
| 770 | +IL-13 | ||
| 771 | +X-linked inhibitor of apoptosis | ||
| 772 | +CD19 | ||
| 773 | +syndecan-2 | ||
| 774 | +EGR1 | ||
| 775 | +STUB1 | ||
| 776 | +Lysocardiolipin acyltransferase | ||
| 777 | +IL8, -6, and -1B | ||
| 778 | +Wnt1-inducible signaling protein 1 | ||
| 779 | +TGF-b | ||
| 780 | +tenascin | ||
| 781 | +hypoxia-inducible factor-1a | ||
| 782 | +IP-10 | ||
| 783 | +XIAP | ||
| 784 | +transforming growth factor beta1 | ||
| 785 | +caveolin-1 | ||
| 786 | +endothelial nitric oxide synthase | ||
| 787 | +IGF-2 | ||
| 788 | +CCR2 | ||
| 789 | +inducible nitric oxide synthase | ||
| 790 | +bone morphogenetic protein 7 | ||
| 791 | +platelet-derived growth factor, insulin-like growth factor type I, and transforming growth factor beta | ||
| 792 | +Itgb1/6 | ||
| 793 | +HIF-1 | ||
| 794 | +SRF | ||
| 795 | +miR-29a | ||
| 796 | +MPP-9 | ||
| 797 | +miR-155 | ||
| 798 | +PDGF-A and -B | ||
| 799 | +FoxO3a | ||
| 800 | +Cub domain containing protein 1 | ||
| 801 | +Muc5ac | ||
| 802 | +Yin yang 1 | ||
| 803 | +Transforming growth factor b | ||
| 804 | +Ltbp1 | ||
| 805 | +NOX-2 | ||
| 806 | +tissue inhibitor of metalloproteinase (TIMP)-1, -2, -3, and -4 | ||
| 807 | +Nox1 | ||
| 808 | +X-box-binding protein 1 | ||
| 809 | +miR-21 | ||
| 810 | +Wnt7B | ||
| 811 | +HSP90b | ||
| 812 | +PPAR-a | ||
| 813 | +leucine-rich alpha-2 glycoprotein | ||
| 814 | +TNF alpha | ||
| 815 | +estrogen receptor 1 | ||
| 816 | +TSLP | ||
| 817 | +signal transducer and activator of transcription 3 | ||
| 818 | +IL-8 and b-FGF | ||
| 819 | +matrix metalloproteinase-7 | ||
| 820 | +mitogen-activated protein kinase-activated protein kinase-2 | ||
| 821 | +Smad-3 | ||
| 822 | +matrix metalloproteinase (MMP)-9 | ||
| 823 | +beta 1 integrin | ||
| 824 | +interleukin (IL)-6 | ||
| 825 | +basic-fibroblast growth factor | ||
| 826 | +gastrin | ||
| 827 | +Pdgfb | ||
| 828 | +Itga2/3 | ||
| 829 | +HLF | ||
| 830 | +snail | ||
| 831 | +TGFb(1) | ||
| 832 | +Smurf2 | ||
| 833 | +STAT1 | ||
| 834 | +tissue factor | ||
| 835 | +Glucagon like peptide-1 | ||
| 836 | +NAC | ||
| 837 | +Lrp5 | ||
| 838 | +transforming growth factor b expression | ||
| 839 | +insulin-like growth factor (IGF)-I | ||
| 840 | +superoxide dismutase 3 | ||
| 841 | +vascular endothelial growth factor receptor | ||
| 842 | +Wt1 | ||
| 843 | +as c | ||
| 844 | +Transforming growth factor (TGF)-beta1 | ||
| 845 | +IGF-I | ||
| 846 | +UCP2 | ||
| 847 | +Protease activated receptor-1 | ||
| 848 | +G1 and G2 | ||
| 849 | +transforming growth factor beta-1 | ||
| 850 | +FHIT | ||
| 851 | +Wnt5A | ||
| 852 | +TGF beta1 | ||
| 853 | +MRTF-A | ||
| 854 | +platelet-derived growth factor receptor | ||
| 855 | +SphK1 | ||
| 856 | +extracellular superoxide dismutase | ||
| 857 | +Acta2 | ||
| 858 | +Toll-like receptor 4 | ||
| 859 | +ICAT | ||
| 860 | +CXCL10 | ||
| 861 | +alpha-SMA | ||
| 862 | +Bax inhibitor-1 | ||
| 863 | +keratin 6 and 14 | ||
| 864 | +AT2 | ||
| 865 | +MT1- and MT2-MMPs | ||
| 866 | +NOX1 | ||
| 867 | +beta 2 | ||
| 868 | +PAI-1-siRNA | ||
| 869 | +fibrillin-2 | ||
| 870 | +col3a1 | ||
| 871 | +IPF-1, insulin, and glucokinase | ||
| 872 | +cyclin D1 | ||
| 873 | +COX-2 | ||
| 874 | +CD4 | ||
| 875 | +MT2 | ||
| 876 | +Transforming growth factor-beta | ||
| 877 | +matrix metalloprotease-1 | ||
| 878 | +Thy-1 | ||
| 879 | +ATG7 | ||
| 880 | +neuraminidase 1 | ||
| 881 | +Mkl1 | ||
| 882 | +LPA1 | ||
| 883 | +Ost-4 | ||
| 884 | +MMP -9 | ||
| 885 | +HIF1a | ||
| 886 | +Semaphorin 7a | ||
| 887 | +EP3 | ||
| 888 | +Transforming growth factor-b1 | ||
| 889 | +PSAT1 | ||
| 890 | +High mobility group AT-hook 2 | ||
| 891 | +jagged 1 | ||
| 892 | +n-cadherin | ||
| 893 | +Janus kinase 2 | ||
| 894 | +let-7d | ||
| 895 | +Fas ligand | ||
| 896 | +integrin alpha v | ||
| 897 | +MK2(-/-) MEF | ||
| 898 | +interleukin-1beta | ||
| 899 | +p21 | ||
| 900 | +Col1a2 | ||
| 901 | +MT3-MMP | ||
| 902 | +PDGF-A | ||
| 903 | +JNK | ||
| 904 | +Transforming Growth Factor- b | ||
| 905 | +PP2A | ||
| 906 | +miR | ||
| 907 | +claudins-1, -3, and -5 | ||
| 908 | +BARD1 | ||
| 909 | +relaxin/insulin-like family peptide receptor 1 | ||
| 910 | +MMP2 | ||
| 911 | +ATG5 | ||
| 912 | +MEK | ||
| 913 | +CAV1 | ||
| 914 | +SIRT3 | ||
| 915 | +ANGII | ||
| 916 | +activin | ||
| 917 | +p38 MAPK | ||
| 918 | +interleukin-1 (IL-1)Ra | ||
| 919 | +hsa-miR-326 | ||
| 920 | +MAP3K19 | ||
| 921 | +surfactant protein C | ||
| 922 | +Nox4 | ||
| 923 | +collagen (Col)1a1 | ||
| 924 | +SAP | ||
| 925 | +miR-9-5p | ||
| 926 | +interleukin (IL)-1b | ||
| 927 | +p21(waf1 | ||
| 928 | +MicroRNA-29c | ||
| 929 | +H19 | ||
| 930 | +Protease-activated receptor-1 | ||
| 931 | +ALXR | ||
| 932 | +miR-487b | ||
| 933 | +TGF beta | ||
| 934 | +Connective tissue growth factor | ||
| 935 | +matrix metalloproteinase-14 | ||
| 936 | +SERPINE1 | ||
| 937 | +mir-21 | ||
| 938 | +CC chemokine receptor 2 | ||
| 939 | +PTEN | ||
| 940 | +IL-1 alpha | ||
| 941 | +IPF-1 | ||
| 942 | +c-IAP)1 | ||
| 943 | +Il-17a | ||
| 944 | +Pigment epithelium-derived factor | ||
| 945 | +fibroblast growth factor 10 | ||
| 946 | +connective-tissue growth factor | ||
| 947 | +BMP3 | ||
| 948 | +transforming growth factor-beta1 | ||
| 949 | +Annexin V | ||
| 950 | +HS6ST1/2 | ||
| 951 | +fibroblast growth factor-10 | ||
| 952 | +BI-1 | ||
| 953 | +lymphotactin | ||
| 954 | +tenascin-C | ||
| 955 | +miR-455 | ||
| 956 | +MT1-MMP | ||
| 957 | +transforming growth factor-b | ||
| 958 | +3-hydroxy-3-methylglutaryl CoenzymeA (HMG CoA) reductase | ||
| 959 | +SMAD2/3 | ||
| 960 | +MiR-338 | ||
| 961 | +MMP-2 and -9 | ||
| 962 | +LTBP)-1 | ||
| 963 | +suppressor of cytokine signaling 1 | ||
| 964 | +neutrophil peptide (HNP)-1 | ||
| 965 | +SphK2 | ||
| 966 | +S1P lyase | ||
| 967 | +ltbp1/2 | ||
| 968 | +iNOS | ||
| 969 | +TGFb(1 | ||
| 970 | +ACE | ||
| 971 | +BCL-2 | ||
| 972 | +Oct-4 | ||
| 973 | +SMA | ||
| 974 | +MMP-9 and tissue inhibitor of metalloproteinase-1 | ||
| 975 | +Hepatocyte growth factor | ||
| 976 | +Ets1 | ||
| 977 | +beta-actin | ||
| 978 | +VASH-1 | ||
| 979 | +CD117 | ||
| 980 | +THBS1 | ||
| 981 | +HO1 | ||
| 982 | +Hsp90 | ||
| 983 | +Extracellular Matrix Metalloproteinase Inducer | ||
| 984 | +COL1A1 | ||
| 985 | +AKT1 | ||
| 986 | +FGF-1 | ||
| 987 | +interleukin 6 | ||
| 988 | +caspase-3/7 | ||
| 989 | +IL6 | ||
| 990 | +receptor for advanced glycation end products | ||
| 991 | +EP4 | ||
| 992 | +TGFb | ||
| 993 | +HSP47 | ||
| 994 | +miR-140 | ||
| 995 | +heat shock protein (HSP)90 | ||
| 996 | +insulin-like growth factor binding proteins (IGFBP)-3 and -5 | ||
| 997 | +EZH2 | ||
| 998 | +Toll-Like Receptor 9 | ||
| 999 | +Col3A1 | ||
| 1000 | +Transforming Growth Factor- Beta1 | ||
| 1001 | +Osteopontin | ||
| 1002 | +hFL1 | ||
| 1003 | +CXCR4 | ||
| 1004 | +MMP19 | ||
| 1005 | +IL-33 | ||
| 1006 | +miR-17 92 | ||
| 1007 | +fibrillin-1 | ||
| 1008 | +ET-A | ||
| 1009 | +HDAC10 | ||
| 1010 | +ALK-5 | ||
| 1011 | +IL-31 | ||
| 1012 | +beclin 1 | ||
| 1013 | +c-Jun | ||
| 1014 | +Sema7a | ||
| 1015 | +MT5-MMP | ||
| 1016 | +PI3 | ||
| 1017 | +ITGB6 | ||
| 1018 | +TIAM1 | ||
| 1019 | +angiotensin II | ||
| 1020 | +LRG | ||
| 1021 | +IL-1alpha | ||
| 1022 | +TbetaRII | ||
| 1023 | +transforming growth factor b | ||
| 1024 | +FIEL1 | ||
| 1025 | +C5aR | ||
| 1026 | +PEDF | ||
| 1027 | +C1q | ||
| 1028 | +IL-1ra | ||
| 1029 | +tissue inhibitor of metalloproteinase | ||
| 1030 | +ErbB2 | ||
| 1031 | +TGF -b1 | ||
| 1032 | +PTEN-induced putative kinase 1 | ||
| 1033 | +AAG | ||
| 1034 | +CD103 | ||
| 1035 | +a-smooth muscle actin | ||
| 1036 | +cytokeratin 19 | ||
| 1037 | +CREB)-binding protein | ||
| 1038 | +p110a | ||
| 1039 | +Ets Domain-containing Protein Elk1 | ||
| 1040 | +insulin-like growth factor I | ||
| 1041 | +Cytokeratin-8 | ||
| 1042 | +TIMP3 | ||
| 1043 | +BMP-15 | ||
| 1044 | +LC3 | ||
| 1045 | +Vimentin | ||
| 1046 | +mTORC1 | ||
| 1047 | +SIRT6 | ||
| 1048 | +heme oxygenase-1 | ||
| 1049 | +Transforming growth factor-beta 1 | ||
| 1050 | +miR-127 | ||
| 1051 | +MIP-1 beta | ||
| 1052 | +TRPV4 | ||
| 1053 | +Neutrophil elastase | ||
| 1054 | +ANG converting enzyme | ||
| 1055 | +ERK-1 | ||
| 1056 | +bFGF | ||
| 1057 | +tumor necrosis factor-alpha | ||
| 1058 | +Serpin B3/B4 | ||
| 1059 | +focal adhesion kinase-related nonkinase | ||
| 1060 | +Stat3 | ||
| 1061 | +miR-1343 | ||
| 1062 | +SMAD7 | ||
| 1063 | +Endosialin | ||
| 1064 | +FGF-2 | ||
| 1065 | +miR-101 | ||
| 1066 | +L1CAM | ||
| 1067 | +thymic stromal lymphopoietin | ||
| 1068 | +vascular endothelial growth factor | ||
| 1069 | +PEX13 | ||
| 1070 | +heat shock protein (HSP) 47 | ||
| 1071 | +transient receptor potential vanilloid 4 | ||
| 1072 | +monocyte chemoattractant protein 1 | ||
| 1073 | +SPP1 | ||
| 1074 | +CD68 | ||
| 1075 | +TGF- b1 | ||
| 1076 | +T beta RII | ||
| 1077 | +TGFb-1 | ||
| 1078 | +Forkhead Box F1 |
tagging_Sklearn_crfsuite.py
deleted
100644 → 0
| 1 | -# -*- coding: UTF-8 -*- | ||
| 2 | - | ||
| 3 | -import os | ||
| 4 | -from itertools import chain | ||
| 5 | -from optparse import OptionParser | ||
| 6 | -from time import time | ||
| 7 | -from collections import Counter | ||
| 8 | - | ||
| 9 | -import nltk | ||
| 10 | -import sklearn | ||
| 11 | -import scipy.stats | ||
| 12 | -import sys | ||
| 13 | - | ||
| 14 | -from sklearn.externals import joblib | ||
| 15 | -from sklearn.metrics import make_scorer | ||
| 16 | -from sklearn.cross_validation import cross_val_score | ||
| 17 | -from sklearn.grid_search import RandomizedSearchCV | ||
| 18 | - | ||
| 19 | -import sklearn_crfsuite | ||
| 20 | -from sklearn_crfsuite import scorers | ||
| 21 | -from sklearn_crfsuite import metrics | ||
| 22 | - | ||
| 23 | -from nltk.corpus import stopwords | ||
| 24 | -from trainingTesting_Sklearn_crfsuite import word2features | ||
| 25 | -from trainingTesting_Sklearn_crfsuite import sent2features | ||
| 26 | -# from trainingTesting_Sklearn_crfsuite import hasNonAlphaNum | ||
| 27 | -# from trainingTesting_Sklearn_crfsuite import hasDigit | ||
| 28 | - | ||
| 29 | -# Objective | ||
| 30 | -# Tagging transformed file with CRF model with sklearn-crfsuite. | ||
| 31 | -# | ||
| 32 | -# Input parameters | ||
| 33 | -# --inputPath=PATH Path of transformed files x|y|z | ||
| 34 | -# --modelPath Path to CRF model | ||
| 35 | -# --modelName Model name | ||
| 36 | -# --outputPath=PATH Output path to place output files | ||
| 37 | -# --filteringStopWords Filtering stop words | ||
| 38 | -# --filterSymbols Filtering punctuation marks | ||
| 39 | - | ||
| 40 | -# Output | ||
| 41 | -# 1) Tagged files in transformed format | ||
| 42 | - | ||
| 43 | -# Examples | ||
| 44 | -# Sentences | ||
| 45 | -# C:\Anaconda2\python tagging_Sklearn_crfsuite.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\classifying_TFSentences\corpus\ECK120011394_FhlA\transformed --modelName aspectsTraining.fStopWords_False.fSymbols_True --modelPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\trainingTest_CRF_TERM_TAGS --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\classifying_TFSentences\corpus\ECK120011394_FhlA\transformed_CRFtagged --filterSymbols > output.taggingCRF.20161107.txt | ||
| 46 | -# C:\Anaconda2\python tagging_Sklearn_crfsuite.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\classifying_TFSentences\corpus\ECK120011394_FhlA\transformed --modelName sentencesTraining.fStopWords_False.fSymbols_False --modelPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\trainingTest_CRF_TERM_TAGS --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\classifying_TFSentences\corpus\ECK120011394_FhlA\transformed_CRFtagged > output.taggingCRF.20161107.txt | ||
| 47 | - | ||
| 48 | -################################# | ||
| 49 | -# FUNCTIONS # | ||
| 50 | -################################# | ||
| 51 | -# def hasDigit(text): | ||
| 52 | -# has = False | ||
| 53 | -# if len(text) < 3: | ||
| 54 | -# return False | ||
| 55 | -# myRegex = nltk.re.compile('[0-9]') | ||
| 56 | -# if myRegex.search(text) != None: | ||
| 57 | -# has = True | ||
| 58 | -# return has | ||
| 59 | -# | ||
| 60 | -# | ||
| 61 | -# def hasNonAlphaNum(text): | ||
| 62 | -# has = False | ||
| 63 | -# if len(text) < 3: | ||
| 64 | -# return False | ||
| 65 | -# myRegex = nltk.re.compile('\W') | ||
| 66 | -# if myRegex.search(text) != None: | ||
| 67 | -# has = True | ||
| 68 | -# return has | ||
| 69 | - | ||
| 70 | -# IMPORTED FROM TRAINING SCRIPT | ||
| 71 | -# def word2features(sent, i): | ||
| 72 | -# # print "i: " + str(i) | ||
| 73 | -# # print "sent[i]" + sent[i] | ||
| 74 | -# listElem = sent[i].split('|') | ||
| 75 | -# word = listElem[0] | ||
| 76 | -# lemma = listElem[1] | ||
| 77 | -# postag = listElem[2] | ||
| 78 | -# | ||
| 79 | -# features = { | ||
| 80 | -# # Names of TF and genes change by lower and upper characters: 'word.lower()': word.lower(), | ||
| 81 | -# # Suffixes | ||
| 82 | -# 'word[-3:]': word[-3:], | ||
| 83 | -# 'word[-2:]': word[-2:], | ||
| 84 | -# 'word[-1:]': word[-1:], | ||
| 85 | -# 'word.isupper()': word.isupper(), | ||
| 86 | -# 'word.istitle()': word.istitle(), | ||
| 87 | -# 'word.hasDigit()': hasDigit(word), | ||
| 88 | -# 'word.hasNonAlphaNum': hasNonAlphaNum(word), | ||
| 89 | -# # 'word.isdigit()': word.isdigit(), | ||
| 90 | -# 'word': word, | ||
| 91 | -# 'lemma': lemma, | ||
| 92 | -# 'lemma[-3:]': lemma[-3:], | ||
| 93 | -# 'lemma[-2:]': lemma[-2:], | ||
| 94 | -# 'lemma[-1:]': lemma[-1:], | ||
| 95 | -# 'postag': postag, | ||
| 96 | -# # Prefixes | ||
| 97 | -# 'postag[:2]': postag[:2], | ||
| 98 | -# 'postag[:1]': postag[:1], | ||
| 99 | -# } | ||
| 100 | -# if i > 0: | ||
| 101 | -# listElem = sent[i - 1].split('|') | ||
| 102 | -# word1 = listElem[0] | ||
| 103 | -# lemma1 = listElem[1] | ||
| 104 | -# postag1 = listElem[2] | ||
| 105 | -# features.update({ | ||
| 106 | -# '-1:word.lower()': word1.lower(), | ||
| 107 | -# '-1:word.istitle()': word1.istitle(), | ||
| 108 | -# '-1:word.isupper()': word1.isupper(), | ||
| 109 | -# '-1:word.hasDigit()': hasDigit(word1), | ||
| 110 | -# '-1:word.hasNonAlphaNum': hasNonAlphaNum(word1), | ||
| 111 | -# '-1:word': word1, | ||
| 112 | -# '-1:lemma': lemma1, | ||
| 113 | -# '-1:postag': postag1, | ||
| 114 | -# '-1:postag[:2]': postag1[:2], | ||
| 115 | -# '-1:postag[:1]': postag1[:1], | ||
| 116 | -# }) | ||
| 117 | -# # else: | ||
| 118 | -# # features['BOS'] = True | ||
| 119 | -# | ||
| 120 | -# if i < len(sent) - 1: | ||
| 121 | -# listElem = sent[i + 1].split('|') | ||
| 122 | -# word1 = listElem[0] | ||
| 123 | -# lemma1 = listElem[1] | ||
| 124 | -# postag1 = listElem[2] | ||
| 125 | -# features.update({ | ||
| 126 | -# '+1:word.lower()': word1.lower(), | ||
| 127 | -# '+1:word.istitle()': word1.istitle(), | ||
| 128 | -# '+1:word.isupper()': word1.isupper(), | ||
| 129 | -# '+1:word.hasDigit()': hasDigit(word1), | ||
| 130 | -# '+1:word.hasNonAlphaNum': hasNonAlphaNum(word1), | ||
| 131 | -# '+1:word': word1, | ||
| 132 | -# '+1:lemma': lemma1, | ||
| 133 | -# '+1:postag': postag1, | ||
| 134 | -# '+1:postag[:2]': postag1[:2], | ||
| 135 | -# '+1:postag[:1]': postag1[:1], | ||
| 136 | -# }) | ||
| 137 | -# # else: | ||
| 138 | -# # features['EOS'] = True | ||
| 139 | -# if i > 1: | ||
| 140 | -# listElem = sent[i - 2].split('|') | ||
| 141 | -# word2 = listElem[0] | ||
| 142 | -# lemma2 = listElem[1] | ||
| 143 | -# postag2 = listElem[2] | ||
| 144 | -# features.update({ | ||
| 145 | -# '-2:word.lower()': word2.lower(), | ||
| 146 | -# '-2:word.istitle()': word2.istitle(), | ||
| 147 | -# '-2:word.isupper()': word2.isupper(), | ||
| 148 | -# '-2:word.hasDigit()': hasDigit(word2), | ||
| 149 | -# '-2:word.hasNonAlphaNum': hasNonAlphaNum(word2), | ||
| 150 | -# '-2:word': word2, | ||
| 151 | -# '-2:lemma': lemma2, | ||
| 152 | -# '-2:postag': postag2, | ||
| 153 | -# '-2:postag[:2]': postag2[:2], | ||
| 154 | -# '-2:postag[:1]': postag2[:1], | ||
| 155 | -# }) | ||
| 156 | -# | ||
| 157 | -# if i < len(sent) - 2: | ||
| 158 | -# listElem = sent[i + 2].split('|') | ||
| 159 | -# word2 = listElem[0] | ||
| 160 | -# lemma2 = listElem[1] | ||
| 161 | -# postag2 = listElem[2] | ||
| 162 | -# features.update({ | ||
| 163 | -# '+2:word.lower()': word2.lower(), | ||
| 164 | -# '+2:word.istitle()': word2.istitle(), | ||
| 165 | -# '+2:word.isupper()': word2.isupper(), | ||
| 166 | -# '+2:word.hasDigit()': hasDigit(word2), | ||
| 167 | -# '+2:word.hasNonAlphaNum': hasNonAlphaNum(word2), | ||
| 168 | -# '+2:word': word2, | ||
| 169 | -# '+2:lemma': lemma2, | ||
| 170 | -# '+2:postag': postag2, | ||
| 171 | -# '+2:postag[:2]': postag2[:2], | ||
| 172 | -# '+2:postag[:1]': postag2[:1], | ||
| 173 | -# }) | ||
| 174 | -# | ||
| 175 | -# trigrams = False | ||
| 176 | -# if trigrams: | ||
| 177 | -# if i > 2: | ||
| 178 | -# listElem = sent[i - 3].split('|') | ||
| 179 | -# word3 = listElem[0] | ||
| 180 | -# lemma3 = listElem[1] | ||
| 181 | -# postag3 = listElem[2] | ||
| 182 | -# features.update({ | ||
| 183 | -# '-3:word.lower()': word3.lower(), | ||
| 184 | -# '-3:word.istitle()': word3.istitle(), | ||
| 185 | -# '-3:word.isupper()': word3.isupper(), | ||
| 186 | -# '-3:word.hasDigit()': hasDigit(word3), | ||
| 187 | -# '-3:word.hasNonAlphaNum': hasNonAlphaNum(word3), | ||
| 188 | -# '-3:word': word3, | ||
| 189 | -# '-3:lemma': lemma3, | ||
| 190 | -# '-3:postag': postag3, | ||
| 191 | -# '-3:postag[:2]': postag3[:2], | ||
| 192 | -# '-3:postag[:1]': postag3[:1], | ||
| 193 | -# }) | ||
| 194 | -# | ||
| 195 | -# if i < len(sent) - 3: | ||
| 196 | -# listElem = sent[i + 3].split('|') | ||
| 197 | -# word3 = listElem[0] | ||
| 198 | -# lemma3 = listElem[1] | ||
| 199 | -# postag3 = listElem[2] | ||
| 200 | -# features.update({ | ||
| 201 | -# '+3:word.lower()': word3.lower(), | ||
| 202 | -# '+3:word.istitle()': word3.istitle(), | ||
| 203 | -# '+3:word.isupper()': word3.isupper(), | ||
| 204 | -# '+3:word.hasDigit()': hasDigit(word3), | ||
| 205 | -# '+3:word.hasNonAlphaNum': hasNonAlphaNum(word3), | ||
| 206 | -# '+3:word': word3, | ||
| 207 | -# '+3:lemma': lemma3, | ||
| 208 | -# '+3:postag': postag3, | ||
| 209 | -# '+3:postag[:2]': postag3[:2], | ||
| 210 | -# '+3:postag[:1]': postag3[:1], | ||
| 211 | -# }) | ||
| 212 | -# | ||
| 213 | -# return features | ||
| 214 | - | ||
| 215 | - | ||
| 216 | -# def sent2features(sent): | ||
| 217 | -# return [word2features(sent, i) for i in range(len(sent))] | ||
| 218 | - | ||
| 219 | - | ||
| 220 | -__author__ = 'CMendezC' | ||
| 221 | - | ||
| 222 | -########################################## | ||
| 223 | -# MAIN PROGRAM # | ||
| 224 | -########################################## | ||
| 225 | - | ||
| 226 | -if __name__ == "__main__": | ||
| 227 | - # Defining parameters | ||
| 228 | - parser = OptionParser() | ||
| 229 | - parser.add_option("--inputPath", dest="inputPath", | ||
| 230 | - help="Path of training data set", metavar="PATH") | ||
| 231 | - parser.add_option("--outputPath", dest="outputPath", | ||
| 232 | - help="Output path to place output files", | ||
| 233 | - metavar="PATH") | ||
| 234 | - parser.add_option("--modelPath", dest="modelPath", | ||
| 235 | - help="Path to read CRF model", | ||
| 236 | - metavar="PATH") | ||
| 237 | - parser.add_option("--modelName", dest="modelName", | ||
| 238 | - help="Model name", metavar="TEXT") | ||
| 239 | - parser.add_option("--filterStopWords", default=False, | ||
| 240 | - action="store_true", dest="filterStopWords", | ||
| 241 | - help="Filtering stop words") | ||
| 242 | - parser.add_option("--filterSymbols", default=False, | ||
| 243 | - action="store_true", dest="filterSymbols", | ||
| 244 | - help="Filtering punctuation marks") | ||
| 245 | - | ||
| 246 | - (options, args) = parser.parse_args() | ||
| 247 | - if len(args) > 0: | ||
| 248 | - parser.error("Any parameter given.") | ||
| 249 | - sys.exit(1) | ||
| 250 | - | ||
| 251 | - print('-------------------------------- PARAMETERS --------------------------------') | ||
| 252 | - print("Path to read input files: " + options.inputPath) | ||
| 253 | - print("Mode name: " + str(options.modelName)) | ||
| 254 | - print("Model path: " + options.modelPath) | ||
| 255 | - print("Path to place output files: " + options.outputPath) | ||
| 256 | - print("Filtering stop words: " + str(options.filterStopWords)) | ||
| 257 | - symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', | ||
| 258 | - '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...'] | ||
| 259 | - # symbols = [sym.decode('utf-8') for sym in ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', | ||
| 260 | - # '}', '[', ']', '*', '%', '$', '#', '&', '°']] | ||
| 261 | - # symbols = [u'.', u',', u':', u';', u'?', u'!', u'\'', u'"', u'<', u'>', u'(', u')', u'-', u'_', u'/', u'\\', u'¿', u'¡', u'+', u'{', | ||
| 262 | - # u'}', u'[', u']', u'*', u'%', u'$', u'#', u'&', u'°', u'`'] | ||
| 263 | - print("Filtering symbols " + str(symbols) + ': ' + str(options.filterSymbols)) | ||
| 264 | - | ||
| 265 | - print('-------------------------------- PROCESSING --------------------------------') | ||
| 266 | - | ||
| 267 | - stopwords = [word.decode('utf-8') for word in stopwords.words('english')] | ||
| 268 | - | ||
| 269 | - # Read CRF model | ||
| 270 | - t0 = time() | ||
| 271 | - print('Reading CRF model...') | ||
| 272 | - crf = joblib.load(os.path.join(options.modelPath, 'models', options.modelName + '.mod')) | ||
| 273 | - print("Reading CRF model done in: %fs" % (time() - t0)) | ||
| 274 | - | ||
| 275 | - print('Processing corpus...') | ||
| 276 | - t0 = time() | ||
| 277 | - labels = list(['MF', 'TF', 'DFAM', 'DMOT', 'DPOS', 'PRO']) | ||
| 278 | - # Walk directory to read files | ||
| 279 | - for path, dirs, files in os.walk(options.inputPath): | ||
| 280 | - # For each file in dir | ||
| 281 | - for file in files: | ||
| 282 | - print(" Preprocessing file..." + str(file)) | ||
| 283 | - sentencesInputData = [] | ||
| 284 | - sentencesOutputData = [] | ||
| 285 | - with open(os.path.join(options.inputPath, file), "r") as iFile: | ||
| 286 | - lines = iFile.readlines() | ||
| 287 | - for line in lines: | ||
| 288 | - listLine = [] | ||
| 289 | - # line = line.decode("utf-8") | ||
| 290 | - for token in line.strip('\n').split(): | ||
| 291 | - if options.filterStopWords: | ||
| 292 | - listToken = token.split('|') | ||
| 293 | - lemma = listToken[1] | ||
| 294 | - # Original if lemma in stopwords.words('english'): | ||
| 295 | - if lemma in stopwords: | ||
| 296 | - continue | ||
| 297 | - if options.filterSymbols: | ||
| 298 | - listToken = token.split('|') | ||
| 299 | - lemma = listToken[1] | ||
| 300 | - if lemma in symbols: | ||
| 301 | - if lemma == ',': | ||
| 302 | - print "Coma , identificada" | ||
| 303 | - continue | ||
| 304 | - listLine.append(token) | ||
| 305 | - sentencesInputData.append(listLine) | ||
| 306 | - print " Sentences input data: " + str(len(sentencesInputData)) | ||
| 307 | - # print sentencesInputData[0] | ||
| 308 | - # print(sent2features(sentencesInputData[0])[0]) | ||
| 309 | - # print(sent2labels(sentencesInputData[0])) | ||
| 310 | - X_input = [sent2features(s) for s in sentencesInputData] | ||
| 311 | - print(sent2features(sentencesInputData[0])[0]) | ||
| 312 | - # y_test = [sent2labels(s) for s in sentencesInputData] | ||
| 313 | - # Predicting tags | ||
| 314 | - t1 = time() | ||
| 315 | - print " Predicting tags with model" | ||
| 316 | - y_pred = crf.predict(X_input) | ||
| 317 | - print y_pred[0] | ||
| 318 | - print(" Prediction done in: %fs" % (time() - t1)) | ||
| 319 | - # Tagging with CRF model | ||
| 320 | - print " Tagging file" | ||
| 321 | - for line, tagLine in zip(lines, y_pred): | ||
| 322 | - outputLine = '' | ||
| 323 | - idx_tagLine = 0 | ||
| 324 | - line = line.strip('\n') | ||
| 325 | - print "\nLine: " + str(line) | ||
| 326 | - print "CRF tagged line: " + str(tagLine) | ||
| 327 | - for token in line.split(): | ||
| 328 | - listToken = token.split('|') | ||
| 329 | - word = listToken[0] | ||
| 330 | - lemma = listToken[1] | ||
| 331 | - tag = listToken[2] | ||
| 332 | - if options.filterStopWords: | ||
| 333 | - if lemma in stopwords: | ||
| 334 | - outputLine += token + ' ' | ||
| 335 | - continue | ||
| 336 | - if options.filterSymbols: | ||
| 337 | - if lemma in symbols: | ||
| 338 | - if lemma == ',': | ||
| 339 | - print "Coma , identificada" | ||
| 340 | - outputLine += token + ' ' | ||
| 341 | - continue | ||
| 342 | - CRFtag = tagLine[idx_tagLine] | ||
| 343 | - if (tag not in labels) and (CRFtag != 'O'): | ||
| 344 | - print "*** CRF change token {} to {}".format(token, CRFtag) | ||
| 345 | - outputLine += word + '|' + lemma + '|' + CRFtag + ' ' | ||
| 346 | - else: | ||
| 347 | - outputLine += word + '|' + lemma + '|' + tag + ' ' | ||
| 348 | - idx_tagLine += 1 | ||
| 349 | - sentencesOutputData.append(outputLine.rstrip()) | ||
| 350 | - with open(os.path.join(options.outputPath, file), "w") as oFile: | ||
| 351 | - for line in sentencesOutputData: | ||
| 352 | - oFile.write(line + '\n') | ||
| 353 | - | ||
| 354 | - print("Processing corpus done in: %fs" % (time() - t0)) |
| ... | @@ -198,8 +198,8 @@ if __name__ == "__main__": | ... | @@ -198,8 +198,8 @@ if __name__ == "__main__": |
| 198 | 198 | ||
| 199 | print("Reading corpus done in: %fs" % (time() - t0)) | 199 | print("Reading corpus done in: %fs" % (time() - t0)) |
| 200 | 200 | ||
| 201 | - print(sent2features(sentencesTrainingData[0])[0]) | 201 | + #print(sent2features(sentencesTrainingData[0])[0]) |
| 202 | - print(sent2features(sentencesTestData[0])[0]) | 202 | + #print(sent2features(sentencesTestData[0])[0]) |
| 203 | t0 = time() | 203 | t0 = time() |
| 204 | 204 | ||
| 205 | X_train = [sent2features(s) for s in sentencesTrainingData] | 205 | X_train = [sent2features(s) for s in sentencesTrainingData] | ... | ... |
-
Please register or login to post a comment