Showing
8 changed files
with
1786 additions
and
0 deletions
corpora/abstracts-titles.txt
0 → 100644
This diff is collapsed. Click to expand it.
embeddings/abstracts-titles.vec
0 → 100644
This diff could not be displayed because it is too large.
| 1 | +# missing word embeddings: | ||
| 2 | +29c | ||
| 3 | +14 | ||
| 4 | +31 | ||
| 5 | +26a | ||
| 6 | +beta1 | ||
| 7 | +486 | ||
| 8 | +profibrotic | ||
| 9 | +92a | ||
| 10 | +29a | ||
| 11 | +matricellular | ||
| 12 | +155 | ||
| 13 | +13 | ||
| 14 | +b1 | ||
| 15 | +etiopathology | ||
| 16 | +nexin | ||
| 17 | +avb6 | ||
| 18 | +21 | ||
| 19 | +19 | ||
| 20 | +acetylglucosaminidase | ||
| 21 | +326 | ||
| 22 | +dermatan | ||
| 23 | +1b | ||
| 24 | +p38 | ||
| 25 | +47 | ||
| 26 | +68 | ||
| 27 | +3p | ||
| 28 | +dedifferentiating | ||
| 29 | +alpha3beta1 | ||
| 30 | +chymase | ||
| 31 | +2alpha | ||
| 32 | +90 | ||
| 33 | +7d | ||
| 34 | +5100 | ||
| 35 | +18a | ||
| 36 | +1a | ||
| 37 | +alpha3 | ||
| 38 | +150 | ||
| 39 | +1343 | ||
| 40 | +140 | ||
| 41 | +541 | ||
| 42 | +fibrogenesis | ||
| 43 | +221 | ||
| 44 | +mitophagy | ||
| 45 | +b4 | ||
| 46 | +p63 | ||
| 47 | +10 | ||
| 48 | +5p | ||
| 49 | +haptotactic | ||
| 50 | +nonkinase | ||
| 51 | +farnesoid | ||
| 52 | +lymphopoietin | ||
| 53 | +7a | ||
| 54 | +18 | ||
| 55 | +338 | ||
| 56 | +1beta | ||
| 57 | +424 | ||
| 58 | +199a | ||
| 59 | +154 | ||
| 60 | +smad2 | ||
| 61 | +17 | ||
| 62 | +185 | ||
| 63 | +bronchoalveolar | ||
| 64 | +101 | ||
| 65 | +profibrogenic | ||
| 66 | +153 | ||
| 67 | +gambogic | ||
| 68 | +ubiquitinating | ||
| 69 | +p110y | ||
| 70 | +200 | ||
| 71 | +323a | ||
| 72 | +196a | ||
| 73 | +dysregulates | ||
| 74 | +fibrogenic | ||
| 75 | +salvianolic | ||
| 76 | +# missing MI weights: | ||
| 77 | +Tumor | ||
| 78 | +sTNFR | ||
| 79 | +Compromised | ||
| 80 | +Modulating | ||
| 81 | +Renin | ||
| 82 | +Invasive | ||
| 83 | +Cytokine | ||
| 84 | +D1 | ||
| 85 | +C3aR | ||
| 86 | +Prostaglandin | ||
| 87 | +Diagnostic | ||
| 88 | +HLA | ||
| 89 | +Interstitial | ||
| 90 | +Transforming | ||
| 91 | +Phenotypes | ||
| 92 | +Up | ||
| 93 | +Clinical | ||
| 94 | +TGFb | ||
| 95 | +Interplay | ||
| 96 | +Animal | ||
| 97 | +Decoction | ||
| 98 | +Foxp3high | ||
| 99 | +Leucine | ||
| 100 | +CCL2 | ||
| 101 | +SDKP | ||
| 102 | +IL | ||
| 103 | +Correct | ||
| 104 | +Elk1 | ||
| 105 | +Participation | ||
| 106 | +Azithromycin | ||
| 107 | +Amplification | ||
| 108 | +1A | ||
| 109 | +Endothelin | ||
| 110 | +Kinase | ||
| 111 | +Possible | ||
| 112 | +Associated | ||
| 113 | +MicroRNA | ||
| 114 | +sL1 | ||
| 115 | +Action | ||
| 116 | +For | ||
| 117 | +Development | ||
| 118 | +Establishment | ||
| 119 | +Extracellular | ||
| 120 | +Epstein | ||
| 121 | +MUM | ||
| 122 | +Serine | ||
| 123 | +Interactions | ||
| 124 | +Macrophage | ||
| 125 | +Pseudomonas | ||
| 126 | +Cytokines | ||
| 127 | +IGFBP | ||
| 128 | +Calu | ||
| 129 | +PI3K | ||
| 130 | +B4 | ||
| 131 | +Chinese | ||
| 132 | +Cigarette | ||
| 133 | +BALF | ||
| 134 | +NK | ||
| 135 | +alphaEbeta7 | ||
| 136 | +microRNAs | ||
| 137 | +Gene | ||
| 138 | +True | ||
| 139 | +Pneumonia | ||
| 140 | +CUX1 | ||
| 141 | +miRNA | ||
| 142 | +Ubiquitin | ||
| 143 | +Wnt | ||
| 144 | +CDCP1 | ||
| 145 | +Barr | ||
| 146 | +ORP150 | ||
| 147 | +Curcumin | ||
| 148 | +aB | ||
| 149 | +Myofibroblast | ||
| 150 | +JNK | ||
| 151 | +Aortic | ||
| 152 | +L5 | ||
| 153 | +Hsp90 | ||
| 154 | +Smoke | ||
| 155 | +IFN | ||
| 156 | +Reactive | ||
| 157 | +Significance | ||
| 158 | +aVb6 | ||
| 159 | +Th2 | ||
| 160 | +Constitutive | ||
| 161 | +Melatonin | ||
| 162 | +Mediates | ||
| 163 | +AP | ||
| 164 | +Lindau | ||
| 165 | +ALK5 | ||
| 166 | +AKT2 | ||
| 167 | +EMT | ||
| 168 | +Reversion | ||
| 169 | +Patients | ||
| 170 | +Solution | ||
| 171 | +Focal | ||
| 172 | +Immunoglobulin | ||
| 173 | +RhoA | ||
| 174 | +Neutrophil | ||
| 175 | +BMPR2 | ||
| 176 | +CXCL9 | ||
| 177 | +Lower | ||
| 178 | +Olodaterol | ||
| 179 | +Lavage | ||
| 180 | +Induces | ||
| 181 | +Induced | ||
| 182 | +Beyond | ||
| 183 | +MMP | ||
| 184 | +Fstl1 | ||
| 185 | +cAMP | ||
| 186 | +Aging | ||
| 187 | +SPARC | ||
| 188 | +Activity | ||
| 189 | +ECM | ||
| 190 | +Human | ||
| 191 | +Fibrotic | ||
| 192 | +Repression | ||
| 193 | +Prostatic | ||
| 194 | +NOX4 | ||
| 195 | +Against | ||
| 196 | +Discovery | ||
| 197 | +Resveratrol | ||
| 198 | +Interleukin | ||
| 199 | +Deleted | ||
| 200 | +Differentiation | ||
| 201 | +Spiruchostatin | ||
| 202 | +Insulin | ||
| 203 | +PXS64 | ||
| 204 | +MCTC | ||
| 205 | +HP | ||
| 206 | +Triptolide | ||
| 207 | +Values | ||
| 208 | +Differential | ||
| 209 | +Lysyl | ||
| 210 | +FGF | ||
| 211 | +Promote | ||
| 212 | +Yin | ||
| 213 | +Tissue | ||
| 214 | +BMP | ||
| 215 | +CAM | ||
| 216 | +Discoidin | ||
| 217 | +Overproduction | ||
| 218 | +Wilms | ||
| 219 | +Regulates | ||
| 220 | +Reduced | ||
| 221 | +Mesenchymal | ||
| 222 | +Inflammation | ||
| 223 | +PINK1 | ||
| 224 | +Release | ||
| 225 | +E2 | ||
| 226 | +CD44V6 | ||
| 227 | +TGFbeta1 | ||
| 228 | +Rg1 | ||
| 229 | +H441 | ||
| 230 | +kDa | ||
| 231 | +M2 | ||
| 232 | +Expression | ||
| 233 | +Data | ||
| 234 | +Contribution | ||
| 235 | +Negative | ||
| 236 | +Nintedanib | ||
| 237 | +Pulmonary | ||
| 238 | +Emphysema | ||
| 239 | +Intrinsic | ||
| 240 | +17A | ||
| 241 | +Marks | ||
| 242 | +26S | ||
| 243 | +RXFP1 | ||
| 244 | +Neovessel | ||
| 245 | +Genetic | ||
| 246 | +Protective | ||
| 247 | +Tensin | ||
| 248 | +Inducer | ||
| 249 | +Like | ||
| 250 | +Determining | ||
| 251 | +Increased | ||
| 252 | +Microsatellite | ||
| 253 | +Sphingosine | ||
| 254 | +TRPV4 | ||
| 255 | +Club | ||
| 256 | +Methylation | ||
| 257 | +Phenoconversion | ||
| 258 | +Serpin | ||
| 259 | +Activated | ||
| 260 | +Muc5ac | ||
| 261 | +MyD88 | ||
| 262 | +Glucagon | ||
| 263 | +IPF | ||
| 264 | +SIRT6 | ||
| 265 | +Rapamycin | ||
| 266 | +Essential | ||
| 267 | +TNFalpha | ||
| 268 | +Corilagin | ||
| 269 | +Sorafenib | ||
| 270 | +Epithelial | ||
| 271 | +T869C | ||
| 272 | +Induction | ||
| 273 | +Long | ||
| 274 | +Wt1 | ||
| 275 | +Molecules | ||
| 276 | +TGFBR2 | ||
| 277 | +P110 | ||
| 278 | +Nuclear | ||
| 279 | +Old | ||
| 280 | +Targeting | ||
| 281 | +WNT7B | ||
| 282 | +Thy | ||
| 283 | +Potential | ||
| 284 | +TGF | ||
| 285 | +Tubastatin | ||
| 286 | +Semaphorin | ||
| 287 | +Attenuating | ||
| 288 | +Smad | ||
| 289 | +Pigment | ||
| 290 | +Homolog | ||
| 291 | +Binding | ||
| 292 | +microRNA | ||
| 293 | +C57BL | ||
| 294 | +Regulating | ||
| 295 | +Implications | ||
| 296 | +Hydrogen | ||
| 297 | +BARD1 | ||
| 298 | +A549 | ||
| 299 | +Homeostasis | ||
| 300 | +Selectivity | ||
| 301 | +Medical | ||
| 302 | +Model | ||
| 303 | +Cytoskeletal | ||
| 304 | +Differing | ||
| 305 | +BMP3 | ||
| 306 | +Enhances | ||
| 307 | +NADPH | ||
| 308 | +Fibrogenesis | ||
| 309 | +Defect | ||
| 310 | +Two | ||
| 311 | +FAK | ||
| 312 | +RNA | ||
| 313 | +Quantifying | ||
| 314 | +Epigenetic | ||
| 315 | +Profibrotic | ||
| 316 | +Ambroxol | ||
| 317 | +Trigger | ||
| 318 | +Titration | ||
| 319 | +Transcription | ||
| 320 | +Regulation | ||
| 321 | +Mitochondrial | ||
| 322 | +H1N1 | ||
| 323 | +Recent | ||
| 324 | +BMPER | ||
| 325 | +PPARs | ||
| 326 | +VCAM | ||
| 327 | +Microsomal | ||
| 328 | +Hippel | ||
| 329 | +Renshen | ||
| 330 | +Absence | ||
| 331 | +Anchorage | ||
| 332 | +Applying | ||
| 333 | +Free | ||
| 334 | +OSF | ||
| 335 | +PGE | ||
| 336 | +Tannic | ||
| 337 | +Plasminogen | ||
| 338 | +TGFBR | ||
| 339 | +Channel | ||
| 340 | +Age | ||
| 341 | +Cell | ||
| 342 | +Connective | ||
| 343 | +Proteasomal | ||
| 344 | +RAGE | ||
| 345 | +Bach1 | ||
| 346 | +Pirfenidone | ||
| 347 | +Outcomes | ||
| 348 | +GATA | ||
| 349 | +Small | ||
| 350 | +Autoimmunity | ||
| 351 | +III | ||
| 352 | +VEGF | ||
| 353 | +Control | ||
| 354 | +HSP27 | ||
| 355 | +Cartilage | ||
| 356 | +Periostin | ||
| 357 | +Idiopathic | ||
| 358 | +COL1A1 | ||
| 359 | +CBP | ||
| 360 | +Bronchoalveolar | ||
| 361 | +Crosstalk | ||
| 362 | +Amplified | ||
| 363 | +Evidence | ||
| 364 | +Simvastatin | ||
| 365 | +Sphingolipids | ||
| 366 | +Mechanisms | ||
| 367 | +JAK2 | ||
| 368 | +Rats | ||
| 369 | +Mice | ||
| 370 | +Protease | ||
| 371 | +From | ||
| 372 | +LPA1 | ||
| 373 | +Collagen | ||
| 374 | +Carbon | ||
| 375 | +Molecular | ||
| 376 | +Stat3 | ||
| 377 | +Genomewide | ||
| 378 | +Stem | ||
| 379 | +S1P | ||
| 380 | +Novel | ||
| 381 | +EBV | ||
| 382 | +Serum | ||
| 383 | +Abrogation | ||
| 384 | +That | ||
| 385 | +Pingfei | ||
| 386 | +Stromal | ||
| 387 | +Current | ||
| 388 | +Molecule | ||
| 389 | +MAP3K19 | ||
| 390 | +Decisive | ||
| 391 | +Protein | ||
| 392 | +Fluid | ||
| 393 | +HDAC4 | ||
| 394 | +Angiotensin | ||
| 395 | +SOCS1 | ||
| 396 | +Different | ||
| 397 | +Membrane | ||
| 398 | +Domain | ||
| 399 | +Secretory | ||
| 400 | +Signalling | ||
| 401 | +NCI | ||
| 402 | +Bax | ||
| 403 | +ADAM | ||
| 404 | +Are | ||
| 405 | +Beta1 | ||
| 406 | +Activation | ||
| 407 | +Problem | ||
| 408 | +Prognostic | ||
| 409 | +II | ||
| 410 | +Sputum | ||
| 411 | +Phosphatase | ||
| 412 | +Inhibition | ||
| 413 | +Profile | ||
| 414 | +Dogs | ||
| 415 | +HRCT | ||
| 416 | +lncRNA | ||
| 417 | +Storage | ||
| 418 | +Nitrated | ||
| 419 | +Box | ||
| 420 | +Forkhead | ||
| 421 | +CREB | ||
| 422 | +Sirtuin | ||
| 423 | +Cryptogenic | ||
| 424 | +Decreased | ||
| 425 | +Inhibits | ||
| 426 | +Formation | ||
| 427 | +MK2 | ||
| 428 | +Comparison | ||
| 429 | +Mediated | ||
| 430 | +Latent | ||
| 431 | +Recombinant | ||
| 432 | +Microencapsulation | ||
| 433 | +PHGDH | ||
| 434 | +Organizing | ||
| 435 | +Dysfunction | ||
| 436 | +Way | ||
| 437 | +Using | ||
| 438 | +Peripheral | ||
| 439 | +Markers | ||
| 440 | +MiR | ||
| 441 | +Anti | ||
| 442 | +Studies | ||
| 443 | +May | ||
| 444 | +Significant | ||
| 445 | +Morphogenic | ||
| 446 | +Low | ||
| 447 | +Lactic | ||
| 448 | +Overexpression | ||
| 449 | +Protects | ||
| 450 | +Arsenic | ||
| 451 | +Caveolin | ||
| 452 | +pH | ||
| 453 | +Inhibit | ||
| 454 | +Proteasome | ||
| 455 | +MicroRNAs | ||
| 456 | +Toll | ||
| 457 | +Herpes | ||
| 458 | +CTGF | ||
| 459 | +Normal | ||
| 460 | +Defective | ||
| 461 | +CD44 | ||
| 462 | +Large | ||
| 463 | +Ligands | ||
| 464 | +Axis | ||
| 465 | +NH2 | ||
| 466 | +Progression | ||
| 467 | +Smad3 | ||
| 468 | +Phenotype | ||
| 469 | +Ets | ||
| 470 | +Identification | ||
| 471 | +kB | ||
| 472 | +Role | ||
| 473 | +Relation | ||
| 474 | +Mode | ||
| 475 | +Developmental | ||
| 476 | +Fibrosis | ||
| 477 | +Stanniocalcin | ||
| 478 | +WNT10A | ||
| 479 | +Integrated | ||
| 480 | +Syndecan | ||
| 481 | +Metalloproteinase | ||
| 482 | +TOB2 | ||
| 483 | +USP11 | ||
| 484 | +WISP1 | ||
| 485 | +Dysregulated | ||
| 486 | +Th17 | ||
| 487 | +Progressive | ||
| 488 | +Key | ||
| 489 | +Subpleural | ||
| 490 | +Mast | ||
| 491 | +Rho | ||
| 492 | +Growth | ||
| 493 | +Upregulation | ||
| 494 | +Alleviates | ||
| 495 | +Re | ||
| 496 | +Preventive | ||
| 497 | +ITGB6 | ||
| 498 | +Fibroblasts | ||
| 499 | +ATG4B | ||
| 500 | +Comparative | ||
| 501 | +Cthrc1 | ||
| 502 | +mRNA | ||
| 503 | +Peptide | ||
| 504 | +SNAI | ||
| 505 | +BM | ||
| 506 | +ATPase | ||
| 507 | +AKT | ||
| 508 | +Fibroblastic | ||
| 509 | +Matriptase | ||
| 510 | +Sub | ||
| 511 | +Sustained | ||
| 512 | +Pleiotropic | ||
| 513 | +New | ||
| 514 | +Regulator | ||
| 515 | +Receptor | ||
| 516 | +Therapeutic | ||
| 517 | +Vimentin | ||
| 518 | +IGF | ||
| 519 | +Cells | ||
| 520 | +LIGHT | ||
| 521 | +Production | ||
| 522 | +D2 | ||
| 523 | +Dehydroepiandrosterone | ||
| 524 | +Lin28B | ||
| 525 | +Antifibrotic | ||
| 526 | +Raised | ||
| 527 | +Proliferation | ||
| 528 | +Dependent | ||
| 529 | +COL1 | ||
| 530 | +Lysocardiolipin | ||
| 531 | +Epithelium | ||
| 532 | +STAT3 | ||
| 533 | +Prevents | ||
| 534 | +Th1 | ||
| 535 | +NF | ||
| 536 | +CCN5 | ||
| 537 | +Snail | ||
| 538 | +Myogenic | ||
| 539 | +CD4 | ||
| 540 | +Akt | ||
| 541 | +TGFb1 | ||
| 542 | +Accelerated | ||
| 543 | +PDGF | ||
| 544 | +Intratracheal | ||
| 545 | +TGFB1 | ||
| 546 | +Cysteine | ||
| 547 | +Oxidant | ||
| 548 | +Effect | ||
| 549 | +Reprogramming | ||
| 550 | +IIP | ||
| 551 | +MS80 | ||
| 552 | +FOXF1 | ||
| 553 | +Promotes | ||
| 554 | +Assessment | ||
| 555 | +BLM | ||
| 556 | +CC16 | ||
| 557 | +BAL | ||
| 558 | +CD248 | ||
| 559 | +Ginsenoside | ||
| 560 | +Secreted | ||
| 561 | +Association | ||
| 562 | +IQ | ||
| 563 | +mTORC2 | ||
| 564 | +Established | ||
| 565 | +The | ||
| 566 | +Combined | ||
| 567 | +Jun | ||
| 568 | +UIP | ||
| 569 | +Sulf2 | ||
| 570 | +Thalidomide | ||
| 571 | +Bioenergetics | ||
| 572 | +TNF | ||
| 573 | +CCN2 | ||
| 574 | +NEU1 | ||
| 575 | +Attenuates | ||
| 576 | +HMGA2 | ||
| 577 | +Group | ||
| 578 | +Conversion | ||
| 579 | +Predisposition | ||
| 580 | +Transglutaminase | ||
| 581 | +Pathway | ||
| 582 | +Reviews | ||
| 583 | +Treg | ||
| 584 | +DDR2 | ||
| 585 | +Autophagy | ||
| 586 | +Hyper | ||
| 587 | +Bile | ||
| 588 | +Sunitinib | ||
| 589 | +Stiffening | ||
| 590 | +Signal | ||
| 591 | +Resolution | ||
| 592 | +De | ||
| 593 | +Type | ||
| 594 | +Factor | ||
| 595 | +Smad2 | ||
| 596 | +Single | ||
| 597 | +PPAR | ||
| 598 | +WNT5A | ||
| 599 | +Novo | ||
| 600 | +An | ||
| 601 | +EGFR | ||
| 602 | +Cub | ||
| 603 | +GLI | ||
| 604 | +HSP47 | ||
| 605 | +Early | ||
| 606 | +ERK1 | ||
| 607 | +TGFbeta | ||
| 608 | +Deficiency | ||
| 609 | +hydroxytryptamine2A | ||
| 610 | +BAX | ||
| 611 | +Inhibitory | ||
| 612 | +Integrin | ||
| 613 | +Suppression | ||
| 614 | +Shikonin | ||
| 615 | +SMAD3 | ||
| 616 | +Effects | ||
| 617 | +Metformin | ||
| 618 | +F1 | ||
| 619 | +MAPK | ||
| 620 | +Modulation | ||
| 621 | +Bleomycin | ||
| 622 | +Injury | ||
| 623 | +Elevated | ||
| 624 | +Cellular | ||
| 625 | +Radioligand | ||
| 626 | +Citrus | ||
| 627 | +TIAM1 | ||
| 628 | +Subjects | ||
| 629 | +Lung | ||
| 630 | +ARPC2 | ||
| 631 | +H19 | ||
| 632 | +EZH2 | ||
| 633 | +Pathways | ||
| 634 | +Is | ||
| 635 | +Microarray | ||
| 636 | +Fas | ||
| 637 | +CCN1 | ||
| 638 | +Ac | ||
| 639 | +miRNAs | ||
| 640 | +Myofibroblasts | ||
| 641 | +FFPE | ||
| 642 | +Inhibitor | ||
| 643 | +During | ||
| 644 | +Matrix | ||
| 645 | +Nrf2 | ||
| 646 | +Immunomodulation | ||
| 647 | +C5aR | ||
| 648 | +Gremlin | ||
| 649 | +High | ||
| 650 | +Concentration | ||
| 651 | +Evaluation | ||
| 652 | +Roles | ||
| 653 | +Number | ||
| 654 | +Bone | ||
| 655 | +ACLP | ||
| 656 | +Hypertension | ||
| 657 | +Lipogenic | ||
| 658 | +Uncoupling | ||
| 659 | +Signaling | ||
| 660 | +Lrp5 | ||
| 661 | +Berberine | ||
| 662 | +A4 | ||
| 663 | +CD11c | ||
| 664 | +miR | ||
| 665 | +Chop | ||
| 666 | +Galectin | ||
| 667 | +Alveolar | ||
| 668 | +Transition | ||
| 669 | +Plasma | ||
| 670 | +Impacts | ||
| 671 | +Smad4 | ||
| 672 | +Its | ||
| 673 | +Pathogenesis | ||
| 674 | +Inappropriate | ||
| 675 | +Investigation | ||
| 676 | +Beta | ||
| 677 | +Ca | ||
| 678 | +ERK | ||
| 679 | +Deregulation | ||
| 680 | +MSCs | ||
| 681 | +PTEN | ||
| 682 | +Lipoxin | ||
| 683 | +Nitric | ||
| 684 | +C1q | ||
| 685 | +KCa3 | ||
| 686 | +kappaB | ||
| 687 | +Involvement | ||
| 688 | +MCP | ||
| 689 | +Pleural | ||
| 690 | +EMMPRIN | ||
| 691 | +Smooth | ||
| 692 | +Synthesis | ||
| 693 | +Blockade | ||
| 694 | +Compared | ||
| 695 | +Transgelin |
scripts/__pycache__/wisse.cpython-36.pyc
0 → 100644
No preview for this file type
| 1 | +# missing word embeddings: | ||
| 2 | +profibrogenic | ||
| 3 | +199a | ||
| 4 | +p38 | ||
| 5 | +beta1 | ||
| 6 | +68 | ||
| 7 | +etiopathology | ||
| 8 | +1343 | ||
| 9 | +lymphopoietin | ||
| 10 | +29c | ||
| 11 | +185 | ||
| 12 | +5p | ||
| 13 | +17 | ||
| 14 | +dermatan | ||
| 15 | +1a | ||
| 16 | +13 | ||
| 17 | +424 | ||
| 18 | +101 | ||
| 19 | +p63 | ||
| 20 | +140 | ||
| 21 | +b1 | ||
| 22 | +fibrogenic | ||
| 23 | +gambogic | ||
| 24 | +nonkinase | ||
| 25 | +21 | ||
| 26 | +alpha3 | ||
| 27 | +154 | ||
| 28 | +2alpha | ||
| 29 | +chymase | ||
| 30 | +18a | ||
| 31 | +196a | ||
| 32 | +5100 | ||
| 33 | +smad2 | ||
| 34 | +7d | ||
| 35 | +541 | ||
| 36 | +1b | ||
| 37 | +acetylglucosaminidase | ||
| 38 | +326 | ||
| 39 | +47 | ||
| 40 | +dysregulates | ||
| 41 | +92a | ||
| 42 | +200 | ||
| 43 | +29a | ||
| 44 | +90 | ||
| 45 | +31 | ||
| 46 | +mitophagy | ||
| 47 | +b4 | ||
| 48 | +3p | ||
| 49 | +nexin | ||
| 50 | +dedifferentiating | ||
| 51 | +155 | ||
| 52 | +150 | ||
| 53 | +ubiquitinating | ||
| 54 | +10 | ||
| 55 | +486 | ||
| 56 | +19 | ||
| 57 | +avb6 | ||
| 58 | +fibrogenesis | ||
| 59 | +farnesoid | ||
| 60 | +haptotactic | ||
| 61 | +alpha3beta1 | ||
| 62 | +14 | ||
| 63 | +323a | ||
| 64 | +matricellular | ||
| 65 | +7a | ||
| 66 | +profibrotic | ||
| 67 | +bronchoalveolar | ||
| 68 | +26a | ||
| 69 | +18 | ||
| 70 | +salvianolic | ||
| 71 | +338 | ||
| 72 | +1beta | ||
| 73 | +p110y | ||
| 74 | +221 | ||
| 75 | +153 | ||
| 76 | +# missing MI weights: | ||
| 77 | +Compared | ||
| 78 | +Are | ||
| 79 | +True | ||
| 80 | +Ambroxol | ||
| 81 | +Diagnostic | ||
| 82 | +Alveolar | ||
| 83 | +Smad2 | ||
| 84 | +Neovessel | ||
| 85 | +RXFP1 | ||
| 86 | +Normal | ||
| 87 | +Shikonin | ||
| 88 | +Spiruchostatin | ||
| 89 | +ORP150 | ||
| 90 | +Tubastatin | ||
| 91 | +That | ||
| 92 | +Bone | ||
| 93 | +WISP1 | ||
| 94 | +Wt1 | ||
| 95 | +Smad4 | ||
| 96 | +ECM | ||
| 97 | +Syndecan | ||
| 98 | +Radioligand | ||
| 99 | +BAX | ||
| 100 | +De | ||
| 101 | +FAK | ||
| 102 | +Prevents | ||
| 103 | +Endothelin | ||
| 104 | +kB | ||
| 105 | +Promote | ||
| 106 | +Reversion | ||
| 107 | +Determining | ||
| 108 | +Cytokines | ||
| 109 | +Glucagon | ||
| 110 | +Pathways | ||
| 111 | +Myogenic | ||
| 112 | +SOCS1 | ||
| 113 | +Investigation | ||
| 114 | +Regulating | ||
| 115 | +Targeting | ||
| 116 | +Decoction | ||
| 117 | +Stromal | ||
| 118 | +PPARs | ||
| 119 | +HP | ||
| 120 | +Focal | ||
| 121 | +Transgelin | ||
| 122 | +Association | ||
| 123 | +Effects | ||
| 124 | +EGFR | ||
| 125 | +Gene | ||
| 126 | +Human | ||
| 127 | +Metalloproteinase | ||
| 128 | +Lower | ||
| 129 | +Rg1 | ||
| 130 | +Binding | ||
| 131 | +Therapeutic | ||
| 132 | +Mesenchymal | ||
| 133 | +CD248 | ||
| 134 | +Formation | ||
| 135 | +Cysteine | ||
| 136 | +Caveolin | ||
| 137 | +Type | ||
| 138 | +Signaling | ||
| 139 | +Molecular | ||
| 140 | +Alleviates | ||
| 141 | +Early | ||
| 142 | +Transforming | ||
| 143 | +Potential | ||
| 144 | +COL1A1 | ||
| 145 | +Plasminogen | ||
| 146 | +Factor | ||
| 147 | +Semaphorin | ||
| 148 | +CC16 | ||
| 149 | +Integrated | ||
| 150 | +Like | ||
| 151 | +Stat3 | ||
| 152 | +Tissue | ||
| 153 | +Signalling | ||
| 154 | +Phenotype | ||
| 155 | +TGFBR2 | ||
| 156 | +Homolog | ||
| 157 | +III | ||
| 158 | +MiR | ||
| 159 | +Lactic | ||
| 160 | +Pulmonary | ||
| 161 | +Fibroblastic | ||
| 162 | +Defect | ||
| 163 | +Molecules | ||
| 164 | +Yin | ||
| 165 | +MCP | ||
| 166 | +MicroRNA | ||
| 167 | +LIGHT | ||
| 168 | +Beyond | ||
| 169 | +Recombinant | ||
| 170 | +Compromised | ||
| 171 | +Ginsenoside | ||
| 172 | +P110 | ||
| 173 | +Production | ||
| 174 | +Lipogenic | ||
| 175 | +HRCT | ||
| 176 | +Its | ||
| 177 | +Implications | ||
| 178 | +Problem | ||
| 179 | +NH2 | ||
| 180 | +Fibrogenesis | ||
| 181 | +TOB2 | ||
| 182 | +SMAD3 | ||
| 183 | +Lin28B | ||
| 184 | +Significance | ||
| 185 | +Differential | ||
| 186 | +Cytokine | ||
| 187 | +Progressive | ||
| 188 | +Solution | ||
| 189 | +Identification | ||
| 190 | +Peptide | ||
| 191 | +Synthesis | ||
| 192 | +Protein | ||
| 193 | +Macrophage | ||
| 194 | +PDGF | ||
| 195 | +Repression | ||
| 196 | +CREB | ||
| 197 | +Cellular | ||
| 198 | +Plasma | ||
| 199 | +A4 | ||
| 200 | +Latent | ||
| 201 | +Wnt | ||
| 202 | +Proteasome | ||
| 203 | +Kinase | ||
| 204 | +Proteasomal | ||
| 205 | +Pathway | ||
| 206 | +Sirtuin | ||
| 207 | +MSCs | ||
| 208 | +D2 | ||
| 209 | +Absence | ||
| 210 | +Cells | ||
| 211 | +Thalidomide | ||
| 212 | +Regulation | ||
| 213 | +Hippel | ||
| 214 | +pH | ||
| 215 | +Chinese | ||
| 216 | +Th17 | ||
| 217 | +Uncoupling | ||
| 218 | +Periostin | ||
| 219 | +Promotes | ||
| 220 | +Amplification | ||
| 221 | +Smad | ||
| 222 | +Profibrotic | ||
| 223 | +Patients | ||
| 224 | +Subpleural | ||
| 225 | +Cytoskeletal | ||
| 226 | +Progression | ||
| 227 | +Lavage | ||
| 228 | +Angiotensin | ||
| 229 | +Domain | ||
| 230 | +Peripheral | ||
| 231 | +Inhibitor | ||
| 232 | +Associated | ||
| 233 | +Involvement | ||
| 234 | +Serum | ||
| 235 | +Toll | ||
| 236 | +Activation | ||
| 237 | +SPARC | ||
| 238 | +Attenuating | ||
| 239 | +Resveratrol | ||
| 240 | +PI3K | ||
| 241 | +Induced | ||
| 242 | +Matrix | ||
| 243 | +Leucine | ||
| 244 | +BALF | ||
| 245 | +Defective | ||
| 246 | +Negative | ||
| 247 | +JNK | ||
| 248 | +Receptor | ||
| 249 | +Reprogramming | ||
| 250 | +CD11c | ||
| 251 | +Nuclear | ||
| 252 | +hydroxytryptamine2A | ||
| 253 | +Is | ||
| 254 | +Sustained | ||
| 255 | +Essential | ||
| 256 | +Beta | ||
| 257 | +CBP | ||
| 258 | +miRNA | ||
| 259 | +Pathogenesis | ||
| 260 | +Aging | ||
| 261 | +Neutrophil | ||
| 262 | +Nitrated | ||
| 263 | +Resolution | ||
| 264 | +Signal | ||
| 265 | +Low | ||
| 266 | +New | ||
| 267 | +Contribution | ||
| 268 | +Homeostasis | ||
| 269 | +HLA | ||
| 270 | +Sub | ||
| 271 | +26S | ||
| 272 | +HMGA2 | ||
| 273 | +Treg | ||
| 274 | +Significant | ||
| 275 | +Blockade | ||
| 276 | +HSP27 | ||
| 277 | +Clinical | ||
| 278 | +Th1 | ||
| 279 | +Triptolide | ||
| 280 | +Dependent | ||
| 281 | +Inflammation | ||
| 282 | +VEGF | ||
| 283 | +PINK1 | ||
| 284 | +H1N1 | ||
| 285 | +E2 | ||
| 286 | +Discovery | ||
| 287 | +Interplay | ||
| 288 | +Secretory | ||
| 289 | +CCL2 | ||
| 290 | +Dehydroepiandrosterone | ||
| 291 | +Modulation | ||
| 292 | +Mechanisms | ||
| 293 | +MMP | ||
| 294 | +Mediated | ||
| 295 | +H19 | ||
| 296 | +Morphogenic | ||
| 297 | +Ac | ||
| 298 | +Corilagin | ||
| 299 | +Tannic | ||
| 300 | +Hypertension | ||
| 301 | +WNT10A | ||
| 302 | +MCTC | ||
| 303 | +Possible | ||
| 304 | +Studies | ||
| 305 | +Pigment | ||
| 306 | +Wilms | ||
| 307 | +Hydrogen | ||
| 308 | +Azithromycin | ||
| 309 | +Number | ||
| 310 | +Transglutaminase | ||
| 311 | +Outcomes | ||
| 312 | +NCI | ||
| 313 | +RNA | ||
| 314 | +WNT5A | ||
| 315 | +Mice | ||
| 316 | +Methylation | ||
| 317 | +Novel | ||
| 318 | +Nitric | ||
| 319 | +Cell | ||
| 320 | +HSP47 | ||
| 321 | +TRPV4 | ||
| 322 | +Protease | ||
| 323 | +Release | ||
| 324 | +For | ||
| 325 | +Ligands | ||
| 326 | +TGF | ||
| 327 | +Epithelium | ||
| 328 | +Aortic | ||
| 329 | +NADPH | ||
| 330 | +Herpes | ||
| 331 | +Bach1 | ||
| 332 | +Ca | ||
| 333 | +Dysregulated | ||
| 334 | +Membrane | ||
| 335 | +TGFBR | ||
| 336 | +Connective | ||
| 337 | +Decreased | ||
| 338 | +TIAM1 | ||
| 339 | +Serpin | ||
| 340 | +Fstl1 | ||
| 341 | +CCN1 | ||
| 342 | +BLM | ||
| 343 | +Sphingosine | ||
| 344 | +C57BL | ||
| 345 | +Data | ||
| 346 | +Dogs | ||
| 347 | +Organizing | ||
| 348 | +IGF | ||
| 349 | +COL1 | ||
| 350 | +CD44V6 | ||
| 351 | +BMPER | ||
| 352 | +ARPC2 | ||
| 353 | +Galectin | ||
| 354 | +Lindau | ||
| 355 | +Inappropriate | ||
| 356 | +Microencapsulation | ||
| 357 | +Oxidant | ||
| 358 | +M2 | ||
| 359 | +Renshen | ||
| 360 | +Sputum | ||
| 361 | +Snail | ||
| 362 | +Inducer | ||
| 363 | +Prognostic | ||
| 364 | +Storage | ||
| 365 | +MAPK | ||
| 366 | +Citrus | ||
| 367 | +PPAR | ||
| 368 | +Collagen | ||
| 369 | +Matriptase | ||
| 370 | +Arsenic | ||
| 371 | +Long | ||
| 372 | +F1 | ||
| 373 | +Deleted | ||
| 374 | +Genomewide | ||
| 375 | +PXS64 | ||
| 376 | +Lysyl | ||
| 377 | +ERK | ||
| 378 | +Calu | ||
| 379 | +MyD88 | ||
| 380 | +aB | ||
| 381 | +Activity | ||
| 382 | +Applying | ||
| 383 | +Secreted | ||
| 384 | +Control | ||
| 385 | +BM | ||
| 386 | +Mitochondrial | ||
| 387 | +Age | ||
| 388 | +EZH2 | ||
| 389 | +Overproduction | ||
| 390 | +Way | ||
| 391 | +The | ||
| 392 | +DDR2 | ||
| 393 | +1A | ||
| 394 | +Rho | ||
| 395 | +Bronchoalveolar | ||
| 396 | +TGFbeta1 | ||
| 397 | +Akt | ||
| 398 | +ERK1 | ||
| 399 | +Novo | ||
| 400 | +Curcumin | ||
| 401 | +FGF | ||
| 402 | +C5aR | ||
| 403 | +17A | ||
| 404 | +Lysocardiolipin | ||
| 405 | +Protects | ||
| 406 | +Predisposition | ||
| 407 | +Thy | ||
| 408 | +C1q | ||
| 409 | +Nintedanib | ||
| 410 | +High | ||
| 411 | +KCa3 | ||
| 412 | +Olodaterol | ||
| 413 | +Reviews | ||
| 414 | +Proliferation | ||
| 415 | +Immunomodulation | ||
| 416 | +Attenuates | ||
| 417 | +Gremlin | ||
| 418 | +Cthrc1 | ||
| 419 | +Vimentin | ||
| 420 | +Elk1 | ||
| 421 | +Lipoxin | ||
| 422 | +IQ | ||
| 423 | +Roles | ||
| 424 | +BAL | ||
| 425 | +Relation | ||
| 426 | +Autophagy | ||
| 427 | +IGFBP | ||
| 428 | +Inhibition | ||
| 429 | +BMP | ||
| 430 | +Anchorage | ||
| 431 | +ITGB6 | ||
| 432 | +Mode | ||
| 433 | +Modulating | ||
| 434 | +miRNAs | ||
| 435 | +Inhibit | ||
| 436 | +PHGDH | ||
| 437 | +Up | ||
| 438 | +Phosphatase | ||
| 439 | +TGFbeta | ||
| 440 | +C3aR | ||
| 441 | +Pseudomonas | ||
| 442 | +Comparative | ||
| 443 | +Reduced | ||
| 444 | +Crosstalk | ||
| 445 | +Conversion | ||
| 446 | +Injury | ||
| 447 | +Phenotypes | ||
| 448 | +CD4 | ||
| 449 | +MicroRNAs | ||
| 450 | +Regulates | ||
| 451 | +TNFalpha | ||
| 452 | +Pirfenidone | ||
| 453 | +Raised | ||
| 454 | +Old | ||
| 455 | +Cartilage | ||
| 456 | +Prostaglandin | ||
| 457 | +BMP3 | ||
| 458 | +BARD1 | ||
| 459 | +Deficiency | ||
| 460 | +RhoA | ||
| 461 | +AKT2 | ||
| 462 | +NF | ||
| 463 | +Cigarette | ||
| 464 | +GATA | ||
| 465 | +MAP3K19 | ||
| 466 | +sTNFR | ||
| 467 | +NK | ||
| 468 | +Different | ||
| 469 | +Subjects | ||
| 470 | +Autoimmunity | ||
| 471 | +Mast | ||
| 472 | +Single | ||
| 473 | +Microsomal | ||
| 474 | +WNT7B | ||
| 475 | +MK2 | ||
| 476 | +TGFb1 | ||
| 477 | +CCN2 | ||
| 478 | +Growth | ||
| 479 | +Prostatic | ||
| 480 | +PGE | ||
| 481 | +Abrogation | ||
| 482 | +Stem | ||
| 483 | +EBV | ||
| 484 | +Microsatellite | ||
| 485 | +Nrf2 | ||
| 486 | +Epstein | ||
| 487 | +Club | ||
| 488 | +TGFB1 | ||
| 489 | +ATG4B | ||
| 490 | +Differentiation | ||
| 491 | +EMMPRIN | ||
| 492 | +Smad3 | ||
| 493 | +Genetic | ||
| 494 | +Sorafenib | ||
| 495 | +IFN | ||
| 496 | +Impacts | ||
| 497 | +Key | ||
| 498 | +Activated | ||
| 499 | +AKT | ||
| 500 | +Th2 | ||
| 501 | +PTEN | ||
| 502 | +USP11 | ||
| 503 | +IL | ||
| 504 | +Effect | ||
| 505 | +HDAC4 | ||
| 506 | +Free | ||
| 507 | +Sunitinib | ||
| 508 | +Established | ||
| 509 | +Fluid | ||
| 510 | +Decisive | ||
| 511 | +Inhibits | ||
| 512 | +Marks | ||
| 513 | +mTORC2 | ||
| 514 | +Trigger | ||
| 515 | +Concentration | ||
| 516 | +Intratracheal | ||
| 517 | +Participation | ||
| 518 | +Against | ||
| 519 | +Expression | ||
| 520 | +kappaB | ||
| 521 | +Role | ||
| 522 | +Rats | ||
| 523 | +Intrinsic | ||
| 524 | +Epigenetic | ||
| 525 | +Smooth | ||
| 526 | +NOX4 | ||
| 527 | +Tumor | ||
| 528 | +Rapamycin | ||
| 529 | +microRNA | ||
| 530 | +Overexpression | ||
| 531 | +Current | ||
| 532 | +Muc5ac | ||
| 533 | +Combined | ||
| 534 | +II | ||
| 535 | +D1 | ||
| 536 | +Accelerated | ||
| 537 | +Regulator | ||
| 538 | +Pleural | ||
| 539 | +Invasive | ||
| 540 | +alphaEbeta7 | ||
| 541 | +From | ||
| 542 | +MUM | ||
| 543 | +Immunoglobulin | ||
| 544 | +Beta1 | ||
| 545 | +Small | ||
| 546 | +Sphingolipids | ||
| 547 | +Stiffening | ||
| 548 | +FFPE | ||
| 549 | +miR | ||
| 550 | +LPA1 | ||
| 551 | +B4 | ||
| 552 | +Two | ||
| 553 | +Extracellular | ||
| 554 | +Enhances | ||
| 555 | +Evaluation | ||
| 556 | +Recent | ||
| 557 | +Elevated | ||
| 558 | +Re | ||
| 559 | +IIP | ||
| 560 | +CD44 | ||
| 561 | +Interactions | ||
| 562 | +CXCL9 | ||
| 563 | +Protective | ||
| 564 | +Ets | ||
| 565 | +Preventive | ||
| 566 | +Establishment | ||
| 567 | +ALK5 | ||
| 568 | +Increased | ||
| 569 | +Values | ||
| 570 | +kDa | ||
| 571 | +Large | ||
| 572 | +May | ||
| 573 | +Transcription | ||
| 574 | +ACLP | ||
| 575 | +Cryptogenic | ||
| 576 | +Ubiquitin | ||
| 577 | +GLI | ||
| 578 | +L5 | ||
| 579 | +Discoidin | ||
| 580 | +Bleomycin | ||
| 581 | +Carbon | ||
| 582 | +Renin | ||
| 583 | +CUX1 | ||
| 584 | +Correct | ||
| 585 | +Constitutive | ||
| 586 | +SNAI | ||
| 587 | +Bile | ||
| 588 | +Assessment | ||
| 589 | +Fibrotic | ||
| 590 | +Differing | ||
| 591 | +Development | ||
| 592 | +Channel | ||
| 593 | +Simvastatin | ||
| 594 | +CAM | ||
| 595 | +Fibroblasts | ||
| 596 | +Melatonin | ||
| 597 | +SIRT6 | ||
| 598 | +STAT3 | ||
| 599 | +Tensin | ||
| 600 | +Pingfei | ||
| 601 | +Stanniocalcin | ||
| 602 | +Bax | ||
| 603 | +Group | ||
| 604 | +mRNA | ||
| 605 | +Selectivity | ||
| 606 | +Emphysema | ||
| 607 | +Barr | ||
| 608 | +Berberine | ||
| 609 | +Metformin | ||
| 610 | +Hsp90 | ||
| 611 | +CDCP1 | ||
| 612 | +T869C | ||
| 613 | +EMT | ||
| 614 | +ADAM | ||
| 615 | +Cub | ||
| 616 | +Pneumonia | ||
| 617 | +Induces | ||
| 618 | +FOXF1 | ||
| 619 | +Upregulation | ||
| 620 | +H441 | ||
| 621 | +RAGE | ||
| 622 | +Myofibroblasts | ||
| 623 | +JAK2 | ||
| 624 | +Interstitial | ||
| 625 | +Amplified | ||
| 626 | +Fibrosis | ||
| 627 | +Microarray | ||
| 628 | +Developmental | ||
| 629 | +CTGF | ||
| 630 | +Serine | ||
| 631 | +Integrin | ||
| 632 | +AP | ||
| 633 | +Fas | ||
| 634 | +During | ||
| 635 | +CCN5 | ||
| 636 | +Insulin | ||
| 637 | +Pleiotropic | ||
| 638 | +TGFb | ||
| 639 | +Evidence | ||
| 640 | +Phenoconversion | ||
| 641 | +Comparison | ||
| 642 | +Smoke | ||
| 643 | +Box | ||
| 644 | +microRNAs | ||
| 645 | +Anti | ||
| 646 | +Suppression | ||
| 647 | +A549 | ||
| 648 | +Chop | ||
| 649 | +Jun | ||
| 650 | +Myofibroblast | ||
| 651 | +Dysfunction | ||
| 652 | +Axis | ||
| 653 | +IPF | ||
| 654 | +MS80 | ||
| 655 | +S1P | ||
| 656 | +Inhibitory | ||
| 657 | +Interleukin | ||
| 658 | +Action | ||
| 659 | +Bioenergetics | ||
| 660 | +Transition | ||
| 661 | +Hyper | ||
| 662 | +Lrp5 | ||
| 663 | +Model | ||
| 664 | +cAMP | ||
| 665 | +Medical | ||
| 666 | +SDKP | ||
| 667 | +UIP | ||
| 668 | +Animal | ||
| 669 | +Forkhead | ||
| 670 | +lncRNA | ||
| 671 | +Lung | ||
| 672 | +Antifibrotic | ||
| 673 | +Induction | ||
| 674 | +Titration | ||
| 675 | +Epithelial | ||
| 676 | +OSF | ||
| 677 | +ATPase | ||
| 678 | +Reactive | ||
| 679 | +TNF | ||
| 680 | +aVb6 | ||
| 681 | +Molecule | ||
| 682 | +NEU1 | ||
| 683 | +Deregulation | ||
| 684 | +Idiopathic | ||
| 685 | +An | ||
| 686 | +Using | ||
| 687 | +Quantifying | ||
| 688 | +BMPR2 | ||
| 689 | +Foxp3high | ||
| 690 | +sL1 | ||
| 691 | +Profile | ||
| 692 | +Sulf2 | ||
| 693 | +Mediates | ||
| 694 | +Markers | ||
| 695 | +VCAM |
scripts/wisse.py
0 → 100644
| 1 | +#!/usr/bin/python | ||
| 2 | +# -*- coding: latin-1 -*- | ||
| 3 | +# Python2.7 | ||
| 4 | + | ||
| 5 | +import numpy as np | ||
| 6 | +import logging | ||
| 7 | +import os | ||
| 8 | +from functools import partial | ||
| 9 | +from pdb import set_trace as st | ||
| 10 | +logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', | ||
| 11 | + level=logging.INFO) | ||
| 12 | + | ||
| 13 | + | ||
| 14 | +class wisse(object): | ||
| 15 | + """ Both the TFIDFVectorizer and the word embedding model must be pretrained, either from the local | ||
| 16 | + sentence corpus or from model persintence. | ||
| 17 | + """ | ||
| 18 | + def __init__(self, embeddings, vectorizer, tf_tfidf, combiner = "sum"): | ||
| 19 | + self.tokenize = vectorizer.build_tokenizer() | ||
| 20 | + self.tfidf = vectorizer | ||
| 21 | + self.embedding = embeddings | ||
| 22 | + self.pred_tfidf = tf_tfidf | ||
| 23 | + if combiner.startswith("avg"): | ||
| 24 | + self.comb = partial(np.mean, axis = 0) | ||
| 25 | + else: | ||
| 26 | + self.comb = partial(np.sum, axis = 0) | ||
| 27 | + | ||
| 28 | + | ||
| 29 | + def fit(self, X, y = None): # Scikit-learn template | ||
| 30 | + if isinstance(X, list): | ||
| 31 | + self.sentences = X | ||
| 32 | + | ||
| 33 | + return self | ||
| 34 | + | ||
| 35 | + | ||
| 36 | + def transform(self, X): | ||
| 37 | + if isinstance(X, list): | ||
| 38 | + return self.fit(X) | ||
| 39 | + | ||
| 40 | + elif isinstance(X, str): | ||
| 41 | + return self.infer_sentence(X) | ||
| 42 | + | ||
| 43 | + | ||
| 44 | + def fit_transform(self, X, y=None): | ||
| 45 | + return self.transform(X) | ||
| 46 | + | ||
| 47 | + | ||
| 48 | + def infer_sentence(self, sent): | ||
| 49 | + ss = self.tokenize(sent) | ||
| 50 | + missing_bow = [] | ||
| 51 | + missing_cbow = [] | ||
| 52 | + series = {} | ||
| 53 | + | ||
| 54 | + if not ss == []: | ||
| 55 | + self.weights, m = self.infer_tfidf_weights(ss) | ||
| 56 | + else: | ||
| 57 | + return None | ||
| 58 | + | ||
| 59 | + missing_bow += m | ||
| 60 | + | ||
| 61 | + for w in self.weights: | ||
| 62 | + try: | ||
| 63 | + series[w] = (self.weights[w], self.embedding[w]) | ||
| 64 | + except KeyError: | ||
| 65 | + series[w] = None | ||
| 66 | + missing_cbow.append(w) | ||
| 67 | + continue | ||
| 68 | + except IndexError: | ||
| 69 | + continue | ||
| 70 | + | ||
| 71 | + if self.weights == {}: return None | ||
| 72 | + # Embedding the sentence... : | ||
| 73 | + sentence = np.array([series[w][1] for w in series if not series[w] is None]) | ||
| 74 | + series = {} | ||
| 75 | + | ||
| 76 | + return missing_cbow, missing_bow, self.comb(sentence) | ||
| 77 | + | ||
| 78 | + | ||
| 79 | + def infer_tfidf_weights(self, sentence): | ||
| 80 | + existent = {} | ||
| 81 | + missing = [] | ||
| 82 | + | ||
| 83 | + if not self.tfidf: | ||
| 84 | + for word in sentence: | ||
| 85 | + existent[word] = 1.0 | ||
| 86 | + | ||
| 87 | + return existent, missing | ||
| 88 | + | ||
| 89 | + if self.pred_tfidf: | ||
| 90 | + unseen = self.tfidf.transform([" ".join(sentence)]).toarray() | ||
| 91 | + for word in sentence: | ||
| 92 | + try: | ||
| 93 | + existent[word] = unseen[0][self.tfidf.vocabulary_[word]] | ||
| 94 | + except KeyError: | ||
| 95 | + missing.append(word) | ||
| 96 | + continue | ||
| 97 | + else: | ||
| 98 | + for word in sentence: | ||
| 99 | + try: | ||
| 100 | + weight = vectorizer.idf_[vectorizer.vocabulary_[word]] | ||
| 101 | + existent[word] = weight if weight > 2 else 0.01 | ||
| 102 | + except KeyError: | ||
| 103 | + missing.append(word) | ||
| 104 | + continue | ||
| 105 | + | ||
| 106 | + return existent, missing | ||
| 107 | + | ||
| 108 | + | ||
| 109 | + def __iter__(self): | ||
| 110 | + for s in self.sentences: | ||
| 111 | + yield self.transform(s) | ||
| 112 | + | ||
| 113 | + | ||
| 114 | +def save_dense(directory, filename, array): | ||
| 115 | + directory=os.path.normpath(directory) + '/' | ||
| 116 | +# try: | ||
| 117 | + if filename.isalpha(): | ||
| 118 | + np.save(directory + filename, array) | ||
| 119 | + else: | ||
| 120 | + return None | ||
| 121 | +# except UnicodeEncodeError: | ||
| 122 | +# return None | ||
| 123 | + | ||
| 124 | +def load_dense(filename): | ||
| 125 | + return np.load(filename) | ||
| 126 | + | ||
| 127 | + | ||
| 128 | +def load_sparse_bsr(filename): | ||
| 129 | + loader = np.load(filename) | ||
| 130 | + return bsr_matrix((loader['data'], loader['indices'], loader['indptr']), | ||
| 131 | + shape=loader['shape']) | ||
| 132 | + | ||
| 133 | + | ||
| 134 | +def save_sparse_bsr(directory, filename, array): | ||
| 135 | +# note that .npz extension is added automatically | ||
| 136 | + directory=os.path.normpath(directory) + '/' | ||
| 137 | + if word.isalpha(): | ||
| 138 | + array=array.tobsr() | ||
| 139 | + np.savez(directory + filename, data=array.data, indices=array.indices, | ||
| 140 | + indptr=array.indptr, shape=array.shape) | ||
| 141 | + else: | ||
| 142 | + return None | ||
| 143 | + | ||
| 144 | + | ||
| 145 | +class vector_space(object): | ||
| 146 | + def __init__(self, directory, sparse = False): | ||
| 147 | + self.sparse = sparse | ||
| 148 | + ext = ".npz" if sparse else ".npy" | ||
| 149 | + if directory.endswith(".tar.gz"): | ||
| 150 | + self._tar = True | ||
| 151 | + import tarfile | ||
| 152 | + self.tar = tarfile.open(directory) | ||
| 153 | + file_list = self.tar.getnames() #[os.path.basename(n) for n in self.tar.getnames()] | ||
| 154 | + self.words = {os.path.basename(word).replace(ext, ''): word | ||
| 155 | + for word in file_list} | ||
| 156 | + else: | ||
| 157 | + self._tar = False | ||
| 158 | + directory = os.path.normpath(directory) + '/' | ||
| 159 | + file_list = os.listdir(directory) | ||
| 160 | + self.words = {word.replace(ext, ''): directory + word | ||
| 161 | + for word in file_list} | ||
| 162 | + | ||
| 163 | + | ||
| 164 | + def __getitem__(self, item): | ||
| 165 | + if self.sparse: | ||
| 166 | + if self._tar: | ||
| 167 | + member = self.tar.getmember(self.words[item]) | ||
| 168 | + word = self.tar.extractfile(member) | ||
| 169 | + else: | ||
| 170 | + word = self.words[item] | ||
| 171 | + #return load_sparse_bsr(self.words[item]) | ||
| 172 | + return load_sparse_bsr(word) | ||
| 173 | + | ||
| 174 | + else: | ||
| 175 | + if self._tar: | ||
| 176 | + member = self.tar.getmember(self.words[item]) | ||
| 177 | + word = self.tar.extractfile(member) | ||
| 178 | + else: | ||
| 179 | + word = self.words[item] | ||
| 180 | + #return load_sparse_bsr(self.words[item]) | ||
| 181 | + return load_dense(word) | ||
| 182 | + | ||
| 183 | + | ||
| 184 | +def keyed2indexed(keyed_model, output_dir = "word_embeddings/", parallel = True, n_jobs = -1): | ||
| 185 | + output_dir = os.path.normpath(output_dir) + '/' | ||
| 186 | + if not os.path.exists(output_dir): | ||
| 187 | + os.makedirs(output_dir) | ||
| 188 | + | ||
| 189 | + if parallel: | ||
| 190 | + from joblib import Parallel, delayed | ||
| 191 | + | ||
| 192 | + Parallel(n_jobs = n_jobs, verbose = 10)(delayed(save_dense)(output_dir, word, keyed_model[word]) | ||
| 193 | + for word, _ in keyed_model.vocab.items()) | ||
| 194 | + else: | ||
| 195 | + for word, _ in keyed_model.vocab.items(): | ||
| 196 | + save_dense(output_dir, word, keyed_model[word]) | ||
| 197 | + | ||
| 198 | + | ||
| 199 | +class streamer(object): | ||
| 200 | + def __init__(self, file_name): | ||
| 201 | + self.file_name = file_name | ||
| 202 | + | ||
| 203 | + def __iter__(self): | ||
| 204 | + for s in open(self.file_name): | ||
| 205 | + yield s.strip() |
scripts/wisse.pyc
0 → 100644
No preview for this file type
scripts/wisse_example.py
0 → 100644
| 1 | +#!/usr/bin/python | ||
| 2 | +# -*- coding: latin-1 -*- | ||
| 3 | +# Python2.7 | ||
| 4 | +from gensim.models.keyedvectors import KeyedVectors as vDB | ||
| 5 | +from sklearn.feature_extraction.text import TfidfVectorizer | ||
| 6 | +import numpy as np | ||
| 7 | +#import numexpr as ne | ||
| 8 | +import argparse | ||
| 9 | +#import _pickle as pickle | ||
| 10 | +#import cPickle as pickle | ||
| 11 | +import logging | ||
| 12 | +import os | ||
| 13 | +from functools import partial | ||
| 14 | +import wisse | ||
| 15 | + | ||
| 16 | + | ||
| 17 | +load_vectors = vDB.load_word2vec_format | ||
| 18 | + | ||
| 19 | +logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', | ||
| 20 | + level=logging.INFO) | ||
| 21 | + | ||
| 22 | + | ||
| 23 | +if __name__ == "__main__": | ||
| 24 | + | ||
| 25 | + parser = argparse.ArgumentParser(description="""This use example shows sentence | ||
| 26 | + embedding by using WISSE. The input is a text file which has a sentece in | ||
| 27 | + each of its rows. The output file has two tab-separated columns: the index | ||
| 28 | + line of the sentece in the input file and the sentence vector representation | ||
| 29 | + .""") | ||
| 30 | + parser.add_argument("--idfmodel", help = """Input file containing IDF | ||
| 31 | + pre-trained weights. If not provided, | ||
| 32 | + all word vector weights will be set to | ||
| 33 | + 1.0. If 'local' tf-idf weights will be | ||
| 34 | + computed locally from the input file | ||
| 35 | + (pickled sklearn object).""", | ||
| 36 | + default = None) | ||
| 37 | + parser.add_argument("--embedmodel", help = """Input file containing word | ||
| 38 | + embeddings model (binary and text | ||
| 39 | + are allowed).""", required = True) | ||
| 40 | + parser.add_argument("--output", help = """Output file containing the sentence | ||
| 41 | + embeddings.""", default = "") | ||
| 42 | + parser.add_argument("--input", help = """Input file containing a sentence | ||
| 43 | + by row.""", required = True) | ||
| 44 | + parser.add_argument("--comb", help = """Desired word vector combination for | ||
| 45 | + sentence representation {sum, avg}. | ||
| 46 | + (default = 'sum')""", default = "sum") | ||
| 47 | + parser.add_argument("--suffix", nargs = '?', help = """A suffix to be added | ||
| 48 | + to the output file (default = '')""", | ||
| 49 | + default = "", required = False) | ||
| 50 | + parser.add_argument("--tfidf", help="""To predict TFIDF complete weights | ||
| 51 | + ('tfidf') or use only partial IDFs | ||
| 52 | + ('idf'). (default = 'tfidf')""", | ||
| 53 | + default = "tfidf") | ||
| 54 | + parser.add_argument("--localw", help = """TFIDF word vector weights | ||
| 55 | + computed locally from the input file of | ||
| 56 | + sentences {freq, binary, sublinear} | ||
| 57 | + (default='none').""", default = "none") | ||
| 58 | + parser.add_argument("--stop", help = """Toggles stripping stop words in | ||
| 59 | + locally computed word vector weights.""", | ||
| 60 | + action = "store_true") | ||
| 61 | + parser.add_argument("--format", help = """The format of the embedding model | ||
| 62 | + file: {binary, text, wisse}. | ||
| 63 | + default = 'binary'""", default = "binary") | ||
| 64 | + args = parser.parse_args() | ||
| 65 | + | ||
| 66 | + | ||
| 67 | + if not args.format.startswith("wisse"): | ||
| 68 | + if not os.path.isfile(args.embedmodel): | ||
| 69 | + logging.info("""Embedding model file does not exist (EXIT): | ||
| 70 | + \n%s\n ...""" % args.embedmodel) | ||
| 71 | + exit() | ||
| 72 | + elif not os.path.exists(args.embedmodel): | ||
| 73 | + logging.info("""Embedding model directory does not exist (EXIT): | ||
| 74 | + \n%s\n ...""" % args.embedmodel) | ||
| 75 | + exit() | ||
| 76 | + | ||
| 77 | + if not os.path.isfile(args.idfmodel) and not args.idfmodel.startswith("local"): | ||
| 78 | + logging.info("""IDF model file does not exist (EXIT): | ||
| 79 | + \n%s\n ...""" % args.idfmodel) | ||
| 80 | + exit() | ||
| 81 | + if not os.path.isfile(args.input): | ||
| 82 | + logging.info("""Input file does not exist (EXIT): | ||
| 83 | + \n%s\n ...""" % args.input) | ||
| 84 | + exit() | ||
| 85 | + if args.output != "": | ||
| 86 | + if os.path.dirname(args.output) != "": | ||
| 87 | + if not os.path.exists(os.path.dirname(args.output)): | ||
| 88 | + logging.info("""Output directory does not exist (EXIT): | ||
| 89 | + \n%s\n ...""" % args.output) | ||
| 90 | + exit() | ||
| 91 | + else: | ||
| 92 | + output_name = args.output | ||
| 93 | + else: | ||
| 94 | + output_name = args.output | ||
| 95 | + else: | ||
| 96 | + suffix = "_".join([embedding_name, | ||
| 97 | + args.comb, | ||
| 98 | + args.tfidf, | ||
| 99 | + "local" if args.idfmodel.startswith("local") else tfidf_name, | ||
| 100 | + args.suffix]).strip("_") | ||
| 101 | + output_name = args.input + ".output_" + suffix | ||
| 102 | + | ||
| 103 | + | ||
| 104 | + if args.tfidf.startswith("tfidf"): | ||
| 105 | + pred_tfidf = True | ||
| 106 | + elif args.tfidf.startswith("idf"): | ||
| 107 | + pred_tfidf = False | ||
| 108 | + else: | ||
| 109 | + pred_tfidf = False | ||
| 110 | + tfidf = False | ||
| 111 | + | ||
| 112 | + vectorizer = TfidfVectorizer(min_df = 1, | ||
| 113 | + encoding = "latin-1", | ||
| 114 | + decode_error = "replace", | ||
| 115 | + lowercase = True, | ||
| 116 | + binary = True if args.localw.startswith("bin") else False, | ||
| 117 | + sublinear_tf = True if args.localw.startswith("subl") else False, | ||
| 118 | + stop_words = "english" if args.stop else None) | ||
| 119 | + | ||
| 120 | + sentences = wisse.streamer(args.input) | ||
| 121 | + | ||
| 122 | + if args.idfmodel.startswith("local"): | ||
| 123 | + logging.info("Fitting local TFIDF weights from: %s ..." % args.input) | ||
| 124 | + tfidf = vectorizer.fit(sentences) | ||
| 125 | + | ||
| 126 | + elif os.path.isfile(args.idfmodel): | ||
| 127 | + logging.info("Loading global TFIDF weights from: %s ..." % args.idfmodel) | ||
| 128 | + with open(args.idfmodel, 'rb') as f: | ||
| 129 | + tfidf = pickle.load(f)#, encoding = 'latin-1') | ||
| 130 | + | ||
| 131 | + else: | ||
| 132 | + tfidf = False | ||
| 133 | + | ||
| 134 | + try: | ||
| 135 | + if args.format.startswith("bin"): | ||
| 136 | + embedding = load_vectors(args.embedmodel, binary = True, | ||
| 137 | + encoding = "latin-1") | ||
| 138 | + elif args.format.startswith("tex"): | ||
| 139 | + embedding = load_vectors(args.embedmodel, binary = False, | ||
| 140 | + encoding = "latin-1") | ||
| 141 | + else: | ||
| 142 | + embedding = wisse.vector_space(args.embedmodel, sparse = False) | ||
| 143 | + | ||
| 144 | + except: | ||
| 145 | + logging.info( | ||
| 146 | + """Error while loading word embedding model. Verify if the file | ||
| 147 | + is broken (EXIT)...\n%s\n""" % args.embedmodel) | ||
| 148 | + exit() | ||
| 149 | + | ||
| 150 | + embedding_name = os.path.basename(args.embedmodel).split(".")[0] | ||
| 151 | + tfidf_name = os.path.basename(args.idfmodel).split(".")[0] | ||
| 152 | + | ||
| 153 | + missing_bow = [] # Stores missing words in the TFIDF model | ||
| 154 | + missing_cbow = [] # Stores missing words in the W2V model | ||
| 155 | + sidx = 0 # The index of the sentence according to the input file | ||
| 156 | + logging.info("\n\nEmbedding sentences and saving then to a the output file..\n%s\n" % output_name) | ||
| 157 | + | ||
| 158 | + with open(output_name, "w") as fo: | ||
| 159 | + for sent in sentences: | ||
| 160 | + sidx += 1 | ||
| 161 | + series = wisse.wisse(embeddings = embedding, vectorizer = tfidf, | ||
| 162 | + tf_tfidf = True, combiner='sum') | ||
| 163 | + try: | ||
| 164 | + mc, mb, vector = series.transform(sent) | ||
| 165 | + except TypeError: | ||
| 166 | + continue | ||
| 167 | + | ||
| 168 | + # At this point you can use the embedding 'vector' for any application as it | ||
| 169 | + # is a numpy array. Also you can simply save the vectors in text format as | ||
| 170 | + # follows: | ||
| 171 | + missing_cbow += mc | ||
| 172 | + missing_bow += mb | ||
| 173 | + fo.write("%d\t%s\n" % (sidx, np.array2string(vector, | ||
| 174 | + formatter = {'float_kind':lambda x: "%.6f" % x}, | ||
| 175 | + max_line_width = 20000).strip(']').strip('[') )) | ||
| 176 | + | ||
| 177 | + missing_name = (os.path.basename(args.input).split(".")[0] + "_" + | ||
| 178 | + embedding_name + "_" + | ||
| 179 | + tfidf_name + ".missing") | ||
| 180 | + logging.info("\n\nSaving missing vocabulary to %s ..\n\n" % missing_name) | ||
| 181 | + | ||
| 182 | + with open(missing_name, "w") as f: | ||
| 183 | + f.write("# missing word embeddings:\n") | ||
| 184 | + for w in set(missing_cbow): | ||
| 185 | + f.write("%s\n" % w) | ||
| 186 | + | ||
| 187 | + f.write("# missing MI weights:\n") | ||
| 188 | + for w in set(missing_bow): | ||
| 189 | + f.write("%s\n" % w) | ||
| 190 | + | ||
| 191 | + logging.info("FINISHED! \n") |
-
Please register or login to post a comment