upload

Estefani Gaytan Nunez
Commit e9ea7a3205512b9bbb22ddda56c97f65854e1bb0 e9ea7a32 1 parent 67d2b78c
Showing 9 changed files with 8 additions and 34 deletions
CoreNLP/bin/annotation/get-raw-sentences_v4.sh
CoreNLP/input/annotation/bg_sentences_v4.txt
predict-annot/bin/tagging/tagging.py
predict-annot/bin/tagging/tlibs.py
predict-annot/mapping/bg_sentences_midx.txt → predict-annot/mapping/bg_sentences_midx_v3.txt
predict-annot/mapping/bg_sentences_midx_v4.txt
predict-annot/output/annot-input_bg_outputII_v4.txt
predict-annot/output/annot-input_bg_outputII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.txt
predict-annot/reports/output_tagging_report_v4.txt
--- a/CoreNLP/bin/annotation/get-raw-sentences_v4.sh
View file @e9ea7a3
+++ b/CoreNLP/bin/annotation/get-raw-sentences_v4.sh
View file @e9ea7a3
@@ -34,7 +34,7 @@ echo
 echo
 echo "Add sentence-end-tag PGCGROWTHCONDITIONS"
 #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"}  length($2) > 3 { print  $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g'  | sed 's/-/\t/' | sed 's/-/\t/' )
- cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7'| cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g' | sed 's/1.\tNeubauer//'| sort | uniq)
+ cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7'| cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g' | sed 's/1.\tNeubauer//'| sed 's/\\null\\/null/g' | sort | uniq)
 echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output
 wc $output
 echo "$cext" | cut -f1-3,5 > $mapping
--- a/CoreNLP/input/annotation/bg_sentences_v4.txt
View file @e9ea7a3
+++ b/CoreNLP/input/annotation/bg_sentences_v4.txt
View file @e9ea7a3
--- a/predict-annot/bin/tagging/tagging.py
View file @e9ea7a3
+++ b/predict-annot/bin/tagging/tagging.py
View file @e9ea7a3
@@ -61,6 +61,7 @@ import training_validation_v14 as training
 # --variant		    13 
 
 #python3 tagging.py  --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/  --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/  --outputFileI  annot-input_bg_outputI.txt  --outputFileII  annot-input_bg_outputII.txt  --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models  --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10  --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping  --infoFile bg_sentences_midx.txt  --variant 13   --S4   --S1 > ../../reports/output_tagging_report.txt
+ #python3 predict-annot/bin/tagging/tagging.py  --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/  --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/  --outputFileI  annot-input_bg_outputI_v4.txt  --outputFileII  annot-input_bg_outputII_v4  --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models  --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10  --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping  --infoFile bg_sentences_midx_v4.txt  --variant 13   --S4   --S1 > predict-annot/reports/output_tagging_report_v4.txt
 
 __author__ = 'egaytan'
 
@@ -241,7 +242,7 @@ if __name__ == "__main__":
             #print( DF(sentencesOutputDataI) )
             #print( '\n'.join(sentencesOutputDataII) )        
             # Save tags
-             with open(os.path.join(options.outputPath, options.outFileII), "w") as oFile:
+             with open(os.path.join(options.outputPath, options.outFileII + '_' + options.modelName + '.txt'), "w") as oFile:
                 for line in sentencesOutputDataII:
                     #print(line)
                     oFile.write(line + '\n')
--- a/predict-annot/bin/tagging/tlibs.py deleted 100644 → 0
View file @67d2b78
+++ b/predict-annot/bin/tagging/tlibs.py deleted 100644 → 0
View file @67d2b78
- # -*- coding: UTF-8 -*-
- 
- import os
- from optparse import OptionParser
- from time import time
- from collections import Counter
- 
- import nltk
- import sklearn
- import scipy.stats
- import sys
- 
- #from sklearn.externals import joblib
- import joblib
- from sklearn.metrics import make_scorer
- #from sklearn.cross_validation import cross_val_score
- from sklearn.model_selection import cross_val_score
- #from sklearn.grid_search import RandomizedSearchCV
- from sklearn.model_selection import RandomizedSearchCV
- 
- import sklearn_crfsuite
- from sklearn_crfsuite import scorers
- from sklearn_crfsuite import metrics
- 
- from nltk.corpus import stopwords
- 
- #################################
--- a/predict-annot/mapping/bg_sentences_midx.txt → predict-annot/mapping/bg_sentences_midx_v3.txt
View file @e9ea7a3
+++ b/predict-annot/mapping/bg_sentences_midx.txt → predict-annot/mapping/bg_sentences_midx_v3.txt
View file @e9ea7a3
--- a/predict-annot/mapping/bg_sentences_midx_v4.txt
View file @e9ea7a3
+++ b/predict-annot/mapping/bg_sentences_midx_v4.txt
View file @e9ea7a3
--- a/predict-annot/output/annot-input_bg_outputII_v4.txt deleted 100644 → 0
View file @67d2b78
+++ b/predict-annot/output/annot-input_bg_outputII_v4.txt deleted 100644 → 0
View file @67d2b78
--- a/predict-annot/output/annot-input_bg_outputII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.txt 0 → 100644
View file @e9ea7a3
+++ b/predict-annot/output/annot-input_bg_outputII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.txt 0 → 100644
View file @e9ea7a3
--- a/predict-annot/reports/output_tagging_report_v4.txt
View file @e9ea7a3
+++ b/predict-annot/reports/output_tagging_report_v4.txt
View file @e9ea7a3
@@ -2,7 +2,7 @@
 --inputPath          Path of training data set        : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/
 --outputPath         Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/
 --outputFileI        Output tagged file I             : annot-input_bg_outputI_v4.txt
- --outputFileII       Output tagged file II            : annot-input_bg_outputII_v4.txt
+ --outputFileII       Output tagged file II            : annot-input_bg_outputII_v4
 --modelPath          Path to read CRF model           : /home/egaytan/automatic-extraction-growth-conditions/CRF/models
 --modelName          Model name                       : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10
 --infoPath           Path of GSE-GSM index file       : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping
@@ -17,16 +17,16 @@
 Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False
 -------------------------------- PROCESSING --------------------------------
 Reading CRF model...
- Reading CRF model done in: 0.009697s
+ Reading CRF model done in: 0.009390s
 Processing corpus...
 Preprocessing file...annot-input_bg_v3.txt
 Sentences input data: 14716
 Predicting tags with model
- Prediction done in: 1.732606s
+ Prediction done in: 1.692121s
 Tagging file
 Preprocessing file...annot-input_bg_v4.txt
 Sentences input data: 90904
 Predicting tags with model
- Prediction done in: 26.221746s
+ Prediction done in: 25.701133s
 Tagging file
- Processing corpus done in: 58.477312s
+ Processing corpus done in: 57.242562s