Showing
9 changed files
with
8 additions
and
34 deletions
... | @@ -34,7 +34,7 @@ echo | ... | @@ -34,7 +34,7 @@ echo |
34 | echo | 34 | echo |
35 | echo "Add sentence-end-tag PGCGROWTHCONDITIONS" | 35 | echo "Add sentence-end-tag PGCGROWTHCONDITIONS" |
36 | #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g' | sed 's/-/\t/' | sed 's/-/\t/' ) | 36 | #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g' | sed 's/-/\t/' | sed 's/-/\t/' ) |
37 | -cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7'| cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g' | sed 's/1.\tNeubauer//'| sort | uniq) | 37 | +cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7'| cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g' | sed 's/1.\tNeubauer//'| sed 's/\\null\\/null/g' | sort | uniq) |
38 | echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output | 38 | echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output |
39 | wc $output | 39 | wc $output |
40 | echo "$cext" | cut -f1-3,5 > $mapping | 40 | echo "$cext" | cut -f1-3,5 > $mapping | ... | ... |
This diff could not be displayed because it is too large.
... | @@ -61,6 +61,7 @@ import training_validation_v14 as training | ... | @@ -61,6 +61,7 @@ import training_validation_v14 as training |
61 | # --variant 13 | 61 | # --variant 13 |
62 | 62 | ||
63 | #python3 tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI.txt --outputFileII annot-input_bg_outputII.txt --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx.txt --variant 13 --S4 --S1 > ../../reports/output_tagging_report.txt | 63 | #python3 tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI.txt --outputFileII annot-input_bg_outputII.txt --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx.txt --variant 13 --S4 --S1 > ../../reports/output_tagging_report.txt |
64 | +#python3 predict-annot/bin/tagging/tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI_v4.txt --outputFileII annot-input_bg_outputII_v4 --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx_v4.txt --variant 13 --S4 --S1 > predict-annot/reports/output_tagging_report_v4.txt | ||
64 | 65 | ||
65 | __author__ = 'egaytan' | 66 | __author__ = 'egaytan' |
66 | 67 | ||
... | @@ -241,7 +242,7 @@ if __name__ == "__main__": | ... | @@ -241,7 +242,7 @@ if __name__ == "__main__": |
241 | #print( DF(sentencesOutputDataI) ) | 242 | #print( DF(sentencesOutputDataI) ) |
242 | #print( '\n'.join(sentencesOutputDataII) ) | 243 | #print( '\n'.join(sentencesOutputDataII) ) |
243 | # Save tags | 244 | # Save tags |
244 | - with open(os.path.join(options.outputPath, options.outFileII), "w") as oFile: | 245 | + with open(os.path.join(options.outputPath, options.outFileII + '_' + options.modelName + '.txt'), "w") as oFile: |
245 | for line in sentencesOutputDataII: | 246 | for line in sentencesOutputDataII: |
246 | #print(line) | 247 | #print(line) |
247 | oFile.write(line + '\n') | 248 | oFile.write(line + '\n') | ... | ... |
predict-annot/bin/tagging/tlibs.py
deleted
100644 → 0
1 | -# -*- coding: UTF-8 -*- | ||
2 | - | ||
3 | -import os | ||
4 | -from optparse import OptionParser | ||
5 | -from time import time | ||
6 | -from collections import Counter | ||
7 | - | ||
8 | -import nltk | ||
9 | -import sklearn | ||
10 | -import scipy.stats | ||
11 | -import sys | ||
12 | - | ||
13 | -#from sklearn.externals import joblib | ||
14 | -import joblib | ||
15 | -from sklearn.metrics import make_scorer | ||
16 | -#from sklearn.cross_validation import cross_val_score | ||
17 | -from sklearn.model_selection import cross_val_score | ||
18 | -#from sklearn.grid_search import RandomizedSearchCV | ||
19 | -from sklearn.model_selection import RandomizedSearchCV | ||
20 | - | ||
21 | -import sklearn_crfsuite | ||
22 | -from sklearn_crfsuite import scorers | ||
23 | -from sklearn_crfsuite import metrics | ||
24 | - | ||
25 | -from nltk.corpus import stopwords | ||
26 | - | ||
27 | -################################# |
File moved
This diff is collapsed. Click to expand it.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
... | @@ -2,7 +2,7 @@ | ... | @@ -2,7 +2,7 @@ |
2 | --inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ | 2 | --inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ |
3 | --outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ | 3 | --outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ |
4 | --outputFileI Output tagged file I : annot-input_bg_outputI_v4.txt | 4 | --outputFileI Output tagged file I : annot-input_bg_outputI_v4.txt |
5 | ---outputFileII Output tagged file II : annot-input_bg_outputII_v4.txt | 5 | +--outputFileII Output tagged file II : annot-input_bg_outputII_v4 |
6 | --modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models | 6 | --modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models |
7 | --modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 | 7 | --modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 |
8 | --infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping | 8 | --infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping |
... | @@ -17,16 +17,16 @@ | ... | @@ -17,16 +17,16 @@ |
17 | Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False | 17 | Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False |
18 | -------------------------------- PROCESSING -------------------------------- | 18 | -------------------------------- PROCESSING -------------------------------- |
19 | Reading CRF model... | 19 | Reading CRF model... |
20 | -Reading CRF model done in: 0.009697s | 20 | +Reading CRF model done in: 0.009390s |
21 | Processing corpus... | 21 | Processing corpus... |
22 | Preprocessing file...annot-input_bg_v3.txt | 22 | Preprocessing file...annot-input_bg_v3.txt |
23 | Sentences input data: 14716 | 23 | Sentences input data: 14716 |
24 | Predicting tags with model | 24 | Predicting tags with model |
25 | -Prediction done in: 1.732606s | 25 | +Prediction done in: 1.692121s |
26 | Tagging file | 26 | Tagging file |
27 | Preprocessing file...annot-input_bg_v4.txt | 27 | Preprocessing file...annot-input_bg_v4.txt |
28 | Sentences input data: 90904 | 28 | Sentences input data: 90904 |
29 | Predicting tags with model | 29 | Predicting tags with model |
30 | -Prediction done in: 26.221746s | 30 | +Prediction done in: 25.701133s |
31 | Tagging file | 31 | Tagging file |
32 | -Processing corpus done in: 58.477312s | 32 | +Processing corpus done in: 57.242562s | ... | ... |
-
Please register or login to post a comment