upload

Estefani Gaytan Nunez
Commit ee43c20633af0c13a6a8b3c527cbdf717166d35d ee43c206 1 parent 87ac8726
Showing 17 changed files with 40 additions and 47 deletions
CoreNLP/bin/annotation/get-raw-sentences_v4.sh
CoreNLP/input/annotation/bg_sentences_v4.txt
CoreNLP/output/annotation/bg_sentences_v4.txt.ner
predict-annot/bin/tagging/tagging.py
predict-annot/input/annot-input_bg_v3.txt
predict-annot/input/annot-input_bg_v4.txt
predict-annot/mapping/bg_sentences_midx_v3.txt
predict-annot/mapping/bg_sentences_midx_v4.txt
predict-annot/output/annot-input_bg_outputII.txt
predict-annot/output/annot-input_bg_outputIII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
predict-annot/output/annot-input_bg_outputII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
predict-annot/output/annot-input_bg_outputI_v4.txt_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
predict-annot/output/annot-input_bg_outputI_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
predict-annot/reports/annot-input_bg_report_v3.txt
predict-annot/reports/annot-input_bg_report_v4.txt
predict-annot/reports/output_tagging_report.txt
predict-annot/reports/output_tagging_report_v4.txt
--- a/CoreNLP/bin/annotation/get-raw-sentences_v4.sh
View file @ee43c20
+++ b/CoreNLP/bin/annotation/get-raw-sentences_v4.sh
View file @ee43c20
@@ -34,7 +34,7 @@ echo
 echo
 echo "Add sentence-end-tag PGCGROWTHCONDITIONS"
 #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"}  length($2) > 3 { print  $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g'  | sed 's/-/\t/' | sed 's/-/\t/' )
-cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7'| cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g' | sed 's/1.\tNeubauer//'| sed 's/\\null\\/null/g' | sort | uniq)
+cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7' | sed 's/1.\tNeubauer//' | cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g'|sed 's/\\null\\//g'  | sort | uniq)
 echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output
 wc $output
 echo "$cext" | cut -f1-3,5 > $mapping
--- a/CoreNLP/input/annotation/bg_sentences_v4.txt
View file @ee43c20
+++ b/CoreNLP/input/annotation/bg_sentences_v4.txt
View file @ee43c20
--- a/CoreNLP/output/annotation/bg_sentences_v4.txt.ner
View file @ee43c20
+++ b/CoreNLP/output/annotation/bg_sentences_v4.txt.ner
View file @ee43c20
--- a/predict-annot/bin/tagging/tagging.py
View file @ee43c20
+++ b/predict-annot/bin/tagging/tagging.py
View file @ee43c20
--- a/predict-annot/input/annot-input_bg_v3.txt deleted 100644 → 0
View file @87ac872
+++ b/predict-annot/input/annot-input_bg_v3.txt deleted 100644 → 0
View file @87ac872
--- a/predict-annot/input/annot-input_bg_v4.txt
View file @ee43c20
+++ b/predict-annot/input/annot-input_bg_v4.txt
View file @ee43c20
--- a/predict-annot/mapping/bg_sentences_midx_v3.txt deleted 100644 → 0
View file @87ac872
+++ b/predict-annot/mapping/bg_sentences_midx_v3.txt deleted 100644 → 0
View file @87ac872
--- a/predict-annot/mapping/bg_sentences_midx_v4.txt
View file @ee43c20
+++ b/predict-annot/mapping/bg_sentences_midx_v4.txt
View file @ee43c20
@@ -9328,7 +9328,7 @@ GSE12006	GSM303526	GPL3154-PMID:18940002	characteristics_ch1.1
 GSE12006	GSM303526	GPL3154-PMID:18940002	growth_protocol_ch1.1
 GSE12006	GSM303526	GPL3154-PMID:18940002	growth_protocol_ch1.2
 GSE12006	GSM303527	GPL3154-PMID:18940002	growth_protocol_ch1.4
-GSE12006	GSM303527	GPL3154-PMID:18940002
+GSE12006	GSM303527	GPL3154-PMID:18940002	extract_protocol_ch1.3
 GSE12006	GSM303527	GPL3154-PMID:18940002	title.1
 GSE12006	GSM303527	GPL3154-PMID:18940002	source_name_ch1.1
 GSE12006	GSM303527	GPL3154-PMID:18940002	organism_ch1.1
@@ -9340,7 +9340,7 @@ GSE12006	GSM303527	GPL3154-PMID:18940002	characteristics_ch1.1
 GSE12006	GSM303527	GPL3154-PMID:18940002	growth_protocol_ch1.1
 GSE12006	GSM303527	GPL3154-PMID:18940002	growth_protocol_ch1.2
 GSE12006	GSM303528	GPL3154-PMID:18940002	growth_protocol_ch1.4
-GSE12006	GSM303528	GPL3154-PMID:18940002
+GSE12006	GSM303528	GPL3154-PMID:18940002	extract_protocol_ch1.3
 GSE12006	GSM303528	GPL3154-PMID:18940002	title.1
 GSE12006	GSM303528	GPL3154-PMID:18940002	source_name_ch1.1
 GSE12006	GSM303528	GPL3154-PMID:18940002	organism_ch1.1
@@ -9352,7 +9352,7 @@ GSE12006	GSM303528	GPL3154-PMID:18940002	characteristics_ch1.1
 GSE12006	GSM303528	GPL3154-PMID:18940002	growth_protocol_ch1.1
 GSE12006	GSM303528	GPL3154-PMID:18940002	growth_protocol_ch1.2
 GSE12006	GSM303529	GPL3154-PMID:18940002	growth_protocol_ch1.4
-GSE12006	GSM303529	GPL3154-PMID:18940002
+GSE12006	GSM303529	GPL3154-PMID:18940002	extract_protocol_ch1.3
 GSE12006	GSM303529	GPL3154-PMID:18940002	title.1
 GSE12006	GSM303529	GPL3154-PMID:18940002	source_name_ch1.1
 GSE12006	GSM303529	GPL3154-PMID:18940002	organism_ch1.1
--- a/predict-annot/output/annot-input_bg_outputII.txt deleted 100644 → 0
View file @87ac872
+++ b/predict-annot/output/annot-input_bg_outputII.txt deleted 100644 → 0
View file @87ac872
--- a/predict-annot/output/annot-input_bg_outputIII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv 0 → 100644
View file @ee43c20
+++ b/predict-annot/output/annot-input_bg_outputIII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv 0 → 100644
View file @ee43c20
--- a/predict-annot/output/annot-input_bg_outputII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
View file @ee43c20
+++ b/predict-annot/output/annot-input_bg_outputII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
View file @ee43c20
--- a/predict-annot/output/annot-input_bg_outputI_v4.txt_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv deleted 100644 → 0
View file @87ac872
+++ b/predict-annot/output/annot-input_bg_outputI_v4.txt_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv deleted 100644 → 0
View file @87ac872
--- a/predict-annot/output/annot-input_bg_outputI_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv 0 → 100644
View file @ee43c20
+++ b/predict-annot/output/annot-input_bg_outputI_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv 0 → 100644
View file @ee43c20
--- a/predict-annot/reports/annot-input_bg_report_v3.txt deleted 100644 → 0
View file @87ac872
+++ b/predict-annot/reports/annot-input_bg_report_v3.txt deleted 100644 → 0
View file @87ac872
--------------------------------- PARAMETERS --------------------------------
-Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation
-File with CoreNLP-tagging bg-sentences: bg_sentences_v3.txt.ner
-Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input
-File to save recontrsucted bg-sentences: annot-input_bg_v3.txt
--------------------------------- PROCESSING --------------------------------
-Number of sentences: 14716
-==================================END===================================
--- a/predict-annot/reports/annot-input_bg_report_v4.txt
View file @ee43c20
+++ b/predict-annot/reports/annot-input_bg_report_v4.txt
View file @ee43c20
 -------------------------------- PARAMETERS --------------------------------
-Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation
+--inputPath          Path of training data set        : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/
-File with CoreNLP-tagging bg-sentences: bg_sentences_v4.txt.ner
+--outputPath         Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/
-Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input
+--outputFileI        Output tagged file I             : annot-input_bg_outputI_v4
-File to save recontrsucted bg-sentences: annot-input_bg_v4.txt
+--outputFileII       Output tagged file II            : annot-input_bg_outputII_v4
+--outputFileII       Output tagged file III           : annot-input_bg_outputIII_v4
+--modelPath          Path to read CRF model           : /home/egaytan/automatic-extraction-growth-conditions/CRF/models
+--modelName          Model name                       : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10
+--infoPath           Path of GSE-GSM index file       : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping
+--infoFile	        GSE-GSM index file               : bg_sentences_midx_v4.txt
+--variant	        Run variant                      : 13
+--S1                 General features                 : True
+--S2                 Inner/Complete word features     : False
+--S3                 Extended context features        : False
+--S4                 Semantic features                : True
+--filteringStopWords Filtering stop words             : False
+--filterSymbols      Filtering punctuation marks      : False
 -------------------------------- PROCESSING --------------------------------
-Number of sentences: 90904
+Reading CRF model...
-==================================END===================================
+Reading CRF model done in: 0.009463s
+Processing corpus...
+Preprocessing file...annot-input_bg_v4.txt
+Sentences input data: 90688
+Predicting tags with model...
+Prediction done in: 26.367272s
+Tagging file...
+Saving Ouput I...
+Saving Ouput II...
+Saving Ouput III...
+Processing corpus done in: 56.584394s
--- a/predict-annot/reports/output_tagging_report.txt deleted 100644 → 0
View file @87ac872
+++ b/predict-annot/reports/output_tagging_report.txt deleted 100644 → 0
View file @87ac872
--------------------------------- PARAMETERS --------------------------------
---inputPath          Path of training data set        : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/
---outputPath         Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/
---outputFileI        Output tagged file I             : annot-input_bg_outputI.txt
---outputFileII       Output tagged file II            : annot-input_bg_outputII.txt
---modelPath          Path to read CRF model           : /home/egaytan/automatic-extraction-growth-conditions/CRF/models
---modelName          Model name                       : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10
---infoPath           Path of GSE-GSM index file       : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping
---infoFile	        GSE-GSM index file               : bg_sentences_midx.txt
---variant	        Run variant                      : 13
---S1                 General features                 : True
---S2                 Inner/Complete word features     : False
---S3                 Extended context features        : False
---S4                 Semantic features                : True
---filteringStopWords Filtering stop words             : False
---filterSymbols      Filtering punctuation marks      : False
-Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False
--------------------------------- PROCESSING --------------------------------
-Reading CRF model...
-Reading CRF model done in: 0.008336s
-Processing corpus...
-Preprocessing file...annot-input_bg_v3.txt
-Sentences input data: 14716
-Predicting tags with model
-Prediction done in: 1.688127s
-Tagging file
-Processing corpus done in: 3.948320s
--- a/predict-annot/reports/output_tagging_report_v4.txt
View file @ee43c20
+++ b/predict-annot/reports/output_tagging_report_v4.txt
View file @ee43c20
@@ -17,10 +17,16 @@
 Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False
 -------------------------------- PROCESSING --------------------------------
 Reading CRF model...
-Reading CRF model done in: 0.009804s
+Reading CRF model done in: 0.009363s
 Processing corpus...
 Preprocessing file...annot-input_bg_v3.txt
 Sentences input data: 14716
 Predicting tags with model
-Prediction done in: 1.811103s
+Prediction done in: 1.737334s
 Tagging file
+Preprocessing file...annot-input_bg_v4.txt
+Sentences input data: 90688
+Predicting tags with model
+Prediction done in: 26.434549s
+Tagging file
+Processing corpus done in: 58.304885s