Estefani Gaytan Nunez

upload

...@@ -34,7 +34,7 @@ echo ...@@ -34,7 +34,7 @@ echo
34 echo 34 echo
35 echo "Add sentence-end-tag PGCGROWTHCONDITIONS" 35 echo "Add sentence-end-tag PGCGROWTHCONDITIONS"
36 #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g' | sed 's/-/\t/' | sed 's/-/\t/' ) 36 #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g' | sed 's/-/\t/' | sed 's/-/\t/' )
37 -cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7'| cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g' | sed 's/1.\tNeubauer//'| sed 's/\\null\\/null/g' | sort | uniq) 37 +cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7' | sed 's/1.\tNeubauer//' | cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g'|sed 's/\\null\\//g' | sort | uniq)
38 echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output 38 echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output
39 wc $output 39 wc $output
40 echo "$cext" | cut -f1-3,5 > $mapping 40 echo "$cext" | cut -f1-3,5 > $mapping
......
This diff could not be displayed because it is too large.
This diff is collapsed. Click to expand it.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
...@@ -9328,7 +9328,7 @@ GSE12006 GSM303526 GPL3154-PMID:18940002 characteristics_ch1.1 ...@@ -9328,7 +9328,7 @@ GSE12006 GSM303526 GPL3154-PMID:18940002 characteristics_ch1.1
9328 GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.1 9328 GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.1
9329 GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.2 9329 GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.2
9330 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.4 9330 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.4
9331 -GSE12006 GSM303527 GPL3154-PMID:18940002 9331 +GSE12006 GSM303527 GPL3154-PMID:18940002 extract_protocol_ch1.3
9332 GSE12006 GSM303527 GPL3154-PMID:18940002 title.1 9332 GSE12006 GSM303527 GPL3154-PMID:18940002 title.1
9333 GSE12006 GSM303527 GPL3154-PMID:18940002 source_name_ch1.1 9333 GSE12006 GSM303527 GPL3154-PMID:18940002 source_name_ch1.1
9334 GSE12006 GSM303527 GPL3154-PMID:18940002 organism_ch1.1 9334 GSE12006 GSM303527 GPL3154-PMID:18940002 organism_ch1.1
...@@ -9340,7 +9340,7 @@ GSE12006 GSM303527 GPL3154-PMID:18940002 characteristics_ch1.1 ...@@ -9340,7 +9340,7 @@ GSE12006 GSM303527 GPL3154-PMID:18940002 characteristics_ch1.1
9340 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.1 9340 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.1
9341 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.2 9341 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.2
9342 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.4 9342 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.4
9343 -GSE12006 GSM303528 GPL3154-PMID:18940002 9343 +GSE12006 GSM303528 GPL3154-PMID:18940002 extract_protocol_ch1.3
9344 GSE12006 GSM303528 GPL3154-PMID:18940002 title.1 9344 GSE12006 GSM303528 GPL3154-PMID:18940002 title.1
9345 GSE12006 GSM303528 GPL3154-PMID:18940002 source_name_ch1.1 9345 GSE12006 GSM303528 GPL3154-PMID:18940002 source_name_ch1.1
9346 GSE12006 GSM303528 GPL3154-PMID:18940002 organism_ch1.1 9346 GSE12006 GSM303528 GPL3154-PMID:18940002 organism_ch1.1
...@@ -9352,7 +9352,7 @@ GSE12006 GSM303528 GPL3154-PMID:18940002 characteristics_ch1.1 ...@@ -9352,7 +9352,7 @@ GSE12006 GSM303528 GPL3154-PMID:18940002 characteristics_ch1.1
9352 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.1 9352 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.1
9353 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.2 9353 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.2
9354 GSE12006 GSM303529 GPL3154-PMID:18940002 growth_protocol_ch1.4 9354 GSE12006 GSM303529 GPL3154-PMID:18940002 growth_protocol_ch1.4
9355 -GSE12006 GSM303529 GPL3154-PMID:18940002 9355 +GSE12006 GSM303529 GPL3154-PMID:18940002 extract_protocol_ch1.3
9356 GSE12006 GSM303529 GPL3154-PMID:18940002 title.1 9356 GSE12006 GSM303529 GPL3154-PMID:18940002 title.1
9357 GSE12006 GSM303529 GPL3154-PMID:18940002 source_name_ch1.1 9357 GSE12006 GSM303529 GPL3154-PMID:18940002 source_name_ch1.1
9358 GSE12006 GSM303529 GPL3154-PMID:18940002 organism_ch1.1 9358 GSE12006 GSM303529 GPL3154-PMID:18940002 organism_ch1.1
......
This diff could not be displayed because it is too large.
1 --------------------------------- PARAMETERS --------------------------------
2 -Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation
3 -File with CoreNLP-tagging bg-sentences: bg_sentences_v3.txt.ner
4 -Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input
5 -File to save recontrsucted bg-sentences: annot-input_bg_v3.txt
6 --------------------------------- PROCESSING --------------------------------
7 -Number of sentences: 14716
8 -==================================END===================================
1 -------------------------------- PARAMETERS -------------------------------- 1 -------------------------------- PARAMETERS --------------------------------
2 -Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation 2 +--inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/
3 -File with CoreNLP-tagging bg-sentences: bg_sentences_v4.txt.ner 3 +--outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/
4 -Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input 4 +--outputFileI Output tagged file I : annot-input_bg_outputI_v4
5 -File to save recontrsucted bg-sentences: annot-input_bg_v4.txt 5 +--outputFileII Output tagged file II : annot-input_bg_outputII_v4
6 +--outputFileII Output tagged file III : annot-input_bg_outputIII_v4
7 +--modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models
8 +--modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10
9 +--infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping
10 +--infoFile GSE-GSM index file : bg_sentences_midx_v4.txt
11 +--variant Run variant : 13
12 +--S1 General features : True
13 +--S2 Inner/Complete word features : False
14 +--S3 Extended context features : False
15 +--S4 Semantic features : True
16 +--filteringStopWords Filtering stop words : False
17 +--filterSymbols Filtering punctuation marks : False
6 -------------------------------- PROCESSING -------------------------------- 18 -------------------------------- PROCESSING --------------------------------
7 -Number of sentences: 90904 19 +Reading CRF model...
8 -==================================END=================================== 20 +Reading CRF model done in: 0.009463s
21 +Processing corpus...
22 +Preprocessing file...annot-input_bg_v4.txt
23 +Sentences input data: 90688
24 +Predicting tags with model...
25 +Prediction done in: 26.367272s
26 +Tagging file...
27 +Saving Ouput I...
28 +Saving Ouput II...
29 +Saving Ouput III...
30 +Processing corpus done in: 56.584394s
......
1 --------------------------------- PARAMETERS --------------------------------
2 ---inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/
3 ---outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/
4 ---outputFileI Output tagged file I : annot-input_bg_outputI.txt
5 ---outputFileII Output tagged file II : annot-input_bg_outputII.txt
6 ---modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models
7 ---modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10
8 ---infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping
9 ---infoFile GSE-GSM index file : bg_sentences_midx.txt
10 ---variant Run variant : 13
11 ---S1 General features : True
12 ---S2 Inner/Complete word features : False
13 ---S3 Extended context features : False
14 ---S4 Semantic features : True
15 ---filteringStopWords Filtering stop words : False
16 ---filterSymbols Filtering punctuation marks : False
17 -Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False
18 --------------------------------- PROCESSING --------------------------------
19 -Reading CRF model...
20 -Reading CRF model done in: 0.008336s
21 -Processing corpus...
22 -Preprocessing file...annot-input_bg_v3.txt
23 -Sentences input data: 14716
24 -Predicting tags with model
25 -Prediction done in: 1.688127s
26 -Tagging file
27 -Processing corpus done in: 3.948320s
...@@ -17,10 +17,16 @@ ...@@ -17,10 +17,16 @@
17 Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False 17 Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False
18 -------------------------------- PROCESSING -------------------------------- 18 -------------------------------- PROCESSING --------------------------------
19 Reading CRF model... 19 Reading CRF model...
20 -Reading CRF model done in: 0.009804s 20 +Reading CRF model done in: 0.009363s
21 Processing corpus... 21 Processing corpus...
22 Preprocessing file...annot-input_bg_v3.txt 22 Preprocessing file...annot-input_bg_v3.txt
23 Sentences input data: 14716 23 Sentences input data: 14716
24 Predicting tags with model 24 Predicting tags with model
25 -Prediction done in: 1.811103s 25 +Prediction done in: 1.737334s
26 Tagging file 26 Tagging file
27 +Preprocessing file...annot-input_bg_v4.txt
28 +Sentences input data: 90688
29 +Predicting tags with model
30 +Prediction done in: 26.434549s
31 +Tagging file
32 +Processing corpus done in: 58.304885s
......