Estefani Gaytan Nunez

upload

...@@ -7,10 +7,14 @@ cd /home/egaytan/automatic-extraction-growth-conditions/extraction-geo/outputs/ ...@@ -7,10 +7,14 @@ cd /home/egaytan/automatic-extraction-growth-conditions/extraction-geo/outputs/
7 7
8 echo "Access to output extracted baglines" 8 echo "Access to output extracted baglines"
9 echo "directory: "$(pwd); 9 echo "directory: "$(pwd);
10 - 10 +#all output-extraction files
11 index="/home/egaytan/automatic-extraction-growth-conditions/extraction-geo/reports/all-output-index.txt" 11 index="/home/egaytan/automatic-extraction-growth-conditions/extraction-geo/reports/all-output-index.txt"
12 -output="/home/egaytan/automatic-extraction-growth-conditions/CoreNLP/input/annotation/bg_sentences_v2.txt" 12 +#input sentences to run CoreNLP
13 -report="/home/egaytan/automatic-extraction-growth-conditions/extraction-geo/reports/bg_report_v2.txt" 13 +output="/home/egaytan/automatic-extraction-growth-conditions/CoreNLP/input/annotation/bg_sentences_v3.txt"
14 +#GSE index by bg_sentence row
15 +mapping="/home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping/bg_sentences_midx.txt"
16 +#Number of fields by bagline
17 +report="/home/egaytan/automatic-extraction-growth-conditions/extraction-geo/reports/bg_report_v3.txt"
14 echo 18 echo
15 echo 19 echo
16 echo 20 echo
...@@ -29,10 +33,15 @@ echo "==============================Baglines extraction========================= ...@@ -29,10 +33,15 @@ echo "==============================Baglines extraction=========================
29 echo 33 echo
30 echo 34 echo
31 echo "Add sentence-end-tag PGCGROWTHCONDITIONS" 35 echo "Add sentence-end-tag PGCGROWTHCONDITIONS"
32 -for gsef in $( cat $index ) 36 +#for gsef in $( cat $index )
33 -do 37 +#do
34 - cat $gsef | sort | uniq; 38 +# cat $gsef | sort | uniq;
35 -done | cut -f1 | cut -f2 -d'"' | sort | uniq | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output 39 +#done | cut -f1 | cut -f2 -d'"' | sort | uniq | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output
40 +cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f7,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | awk '{ print $_ " PGCGROWTHCONDITIONS" }' | sed 's/\\null\\/null/g' )
41 +echo "$cext" | cut -f2 > $output
42 +echo "$cext" | cut -f2 |wc
43 +echo "$cext" | cut -f1 > $mapping
44 +echo "$cext" | cut -f1 |wc
36 echo 45 echo
37 echo 46 echo
38 echo "Number of total baglines: "$(wc -l $output ); 47 echo "Number of total baglines: "$(wc -l $output );
...@@ -40,7 +49,7 @@ echo ...@@ -40,7 +49,7 @@ echo
40 echo "Baglines report" 49 echo "Baglines report"
41 50
42 51
43 -for gsef in $( cat ../reports/all-output-index.txt) 52 +for gsef in $( cat $index)
44 do 53 do
45 cat $gsef | sort | uniq ; 54 cat $gsef | sort | uniq ;
46 done | cut -f2 | cut -f2 -d'"' | sed 's/_ch/./g' | cut -f1 -d'.' | sort | uniq -c | awk '{print $1"\t"$2}' > $report 55 done | cut -f2 | cut -f2 -d'"' | sed 's/_ch/./g' | cut -f1 -d'.' | sort | uniq -c | awk '{print $1"\t"$2}' > $report
...@@ -48,4 +57,4 @@ done | cut -f2 | cut -f2 -d'"' | sed 's/_ch/./g' | cut -f1 -d'.' | sort | uniq - ...@@ -48,4 +57,4 @@ done | cut -f2 | cut -f2 -d'"' | sed 's/_ch/./g' | cut -f1 -d'.' | sort | uniq -
48 cat $report 57 cat $report
49 echo 58 echo
50 echo 59 echo
51 -echo "Saving file: /home/egaytan/automatic-extraction-growth-conditions/extraction/bg_sentences_v2.txt"; 60 +echo "Saving file: "$output;
......
...@@ -4,7 +4,7 @@ echo "==============================Run CoreNLP================================= ...@@ -4,7 +4,7 @@ echo "==============================Run CoreNLP=================================
4 echo 4 echo
5 echo 5 echo
6 6
7 -input="/home/egaytan/automatic-extraction-growth-conditions/CoreNLP/input/annotation/bg_sentences_v2.txt" 7 +input="/home/egaytan/automatic-extraction-growth-conditions/CoreNLP/input/annotation/bg_sentences_v3.txt"
8 output="/home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation" 8 output="/home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation"
9 regexfile="/home/egaytan/automatic-extraction-growth-conditions/CoreNLP/input/NER/inputEntities.txt" 9 regexfile="/home/egaytan/automatic-extraction-growth-conditions/CoreNLP/input/NER/inputEntities.txt"
10 10
......
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
1 -index="/home/egaytan/automatic-extraction-growth-conditions/extraction-geo/reports/all-output-index.txt"
2 -mapfiel="/home/egaytan/automatic-extraction-growth-conditions/extraction-geo/reports/bg_map_index.txt"
3 -grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f7,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' > $mapfile
4 -
5 -
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
1 --------------------------------- PARAMETERS --------------------------------
2 -Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation
3 -File with CoreNLP-tagging bg-sentences: bg_sentences_v2.txt.ner
4 -Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input
5 -File to save recontrsucted bg-sentences: annot-input_bg_v2.txt
6 --------------------------------- PROCESSING --------------------------------
7 -EXCLUDE:
8 -EXCLUDE:
9 -EXCLUDE: .|.|.
10 -EXCLUDE: \|\|SYM
11 -EXCLUDE: C1|c1|NN
12 -EXCLUDE: C2|c2|NN
13 -EXCLUDE: F1|f1|NN
14 -EXCLUDE: F2|f2|NN
15 -EXCLUDE: LB|lb|NN
16 -EXCLUDE: NA|NA|NNP
17 -EXCLUDE: NC|nc|NN
18 -EXCLUDE: V1|v1|NN
19 -EXCLUDE: wt|wt|JJ
20 -EXCLUDE: WT|WT|NNP
21 -Number of sentences: 13889
22 -==================================END===================================
1 -------------------------------- PARAMETERS -------------------------------- 1 -------------------------------- PARAMETERS --------------------------------
2 Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation 2 Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation
3 -File with CoreNLP-tagging bg-sentences: bg_sentences_v2.txt.ner 3 +File with CoreNLP-tagging bg-sentences: bg_sentences_v3.txt.ner
4 Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input 4 Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input
5 -File to save recontrsucted bg-sentences: annot-input_bg_v1.txt 5 +File to save recontrsucted bg-sentences: annot-input_bg_v3.txt
6 -------------------------------- PROCESSING -------------------------------- 6 -------------------------------- PROCESSING --------------------------------
7 -Number of sentences: 13903 7 +Number of sentences: 14716
8 ==================================END=================================== 8 ==================================END===================================
......