Showing
17 changed files
with
40 additions
and
47 deletions
... | @@ -34,7 +34,7 @@ echo | ... | @@ -34,7 +34,7 @@ echo |
34 | echo | 34 | echo |
35 | echo "Add sentence-end-tag PGCGROWTHCONDITIONS" | 35 | echo "Add sentence-end-tag PGCGROWTHCONDITIONS" |
36 | #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g' | sed 's/-/\t/' | sed 's/-/\t/' ) | 36 | #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g' | sed 's/-/\t/' | sed 's/-/\t/' ) |
37 | -cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7'| cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g' | sed 's/1.\tNeubauer//'| sed 's/\\null\\/null/g' | sort | uniq) | 37 | +cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7' | sed 's/1.\tNeubauer//' | cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g'|sed 's/\\null\\//g' | sort | uniq) |
38 | echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output | 38 | echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output |
39 | wc $output | 39 | wc $output |
40 | echo "$cext" | cut -f1-3,5 > $mapping | 40 | echo "$cext" | cut -f1-3,5 > $mapping | ... | ... |
This diff could not be displayed because it is too large.
This file is too large to display.
This diff is collapsed. Click to expand it.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
... | @@ -9328,7 +9328,7 @@ GSE12006 GSM303526 GPL3154-PMID:18940002 characteristics_ch1.1 | ... | @@ -9328,7 +9328,7 @@ GSE12006 GSM303526 GPL3154-PMID:18940002 characteristics_ch1.1 |
9328 | GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.1 | 9328 | GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.1 |
9329 | GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.2 | 9329 | GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.2 |
9330 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.4 | 9330 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.4 |
9331 | -GSE12006 GSM303527 GPL3154-PMID:18940002 | 9331 | +GSE12006 GSM303527 GPL3154-PMID:18940002 extract_protocol_ch1.3 |
9332 | GSE12006 GSM303527 GPL3154-PMID:18940002 title.1 | 9332 | GSE12006 GSM303527 GPL3154-PMID:18940002 title.1 |
9333 | GSE12006 GSM303527 GPL3154-PMID:18940002 source_name_ch1.1 | 9333 | GSE12006 GSM303527 GPL3154-PMID:18940002 source_name_ch1.1 |
9334 | GSE12006 GSM303527 GPL3154-PMID:18940002 organism_ch1.1 | 9334 | GSE12006 GSM303527 GPL3154-PMID:18940002 organism_ch1.1 |
... | @@ -9340,7 +9340,7 @@ GSE12006 GSM303527 GPL3154-PMID:18940002 characteristics_ch1.1 | ... | @@ -9340,7 +9340,7 @@ GSE12006 GSM303527 GPL3154-PMID:18940002 characteristics_ch1.1 |
9340 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.1 | 9340 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.1 |
9341 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.2 | 9341 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.2 |
9342 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.4 | 9342 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.4 |
9343 | -GSE12006 GSM303528 GPL3154-PMID:18940002 | 9343 | +GSE12006 GSM303528 GPL3154-PMID:18940002 extract_protocol_ch1.3 |
9344 | GSE12006 GSM303528 GPL3154-PMID:18940002 title.1 | 9344 | GSE12006 GSM303528 GPL3154-PMID:18940002 title.1 |
9345 | GSE12006 GSM303528 GPL3154-PMID:18940002 source_name_ch1.1 | 9345 | GSE12006 GSM303528 GPL3154-PMID:18940002 source_name_ch1.1 |
9346 | GSE12006 GSM303528 GPL3154-PMID:18940002 organism_ch1.1 | 9346 | GSE12006 GSM303528 GPL3154-PMID:18940002 organism_ch1.1 |
... | @@ -9352,7 +9352,7 @@ GSE12006 GSM303528 GPL3154-PMID:18940002 characteristics_ch1.1 | ... | @@ -9352,7 +9352,7 @@ GSE12006 GSM303528 GPL3154-PMID:18940002 characteristics_ch1.1 |
9352 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.1 | 9352 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.1 |
9353 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.2 | 9353 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.2 |
9354 | GSE12006 GSM303529 GPL3154-PMID:18940002 growth_protocol_ch1.4 | 9354 | GSE12006 GSM303529 GPL3154-PMID:18940002 growth_protocol_ch1.4 |
9355 | -GSE12006 GSM303529 GPL3154-PMID:18940002 | 9355 | +GSE12006 GSM303529 GPL3154-PMID:18940002 extract_protocol_ch1.3 |
9356 | GSE12006 GSM303529 GPL3154-PMID:18940002 title.1 | 9356 | GSE12006 GSM303529 GPL3154-PMID:18940002 title.1 |
9357 | GSE12006 GSM303529 GPL3154-PMID:18940002 source_name_ch1.1 | 9357 | GSE12006 GSM303529 GPL3154-PMID:18940002 source_name_ch1.1 |
9358 | GSE12006 GSM303529 GPL3154-PMID:18940002 organism_ch1.1 | 9358 | GSE12006 GSM303529 GPL3154-PMID:18940002 organism_ch1.1 | ... | ... |
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
File mode changed
This diff could not be displayed because it is too large.
1 | --------------------------------- PARAMETERS -------------------------------- | ||
2 | -Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation | ||
3 | -File with CoreNLP-tagging bg-sentences: bg_sentences_v3.txt.ner | ||
4 | -Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input | ||
5 | -File to save recontrsucted bg-sentences: annot-input_bg_v3.txt | ||
6 | --------------------------------- PROCESSING -------------------------------- | ||
7 | -Number of sentences: 14716 | ||
8 | -==================================END=================================== |
1 | -------------------------------- PARAMETERS -------------------------------- | 1 | -------------------------------- PARAMETERS -------------------------------- |
2 | -Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation | 2 | +--inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ |
3 | -File with CoreNLP-tagging bg-sentences: bg_sentences_v4.txt.ner | 3 | +--outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ |
4 | -Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input | 4 | +--outputFileI Output tagged file I : annot-input_bg_outputI_v4 |
5 | -File to save recontrsucted bg-sentences: annot-input_bg_v4.txt | 5 | +--outputFileII Output tagged file II : annot-input_bg_outputII_v4 |
6 | +--outputFileII Output tagged file III : annot-input_bg_outputIII_v4 | ||
7 | +--modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models | ||
8 | +--modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 | ||
9 | +--infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping | ||
10 | +--infoFile GSE-GSM index file : bg_sentences_midx_v4.txt | ||
11 | +--variant Run variant : 13 | ||
12 | +--S1 General features : True | ||
13 | +--S2 Inner/Complete word features : False | ||
14 | +--S3 Extended context features : False | ||
15 | +--S4 Semantic features : True | ||
16 | +--filteringStopWords Filtering stop words : False | ||
17 | +--filterSymbols Filtering punctuation marks : False | ||
6 | -------------------------------- PROCESSING -------------------------------- | 18 | -------------------------------- PROCESSING -------------------------------- |
7 | -Number of sentences: 90904 | 19 | +Reading CRF model... |
8 | -==================================END=================================== | 20 | +Reading CRF model done in: 0.009463s |
21 | +Processing corpus... | ||
22 | +Preprocessing file...annot-input_bg_v4.txt | ||
23 | +Sentences input data: 90688 | ||
24 | +Predicting tags with model... | ||
25 | +Prediction done in: 26.367272s | ||
26 | +Tagging file... | ||
27 | +Saving Ouput I... | ||
28 | +Saving Ouput II... | ||
29 | +Saving Ouput III... | ||
30 | +Processing corpus done in: 56.584394s | ... | ... |
1 | --------------------------------- PARAMETERS -------------------------------- | ||
2 | ---inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ | ||
3 | ---outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ | ||
4 | ---outputFileI Output tagged file I : annot-input_bg_outputI.txt | ||
5 | ---outputFileII Output tagged file II : annot-input_bg_outputII.txt | ||
6 | ---modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models | ||
7 | ---modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 | ||
8 | ---infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping | ||
9 | ---infoFile GSE-GSM index file : bg_sentences_midx.txt | ||
10 | ---variant Run variant : 13 | ||
11 | ---S1 General features : True | ||
12 | ---S2 Inner/Complete word features : False | ||
13 | ---S3 Extended context features : False | ||
14 | ---S4 Semantic features : True | ||
15 | ---filteringStopWords Filtering stop words : False | ||
16 | ---filterSymbols Filtering punctuation marks : False | ||
17 | -Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False | ||
18 | --------------------------------- PROCESSING -------------------------------- | ||
19 | -Reading CRF model... | ||
20 | -Reading CRF model done in: 0.008336s | ||
21 | -Processing corpus... | ||
22 | -Preprocessing file...annot-input_bg_v3.txt | ||
23 | -Sentences input data: 14716 | ||
24 | -Predicting tags with model | ||
25 | -Prediction done in: 1.688127s | ||
26 | -Tagging file | ||
27 | -Processing corpus done in: 3.948320s |
... | @@ -17,10 +17,16 @@ | ... | @@ -17,10 +17,16 @@ |
17 | Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False | 17 | Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False |
18 | -------------------------------- PROCESSING -------------------------------- | 18 | -------------------------------- PROCESSING -------------------------------- |
19 | Reading CRF model... | 19 | Reading CRF model... |
20 | -Reading CRF model done in: 0.009804s | 20 | +Reading CRF model done in: 0.009363s |
21 | Processing corpus... | 21 | Processing corpus... |
22 | Preprocessing file...annot-input_bg_v3.txt | 22 | Preprocessing file...annot-input_bg_v3.txt |
23 | Sentences input data: 14716 | 23 | Sentences input data: 14716 |
24 | Predicting tags with model | 24 | Predicting tags with model |
25 | -Prediction done in: 1.811103s | 25 | +Prediction done in: 1.737334s |
26 | Tagging file | 26 | Tagging file |
27 | +Preprocessing file...annot-input_bg_v4.txt | ||
28 | +Sentences input data: 90688 | ||
29 | +Predicting tags with model | ||
30 | +Prediction done in: 26.434549s | ||
31 | +Tagging file | ||
32 | +Processing corpus done in: 58.304885s | ... | ... |
-
Please register or login to post a comment