Showing
1 changed file
with
7 additions
and
6 deletions
... | @@ -3,6 +3,8 @@ echo 'Preprocessing files...' | ... | @@ -3,6 +3,8 @@ echo 'Preprocessing files...' |
3 | ORIGINAL_CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original | 3 | ORIGINAL_CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original |
4 | CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets | 4 | CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets |
5 | TERM_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/dictionaries | 5 | TERM_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/dictionaries |
6 | +POST_PATH=/home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09 | ||
7 | +LEMMA_PATH/home/cmendezc/BIO_LEMMATIZER | ||
6 | 8 | ||
7 | PRE=TRUE | 9 | PRE=TRUE |
8 | echo " Preprocessing: $PRE" | 10 | echo " Preprocessing: $PRE" |
... | @@ -19,34 +21,33 @@ if [ "$PRE" = "TRUE" ]; then | ... | @@ -19,34 +21,33 @@ if [ "$PRE" = "TRUE" ]; then |
19 | echo "Preprocessing..." | 21 | echo "Preprocessing..." |
20 | INPUT_PATH=$ORIGINAL_CORPUS_PATH | 22 | INPUT_PATH=$ORIGINAL_CORPUS_PATH |
21 | OUTPUT_PATH=$CORPUS_PATH/preprocessed | 23 | OUTPUT_PATH=$CORPUS_PATH/preprocessed |
22 | -python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termDetection --termPath $TERM_PATH --termFiles termFilesLength_LREGULONDB.json > outputPreprocessing_lregulondb.txt | 24 | +python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termDetection --termPath $TERM_PATH --termFiles termFilesLength.json > outputPreprocessing.txt |
23 | -# python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH > outputPreprocessing_lregulondb.txt | ||
24 | fi | 25 | fi |
25 | 26 | ||
26 | if [ "$POS" = "TRUE" ]; then | 27 | if [ "$POS" = "TRUE" ]; then |
27 | echo "POS Tagging..." | 28 | echo "POS Tagging..." |
28 | INPUT_PATH=$CORPUS_PATH/preprocessed | 29 | INPUT_PATH=$CORPUS_PATH/preprocessed |
29 | OUTPUT_PATH=$CORPUS_PATH/pos | 30 | OUTPUT_PATH=$CORPUS_PATH/pos |
30 | -python3.4 posTaggingStanford.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --taggerPath /home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09 --biolemmatizer > outputPOST_lregulondb.txt | 31 | +python3.4 posTaggingStanford.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --taggerPath $POST_PATH --biolemmatizer > outputPOST.txt |
31 | fi | 32 | fi |
32 | 33 | ||
33 | if [ "$LEMMA" = "TRUE" ]; then | 34 | if [ "$LEMMA" = "TRUE" ]; then |
34 | echo "Lemmatization..." | 35 | echo "Lemmatization..." |
35 | INPUT_PATH=$CORPUS_PATH/pos | 36 | INPUT_PATH=$CORPUS_PATH/pos |
36 | OUTPUT_PATH=$CORPUS_PATH/lemma | 37 | OUTPUT_PATH=$CORPUS_PATH/lemma |
37 | -python3.4 biolemmatizing.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --biolemmatizerPath /home/cmendezc/BIO_LEMMATIZER > outputLemma_lregulondb.txt | 38 | +python3.4 biolemmatizing.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --biolemmatizerPath $LEMMA_PATH > outputLemma.txt |
38 | fi | 39 | fi |
39 | 40 | ||
40 | if [ "$TERM" = "TRUE" ]; then | 41 | if [ "$TERM" = "TRUE" ]; then |
41 | echo "Terminological tagging..." | 42 | echo "Terminological tagging..." |
42 | INPUT_PATH=$CORPUS_PATH/lemma | 43 | INPUT_PATH=$CORPUS_PATH/lemma |
43 | OUTPUT_PATH=$CORPUS_PATH/term | 44 | OUTPUT_PATH=$CORPUS_PATH/term |
44 | -python3.4 biologicalTermTagging.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termPath $TERM_PATH --termFiles termFilesTag_LREGULONDB.json > outputTerm_lregulondb.txt | 45 | +python3.4 biologicalTermTagging.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termPath $TERM_PATH --termFiles termFilesTag.json > outputTerm.txt |
45 | fi | 46 | fi |
46 | 47 | ||
47 | if [ "$TRANS" = "TRUE" ]; then | 48 | if [ "$TRANS" = "TRUE" ]; then |
48 | echo "Transformation..." | 49 | echo "Transformation..." |
49 | INPUT_PATH=$CORPUS_PATH/term | 50 | INPUT_PATH=$CORPUS_PATH/term |
50 | OUTPUT_PATH=$CORPUS_PATH/transformed | 51 | OUTPUT_PATH=$CORPUS_PATH/transformed |
51 | -python3.4 transforming.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --minWordsInLine 5 > outputTransformation_lregulondb.txt | 52 | +python3.4 transforming.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --minWordsInLine 5 > outputTransformation.txt |
52 | fi | 53 | fi | ... | ... |
-
Please register or login to post a comment