Showing
1 changed file
with
7 additions
and
6 deletions
| ... | @@ -3,6 +3,8 @@ echo 'Preprocessing files...' | ... | @@ -3,6 +3,8 @@ echo 'Preprocessing files...' |
| 3 | ORIGINAL_CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original | 3 | ORIGINAL_CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original |
| 4 | CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets | 4 | CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets |
| 5 | TERM_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/dictionaries | 5 | TERM_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/dictionaries |
| 6 | +POST_PATH=/home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09 | ||
| 7 | +LEMMA_PATH/home/cmendezc/BIO_LEMMATIZER | ||
| 6 | 8 | ||
| 7 | PRE=TRUE | 9 | PRE=TRUE |
| 8 | echo " Preprocessing: $PRE" | 10 | echo " Preprocessing: $PRE" |
| ... | @@ -19,34 +21,33 @@ if [ "$PRE" = "TRUE" ]; then | ... | @@ -19,34 +21,33 @@ if [ "$PRE" = "TRUE" ]; then |
| 19 | echo "Preprocessing..." | 21 | echo "Preprocessing..." |
| 20 | INPUT_PATH=$ORIGINAL_CORPUS_PATH | 22 | INPUT_PATH=$ORIGINAL_CORPUS_PATH |
| 21 | OUTPUT_PATH=$CORPUS_PATH/preprocessed | 23 | OUTPUT_PATH=$CORPUS_PATH/preprocessed |
| 22 | -python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termDetection --termPath $TERM_PATH --termFiles termFilesLength_LREGULONDB.json > outputPreprocessing_lregulondb.txt | 24 | +python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termDetection --termPath $TERM_PATH --termFiles termFilesLength.json > outputPreprocessing.txt |
| 23 | -# python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH > outputPreprocessing_lregulondb.txt | ||
| 24 | fi | 25 | fi |
| 25 | 26 | ||
| 26 | if [ "$POS" = "TRUE" ]; then | 27 | if [ "$POS" = "TRUE" ]; then |
| 27 | echo "POS Tagging..." | 28 | echo "POS Tagging..." |
| 28 | INPUT_PATH=$CORPUS_PATH/preprocessed | 29 | INPUT_PATH=$CORPUS_PATH/preprocessed |
| 29 | OUTPUT_PATH=$CORPUS_PATH/pos | 30 | OUTPUT_PATH=$CORPUS_PATH/pos |
| 30 | -python3.4 posTaggingStanford.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --taggerPath /home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09 --biolemmatizer > outputPOST_lregulondb.txt | 31 | +python3.4 posTaggingStanford.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --taggerPath $POST_PATH --biolemmatizer > outputPOST.txt |
| 31 | fi | 32 | fi |
| 32 | 33 | ||
| 33 | if [ "$LEMMA" = "TRUE" ]; then | 34 | if [ "$LEMMA" = "TRUE" ]; then |
| 34 | echo "Lemmatization..." | 35 | echo "Lemmatization..." |
| 35 | INPUT_PATH=$CORPUS_PATH/pos | 36 | INPUT_PATH=$CORPUS_PATH/pos |
| 36 | OUTPUT_PATH=$CORPUS_PATH/lemma | 37 | OUTPUT_PATH=$CORPUS_PATH/lemma |
| 37 | -python3.4 biolemmatizing.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --biolemmatizerPath /home/cmendezc/BIO_LEMMATIZER > outputLemma_lregulondb.txt | 38 | +python3.4 biolemmatizing.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --biolemmatizerPath $LEMMA_PATH > outputLemma.txt |
| 38 | fi | 39 | fi |
| 39 | 40 | ||
| 40 | if [ "$TERM" = "TRUE" ]; then | 41 | if [ "$TERM" = "TRUE" ]; then |
| 41 | echo "Terminological tagging..." | 42 | echo "Terminological tagging..." |
| 42 | INPUT_PATH=$CORPUS_PATH/lemma | 43 | INPUT_PATH=$CORPUS_PATH/lemma |
| 43 | OUTPUT_PATH=$CORPUS_PATH/term | 44 | OUTPUT_PATH=$CORPUS_PATH/term |
| 44 | -python3.4 biologicalTermTagging.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termPath $TERM_PATH --termFiles termFilesTag_LREGULONDB.json > outputTerm_lregulondb.txt | 45 | +python3.4 biologicalTermTagging.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termPath $TERM_PATH --termFiles termFilesTag.json > outputTerm.txt |
| 45 | fi | 46 | fi |
| 46 | 47 | ||
| 47 | if [ "$TRANS" = "TRUE" ]; then | 48 | if [ "$TRANS" = "TRUE" ]; then |
| 48 | echo "Transformation..." | 49 | echo "Transformation..." |
| 49 | INPUT_PATH=$CORPUS_PATH/term | 50 | INPUT_PATH=$CORPUS_PATH/term |
| 50 | OUTPUT_PATH=$CORPUS_PATH/transformed | 51 | OUTPUT_PATH=$CORPUS_PATH/transformed |
| 51 | -python3.4 transforming.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --minWordsInLine 5 > outputTransformation_lregulondb.txt | 52 | +python3.4 transforming.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --minWordsInLine 5 > outputTransformation.txt |
| 52 | fi | 53 | fi | ... | ... |
-
Please register or login to post a comment