Carlos-Francisco Méndez-Cruz

Setting up project

...@@ -3,6 +3,8 @@ echo 'Preprocessing files...' ...@@ -3,6 +3,8 @@ echo 'Preprocessing files...'
3 ORIGINAL_CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original 3 ORIGINAL_CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original
4 CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets 4 CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets
5 TERM_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/dictionaries 5 TERM_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/dictionaries
6 +POST_PATH=/home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09
7 +LEMMA_PATH/home/cmendezc/BIO_LEMMATIZER
6 8
7 PRE=TRUE 9 PRE=TRUE
8 echo " Preprocessing: $PRE" 10 echo " Preprocessing: $PRE"
...@@ -19,34 +21,33 @@ if [ "$PRE" = "TRUE" ]; then ...@@ -19,34 +21,33 @@ if [ "$PRE" = "TRUE" ]; then
19 echo "Preprocessing..." 21 echo "Preprocessing..."
20 INPUT_PATH=$ORIGINAL_CORPUS_PATH 22 INPUT_PATH=$ORIGINAL_CORPUS_PATH
21 OUTPUT_PATH=$CORPUS_PATH/preprocessed 23 OUTPUT_PATH=$CORPUS_PATH/preprocessed
22 -python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termDetection --termPath $TERM_PATH --termFiles termFilesLength_LREGULONDB.json > outputPreprocessing_lregulondb.txt 24 +python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termDetection --termPath $TERM_PATH --termFiles termFilesLength.json > outputPreprocessing.txt
23 -# python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH > outputPreprocessing_lregulondb.txt
24 fi 25 fi
25 26
26 if [ "$POS" = "TRUE" ]; then 27 if [ "$POS" = "TRUE" ]; then
27 echo "POS Tagging..." 28 echo "POS Tagging..."
28 INPUT_PATH=$CORPUS_PATH/preprocessed 29 INPUT_PATH=$CORPUS_PATH/preprocessed
29 OUTPUT_PATH=$CORPUS_PATH/pos 30 OUTPUT_PATH=$CORPUS_PATH/pos
30 -python3.4 posTaggingStanford.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --taggerPath /home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09 --biolemmatizer > outputPOST_lregulondb.txt 31 +python3.4 posTaggingStanford.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --taggerPath $POST_PATH --biolemmatizer > outputPOST.txt
31 fi 32 fi
32 33
33 if [ "$LEMMA" = "TRUE" ]; then 34 if [ "$LEMMA" = "TRUE" ]; then
34 echo "Lemmatization..." 35 echo "Lemmatization..."
35 INPUT_PATH=$CORPUS_PATH/pos 36 INPUT_PATH=$CORPUS_PATH/pos
36 OUTPUT_PATH=$CORPUS_PATH/lemma 37 OUTPUT_PATH=$CORPUS_PATH/lemma
37 -python3.4 biolemmatizing.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --biolemmatizerPath /home/cmendezc/BIO_LEMMATIZER > outputLemma_lregulondb.txt 38 +python3.4 biolemmatizing.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --biolemmatizerPath $LEMMA_PATH > outputLemma.txt
38 fi 39 fi
39 40
40 if [ "$TERM" = "TRUE" ]; then 41 if [ "$TERM" = "TRUE" ]; then
41 echo "Terminological tagging..." 42 echo "Terminological tagging..."
42 INPUT_PATH=$CORPUS_PATH/lemma 43 INPUT_PATH=$CORPUS_PATH/lemma
43 OUTPUT_PATH=$CORPUS_PATH/term 44 OUTPUT_PATH=$CORPUS_PATH/term
44 -python3.4 biologicalTermTagging.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termPath $TERM_PATH --termFiles termFilesTag_LREGULONDB.json > outputTerm_lregulondb.txt 45 +python3.4 biologicalTermTagging.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termPath $TERM_PATH --termFiles termFilesTag.json > outputTerm.txt
45 fi 46 fi
46 47
47 if [ "$TRANS" = "TRUE" ]; then 48 if [ "$TRANS" = "TRUE" ]; then
48 echo "Transformation..." 49 echo "Transformation..."
49 INPUT_PATH=$CORPUS_PATH/term 50 INPUT_PATH=$CORPUS_PATH/term
50 OUTPUT_PATH=$CORPUS_PATH/transformed 51 OUTPUT_PATH=$CORPUS_PATH/transformed
51 -python3.4 transforming.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --minWordsInLine 5 > outputTransformation_lregulondb.txt 52 +python3.4 transforming.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --minWordsInLine 5 > outputTransformation.txt
52 fi 53 fi
......