Carlos-Francisco Méndez-Cruz

Setting up project

......@@ -3,6 +3,8 @@ echo 'Preprocessing files...'
ORIGINAL_CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original
CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets
TERM_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/dictionaries
POST_PATH=/home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09
LEMMA_PATH/home/cmendezc/BIO_LEMMATIZER
PRE=TRUE
echo " Preprocessing: $PRE"
......@@ -19,34 +21,33 @@ if [ "$PRE" = "TRUE" ]; then
echo "Preprocessing..."
INPUT_PATH=$ORIGINAL_CORPUS_PATH
OUTPUT_PATH=$CORPUS_PATH/preprocessed
python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termDetection --termPath $TERM_PATH --termFiles termFilesLength_LREGULONDB.json > outputPreprocessing_lregulondb.txt
# python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH > outputPreprocessing_lregulondb.txt
python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termDetection --termPath $TERM_PATH --termFiles termFilesLength.json > outputPreprocessing.txt
fi
if [ "$POS" = "TRUE" ]; then
echo "POS Tagging..."
INPUT_PATH=$CORPUS_PATH/preprocessed
OUTPUT_PATH=$CORPUS_PATH/pos
python3.4 posTaggingStanford.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --taggerPath /home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09 --biolemmatizer > outputPOST_lregulondb.txt
python3.4 posTaggingStanford.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --taggerPath $POST_PATH --biolemmatizer > outputPOST.txt
fi
if [ "$LEMMA" = "TRUE" ]; then
echo "Lemmatization..."
INPUT_PATH=$CORPUS_PATH/pos
OUTPUT_PATH=$CORPUS_PATH/lemma
python3.4 biolemmatizing.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --biolemmatizerPath /home/cmendezc/BIO_LEMMATIZER > outputLemma_lregulondb.txt
python3.4 biolemmatizing.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --biolemmatizerPath $LEMMA_PATH > outputLemma.txt
fi
if [ "$TERM" = "TRUE" ]; then
echo "Terminological tagging..."
INPUT_PATH=$CORPUS_PATH/lemma
OUTPUT_PATH=$CORPUS_PATH/term
python3.4 biologicalTermTagging.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termPath $TERM_PATH --termFiles termFilesTag_LREGULONDB.json > outputTerm_lregulondb.txt
python3.4 biologicalTermTagging.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termPath $TERM_PATH --termFiles termFilesTag.json > outputTerm.txt
fi
if [ "$TRANS" = "TRUE" ]; then
echo "Transformation..."
INPUT_PATH=$CORPUS_PATH/term
OUTPUT_PATH=$CORPUS_PATH/transformed
python3.4 transforming.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --minWordsInLine 5 > outputTransformation_lregulondb.txt
python3.4 transforming.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --minWordsInLine 5 > outputTransformation.txt
fi
......