Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
nlp-preprocessing-pipeline
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-03-07 11:53:36 -0600
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
642452ffdc483f7b4e42ca4e7ceb0318598d471d
642452ff
1 parent
d5d71dbc
Setting up project
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
6 deletions
nlp-preprocessing-pipeline.sh
nlp-preprocessing-pipeline.sh
View file @
642452f
...
...
@@ -3,6 +3,8 @@ echo 'Preprocessing files...'
ORIGINAL_CORPUS_PATH
=
/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original
CORPUS_PATH
=
/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets
TERM_PATH
=
/export/space1/users/compu2/bionlp/conditional-random-fields/dictionaries
POST_PATH
=
/home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09
LEMMA_PATH/home/cmendezc/BIO_LEMMATIZER
PRE
=
TRUE
echo
" Preprocessing:
$PRE
"
...
...
@@ -19,34 +21,33 @@ if [ "$PRE" = "TRUE" ]; then
echo
"Preprocessing..."
INPUT_PATH
=
$ORIGINAL_CORPUS_PATH
OUTPUT_PATH
=
$CORPUS_PATH
/preprocessed
python3.4 preprocessingTermDetection.py --inputPath
$INPUT_PATH
--outputPath
$OUTPUT_PATH
--termDetection --termPath
$TERM_PATH
--termFiles termFilesLength_LREGULONDB.json > outputPreprocessing_lregulondb.txt
# python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH > outputPreprocessing_lregulondb.txt
python3.4 preprocessingTermDetection.py --inputPath
$INPUT_PATH
--outputPath
$OUTPUT_PATH
--termDetection --termPath
$TERM_PATH
--termFiles termFilesLength.json > outputPreprocessing.txt
fi
if
[
"
$POS
"
=
"TRUE"
]
;
then
echo
"POS Tagging..."
INPUT_PATH
=
$CORPUS_PATH
/preprocessed
OUTPUT_PATH
=
$CORPUS_PATH
/pos
python3.4 posTaggingStanford.py --inputPath
$INPUT_PATH
--outputPath
$OUTPUT_PATH
--taggerPath
/home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09 --biolemmatizer > outputPOST_lregulondb
.txt
python3.4 posTaggingStanford.py --inputPath
$INPUT_PATH
--outputPath
$OUTPUT_PATH
--taggerPath
$POST_PATH
--biolemmatizer > outputPOST
.txt
fi
if
[
"
$LEMMA
"
=
"TRUE"
]
;
then
echo
"Lemmatization..."
INPUT_PATH
=
$CORPUS_PATH
/pos
OUTPUT_PATH
=
$CORPUS_PATH
/lemma
python3.4 biolemmatizing.py --inputPath
$INPUT_PATH
--outputPath
$OUTPUT_PATH
--biolemmatizerPath
/home/cmendezc/BIO_LEMMATIZER > outputLemma_lregulondb
.txt
python3.4 biolemmatizing.py --inputPath
$INPUT_PATH
--outputPath
$OUTPUT_PATH
--biolemmatizerPath
$LEMMA_PATH
> outputLemma
.txt
fi
if
[
"
$TERM
"
=
"TRUE"
]
;
then
echo
"Terminological tagging..."
INPUT_PATH
=
$CORPUS_PATH
/lemma
OUTPUT_PATH
=
$CORPUS_PATH
/term
python3.4 biologicalTermTagging.py --inputPath
$INPUT_PATH
--outputPath
$OUTPUT_PATH
--termPath
$TERM_PATH
--termFiles termFilesTag
_LREGULONDB.json > outputTerm_lregulondb
.txt
python3.4 biologicalTermTagging.py --inputPath
$INPUT_PATH
--outputPath
$OUTPUT_PATH
--termPath
$TERM_PATH
--termFiles termFilesTag
.json > outputTerm
.txt
fi
if
[
"
$TRANS
"
=
"TRUE"
]
;
then
echo
"Transformation..."
INPUT_PATH
=
$CORPUS_PATH
/term
OUTPUT_PATH
=
$CORPUS_PATH
/transformed
python3.4 transforming.py --inputPath
$INPUT_PATH
--outputPath
$OUTPUT_PATH
--minWordsInLine 5 > outputTransformation
_lregulondb
.txt
python3.4 transforming.py --inputPath
$INPUT_PATH
--outputPath
$OUTPUT_PATH
--minWordsInLine 5 > outputTransformation.txt
fi
...
...
Please
register
or
login
to post a comment