automatic-extraction-ris-gcs.sh 26 KB

Raw Blame History Permalink

#!/bin/bash
# Main script for automatic extraction of regulatory interactions

#Parameters
#1: Path y nombre de archivo con las frases preprocesadas en formato de tokens (palabras)
#2: Path y nombre de archivo con las frases preprocesadas en formato trasformado (palabra|lemma|pos)
#3: Path y nombre de archivo para procesamiento con OpenIE
#4: Path de salida de archivos a1 y a2 con RIS y GCs
#5: Simplificar Y/N?
#6: Separar verbales y deverbales Y/N?
#7: Filtro de frases que contengan entidades. FILT1 = (GENE OR TU) AND TF
#8: Path con archivos a1 y a2 de referencia (RIs y GCs verdaderas)
#9: Archivo de referencia (RIs y GCs verdaderas)
#10: Path para guardar archivo de evaluación
#11: Archivo para guardar resultados de la evaluación contra referencia
#12: Archivo de sinónimos de TFs

# RUN EXTRACTION FOR L&C STM
# ./automatic-extraction-ris-gcs.sh
# /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/features/6094508.tra.word.txt
# /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/transformed/6094508.tra.txt
# /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/bries-bacterial-regulatory-interaction-extraction-system/ri-openie-extraction/ris-STM.txt
# /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs
# Y Y FILT1
# /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/bries-bacterial-regulatory-interaction-extraction-system/unused-reference
# unused.txt
# /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/bries-bacterial-regulatory-interaction-extraction-system/evaluation-reports
# unused.txt
# diccionario-STM-LT2-v7.0.SYNONYMS.json
# 1>uno-STM-LC.txt
# 2>dos-STM-LC.txt
# ./automatic-extraction-ris-gcs.sh /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/features/6094508.tra.word.txt /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/transformed/6094508.tra.txt /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/bries-bacterial-regulatory-interaction-extraction-system/ri-openie-extraction/ris-STM.txt /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs Y Y FILT1 /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/bries-bacterial-regulatory-interaction-extraction-system/unused-reference unused.txt /home/cmendezc/gitlab_repositories/lisen-curate-nlp-tasks/ris-extraction/bries-bacterial-regulatory-interaction-extraction-system/evaluation-reports unused.txt diccionario-STM-LT2-v7.0.SYNONYMS.json 1>uno-STM-LC.txt 2>dos-STM-LC.txt

# Some help
# Filename without path: filename=$(basename "$fullfile")
# Filename extension: extension="${filename##*.}"
# Filename without extension: filename="${filename%.*}"
# Por error de muchos archivos: find . -print0 | xargs -0 grep AcrR


PATH_TO_CORENLP=/home/cmendezc/STANFORD_CORENLP/stanford-corenlp-full-2017-06-09
DICC_PATH=/home/cmendezc/terminologicalResources
ISIMP_PATH=/home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-ris-gcs/sentence-simplification/isimp_v2

SCRIPT_PATH=$(cd `dirname $0` && pwd)
INPUT_PATH=$1
INPUT_PATH_TRANS=$2
OUTPUT_FILE=$3
OUTPUT_PATH=$4
INPUT_NAME_EXT=$(basename "$INPUT_PATH")
INPUT_NAME="${INPUT_NAME_EXT%.*}"
# Simplify sentences?
SIMPLIFY=$5
# Separate sentences with deverbal effect?
DEVERBAL_SEPARATOR=$6
FILT=$7
TRUE_PATH=$8
TRUE_FILE=$9
PATH_EVAL=${10}
FILE_EVAL=${11}
DICC_SYNON=${12}
# CFMC 2022-03-09: tsv file with section, id sentence, sentence (Extracted from jsonpdf)
TSV_PATH=${13}

#Validate arguments
if [[ ! ("$#" == 13 ) ]]; then
    echo 'Usage: ./automatic-extraction-ris-gcs.sh <inputPath_wordFile>
    <inputPath_taggedFile> <outputPath_file> <simplify?> <deverbal_detector?>
    <filter> <true_path> <true_file> <path_evaluation_report> <file_evaluation_report>
    <dictionary_TFs_synonyms> <path_tsv_file>'
    exit 1
fi

echo "********** SELECTED PARAMETERS **********"
echo "INPUT PATH: $INPUT_PATH"
echo "INPUT PATH TRANSFORMED FILE $INPUT_PATH_TRANS"
echo "OUTPUT FILE: $OUTPUT_FILE"
echo "OUTPUT PATH: $OUTPUT_PATH"
echo "SIMPLIFY SENTENCES? $SIMPLIFY"
echo "SEPARATE DEVERBAL SENTENCES? $DEVERBAL_SEPARATOR"
echo "FILTER SENTENCES WITH ENTITIES? $FILT"
echo "REFERENCE (TRUE) PATH: $TRUE_PATH"
echo "REFERENCE (TRUE) FILE: $TRUE_FILE"
echo "PATH EVALUATION REPORT: $PATH_EVAL"
echo "FILE EVALUATION REPORT: $FILE_EVAL"
echo "DICTIONARY OF SYNONYMS OF TFS: $DICC_SYNON"

echo "********** SELECTED PROCESSES **********"
CLEAN_OUTPUT=FALSE
echo "   Clean output paths: $CLEAN_OUTPUT"

FILTER=TRUE
echo "   Filter sentences: $FILTER"

CLEAN=TRUE
echo "   Clean sentences for iSimp: $CLEAN"

SEPARATE=TRUE
echo "   Separate sentences to iSimp: $SEPARATE"

SIMPLI=TRUE
echo "   Simplify sentences: $SIMPLI"

DEVERBAL=TRUE
echo "   Separate deverbal and verbal sentences: $DEVERBAL"

DEVTAG=TRUE # Needs DEVERBAL=TRUE
echo "   Tag sentences to separate deverbal and verbal sentences: $DEVTAG"

DEVSEPAR=TRUE # Needs DEVERBAL=TRUE
echo "   Do separate deverbal and verbal sentences: $DEVSEPAR"

EXTDEVERBAL=TRUE
echo "   Extract RI deverbal: $EXTDEVERBAL"

OPENIE=TRUE
echo "   OpenIE triplet extraction: $OPENIE"

EXTOPENIE=TRUE
echo "   Extract RI verbal: $EXTOPENIE"

EXTATTRIB=TRUE
echo "   Extract RI attributive: $EXTATTRIB"

EXTAUTOREG=TRUE
echo "   Extract RI autoregulation: $EXTAUTOREG"

EXTGC=FALSE
echo "   Extract growth conditions: $EXTGC"

EVAL=FALSE
echo "   Evaluate extraction: $EVAL"

EVALGC=FALSE
echo "   Evaluate growth condition extraction: $EVALGC"

#########################
# Cleaning output paths #
#########################
if [ "$CLEAN_OUTPUT" = "TRUE" ]; then
    if [ -z "$(ls -A $OUTPUT_PATH/complete-ris/)" ]; then :
    else
       #echo "Not Empty"
       # Original: rm $OUTPUT_PATH/complete-ris/*
       find $OUTPUT_PATH/complete-ris -maxdepth 1 -name '*.*' -delete
    fi
    if [ -z "$(ls -A $OUTPUT_PATH/incomplete-ris/)" ]; then :
    else
       #echo "Not Empty"
       # Original: rm $OUTPUT_PATH/incomplete-ris/*
       find $OUTPUT_PATH/incomplete-ris -maxdepth 1 -name '*.*' -delete
    fi
fi # if [ "$CLEAN_OUTPUT" = "TRUE" ]; then
################
# preliminares #
################
#Clone and update simplification pipeline
#if [ ! -d "./sentence-simplification" ]
#	then
#		echo Downloading sentence simplificator...
#		git clone https://github.com/ezojg/sentence-simplification
#	else
#		cd ./sentence-simplification
#		git pull origin master
#		cd ..
#fi
#Check for iSimp
#if [ ! -d "./sentence-simplification/isimp_v2" ]
#	then
#		echo ERROR: ./sentence-simplification/isimp_v2 not found. Please manually copy iSimp to said path.
#		exit 1
#fi

if [ "$FILTER" = "TRUE" ]; then
echo "********** FILTER SENTENCES **********"
###################################################
# filter sentences with entities of interest      #
# and collect attributive examples ArgP-regulated #
###################################################
# INPUT:
# 1) --inputFileWord $INPUT_PATH input file transformed
# 2) --inputFileTrans $INPUT_PATH_TRANS input file of feature 'word'
# 3) --outputPath $SCRIPT_PATH/filtered-sentences
# 4) --outputFile filtered-sentences.txt output File
# 5) --filter filter $FILT
#  FILT1: (GENE OR TU) AND TF
#  FILT2: (GENE OR TU) AND EFFECT AND TF
# 6) --attrPath $SCRIPT_PATH/attributive-sentences Path for attributive cases: ArgP-regulated genes
# 7) --attrFile attributive-sentences.txt File for attributive cases: ArgP-regulated genes
# $DICC_PATH/normalized_Effects.json

cd $SCRIPT_PATH
if [ -z "$(ls -A ./filtered-sentences/)" ]; then :
else
   #echo "Not Empty"
   rm ./filtered-sentences/*
fi
if [ -z "$(ls -A ./attributive-sentences/)" ]; then :
else
   #echo "Not Empty"
   rm ./attributive-sentences/*
fi
if [ -z "$(ls -A ./autoregulation-sentences/)" ]; then :
else
   #echo "Not Empty"
   rm ./autoregulation-sentences/*
fi
# CFMC 2022-03-09: To update tsv file with filtered sentences
# python3.4 $SCRIPT_PATH/sentence-filter.py --inputFileWord $INPUT_PATH --inputFileTrans $INPUT_PATH_TRANS --outputPath $SCRIPT_PATH/filtered-sentences --outputFile filtered-sentences.txt --filter $FILT --attrPath $SCRIPT_PATH/attributive-sentences --autoPath $SCRIPT_PATH/autoregulation-sentences --dicPath $DICC_PATH --dicFile normalized_Effects.json
python3.4 $SCRIPT_PATH/sentence-filter_v02.py --tsvPath $TSV_PATH --inputFileWord $INPUT_PATH --inputFileTrans $INPUT_PATH_TRANS --outputPath $SCRIPT_PATH/filtered-sentences --outputFile filtered-sentences.txt --filter $FILT --attrPath $SCRIPT_PATH/attributive-sentences --autoPath $SCRIPT_PATH/autoregulation-sentences --dicPath $DICC_PATH --dicFile normalized_Effects.json
fi # if [ "$PRE" = "TRUE" ]; then

if [ "$CLEAN" = "TRUE" ]; then
echo "********** CLEAN SENTENCES **********"
#################################
# Clean sentences for iSimpm #
#################################
# INPUT - PREVIOUS OUTPUT: filtered sentences $SCRIPT_PATH/filtered-sentences/filtered-sentences.txt
# output path and file $SCRIPT_PATH/format/sanitized_sentences/$INPUT_NAME_EXT
if [ -z "$(ls -A ./format/sanitized_sentences/)" ]; then :
else
   #echo "Not Empty"
   rm ./format/sanitized_sentences/*
fi
#Original Daniel: python2 $SCRIPT_PATH/format/regex-before.py $INPUT_PATH $SCRIPT_PATH/format/sanitized_sentences/$INPUT_NAME_EXT
python2 $SCRIPT_PATH/format/regex.py $SCRIPT_PATH/filtered-sentences/filtered-sentences.txt $SCRIPT_PATH/format/sanitized_sentences/$INPUT_NAME_EXT
fi # if [ "$CLEAN" = "TRUE" ]; then

if [ "$SEPARATE" = "TRUE" ]; then
echo "********** SEPARATE SENTENCES **********"
################################
# Separate sentences for iSimp #
################################
# INPUT - PREVIOUS OUTPUT: $SCRIPT_PATH/format/sanitized_sentences/$l
# output path and file $SCRIPT_PATH/format/split_sentences/$BARE_NAME
cd $SCRIPT_PATH
if [ -z "$(ls -A ./format/split_sentences/)" ]; then :
	else
		rm ./format/split_sentences/*
fi
cd ./format/sanitized_sentences
for l in $(\ls $INPUT_NAME*)
do
	# echo $l
	BARE_NAME=$(echo $l | cut -f 1 -d '.')
	BARE_NAME+="_"
	LENGTH="$(wc -l < $l)"
	LENGTH="$(echo "${#LENGTH}")"
	split -a $LENGTH -d -l 1 --additional-suffix=.spt $SCRIPT_PATH/format/sanitized_sentences/$l $SCRIPT_PATH/format/split_sentences/$BARE_NAME
done
fi # if [ "$SEPARATE" = "TRUE" ]; then

if [ "$SIMPLI" = "TRUE" ]; then
echo "********** SIMPLIFY SENTENCES **********"
######################
# Simplify sentences #
######################
# INPUT - PREVIOUS OUTPUT: $SCRIPT_PATH/format/split_sentences
# output file $OUTPUT_FILE
# path to iSimp $ISIMP_PATH
# CALL: ./sentence-simplification/sentence-simplification-main.sh
#  CALL: $ISIMP_PATH/simplify.sh $j $SCRIPT_PATH/iSimp_sentences/$(basename $j)
#  CALL: $SCRIPT_PATH/simplifier.py $k $SCRIPT_PATH/algorithm_sentences/$(basename $k) $OUTPUT_INDEX_FILE_PATH
#     $OUTPUT_INDEX_FILE_PATH = $OUTPUT_FILE
# OUTPUT: simplified sentences in path ./algorithm_sentences

# while true; do
    # read -p "Do you wish to simplificate sentences? [Y/N]: " yn
    # case $yn in
        # [Yy]* ) SIMP=1; break;;
        # [Nn]* ) SIMP=0; break;;
        # * ) echo "Please answer yes [Y] or no [N].";;
    # esac
# done
case $SIMPLIFY in
	[Yy]* )
		SIMP=1
		;;
	[Nn]* )
		SIMP=0
		;;
	* )
		SIMP=1
		;;
esac
cd $SCRIPT_PATH
if [ $SIMP == 1 ]
	then    #USING SIMPLIFICATION
        echo "********** YES SIMPLIFY SENTENCES **********"
		#Copy file to sentence-simplification
		#FILE_NAME=$(basename "$INPUT_PATH")
		#Call simplification pipeline AND create a file with the paths for the simplificated sentences
		./sentence-simplification/sentence-simplification-main.sh $SCRIPT_PATH/format/split_sentences $OUTPUT_FILE $ISIMP_PATH
		#echo "entrada: $SCRIPT_PATH/format/split_sentences --salida: $OUTPUT_FILE"
		#echo "Sentences simplificated. Paths to simplificated sentences saved in $OUTPUT_FILE"
	else    #WITHOUT SIMPLIFICACION
        echo "********** NO SIMPLIFY SENTENCES **********"
        if [ -z "$(ls -A ./sentence-simplification/algorithm_sentences/)" ]; then :
        else
           #echo "Not Empty"
           rm ./sentence-simplification/algorithm_sentences/*
        fi
		ls $SCRIPT_PATH/format/split_sentences/* > $OUTPUT_FILE
		cp $SCRIPT_PATH/format/split_sentences/* $SCRIPT_PATH/sentence-simplification/algorithm_sentences
		#echo "Sentences split. Paths to split sentences saved in $OUTPUT_FILE"
fi
fi # if [ "$SIMPLI" = "TRUE" ]; then

if [ "$DEVERBAL" = "TRUE" ]; then
echo "********** SEPARATE VERBAL AND DEVERBAL SENTENCES **********"
######################
# Deverbal separator #
######################
# $PATH_TO_CORENLP
# INPUT - PREVIOUS OUTPUT: $SCRIPT_PATH/sentence-simplification/algorithm_sentences
# output path $SCRIPT_PATH/deverbal-separator/separated_sentences
# $DICC_PATH
# $DEVTAG POS taggging sentences
# $DEVSEPAR Do separate sentences
# CALL: java -cp "$PATH_TO_CORENLP/*"
#       $SCRIPT_PATH/filter.py
# OUTPUT: sentences separated in two paths according to verbal/deverbal effect

case $DEVERBAL_SEPARATOR in
	[Yy]* )
		DEVSEP=1
		;;
	[Nn]* )
		DEVSEP=0
		;;
	* )
		DEVSEP=1
		;;
esac
if [ $DEVSEP == 1 ]
	then    #USING DEVERBAL SEPARATOR

        #if [ -z "$(ls -A $SCRIPT_PATH/deverbal-separator/separated_sentences/vrb/)" ]; then :
        #else
           #echo "Not Empty"
           # Error: /bin/rm: Argument list too long: rm $SCRIPT_PATH/deverbal-separator/separated_sentences/vrb/*
        #   find $SCRIPT_PATH/deverbal-separator/separated_sentences/vrb -maxdepth 1 -name '*.vrb' -delete
        #fi
        #if [ -z "$(ls -A $SCRIPT_PATH/deverbal-separator/separated_sentences/dev/)" ]; then :
        #else
           #echo "Not Empty"
           # Error: /bin/rm: Argument list too long: rm $SCRIPT_PATH/deverbal-separator/separated_sentences/dev/*
        #   find $SCRIPT_PATH/deverbal-separator/separated_sentences/dev -maxdepth 1 -name '*.dev' -delete
        #fi

		echo "********** YES SEPARATE VERBAL AND DEVERBAL SENTENCES **********"
		# Original Daniel 2018-12-06: ./deverbal-separator/separator.sh $PATH_TO_CORENLP $SCRIPT_PATH/sentence-simplification/algorithm_sentences $SCRIPT_PATH/deverbal-separator/separated_sentences $DICC_PATH $DEVTAG $DEVSEPAR
		./deverbal-separator/separator-v02.sh $PATH_TO_CORENLP $SCRIPT_PATH/sentence-simplification/algorithm_sentences $SCRIPT_PATH/deverbal-separator/separated_sentences $DICC_PATH $DEVTAG $DEVSEPAR
	else    #WITHOUT DEVERBAL SEPARATOR
	    echo "********** NO SEPARATE VERBAL AND DEVERBAL SENTENCES **********"
		ls $SCRIPT_PATH/sentence-simplification/algorithm_sentences/* > $OUTPUT_FILE
		#echo "Sentences split. Paths to split sentences saved in $OUTPUT_FILE"
fi # [ $DEVSEP == 1 ]
fi # if [ "$DEVERBAL" = "TRUE" ]; then

if [ "$EXTDEVERBAL" = "TRUE" ]; then
echo "********** EXTRACT RI DEVERBAL **********"
#######################
# Extract RI deverbal #
#######################
# INPUT: deverbal files $(dirname ${file}) $(basename ${file})
# output path $OUTPUT_PATH $(basename ${file%.*})
# $DICC_PATH/names_EFFECT_ONTOGENE.txt $
# DICC_PATH/names_GENE.txt
# $DICC_PATH/names_GENE_ONTOGENE.txt
# $DICC_PATH/names_GENE_SYN.txt
# $DICC_PATH/names_TU.txt
# $DICC_PATH/names_TU_ONTOGENE.txt
# $DICC_PATH/names_TF_1grams.txt
# $DICC_PATH/names_TF_2grams.txt
# $DICC_PATH/names_TF_3grams.txt
# $DICC_PATH/names_TF_4grams.txt
# $DICC_PATH/names_TF_5Moregrams.txt
# $DICC_PATH/names_TF_ONTOGENE.txt
# $DICC_PATH/normalized_Effects.json
# OUTPUT: standoff files with RIs
# PATH ALREADY TAGGED ENTITIES: $SCRIPT_PATH/filtered-sentences
# FILE ALREADY TAGGED ENTITIES: filtered-sentences.ents.json
    for file in $SCRIPT_PATH/deverbal-separator/separated_sentences/dev/*.*
    do
        #python3 $SCRIPT_PATH/extract-ris-deverbal/EFF_DVB-regex-OriginalDaniel.py $file $OUTPUT_PATH/$(basename ${file%.*}) $DICC_PATH/names_EFFECT_ONTOGENE.txt $DICC_PATH/names_GENE.txt $DICC_PATH/names_GENE_ONTOGENE.txt $DICC_PATH/names_GENE_SYN.txt $DICC_PATH/names_TU.txt $DICC_PATH/names_TU_ONTOGENE.txt $DICC_PATH/names_TF_1grams.txt $DICC_PATH/names_TF_2grams.txt $DICC_PATH/names_TF_3grams.txt $DICC_PATH/names_TF_4grams.txt $DICC_PATH/names_TF_5Moregrams.txt $DICC_PATH/names_TF_ONTOGENE.txt
        #echo "Dir file: $(dirname ${file})"
        #echo "File $(basename ${file})"
        #echo "OUTOUT_PATH $OUTPUT_PATH"
        #echo "File $(basename ${file%.*})"
        echo "Dir and files: $(dirname ${file}) $(basename ${file}) $OUTPUT_PATH $(basename ${file%.*})"
        #python3 $SCRIPT_PATH/extract-ris-deverbal/EFF_DVB-regex-v02.py $(dirname ${file}) $(basename ${file}) $OUTPUT_PATH $(basename ${file%.*}) $DICC_PATH/normalized_Effects.json $SCRIPT_PATH/filtered-sentences filtered-sentences.ents.json
        python3 $SCRIPT_PATH/extract-ris-deverbal/EFF_DVB-regex-v03.py $(dirname ${file}) $(basename ${file}) $OUTPUT_PATH $(basename ${file%.*}) $DICC_PATH/normalized_Effects.json $SCRIPT_PATH/filtered-sentences filtered-sentences.ents.json
    done
fi # if [ "$EXTDEVERBAL" = "TRUE" ]; then

if [ "$OPENIE" = "TRUE" ]; then
echo "********** OPENIE TRIPLET EXTRACTION **********"
    #########################
    # OpenIE RI extraction #
    #########################
    # Juntamos frases verbales en archivo para OpenIE extraction
    # Error: /bin/ls: Argument list too long: ls $SCRIPT_PATH/deverbal-separator/separated_sentences/vrb/* > $OUTPUT_FILE
    echo "   Join verbal sentences into file for OpenIE extraction"
    find $SCRIPT_PATH/deverbal-separator/separated_sentences/vrb -type f -name '*' > $OUTPUT_FILE
    #echo "Deberval sentences separated. Paths to verbal sentences saved in $OUTPUT_FILE"

    echo "   CoreNLP OpenIE..."
    java -Xms2g -cp "$PATH_TO_CORENLP/*" edu.stanford.nlp.naturalli.OpenIE -filelist $OUTPUT_FILE -triple.strict false -triple.all_nominals true -format reverb > $OUTPUT_FILE.reverb
fi # if [ "$OPENIE" = "TRUE" ]; then

if [ "$EXTOPENIE" = "TRUE" ]; then
    echo "********** OPENIE RI EXTRACTION **********"
    #########################
    # OpenIE RI extraction #
    #########################
    # Sustituyo oie_compress de Nacho por un programa hecho por CMC para analizar las tripletas
    # y obtener aquellas que sugieran a los participantes y el efecto
    #Paste input and output for fancy printing
    # Original Nacho: echo "   Fancy printing..."
    # Original Nacho: > $OUTPUT_FILE.fuzzy
    # Original Nacho: python3 oie_compress.py --oies $OUTPUT_FILE.reverb --op fuzzy --ris $DICC_PATH/normalized_Effects.json --out $OUTPUT_FILE.fuzzy
    #
    # --inputFile $OUTPUT_FILE.reverb file obtained with CoreNLPL
    # --outputPath $OUTPUT_PATH
    # --diccPath $SCRIPT_PATH/filtered-sentences  Before: $DICC_PATH
    # --diccFile Before: termFilesTag_RIE_GCE_SYSTEM_ECCO.json
    # --diccEffect normalized_Effects.json
    # --format standoff
    # --diccEPAth $DICC_PATH
    # OUTPUT: standoff files with RIs

    # python3.4 $SCRIPT_PATH/ri-openie-extraction.py --inputFile $OUTPUT_FILE.reverb --outputPath $OUTPUT_PATH --diccPath $DICC_PATH --diccFile termFilesTag_RIE_GCE_SYSTEM_ECCO.json --diccEffect normalized_Effects.json --format standoff
    python3.4 $SCRIPT_PATH/ri-openie-extraction-v02.py --inputFile $OUTPUT_FILE.reverb --outputPath $OUTPUT_PATH --diccPath $SCRIPT_PATH/filtered-sentences --diccFile filtered-sentences.ents.json --diccEffect normalized_Effects.json --diccEPAth $DICC_PATH --format standoff

    #Join into single file
    #Sort fuzzy
    # Original Nacho: echo "   Sort fuzzy..."
    # Obtiene tipo de efecto
    # Original Nacho: sort $OUTPUT_FILE.fuzzy -o $OUTPUT_FILE.fuzzy
    #Concatenate
    # CMC eliminated following lines because simplification was
    #discriminated before
    #if [ $SIMP == 1 ]
        #then    #USING SIMPLIFICATION
            #ls -l $SCRIPT_PATH/sentence-simplification/algorithm_sentences/* | awk -F '/' '{print $NF}' > $OUTPUT_FILE.ils
            #awk '{print $0":"}' $OUTPUT_FILE.ils > $OUTPUT_FILE.fls
            #cat $SCRIPT_PATH/sentence-simplification/algorithm_sentences/* > $OUTPUT_FILE.als
            #paste $OUTPUT_FILE.fls $OUTPUT_FILE.als > $OUTPUT_FILE.merger
        #else    #WITHOUT SIMPLIFICACION
            #ls -l $SCRIPT_PATH/format/split_sentences/* | awk -F '/' '{print $NF}' > $OUTPUT_FILE.ils
            #awk '{print $0":"}' $OUTPUT_FILE.ils > $OUTPUT_FILE.fls
            #cat $SCRIPT_PATH/format/split_sentences/* > $OUTPUT_FILE.als
            #paste $OUTPUT_FILE.fls $OUTPUT_FILE.als > $OUTPUT_FILE.merger
    #fi
    # Original Nacho: ls -l $SCRIPT_PATH/sentence-simplification/algorithm_sentences/* | awk -F '/' '{print $NF}' > $OUTPUT_FILE.ils
    # Original Nacho: awk '{print $0":"}' $OUTPUT_FILE.ils > $OUTPUT_FILE.fls
    # Original Nacho: cat $SCRIPT_PATH/sentence-simplification/algorithm_sentences/* > $OUTPUT_FILE.als
    # Original Nacho: echo "   Creating ils, fls and als files..."
    # Original Nacho: if [ $DEVSEP == 1 ]
        # Original Nacho: then    #USING DEVERBAL SEPARATOR
            # Original Nacho: ls -l $SCRIPT_PATH/deverbal-separator/separated_sentences/vrb/* | awk -F '/' '{print $NF}' > $OUTPUT_FILE.ils
            # Original Nacho: awk '{print $0":"}' $OUTPUT_FILE.ils > $OUTPUT_FILE.fls
            # Original Nacho: cat $SCRIPT_PATH/deverbal-separator/separated_sentences/vrb/* > $OUTPUT_FILE.als
        # Original Nacho: else    #WITHOUT DEVERBAL SEPARATOR
            # Original Nacho: ls -l $SCRIPT_PATH/sentence-simplification/algorithm_sentences/* | awk -F '/' '{print $NF}' > $OUTPUT_FILE.ils
            # Original Nacho: awk '{print $0":"}' $OUTPUT_FILE.ils > $OUTPUT_FILE.fls
            # Original Nacho: cat $SCRIPT_PATH/sentence-simplification/algorithm_sentences/* > $OUTPUT_FILE.als
    # Original Nacho: fi
    # Original Nacho: echo "   Paste merger..."
    # Original Nacho: paste $OUTPUT_FILE.fls $OUTPUT_FILE.als > $OUTPUT_FILE.merger
    # Original Nacho: echo "   Create dsp file..."
    #  Original Nacho: awk -F "\t" 'NR==FNR{a[$1]=$0} NR>FNR && a[$1]>0{print a[$1],"\t",$2}' $OUTPUT_FILE.fuzzy $OUTPUT_FILE.merger > $OUTPUT_FILE.dsp
    # Original Nacho: awk -F "\t" 'NR==FNR{a[$1]=$0} NR>FNR && a[$1]>0{print a[$1],"\t",$2}' $OUTPUT_FILE.fuzzy $OUTPUT_FILE.merger > $OUTPUT_FILE.dsp
    # rm $(dirname "$OUTPUT_FILE")/*.fls
    # rm $(dirname "$OUTPUT_FILE")/*.ils
    # rm $(dirname "$OUTPUT_FILE")/*.als
    #rm $SCRIPT_PATH/*.merger
    #rm $SCRIPT_PATH/*.reverb
    #rm $SCRIPT_PATH/*.fuzzy
fi # if [ "$EXTOPENIE" = "TRUE" ]; then

if [ "$EXTATTRIB" = "TRUE" ]; then
    echo "********** ATTRIBUTIVE RI EXTRACTION **********"
    #########################
    # Attributive RI extraction #
    #########################
    # Attributive RI extraction, such as AraP-regulated genes aragP, aragT
    #
    # --inputPath $SCRIPT_PATH/attributive-sentences
    # --outputPath $OUTPUT_PATH
    # --diccPath $SCRIPT_PATH/filtered-sentences  Before: $DICC_PATH
    # --diccEffect normalized_Effects.json
    # OUTPUT: standoff files with RIs

    for file in $SCRIPT_PATH/attributive-sentences/*.*
    do
        echo "Dir file: $(dirname ${file})"
        echo "File: $(basename ${file})"
        # echo "OUTOUT_PATH $OUTPUT_PATH"
        # echo "File $(basename ${file%.*})"
        # echo "All $(dirname ${file}) $(basename ${file}) $OUTPUT_PATH $(basename ${file%.*})"
        if [ "$(basename ${file})" = "*.*" ]; then
          echo "None attributive sentence found"
        else
          python3 $SCRIPT_PATH/ri-attributive-extraction-v02.py --inputPath $(dirname ${file}) --inputFile $(basename ${file}) --outputPath $OUTPUT_PATH --diccPath $DICC_PATH --diccEffect normalized_Effects.json
        fi
    done

fi # if [ "$EXTATTRIB" = "TRUE" ]; then

if [ "$EXTAUTOREG" = "TRUE" ]; then
    echo "********** AUTOREGULATION RI EXTRACTION **********"
    #########################
    # Autoregulation RI extraction #
    #########################
    # Autoregulation RI extraction, such as ArgP protein represses its own synthesis
    #
    # --inputPath $SCRIPT_PATH/autoregulation-sentences
    # --outputPath $OUTPUT_PATH
    # --diccPath $DICC_PATH
    # --diccEffect normalized_Effects.json
    # OUTPUT: standoff files with RIs

    for file in $SCRIPT_PATH/autoregulation-sentences/*.*
    do
        echo "Dir file: $(dirname ${file})"
        echo "File: $(basename ${file})"
        # echo "OUTOUT_PATH $OUTPUT_PATH"
        # echo "File $(basename ${file%.*})"
        # echo "All $(dirname ${file}) $(basename ${file}) $OUTPUT_PATH $(basename ${file%.*})"
        if [ "$(basename ${file})" = "*.*" ]; then
          echo "None autoregulation sentence found"
        else
          python3 $SCRIPT_PATH/ri-autoregulation-extraction-v01.py --inputPath $(dirname ${file}) --inputFile $(basename ${file}) --outputPath $OUTPUT_PATH --diccPath $DICC_PATH --diccEffect normalized_Effects.json
        fi
    done

fi # if [ "$EXTAUTOREG" = "TRUE" ]; then

if [ "$EXTGC" = "TRUE" ]; then
    echo "********** EXTRACT GROWTH CONDITIONS **********"
    #############################
    # Extract growth conditions #
    #############################
    python3.4 $SCRIPT_PATH/extract-gcs/extract-gcs-regex.py --inputPath $OUTPUT_PATH/complete-ris --outputPath $OUTPUT_PATH/complete-ris --termPath /home/cmendezc/terminologicalResources
    #python3 ./GCs-regex-before.py ./ejemplo_11.spt
    #/home/elwe/Documents/prueba3/RIE_reordenado/RI-searcher/GC/ejemplo_11.spt ./ejemplo_11.a2
    #./names_GC_ECCO_1grams.txt ./names_GC_ECCO_2grams.txt ./names_GC_ECCO_3grams.txt
    #./names_GC_ECCO_4grams.txt ./names_GC_ECCO_5Moregrams.txt
fi # if [ "$EXTGC" = "TRUE" ]; then

if [ "$EVAL" = "TRUE" ]; then
    echo "********** EVALUATE EXTRACTION **********"
    if [ "$EVALGC" = "TRUE" ]; then
        echo "********** EVALUATE GROWTH CONDITION EXTRACTION **********"
        python3.4 $SCRIPT_PATH/evaluate-ris-gcs-standoff-v04.py --truePath $TRUE_PATH --trueFile $TRUE_FILE --predictedPath $OUTPUT_PATH/complete-ris --outputPath $PATH_EVAL --outputFile $FILE_EVAL --diccPath $DICC_PATH --diccSynon $DICC_SYNON --evaluateGCs
    else
        echo "********** EVALUATE WITHOUT GROWTH CONDITION EXTRACTION **********"
        python3.4 $SCRIPT_PATH/evaluate-ris-gcs-standoff-v04.py --truePath $TRUE_PATH --trueFile $TRUE_FILE --predictedPath $OUTPUT_PATH/complete-ris --outputPath $PATH_EVAL --outputFile $FILE_EVAL --diccPath $DICC_PATH --diccSynon $DICC_SYNON

    fi  # if [ "$EVALGC" = "TRUE" ]; then
fi # if [ "$EVAL" = "TRUE" ]; then