Showing
31 changed files
with
343 additions
and
0 deletions
README.md
0 → 100644
| 1 | +# Bacterial regulatory interaction extraction system | ||
| 2 | + | ||
| 3 | +## Prerequisites | ||
| 4 | +1. Input file must be tokenized and sentence split | ||
| 5 | + | ||
| 6 | + | ||
| 7 | + | ||
| 8 | + | ||
| 9 | +## Run | ||
| 10 | +### Several files | ||
| 11 | +Set filenames and paths in run-several-files.sh | ||
| 12 | + | ||
| 13 | +## Acknowledgments | ||
| 14 | +This work was supported by UNAM-PAPIIT IA203420. | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
attributive-sentences/.gitignore
0 → 100644
| 1 | + |
automatic-extraction-ris-gcs.sh
0 → 100755
This diff is collapsed. Click to expand it.
autoregulation-sentences/deleteme.txt
0 → 100644
File mode changed
deverbal-separator/filter-v03.py
0 → 100644
| 1 | +# import fileinput | ||
| 2 | +# import regex as re | ||
| 3 | +# from regex import finditer | ||
| 4 | +import sys | ||
| 5 | +import json | ||
| 6 | + | ||
| 7 | +if ( len( sys.argv ) != 3 ): | ||
| 8 | + # Original Daniel: sys.stderr.write( "E: usage: " +sys.argv[0] + " <input_file> <EFFs_dictionary> \n" ) | ||
| 9 | + sys.stderr.write("E: usage: " + sys.argv[0] + " <input_file> <normalized_Effects> \n") | ||
| 10 | + sys.stderr.flush(); | ||
| 11 | + | ||
| 12 | +# exit( 2 ); | ||
| 13 | + | ||
| 14 | +#LEER ARCHIVO INPUT | ||
| 15 | +text_file = open( sys.argv[1], "r" ) | ||
| 16 | +dato = text_file.read() | ||
| 17 | +text_file.close() | ||
| 18 | + | ||
| 19 | +#LEE DICCIONARIO | ||
| 20 | + | ||
| 21 | +# Loading normalized effects | ||
| 22 | +# print('Loading normalized effects...') | ||
| 23 | +with open(sys.argv[2]) as diccFile: | ||
| 24 | + hashNormalizedEffects = json.load(diccFile) | ||
| 25 | +DICC = list(hashNormalizedEffects.keys()) | ||
| 26 | + | ||
| 27 | +# Original Daniel: text_file = open( sys.argv[2], "r" ) | ||
| 28 | +# Original Daniel: DICC = text_file.read().splitlines() | ||
| 29 | +# Original Daniel: text_file.close() | ||
| 30 | + | ||
| 31 | + | ||
| 32 | +#declara variables | ||
| 33 | +is_dev = False | ||
| 34 | +is_vrb = False | ||
| 35 | + | ||
| 36 | + | ||
| 37 | +# DICC | ||
| 38 | +# 2018-11-30 CMC: We separated noun and only past participle for deverbal processing | ||
| 39 | +# and all verb forms as verbal | ||
| 40 | +# VRB: VB verb, base form think | ||
| 41 | +# VRB: VBZ verb, 3rd person singular present she thinks | ||
| 42 | +# VRB: VBP verb, non-3rd person singular present I think | ||
| 43 | +# VRB: VBD verb, past tense they thought | ||
| 44 | +# DEV: VBN verb, past participle a sunken ship | ||
| 45 | +# VRB: VBG verb, gerund or present participle thinking is fun | ||
| 46 | +# extend/VBP | ||
| 47 | +for i in range(len(DICC)): | ||
| 48 | + # print(DICC[i]) | ||
| 49 | + for token in dato.split(): | ||
| 50 | + word = token[:token.find("/")] | ||
| 51 | + tag = token[token.find("/")+1:] | ||
| 52 | + # print("word: {}".format(word)) | ||
| 53 | + # print("tag: {}".format(tag)) | ||
| 54 | + if (DICC[i] in word) and (("NN" in tag) | ||
| 55 | + or ("VBN" == tag) | ||
| 56 | + ): | ||
| 57 | + is_dev = True | ||
| 58 | + # print("deverbal: " + word) | ||
| 59 | + if (DICC[i] in word) and ("VB" in tag): | ||
| 60 | + is_vrb = True | ||
| 61 | + # print("verbal: " + word) | ||
| 62 | + | ||
| 63 | +if is_dev and is_vrb: | ||
| 64 | + sys.exit(11) | ||
| 65 | +elif is_dev: | ||
| 66 | + sys.exit(12) | ||
| 67 | +elif is_vrb: | ||
| 68 | + sys.exit(13) | ||
| 69 | +else: | ||
| 70 | + sys.exit(10) | ||
| 71 | + |
| 1 | + |
| 1 | + |
deverbal-separator/separator-v02.sh
0 → 100755
| 1 | +#!/bin/bash | ||
| 2 | +# Separates sentences by deverbal (.dev) and verbal (.vrb) | ||
| 3 | + | ||
| 4 | +# Original Daniel: PATH_TO_CORENLP=/home/elwe/Documents/temporal/CoreNLP | ||
| 5 | + | ||
| 6 | +#Validate arguments | ||
| 7 | +if [[ ! ("$#" == 6 ) ]]; then | ||
| 8 | + echo 'Usage: ./separator.sh <path_to_corenlp> <input_path> <output_path> <dicc_path> <if_tag> <if_separate>' | ||
| 9 | + exit 1 | ||
| 10 | +fi | ||
| 11 | + | ||
| 12 | +SCRIPT_PATH=$(cd `dirname $0` && pwd) | ||
| 13 | +# Original Daniel: INPUT_PATH=$1 #carpeta que contiene archivos a separar | ||
| 14 | +# Original Daniel: OUTPUT_PATH=$2 | ||
| 15 | +PATH_TO_CORENLP=$1 | ||
| 16 | +INPUT_PATH=$2 #carpeta que contiene archivos a separar | ||
| 17 | +OUTPUT_PATH=$3 | ||
| 18 | +DICC_PATH=$4 | ||
| 19 | +# Tag sentences to separate deverbal and verbal sentences: $DEVTAG | ||
| 20 | +TAG=$5 | ||
| 21 | +# Do separate deverbal and verbal sentences: $DEVSEPAR | ||
| 22 | +SEP=$6 | ||
| 23 | + | ||
| 24 | +if [ $TAG == "TRUE" ] | ||
| 25 | + then #ANALIZAR EN STANFORD PARSER | ||
| 26 | + | ||
| 27 | + if [ -z "$(ls -A $SCRIPT_PATH/tagged/)" ]; then : | ||
| 28 | + else | ||
| 29 | + #echo "Not Empty" | ||
| 30 | + # Error: /bin/rm: Argument list too long: rm $SCRIPT_PATH/tagged/* | ||
| 31 | + find $SCRIPT_PATH/tagged -maxdepth 1 -name '*.conll' -delete | ||
| 32 | + fi | ||
| 33 | + | ||
| 34 | + # Added by CMC | ||
| 35 | + if [ -z "$(ls -A $SCRIPT_PATH/tagged-line/)" ]; then : | ||
| 36 | + else | ||
| 37 | + #echo "Not Empty" | ||
| 38 | + # Error: /bin/rm: Argument list too long: rm $SCRIPT_PATH/tagged-line/* | ||
| 39 | + find $SCRIPT_PATH/tagged-line -maxdepth 1 -name '*.spt' -delete | ||
| 40 | + fi | ||
| 41 | + | ||
| 42 | + for j in $INPUT_PATH/* | ||
| 43 | + do | ||
| 44 | + #echo $j | ||
| 45 | + #Original Daniel: java -Xms2g -cp "$PATH_TO_CORENLP/*" edu.stanford.nlp.parser.lexparser.LexicalizedParser -writeOutputFiles -retainTMPSubcategories -outputFormat "wordsAndTags" $SCRIPT_PATH/englishPCFG.ser.gz $j | ||
| 46 | + # Command line: java -cp "/home/cmendezc/STANFORD_CORENLP/stanford-corenlp-full-2017-06-09/*" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma,ner,parse -outputFormat conll -file datos_0.spt -outputDirectory tagged | ||
| 47 | + # java -cp "$PATH_TO_CORENLP/*" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma,ner,parse -outputFormat conll -file $j -outputDirectory $SCRIPT_PATH/tagged | ||
| 48 | + # With parse: java -cp "$PATH_TO_CORENLP/*" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,parse -outputFormat conll -file $j -outputDirectory $SCRIPT_PATH/tagged | ||
| 49 | + java -cp "$PATH_TO_CORENLP/*" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos -outputFormat conll -file $j -outputDirectory $SCRIPT_PATH/tagged | ||
| 50 | + done | ||
| 51 | + | ||
| 52 | + # Original Daniel: mv $INPUT_PATH/*.stp $SCRIPT_PATH/tagged/ | ||
| 53 | + for j in $SCRIPT_PATH/tagged/* | ||
| 54 | + do | ||
| 55 | + # Original Daniel: awk 'NF {print $2 "/" $4}' tagged/$j | paste -d" " -s > $SCRIPT_PATH/tagged-line/"${j%.spt}" | ||
| 56 | + filename=$(basename "$j") | ||
| 57 | + #filename="${filename%.*}" | ||
| 58 | + awk 'NF {print $2 "/" $4}' $j | paste -d" " -s > $SCRIPT_PATH/tagged-line/"${filename%.*}.spt" | ||
| 59 | + # Original Daniel: mv "$j" "${j%.stp}" | ||
| 60 | + done | ||
| 61 | +fi # if [ $TAG == "TRUE" ] | ||
| 62 | + | ||
| 63 | +if [ $SEP == "TRUE" ] | ||
| 64 | + then #SEPARAR ARCHIVOS | ||
| 65 | + | ||
| 66 | + # Original Daniel: if [ -z "$(ls -A $OUTPUT_PATH)" ]; then : | ||
| 67 | + # Modified by Carlos Méndez | ||
| 68 | + if [ -z "$(ls -A $OUTPUT_PATH/dev)" ]; then : | ||
| 69 | + else | ||
| 70 | + #echo "Not Empty" | ||
| 71 | + # Error: /bin/rm: Argument list too long: rm $OUTPUT_PATH/dev/* | ||
| 72 | + find $OUTPUT_PATH/dev -maxdepth 1 -name '*.dev' -delete | ||
| 73 | + fi | ||
| 74 | + | ||
| 75 | + if [ -z "$(ls -A $OUTPUT_PATH/vrb)" ]; then : | ||
| 76 | + else | ||
| 77 | + #echo "Not Empty" | ||
| 78 | + # Error: /bin/rm: Argument list too long: rm $OUTPUT_PATH/vrb/* | ||
| 79 | + find $OUTPUT_PATH/vrb -maxdepth 1 -name '*.vrb' -delete | ||
| 80 | + fi | ||
| 81 | + | ||
| 82 | + for j in $SCRIPT_PATH/tagged-line/* | ||
| 83 | + do | ||
| 84 | + # Original Daniel: python3 $SCRIPT_PATH/filter.py $j $DICC_PATH/names_EFFECT_ONTOGENE.txt | ||
| 85 | + # CMC 2018-12-04: Without separating verbal forms: python3 $SCRIPT_PATH/filter.py $j $DICC_PATH/normalized_Effects.json | ||
| 86 | + # CMC 2018-12-11: With separating verbal forms: python3 $SCRIPT_PATH/filter-v02.py $j $DICC_PATH/normalized_Effects.json | ||
| 87 | + # CMC 2018-12-11: Considering only passive verbal form as deverbal: VBN verb, past participle | ||
| 88 | + python3 $SCRIPT_PATH/filter-v03.py $j $DICC_PATH/normalized_Effects.json | ||
| 89 | + VAR=$? | ||
| 90 | + # filename=${j##*/} | ||
| 91 | + # inputfile=${filename%.spt} | ||
| 92 | + # exit | ||
| 93 | + | ||
| 94 | + if [ $VAR == 11 ]; then : | ||
| 95 | + #contiene dev y vrb $SCRIPT_PATH/tagged-line/ | ||
| 96 | + # o | ||
| 97 | + #Original Daniel: cp $INPUT_PATH/${j##*/} $OUTPUT_PATH/dev/$(basename ${j%.*}).dev | ||
| 98 | + #Original Daniel: cp $INPUT_PATH/${j##*/} $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
| 99 | + #echo "Deverbal and verbal" | ||
| 100 | + cp $INPUT_PATH/$(basename ${j%.*}) $OUTPUT_PATH/dev/$(basename ${j%.*}).dev | ||
| 101 | + cp $INPUT_PATH/$(basename ${j%.*}) $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
| 102 | + elif [ $VAR == 12 ]; then : | ||
| 103 | + #contiene dev | ||
| 104 | + #echo "Deverbal" | ||
| 105 | + cp $INPUT_PATH/$(basename ${j%.*}) $OUTPUT_PATH/dev/$(basename ${j%.*}).dev | ||
| 106 | + # cp $SCRIPT_PATH/tagged-line/${j##*/} $OUTPUT_PATH/dev/$(basename ${j%.*}).dev | ||
| 107 | + elif [ $VAR == 13 ]; then : | ||
| 108 | + #contiene vrb | ||
| 109 | + #echo "Verbal" | ||
| 110 | + cp $INPUT_PATH/$(basename ${j%.*}) $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
| 111 | + # cp $SCRIPT_PATH/tagged-line/${j##*/} $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
| 112 | + elif [ $VAR == 10 ]; then : | ||
| 113 | + #parece no contener dev ni vrb | ||
| 114 | + echo "Non deverbal and verbal" | ||
| 115 | + cp $INPUT_PATH/$(basename ${j%.*}) $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
| 116 | + # cp $SCRIPT_PATH/tagged-line/${j##*/} $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
| 117 | + fi | ||
| 118 | + done | ||
| 119 | +fi # if [ $SEP == "TRUE" ] |
deverbal-separator/tagged-line/.gitignore
0 → 100644
| 1 | + |
deverbal-separator/tagged/.gitignore
0 → 100644
| 1 | + |
evaluate-ris-gcs-standoff-v04.py
0 → 100644
This diff is collapsed. Click to expand it.
extract-ris-deverbal/EFF_DVB-regex-v03.py
0 → 100644
This diff is collapsed. Click to expand it.
filtered-sentences/.gitignore
0 → 100644
| 1 | + |
format/regex.py
0 → 100644
| 1 | +import fileinput | ||
| 2 | +import re | ||
| 3 | +import sys | ||
| 4 | + | ||
| 5 | +if ( len( sys.argv ) < 3 ): | ||
| 6 | + sys.stderr.write( "E: usage: " +sys.argv[0] + " <input_file> <output_file> \n" ) | ||
| 7 | + sys.stderr.flush(); | ||
| 8 | + | ||
| 9 | + exit( 2 ); | ||
| 10 | +else: | ||
| 11 | + print("Ok.") | ||
| 12 | + | ||
| 13 | +#LEER ARCHIVO INPUT | ||
| 14 | +text_file = open( sys.argv[1], "r" ) | ||
| 15 | +dato = text_file.read().splitlines() | ||
| 16 | +text_file.close() | ||
| 17 | + | ||
| 18 | + | ||
| 19 | +#QUITA EXTENSION DE NOMBRE DE ARCHIVO | ||
| 20 | +split_line = sys.argv[2] | ||
| 21 | +split_line = split_line[:-4] | ||
| 22 | +file_name="" | ||
| 23 | +file_name = split_line + ".san" | ||
| 24 | +open( file_name , 'w').close() | ||
| 25 | + | ||
| 26 | +#ESCRIBIR REGEX EN ARGV 2 | ||
| 27 | +for line in dato: | ||
| 28 | + line = re.sub('[\(][^\(|^\)]*\s[0-9]+[a-z]{1}\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_NNNNa_) | ||
| 29 | + line = re.sub('[\[][^\(|^\)]*\s[0-9]+[a-z]{1}\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_NNNNa_] | ||
| 30 | + line = re.sub('[\(][^\(|^\)]*\s([0-9]+,?)+\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_NN,NN,NN_) | ||
| 31 | + line = re.sub('[\[][^\(|^\)]*\s([0-9]+,?)+\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_NN,NN,NN_] | ||
| 32 | + line = re.sub('[\(][^\(|^\)]*\s[0-9]+\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_num_) | ||
| 33 | + line = re.sub('[\(][^\(|^\)]*\s[0-9]+\.[0-9]+\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_num.num_) | ||
| 34 | + line = re.sub('[\(][^\(|^\)]*\s[0-9]+\-[0-9]+\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_num-num_) | ||
| 35 | + line = re.sub('[\[][^\(|^\)]*\s[0-9]+\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_num_] | ||
| 36 | + line = re.sub('[\[][^\(|^\)]*\s[0-9]+\.[0-9]+\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_num.num_] | ||
| 37 | + line = re.sub('[\[][^\(|^\)]*\s[0-9]+\-[0-9]+\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_num-num_] | ||
| 38 | + line = re.sub('[\(]\s[a-zA-Z]{1}\s[\)]', '', line.rstrip()) #elimina (_alpha_) | ||
| 39 | + line = re.sub('[\[]\s[a-zA-Z]{1}\s[\]]', '', line.rstrip()) #elimina [_alpha_] | ||
| 40 | + line = re.sub('[\(]\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s[\)]', '', line.rstrip()) #elimina (_Roman_) | ||
| 41 | + line = re.sub('[\(]\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s\-\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s[\)]', '', line.rstrip()) #elimina (_Roman-Roman_) | ||
| 42 | + line = re.sub('[\(]\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s[\)]', '', line.rstrip()) #elimina (_roman_) | ||
| 43 | + line = re.sub('[\(]\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s\-\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s[\)]', '', line.rstrip()) #elimina (_roman-roman_) | ||
| 44 | + line = re.sub('[\[]\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s[\]]', '', line.rstrip()) #elimina [_Roman_] | ||
| 45 | + line = re.sub('[\[]\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s\-\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s[\]]', '', line.rstrip()) #elimina [_Roman-Roman_] | ||
| 46 | + line = re.sub('[\[]\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s[\]]', '', line.rstrip()) #elimina [_roman_] | ||
| 47 | + line = re.sub('[\[]\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s\-\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s[\]]', '', line.rstrip()) #elimina [_roman-roman_] | ||
| 48 | + line = re.sub('[\(][^\(|^\)]*\s(fig\s\.|figure|see|i\s\.\se\s\.|e\s\.\sg\s\.|tab\s\.table)\s[^\(|^\)]*[\)]', '', line.rstrip(), flags=re.I) # | ||
| 49 | + line = re.sub(' ', ' ', line.rstrip()) #elimina (_NNNNa_) | ||
| 50 | + #print(line) | ||
| 51 | + | ||
| 52 | + | ||
| 53 | + save_file = open( file_name, "a" ) | ||
| 54 | + save_file.write(line) | ||
| 55 | + save_file.write("\n") | ||
| 56 | + save_file.close() |
format/sanitized_sentences/.gitignore
0 → 100644
| 1 | + |
format/split_sentences/.gitignore
0 → 100644
| 1 | + |
get-TRN-Organism-v1.py
0 → 100644
This diff is collapsed. Click to expand it.
get-TRN-v2.py
0 → 100644
This diff is collapsed. Click to expand it.
predicted-ris-gcs/complete-ris/.gitignore
0 → 100644
| 1 | + |
predicted-ris-gcs/incomplete-ris/.gitignore
0 → 100644
| 1 | + |
ri-attributive-extraction-v02.py
0 → 100644
This diff is collapsed. Click to expand it.
ri-autoregulation-extraction-v01.py
0 → 100644
This diff is collapsed. Click to expand it.
ri-openie-extraction-v02.py
0 → 100644
This diff is collapsed. Click to expand it.
ri-openie-extraction/.gitignore
0 → 100644
| 1 | + |
run-several-files.sh
0 → 100755
| 1 | +#!/bin/bash | ||
| 2 | + | ||
| 3 | +###### Automatic extraction of TRN from several files ###### | ||
| 4 | + | ||
| 5 | +BRIES_HOME=/myhome/bries | ||
| 6 | +PMIDS_HOME=/myhome/preprocessed-files | ||
| 7 | +# We don't use REFERENCE_HOME because we don't evaluate. Path /reference-data-set doesn't exist. File no-reference.txt doesn't exist. | ||
| 8 | +REFERENCE_HOME=/myhome/reference-data-set | ||
| 9 | + | ||
| 10 | +for f in $PMIDS_HOME/original/text/*.* | ||
| 11 | +do | ||
| 12 | + FILE_NAME=$(basename "$f") | ||
| 13 | + FILE_NAME="${FILE_NAME%.*}" | ||
| 14 | + echo "File: $FILE_NAME" | ||
| 15 | + ./automatic-extraction-ris-gcs.sh $PMIDS_HOME/features/$FILE_NAME.tra.word.txt $PMIDS_HOME/transformed/$FILE_NAME.tra.txt $BRIES_HOME/ri-openie-extraction/$FILE_NAME.txt $BRIES_HOME/predicted-ris-gcs Y Y FILT1 $REFERENCE_HOME no-reference.txt $BRIES_HOME/evaluation-reports no-evaluation.txt diccionario-SYNONYMS.json $PMIDS_HOME/original/tsv 1>uno-$FILE_NAME.txt 2>dos-$FILE_NAME.txt | ||
| 16 | +done |
sentence-filter_v02.py
0 → 100644
This diff is collapsed. Click to expand it.
| 1 | + |
| 1 | + |
| 1 | +#!/bin/bash | ||
| 2 | + | ||
| 3 | +#Validate arguments | ||
| 4 | +if [[ ! ("$#" == 3 ) ]]; then | ||
| 5 | + echo 'Usage: ./sentence-simplification-main.sh <input_path> <output_file_path> <isimp_path>' | ||
| 6 | + exit 1 | ||
| 7 | +fi | ||
| 8 | + | ||
| 9 | +SCRIPT_PATH=$(cd `dirname $0` && pwd) | ||
| 10 | +#Define aquí la palabra clave del grupo de oraciones a simplificar. | ||
| 11 | +INPUT_PATH=$1 | ||
| 12 | +OUTPUT_INDEX_FILE_PATH=$2 | ||
| 13 | +ISIMP_PATH=$3 | ||
| 14 | +cd $SCRIPT_PATH | ||
| 15 | + | ||
| 16 | + | ||
| 17 | + | ||
| 18 | + | ||
| 19 | +#ANALIZAR EN ISIMP | ||
| 20 | +echo "Analysing in iSimp..." | ||
| 21 | +if [ -z "$(ls -A ./iSimp_sentences/)" ]; then : | ||
| 22 | +else | ||
| 23 | + #echo "Not Empty" | ||
| 24 | + rm ./iSimp_sentences/* | ||
| 25 | +fi | ||
| 26 | +#cd $INPUT_PATH | ||
| 27 | +for j in $INPUT_PATH/* | ||
| 28 | +do | ||
| 29 | + echo $j | ||
| 30 | + #echo "++++entrada_simp: $j salida_simp: $SCRIPT_PATH/iSimp_sentences/$(basename $j)" | ||
| 31 | + $ISIMP_PATH/simplify.sh $j $SCRIPT_PATH/iSimp_sentences/$(basename $j) | ||
| 32 | +done | ||
| 33 | +cd $SCRIPT_PATH | ||
| 34 | + | ||
| 35 | +#CREA INDICE DE ARCHIVOS SIMPLIFICADOS | ||
| 36 | +#touch $SCRIPT_PATH/index.txt | ||
| 37 | +>| $OUTPUT_INDEX_FILE_PATH | ||
| 38 | + | ||
| 39 | +#ALIMENTAR A ALGORITMO | ||
| 40 | +echo "Analysing in Algorithm..." | ||
| 41 | +if [ -z "$(ls -A ./algorithm_sentences/)" ]; then : | ||
| 42 | +else | ||
| 43 | + #echo "Not Empty" | ||
| 44 | + rm ./algorithm_sentences/* | ||
| 45 | +fi | ||
| 46 | +#cd ./iSimp_sentences | ||
| 47 | +for k in $SCRIPT_PATH/iSimp_sentences/* | ||
| 48 | +do | ||
| 49 | + echo $k | ||
| 50 | + #echo "entrada: $k salida: $SCRIPT_PATH/algorithm_sentences/$(basename $k) index: $OUTPUT_INDEX_FILE_PATH" | ||
| 51 | + python2 $SCRIPT_PATH/simplifier.py $k $SCRIPT_PATH/algorithm_sentences/$(basename $k) $OUTPUT_INDEX_FILE_PATH | ||
| 52 | +done | ||
| 53 | +cd $SCRIPT_PATH |
sentence-simplification/simplifier.py
0 → 100644
This diff is collapsed. Click to expand it.
trn/empty-file.txt
0 → 100644
| 1 | +Delete me | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment