Showing
31 changed files
with
343 additions
and
0 deletions
README.md
0 → 100644
1 | +# Bacterial regulatory interaction extraction system | ||
2 | + | ||
3 | +## Prerequisites | ||
4 | +1. Input file must be tokenized and sentence split | ||
5 | + | ||
6 | + | ||
7 | + | ||
8 | + | ||
9 | +## Run | ||
10 | +### Several files | ||
11 | +Set filenames and paths in run-several-files.sh | ||
12 | + | ||
13 | +## Acknowledgments | ||
14 | +This work was supported by UNAM-PAPIIT IA203420. | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
attributive-sentences/.gitignore
0 → 100644
1 | + |
automatic-extraction-ris-gcs.sh
0 → 100755
This diff is collapsed. Click to expand it.
autoregulation-sentences/deleteme.txt
0 → 100644
File mode changed
deverbal-separator/filter-v03.py
0 → 100644
1 | +# import fileinput | ||
2 | +# import regex as re | ||
3 | +# from regex import finditer | ||
4 | +import sys | ||
5 | +import json | ||
6 | + | ||
7 | +if ( len( sys.argv ) != 3 ): | ||
8 | + # Original Daniel: sys.stderr.write( "E: usage: " +sys.argv[0] + " <input_file> <EFFs_dictionary> \n" ) | ||
9 | + sys.stderr.write("E: usage: " + sys.argv[0] + " <input_file> <normalized_Effects> \n") | ||
10 | + sys.stderr.flush(); | ||
11 | + | ||
12 | +# exit( 2 ); | ||
13 | + | ||
14 | +#LEER ARCHIVO INPUT | ||
15 | +text_file = open( sys.argv[1], "r" ) | ||
16 | +dato = text_file.read() | ||
17 | +text_file.close() | ||
18 | + | ||
19 | +#LEE DICCIONARIO | ||
20 | + | ||
21 | +# Loading normalized effects | ||
22 | +# print('Loading normalized effects...') | ||
23 | +with open(sys.argv[2]) as diccFile: | ||
24 | + hashNormalizedEffects = json.load(diccFile) | ||
25 | +DICC = list(hashNormalizedEffects.keys()) | ||
26 | + | ||
27 | +# Original Daniel: text_file = open( sys.argv[2], "r" ) | ||
28 | +# Original Daniel: DICC = text_file.read().splitlines() | ||
29 | +# Original Daniel: text_file.close() | ||
30 | + | ||
31 | + | ||
32 | +#declara variables | ||
33 | +is_dev = False | ||
34 | +is_vrb = False | ||
35 | + | ||
36 | + | ||
37 | +# DICC | ||
38 | +# 2018-11-30 CMC: We separated noun and only past participle for deverbal processing | ||
39 | +# and all verb forms as verbal | ||
40 | +# VRB: VB verb, base form think | ||
41 | +# VRB: VBZ verb, 3rd person singular present she thinks | ||
42 | +# VRB: VBP verb, non-3rd person singular present I think | ||
43 | +# VRB: VBD verb, past tense they thought | ||
44 | +# DEV: VBN verb, past participle a sunken ship | ||
45 | +# VRB: VBG verb, gerund or present participle thinking is fun | ||
46 | +# extend/VBP | ||
47 | +for i in range(len(DICC)): | ||
48 | + # print(DICC[i]) | ||
49 | + for token in dato.split(): | ||
50 | + word = token[:token.find("/")] | ||
51 | + tag = token[token.find("/")+1:] | ||
52 | + # print("word: {}".format(word)) | ||
53 | + # print("tag: {}".format(tag)) | ||
54 | + if (DICC[i] in word) and (("NN" in tag) | ||
55 | + or ("VBN" == tag) | ||
56 | + ): | ||
57 | + is_dev = True | ||
58 | + # print("deverbal: " + word) | ||
59 | + if (DICC[i] in word) and ("VB" in tag): | ||
60 | + is_vrb = True | ||
61 | + # print("verbal: " + word) | ||
62 | + | ||
63 | +if is_dev and is_vrb: | ||
64 | + sys.exit(11) | ||
65 | +elif is_dev: | ||
66 | + sys.exit(12) | ||
67 | +elif is_vrb: | ||
68 | + sys.exit(13) | ||
69 | +else: | ||
70 | + sys.exit(10) | ||
71 | + |
1 | + |
1 | + |
deverbal-separator/separator-v02.sh
0 → 100755
1 | +#!/bin/bash | ||
2 | +# Separates sentences by deverbal (.dev) and verbal (.vrb) | ||
3 | + | ||
4 | +# Original Daniel: PATH_TO_CORENLP=/home/elwe/Documents/temporal/CoreNLP | ||
5 | + | ||
6 | +#Validate arguments | ||
7 | +if [[ ! ("$#" == 6 ) ]]; then | ||
8 | + echo 'Usage: ./separator.sh <path_to_corenlp> <input_path> <output_path> <dicc_path> <if_tag> <if_separate>' | ||
9 | + exit 1 | ||
10 | +fi | ||
11 | + | ||
12 | +SCRIPT_PATH=$(cd `dirname $0` && pwd) | ||
13 | +# Original Daniel: INPUT_PATH=$1 #carpeta que contiene archivos a separar | ||
14 | +# Original Daniel: OUTPUT_PATH=$2 | ||
15 | +PATH_TO_CORENLP=$1 | ||
16 | +INPUT_PATH=$2 #carpeta que contiene archivos a separar | ||
17 | +OUTPUT_PATH=$3 | ||
18 | +DICC_PATH=$4 | ||
19 | +# Tag sentences to separate deverbal and verbal sentences: $DEVTAG | ||
20 | +TAG=$5 | ||
21 | +# Do separate deverbal and verbal sentences: $DEVSEPAR | ||
22 | +SEP=$6 | ||
23 | + | ||
24 | +if [ $TAG == "TRUE" ] | ||
25 | + then #ANALIZAR EN STANFORD PARSER | ||
26 | + | ||
27 | + if [ -z "$(ls -A $SCRIPT_PATH/tagged/)" ]; then : | ||
28 | + else | ||
29 | + #echo "Not Empty" | ||
30 | + # Error: /bin/rm: Argument list too long: rm $SCRIPT_PATH/tagged/* | ||
31 | + find $SCRIPT_PATH/tagged -maxdepth 1 -name '*.conll' -delete | ||
32 | + fi | ||
33 | + | ||
34 | + # Added by CMC | ||
35 | + if [ -z "$(ls -A $SCRIPT_PATH/tagged-line/)" ]; then : | ||
36 | + else | ||
37 | + #echo "Not Empty" | ||
38 | + # Error: /bin/rm: Argument list too long: rm $SCRIPT_PATH/tagged-line/* | ||
39 | + find $SCRIPT_PATH/tagged-line -maxdepth 1 -name '*.spt' -delete | ||
40 | + fi | ||
41 | + | ||
42 | + for j in $INPUT_PATH/* | ||
43 | + do | ||
44 | + #echo $j | ||
45 | + #Original Daniel: java -Xms2g -cp "$PATH_TO_CORENLP/*" edu.stanford.nlp.parser.lexparser.LexicalizedParser -writeOutputFiles -retainTMPSubcategories -outputFormat "wordsAndTags" $SCRIPT_PATH/englishPCFG.ser.gz $j | ||
46 | + # Command line: java -cp "/home/cmendezc/STANFORD_CORENLP/stanford-corenlp-full-2017-06-09/*" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma,ner,parse -outputFormat conll -file datos_0.spt -outputDirectory tagged | ||
47 | + # java -cp "$PATH_TO_CORENLP/*" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma,ner,parse -outputFormat conll -file $j -outputDirectory $SCRIPT_PATH/tagged | ||
48 | + # With parse: java -cp "$PATH_TO_CORENLP/*" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,parse -outputFormat conll -file $j -outputDirectory $SCRIPT_PATH/tagged | ||
49 | + java -cp "$PATH_TO_CORENLP/*" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos -outputFormat conll -file $j -outputDirectory $SCRIPT_PATH/tagged | ||
50 | + done | ||
51 | + | ||
52 | + # Original Daniel: mv $INPUT_PATH/*.stp $SCRIPT_PATH/tagged/ | ||
53 | + for j in $SCRIPT_PATH/tagged/* | ||
54 | + do | ||
55 | + # Original Daniel: awk 'NF {print $2 "/" $4}' tagged/$j | paste -d" " -s > $SCRIPT_PATH/tagged-line/"${j%.spt}" | ||
56 | + filename=$(basename "$j") | ||
57 | + #filename="${filename%.*}" | ||
58 | + awk 'NF {print $2 "/" $4}' $j | paste -d" " -s > $SCRIPT_PATH/tagged-line/"${filename%.*}.spt" | ||
59 | + # Original Daniel: mv "$j" "${j%.stp}" | ||
60 | + done | ||
61 | +fi # if [ $TAG == "TRUE" ] | ||
62 | + | ||
63 | +if [ $SEP == "TRUE" ] | ||
64 | + then #SEPARAR ARCHIVOS | ||
65 | + | ||
66 | + # Original Daniel: if [ -z "$(ls -A $OUTPUT_PATH)" ]; then : | ||
67 | + # Modified by Carlos Méndez | ||
68 | + if [ -z "$(ls -A $OUTPUT_PATH/dev)" ]; then : | ||
69 | + else | ||
70 | + #echo "Not Empty" | ||
71 | + # Error: /bin/rm: Argument list too long: rm $OUTPUT_PATH/dev/* | ||
72 | + find $OUTPUT_PATH/dev -maxdepth 1 -name '*.dev' -delete | ||
73 | + fi | ||
74 | + | ||
75 | + if [ -z "$(ls -A $OUTPUT_PATH/vrb)" ]; then : | ||
76 | + else | ||
77 | + #echo "Not Empty" | ||
78 | + # Error: /bin/rm: Argument list too long: rm $OUTPUT_PATH/vrb/* | ||
79 | + find $OUTPUT_PATH/vrb -maxdepth 1 -name '*.vrb' -delete | ||
80 | + fi | ||
81 | + | ||
82 | + for j in $SCRIPT_PATH/tagged-line/* | ||
83 | + do | ||
84 | + # Original Daniel: python3 $SCRIPT_PATH/filter.py $j $DICC_PATH/names_EFFECT_ONTOGENE.txt | ||
85 | + # CMC 2018-12-04: Without separating verbal forms: python3 $SCRIPT_PATH/filter.py $j $DICC_PATH/normalized_Effects.json | ||
86 | + # CMC 2018-12-11: With separating verbal forms: python3 $SCRIPT_PATH/filter-v02.py $j $DICC_PATH/normalized_Effects.json | ||
87 | + # CMC 2018-12-11: Considering only passive verbal form as deverbal: VBN verb, past participle | ||
88 | + python3 $SCRIPT_PATH/filter-v03.py $j $DICC_PATH/normalized_Effects.json | ||
89 | + VAR=$? | ||
90 | + # filename=${j##*/} | ||
91 | + # inputfile=${filename%.spt} | ||
92 | + # exit | ||
93 | + | ||
94 | + if [ $VAR == 11 ]; then : | ||
95 | + #contiene dev y vrb $SCRIPT_PATH/tagged-line/ | ||
96 | + # o | ||
97 | + #Original Daniel: cp $INPUT_PATH/${j##*/} $OUTPUT_PATH/dev/$(basename ${j%.*}).dev | ||
98 | + #Original Daniel: cp $INPUT_PATH/${j##*/} $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
99 | + #echo "Deverbal and verbal" | ||
100 | + cp $INPUT_PATH/$(basename ${j%.*}) $OUTPUT_PATH/dev/$(basename ${j%.*}).dev | ||
101 | + cp $INPUT_PATH/$(basename ${j%.*}) $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
102 | + elif [ $VAR == 12 ]; then : | ||
103 | + #contiene dev | ||
104 | + #echo "Deverbal" | ||
105 | + cp $INPUT_PATH/$(basename ${j%.*}) $OUTPUT_PATH/dev/$(basename ${j%.*}).dev | ||
106 | + # cp $SCRIPT_PATH/tagged-line/${j##*/} $OUTPUT_PATH/dev/$(basename ${j%.*}).dev | ||
107 | + elif [ $VAR == 13 ]; then : | ||
108 | + #contiene vrb | ||
109 | + #echo "Verbal" | ||
110 | + cp $INPUT_PATH/$(basename ${j%.*}) $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
111 | + # cp $SCRIPT_PATH/tagged-line/${j##*/} $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
112 | + elif [ $VAR == 10 ]; then : | ||
113 | + #parece no contener dev ni vrb | ||
114 | + echo "Non deverbal and verbal" | ||
115 | + cp $INPUT_PATH/$(basename ${j%.*}) $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
116 | + # cp $SCRIPT_PATH/tagged-line/${j##*/} $OUTPUT_PATH/vrb/$(basename ${j%.*}).vrb | ||
117 | + fi | ||
118 | + done | ||
119 | +fi # if [ $SEP == "TRUE" ] |
deverbal-separator/tagged-line/.gitignore
0 → 100644
1 | + |
deverbal-separator/tagged/.gitignore
0 → 100644
1 | + |
evaluate-ris-gcs-standoff-v04.py
0 → 100644
This diff is collapsed. Click to expand it.
extract-ris-deverbal/EFF_DVB-regex-v03.py
0 → 100644
This diff is collapsed. Click to expand it.
filtered-sentences/.gitignore
0 → 100644
1 | + |
format/regex.py
0 → 100644
1 | +import fileinput | ||
2 | +import re | ||
3 | +import sys | ||
4 | + | ||
5 | +if ( len( sys.argv ) < 3 ): | ||
6 | + sys.stderr.write( "E: usage: " +sys.argv[0] + " <input_file> <output_file> \n" ) | ||
7 | + sys.stderr.flush(); | ||
8 | + | ||
9 | + exit( 2 ); | ||
10 | +else: | ||
11 | + print("Ok.") | ||
12 | + | ||
13 | +#LEER ARCHIVO INPUT | ||
14 | +text_file = open( sys.argv[1], "r" ) | ||
15 | +dato = text_file.read().splitlines() | ||
16 | +text_file.close() | ||
17 | + | ||
18 | + | ||
19 | +#QUITA EXTENSION DE NOMBRE DE ARCHIVO | ||
20 | +split_line = sys.argv[2] | ||
21 | +split_line = split_line[:-4] | ||
22 | +file_name="" | ||
23 | +file_name = split_line + ".san" | ||
24 | +open( file_name , 'w').close() | ||
25 | + | ||
26 | +#ESCRIBIR REGEX EN ARGV 2 | ||
27 | +for line in dato: | ||
28 | + line = re.sub('[\(][^\(|^\)]*\s[0-9]+[a-z]{1}\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_NNNNa_) | ||
29 | + line = re.sub('[\[][^\(|^\)]*\s[0-9]+[a-z]{1}\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_NNNNa_] | ||
30 | + line = re.sub('[\(][^\(|^\)]*\s([0-9]+,?)+\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_NN,NN,NN_) | ||
31 | + line = re.sub('[\[][^\(|^\)]*\s([0-9]+,?)+\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_NN,NN,NN_] | ||
32 | + line = re.sub('[\(][^\(|^\)]*\s[0-9]+\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_num_) | ||
33 | + line = re.sub('[\(][^\(|^\)]*\s[0-9]+\.[0-9]+\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_num.num_) | ||
34 | + line = re.sub('[\(][^\(|^\)]*\s[0-9]+\-[0-9]+\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_num-num_) | ||
35 | + line = re.sub('[\[][^\(|^\)]*\s[0-9]+\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_num_] | ||
36 | + line = re.sub('[\[][^\(|^\)]*\s[0-9]+\.[0-9]+\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_num.num_] | ||
37 | + line = re.sub('[\[][^\(|^\)]*\s[0-9]+\-[0-9]+\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_num-num_] | ||
38 | + line = re.sub('[\(]\s[a-zA-Z]{1}\s[\)]', '', line.rstrip()) #elimina (_alpha_) | ||
39 | + line = re.sub('[\[]\s[a-zA-Z]{1}\s[\]]', '', line.rstrip()) #elimina [_alpha_] | ||
40 | + line = re.sub('[\(]\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s[\)]', '', line.rstrip()) #elimina (_Roman_) | ||
41 | + line = re.sub('[\(]\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s\-\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s[\)]', '', line.rstrip()) #elimina (_Roman-Roman_) | ||
42 | + line = re.sub('[\(]\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s[\)]', '', line.rstrip()) #elimina (_roman_) | ||
43 | + line = re.sub('[\(]\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s\-\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s[\)]', '', line.rstrip()) #elimina (_roman-roman_) | ||
44 | + line = re.sub('[\[]\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s[\]]', '', line.rstrip()) #elimina [_Roman_] | ||
45 | + line = re.sub('[\[]\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s\-\sM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s[\]]', '', line.rstrip()) #elimina [_Roman-Roman_] | ||
46 | + line = re.sub('[\[]\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s[\]]', '', line.rstrip()) #elimina [_roman_] | ||
47 | + line = re.sub('[\[]\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s\-\sm{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s[\]]', '', line.rstrip()) #elimina [_roman-roman_] | ||
48 | + line = re.sub('[\(][^\(|^\)]*\s(fig\s\.|figure|see|i\s\.\se\s\.|e\s\.\sg\s\.|tab\s\.table)\s[^\(|^\)]*[\)]', '', line.rstrip(), flags=re.I) # | ||
49 | + line = re.sub(' ', ' ', line.rstrip()) #elimina (_NNNNa_) | ||
50 | + #print(line) | ||
51 | + | ||
52 | + | ||
53 | + save_file = open( file_name, "a" ) | ||
54 | + save_file.write(line) | ||
55 | + save_file.write("\n") | ||
56 | + save_file.close() |
format/sanitized_sentences/.gitignore
0 → 100644
1 | + |
format/split_sentences/.gitignore
0 → 100644
1 | + |
get-TRN-Organism-v1.py
0 → 100644
This diff is collapsed. Click to expand it.
get-TRN-v2.py
0 → 100644
This diff is collapsed. Click to expand it.
predicted-ris-gcs/complete-ris/.gitignore
0 → 100644
1 | + |
predicted-ris-gcs/incomplete-ris/.gitignore
0 → 100644
1 | + |
ri-attributive-extraction-v02.py
0 → 100644
This diff is collapsed. Click to expand it.
ri-autoregulation-extraction-v01.py
0 → 100644
This diff is collapsed. Click to expand it.
ri-openie-extraction-v02.py
0 → 100644
This diff is collapsed. Click to expand it.
ri-openie-extraction/.gitignore
0 → 100644
1 | + |
run-several-files.sh
0 → 100755
1 | +#!/bin/bash | ||
2 | + | ||
3 | +###### Automatic extraction of TRN from several files ###### | ||
4 | + | ||
5 | +BRIES_HOME=/myhome/bries | ||
6 | +PMIDS_HOME=/myhome/preprocessed-files | ||
7 | +# We don't use REFERENCE_HOME because we don't evaluate. Path /reference-data-set doesn't exist. File no-reference.txt doesn't exist. | ||
8 | +REFERENCE_HOME=/myhome/reference-data-set | ||
9 | + | ||
10 | +for f in $PMIDS_HOME/original/text/*.* | ||
11 | +do | ||
12 | + FILE_NAME=$(basename "$f") | ||
13 | + FILE_NAME="${FILE_NAME%.*}" | ||
14 | + echo "File: $FILE_NAME" | ||
15 | + ./automatic-extraction-ris-gcs.sh $PMIDS_HOME/features/$FILE_NAME.tra.word.txt $PMIDS_HOME/transformed/$FILE_NAME.tra.txt $BRIES_HOME/ri-openie-extraction/$FILE_NAME.txt $BRIES_HOME/predicted-ris-gcs Y Y FILT1 $REFERENCE_HOME no-reference.txt $BRIES_HOME/evaluation-reports no-evaluation.txt diccionario-SYNONYMS.json $PMIDS_HOME/original/tsv 1>uno-$FILE_NAME.txt 2>dos-$FILE_NAME.txt | ||
16 | +done |
sentence-filter_v02.py
0 → 100644
This diff is collapsed. Click to expand it.
1 | + |
1 | + |
1 | +#!/bin/bash | ||
2 | + | ||
3 | +#Validate arguments | ||
4 | +if [[ ! ("$#" == 3 ) ]]; then | ||
5 | + echo 'Usage: ./sentence-simplification-main.sh <input_path> <output_file_path> <isimp_path>' | ||
6 | + exit 1 | ||
7 | +fi | ||
8 | + | ||
9 | +SCRIPT_PATH=$(cd `dirname $0` && pwd) | ||
10 | +#Define aquí la palabra clave del grupo de oraciones a simplificar. | ||
11 | +INPUT_PATH=$1 | ||
12 | +OUTPUT_INDEX_FILE_PATH=$2 | ||
13 | +ISIMP_PATH=$3 | ||
14 | +cd $SCRIPT_PATH | ||
15 | + | ||
16 | + | ||
17 | + | ||
18 | + | ||
19 | +#ANALIZAR EN ISIMP | ||
20 | +echo "Analysing in iSimp..." | ||
21 | +if [ -z "$(ls -A ./iSimp_sentences/)" ]; then : | ||
22 | +else | ||
23 | + #echo "Not Empty" | ||
24 | + rm ./iSimp_sentences/* | ||
25 | +fi | ||
26 | +#cd $INPUT_PATH | ||
27 | +for j in $INPUT_PATH/* | ||
28 | +do | ||
29 | + echo $j | ||
30 | + #echo "++++entrada_simp: $j salida_simp: $SCRIPT_PATH/iSimp_sentences/$(basename $j)" | ||
31 | + $ISIMP_PATH/simplify.sh $j $SCRIPT_PATH/iSimp_sentences/$(basename $j) | ||
32 | +done | ||
33 | +cd $SCRIPT_PATH | ||
34 | + | ||
35 | +#CREA INDICE DE ARCHIVOS SIMPLIFICADOS | ||
36 | +#touch $SCRIPT_PATH/index.txt | ||
37 | +>| $OUTPUT_INDEX_FILE_PATH | ||
38 | + | ||
39 | +#ALIMENTAR A ALGORITMO | ||
40 | +echo "Analysing in Algorithm..." | ||
41 | +if [ -z "$(ls -A ./algorithm_sentences/)" ]; then : | ||
42 | +else | ||
43 | + #echo "Not Empty" | ||
44 | + rm ./algorithm_sentences/* | ||
45 | +fi | ||
46 | +#cd ./iSimp_sentences | ||
47 | +for k in $SCRIPT_PATH/iSimp_sentences/* | ||
48 | +do | ||
49 | + echo $k | ||
50 | + #echo "entrada: $k salida: $SCRIPT_PATH/algorithm_sentences/$(basename $k) index: $OUTPUT_INDEX_FILE_PATH" | ||
51 | + python2 $SCRIPT_PATH/simplifier.py $k $SCRIPT_PATH/algorithm_sentences/$(basename $k) $OUTPUT_INDEX_FILE_PATH | ||
52 | +done | ||
53 | +cd $SCRIPT_PATH |
sentence-simplification/simplifier.py
0 → 100644
This diff is collapsed. Click to expand it.
trn/empty-file.txt
0 → 100644
1 | +Delete me | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment