Setting up project

Carlos-Francisco Méndez-Cruz
Commit b5518966e67291bf2d6021b6da539e461c1c9185 b5518966 0 parents
Showing 6 changed files with 373 additions and 0 deletions
.idea/vcs.xml
biologicalTermTagging.py
nlp-preprocessing-pipeline.sh
posTaggingStanford.py
preprocessingTermDetection.py
transforming.py
--- a/.idea/vcs.xml 0 → 100644
View file @b551896
+++ b/.idea/vcs.xml 0 → 100644
View file @b551896
+ <?xml version="1.0" encoding="UTF-8"?>
+ <project version="4">
+   <component name="VcsDirectoryMappings">
+     <mapping directory="$PROJECT_DIR$" vcs="Git" />
+   </component>
+ </project>
\ No newline at end of file
--- a/biologicalTermTagging.py 0 → 100644
View file @b551896
+++ b/biologicalTermTagging.py 0 → 100644
View file @b551896
--- a/nlp-preprocessing-pipeline.sh 0 → 100644
View file @b551896
+++ b/nlp-preprocessing-pipeline.sh 0 → 100644
View file @b551896
+ #!/bin/sh
+ echo 'Preprocessing files...'
+ ORIGINAL_CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original
+ CORPUS_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/data-sets
+ TERM_PATH=/export/space1/users/compu2/bionlp/conditional-random-fields/dictionaries
+ 
+ PRE=TRUE
+ echo "   Preprocessing: $PRE"
+ POS=TRUE
+ echo "   POS Tagging: $POS"
+ LEMMA=TRUE
+ echo "   Lemmatization: $LEMMA"
+ TERM=TRUE
+ echo "   Terminological tagging: $TERM"
+ TRANS=TRUE
+ echo "   Transformation: $TRANS"
+ 
+ if [ "$PRE" = "TRUE" ]; then
+ echo "Preprocessing..."
+ INPUT_PATH=$ORIGINAL_CORPUS_PATH
+ OUTPUT_PATH=$CORPUS_PATH/preprocessed
+ python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termDetection --termPath $TERM_PATH --termFiles termFilesLength_LREGULONDB.json > outputPreprocessing_lregulondb.txt
+ # python3.4 preprocessingTermDetection.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH > outputPreprocessing_lregulondb.txt
+ fi
+ 
+ if [ "$POS" = "TRUE" ]; then
+ echo "POS Tagging..."
+ INPUT_PATH=$CORPUS_PATH/preprocessed
+ OUTPUT_PATH=$CORPUS_PATH/pos
+ python3.4 posTaggingStanford.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --taggerPath /home/cmendezc/STANFORD_POSTAGGER/stanford-postagger-2015-12-09 --biolemmatizer > outputPOST_lregulondb.txt
+ fi
+ 
+ if [ "$LEMMA" = "TRUE" ]; then
+ echo "Lemmatization..."
+ INPUT_PATH=$CORPUS_PATH/pos
+ OUTPUT_PATH=$CORPUS_PATH/lemma
+ python3.4 biolemmatizing.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --biolemmatizerPath /home/cmendezc/BIO_LEMMATIZER  > outputLemma_lregulondb.txt
+ fi
+ 
+ if [ "$TERM" = "TRUE" ]; then
+ echo "Terminological tagging..."
+ INPUT_PATH=$CORPUS_PATH/lemma
+ OUTPUT_PATH=$CORPUS_PATH/term
+ python3.4 biologicalTermTagging.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --termPath $TERM_PATH --termFiles termFilesTag_LREGULONDB.json > outputTerm_lregulondb.txt
+ fi
+ 
+ if [ "$TRANS" = "TRUE" ]; then
+ echo "Transformation..."
+ INPUT_PATH=$CORPUS_PATH/term
+ OUTPUT_PATH=$CORPUS_PATH/transformed
+ python3.4 transforming.py --inputPath $INPUT_PATH --outputPath $OUTPUT_PATH --minWordsInLine 5 > outputTransformation_lregulondb.txt
+ fi
--- a/posTaggingStanford.py 0 → 100644
View file @b551896
+++ b/posTaggingStanford.py 0 → 100644
View file @b551896
+ # -*- coding: UTF-8 -*-
+ 
+ from optparse import OptionParser
+ import os
+ import sys
+ from time import time
+ from subprocess import call
+ 
+ __author__ = 'CMendezC'
+ 
+ # Objective: Part-of-Speech Tagging of several files with Stanford POS Tagger.
+ 
+ # Parameters:
+ #   1) --inputPath Path to read TXT files.
+ #   2) --outputPath Path to place POST files.
+ #   3) --taggerPath Path POS Tagger command.
+ #   4) --biolemmatizer Format for biolemmatizer?.
+ 
+ # Output:
+ #   1) POS Tagged files.
+ #   2) If --biolemmatizer with format:
+ #   Rob	NNP
+ #   is	VBZ
+ #   a	DT
+ #   transcriptional	JJ
+ #   dual	JJ
+ #   regulator	NN
+ #   .	.
+ #
+ #   Its	PRP$
+ #   N-terminal	JJ ...
+ 
+ # Execution:
+ # GntR
+ # python posTaggingStanford.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT\ECK120012096_GntR\preprocessed --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT\ECK120012096_GntR\post --taggerPath C:\Users\cmendezc\Documents\GENOMICAS\STANFORD_POSTAGGER\stanford-postagger-2015-12-09 --biolemmatizer
+ 
+ # FhlA
+ # python posTaggingStanford.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011394_FhlA\preprocessed --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011394_FhlA\post --taggerPath C:\Users\cmendezc\Documents\GENOMICAS\STANFORD_POSTAGGER\stanford-postagger-2015-12-09 --biolemmatizer
+ 
+ # MarA
+ # python posTaggingStanford.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011412_MarA\preprocessed --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011412_MarA\post --taggerPath C:\Users\cmendezc\Documents\GENOMICAS\STANFORD_POSTAGGER\stanford-postagger-2015-12-09 --biolemmatizer
+ 
+ # ArgR
+ # python posTaggingStanford.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011670_ArgR\preprocessed --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011670_ArgR\post --taggerPath C:\Users\cmendezc\Documents\GENOMICAS\STANFORD_POSTAGGER\stanford-postagger-2015-12-09 --biolemmatizer
+ 
+ # CytR
+ # python posTaggingStanford.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120012407_CytR\preprocessed --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120012407_CytR\post --taggerPath C:\Users\cmendezc\Documents\GENOMICAS\STANFORD_POSTAGGER\stanford-postagger-2015-12-09 --biolemmatizer
+ 
+ # Rob
+ # python posTaggingStanford.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011190_Rob\preprocessed --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011190_Rob\post --taggerPath C:\Users\cmendezc\Documents\GENOMICAS\STANFORD_POSTAGGER\stanford-postagger-2015-12-09 --biolemmatizer
+ 
+ # EXTRACTING REGULATORY INTERACTIONS
+ # python posTaggingStanford.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\EXTRACTING_REGULATORY_INTERACTIONS\corpus_ecoli\preprocessed --outputPath C:\Users\cmendezc\Documents\GENOMICAS\EXTRACTING_REGULATORY_INTERACTIONS\corpus_ecoli\post  --taggerPath C:\Users\cmendezc\Documents\GENOMICAS\STANFORD_POSTAGGER\stanford-postagger-2015-12-09 --biolemmatizer
+ 
+ ###########################################################
+ #                       MAIN PROGRAM                      #
+ ###########################################################
+ 
+ if __name__ == "__main__":
+     # Parameter definition
+     parser = OptionParser()
+     parser.add_option("-i", "--inputPath", dest="inputPath",
+                       help="Path to read TXT files", metavar="PATH")
+     parser.add_option("-o", "--outputPath", dest="outputPath",
+                       help="Path to place POST files", metavar="PATH")
+     parser.add_option("-a", "--taggerPath", dest="taggerPath", default="",
+                       help="Path FreeLing analyzer files", metavar="PATH")
+     parser.add_option("-p", "--biolemmatizer", default=False,
+                       action="store_true", dest="biolemmatizer",
+                       help="Format for biolemmatizer?")
+ 
+     (options, args) = parser.parse_args()
+     if len(args) > 0:
+         parser.error("None parameters indicated.")
+         sys.exit(1)
+ 
+     # Printing parameter values
+     print('-------------------------------- PARAMETERS --------------------------------')
+     print("Path to read input files: " + str(options.inputPath))
+     print("Path to place output files: " + str(options.outputPath))
+     print("Path POS Tagger command: " + str(options.taggerPath))
+     print("Format for biolemmatizer?: " + str(options.biolemmatizer))
+ 
+     filesTagged = 0
+     t0 = time()
+     print("Tagging corpus...")
+     # Walk directory to read files
+     for path, dirs, files in os.walk(options.inputPath):
+         # For each file in dir
+         for file in files:
+             print("   Tagging file..." + str(file))
+             try:
+                 # FREELING: taggerPath = os.path.join(options.taggerPath, "analyzer.ex")
+                 # FREELING: command = taggerPath + " -f " + os.path.join("%FREELINGSHARE%", "config", "en.cfg") + " <" + os.path.join(path, file) + "> " + os.path.join(options.outputPath, file) + ".post.txt"
+ 
+                 # stanford-postagger models\english-left3words-distsim.tagger
+                 # C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TFsummaries_tagged_SGC_aspectRP-DOM\ECK120011190.Rob.sum.txt
+                 # >
+                 # C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\aspectsOfInterest_TrainingSet\testingTaggers\ECK120011190.Rob.sum.txt
+ 
+                 import platform
+                 plat = platform.system()
+                 if plat == 'Linux':
+                     # FOR LINUX
+                     # java -mx300m -cp 'stanford-postagger.jar:lib/*' edu.stanford.nlp.tagger.maxent.MaxentTagger
+                     # -model $1 -textFile $2
+                     command = "java -mx300m -cp " + os.path.join(options.taggerPath, 'stanford-postagger.jar:') + \
+                               os.path.join(options.taggerPath, 'lib/*') + \
+                               ' edu.stanford.nlp.tagger.maxent.MaxentTagger -model ' + \
+                               os.path.join(options.taggerPath, 'models', 'english-left3words-distsim.tagger') + \
+                               ' -textFile ' + os.path.join(options.inputPath, file) + \
+                               ' > ' + os.path.join(options.outputPath, file.replace('pre.txt', 'pos.txt'))
+                 else:
+                     # C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\preprocessingCorpus>java -mx300m
+                     # -cp "C:\Users\cmendezc\Documents\GENOMICAS\STANFORD_POSTAGGER\stanford-postagger-2015-12-09\stanford-postagger.jar;
+                     # C:\Users\cmendezc\Documents\GENOMICAS\STANFORD_POSTAGGER\stanford-postagger-2015-12-09\lib/*"
+                     # edu.stanford.nlp.tagger.maxent.MaxentTagger -model
+                     # C:\Users\cmendezc\Documents\GENOMICAS\STANFORD_POSTAGGER\stanford-postagger-2015-12-09\models\english-left3words-distsim.tagger
+                     # -textFile C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\aspectClassificationDatasets\preprocessed\ECK120011190.Rob.sum.pre.txt
+                     #taggerPath = os.path.join('java')
+                     command = "java -mx300m -cp " + os.path.join(options.taggerPath, 'stanford-postagger.jar;') + \
+                               os.path.join(options.taggerPath, 'lib/*') + \
+                               ' edu.stanford.nlp.tagger.maxent.MaxentTagger -model ' + \
+                               os.path.join(options.taggerPath, 'models', 'english-left3words-distsim.tagger') + \
+                               ' -textFile ' + os.path.join(options.inputPath, file) + \
+                               ' > ' + os.path.join(options.outputPath, file.replace('pre.txt', 'pos.txt'))  #print(command)
+ 
+                 retcode = call(command, shell=True)
+                 if retcode < 0:
+                     print("   Child was terminated by signal", -retcode, file=sys.stderr)
+                 else:
+                     print("   Child returned", retcode, file=sys.stderr)
+                     filesTagged += 1
+             except OSError as e:
+                 print("   Execution failed:", e, file=sys.stderr)
+ 
+             text = ""
+             if options.biolemmatizer:
+                 with open(os.path.join(options.outputPath, file.replace('pre.txt', 'pos.txt')), "r", encoding="utf-8", errors="replace") as iFile:
+                     text = iFile.read()
+                     # -LRB-_-LRB- PTS_NN -RRB-_-RRB-
+                     # for_IN Mlc_NN inactivation_NN ._.
+                     text = text.replace('-LRB-', '(')
+                     text = text.replace('-RRB-', ')')
+ 
+                     text = text.replace('-LSB-', '[')
+                     text = text.replace('-RSB-', ']')
+ 
+                     text = text.replace('_', '\t')
+                     text = text.replace(' ', '\n')
+                     text = text.replace('.\n', '.\n\n')
+                 with open(os.path.join(options.outputPath, file.replace('pre.txt', 'pos.txt')), "w", encoding="utf-8", errors="replace") as oFile:
+                     oFile.write(text)
+ 
+     # Imprime archivos procesados
+     print()
+     print("Files POS Tagged: " + str(filesTagged))
+     print("Files POS Tagged in: %fs" % (time() - t0))
--- a/preprocessingTermDetection.py 0 → 100644
View file @b551896
+++ b/preprocessingTermDetection.py 0 → 100644
View file @b551896
--- a/transforming.py 0 → 100644
View file @b551896
+++ b/transforming.py 0 → 100644
View file @b551896
+ # -*- coding: UTF-8 -*-
+ import re
+ from optparse import OptionParser
+ import os
+ import sys
+ from time import time
+ 
+ __author__ = 'CMendezC'
+ 
+ # Objective: Transforming BIOLemmatized files:
+ #   1) Transformed files
+ #   2) Text files to extract aspects
+ 
+ # Parameters:
+ #   1) --inputPath Path to read input files.
+ #   2) --outputPath Path to place output files.
+ #   3) --textPath Path to place output files.
+ #   4) --minWordsInLine Minimum length sentence in number of words
+ #   5) --classes Classes to indicate final of sentence when line contains: PMID\tNUMSENT\tSENT\tCLASS
+ 
+ # Output:
+ #   1) transformed files
+ #   2) text files
+ 
+ # Execution:
+ # GntR
+ # python transforming.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120012096_GntR\term --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120012096_GntR\transformed --minWordsInLine 5
+ 
+ # FhlA
+ # python transforming.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011394_FhlA\term --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011394_FhlA\transformed --minWordsInLine 5
+ 
+ # MarA
+ # python transforming.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011412_MarA\term --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011412_MarA\transformed --minWordsInLine 5
+ 
+ # ArgR
+ # python transforming.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011670_ArgR\term --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011670_ArgR\transformed --minWordsInLine 5
+ 
+ # CytR
+ # python transforming.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120012407_CytR\term --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120012407_CytR\transformed --minWordsInLine 5
+ 
+ # Rob
+ # python transforming.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011190_Rob\term --outputPath C:\Users\cmendezc\Documents\GENOMICAS\AUTOMATIC_SUMMARIZATION_TFS\corpus\TF_PMIDs_TXT_ECK120011190_Rob\transformed --minWordsInLine 5
+ 
+ # EXTRACTING REGULATORY INTERACTIONS
+ # python transforming.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\EXTRACTING_REGULATORY_INTERACTIONS\corpus_ecoli\lemma --outputPath C:\Users\cmendezc\Documents\GENOMICAS\EXTRACTING_REGULATORY_INTERACTIONS\corpus_ecoli\transformed --minWordsInLine 5
+ 
+ 
+ def length(listWords):
+     regexWord = re.compile('[a-zA-Z]')
+     words = 0
+     chars = 0
+     for word in listWords:
+         listTemp = word.split('|')
+         if regexWord.search(listTemp[1]) is not None:
+             words += 1
+         chars += len(listTemp[0])
+     return words, chars
+ 
+ ###########################################################
+ #                       MAIN PROGRAM                      #
+ ###########################################################
+ 
+ if __name__ == "__main__":
+     # Parameter definition
+     parser = OptionParser()
+     parser.add_option("-i", "--inputPath", dest="inputPath",
+                       help="Path to read input files", metavar="PATH")
+     parser.add_option("-o", "--outputPath", dest="outputPath",
+                       help="Path to place transformed files", metavar="PATH")
+     parser.add_option("--minWordsInLine", type="int", dest="minWordsInLine", default=3,
+                       help="Minimum length sentence in number of words", metavar="NUM")
+     parser.add_option("--classes", dest="classes",
+                       help="Classes to indicate final of sentence when line contains: PMID-NUMSENT-SENT-CLASS", metavar="CLASS,CLASS")
+ 
+     (options, args) = parser.parse_args()
+ 
+     if len(args) > 0:
+         parser.error("None parameters indicated.")
+         sys.exit(1)
+ 
+     # Printing parameter values
+     print('-------------------------------- PARAMETERS --------------------------------')
+     print("Path to read input files: " + str(options.inputPath))
+     print("Path to place transformed files: " + str(options.outputPath))
+     print("Minimum length sentence in number of words: " + str(options.minWordsInLine))
+     print("Classes to indicate final of sentence: " + str(options.classes))
+ 
+     # We realized that POS tags from Biolemmatizer are very specific, therefore we decided to use Standford tags
+     bioPOST = False
+     filesProcessed = 0
+     # minWordsInLine = 3
+     if not options.classes is None:
+         listClasses = options.classes.split(',')
+     t0 = time()
+     print("Transforming files...")
+     # Walk directory to read files
+     for path, dirs, files in os.walk(options.inputPath):
+         # For each file in dir
+         for file in files:
+             print("   Transforming file..." + str(file))
+             #TrpR	NN	TrpR NN PennPOS
+             # ,	,	, , NUPOS
+             # tryptophan	NN	tryptophan NN PennPOS
+             listLine1 = []
+             listLine2 = []
+             text = ''
+             lemma = ''
+             pos = ''
+             textTransformed = ''
+             textText = ''
+             with open(os.path.join(path, file), "r", encoding="utf-8", errors="replace") as iFile:
+                 # Create output file to write
+                 with open(os.path.join(options.outputPath, file.replace('term.txt', 'tra.txt')), "w", encoding="utf-8") as transformedFile:
+                     for line in iFile:
+                         if line == '\n':
+                             if options.classes is None:
+                                 if length(textTransformed.split())[0] > options.minWordsInLine and length(textTransformed.split())[1] <= 1000:
+                                     transformedFile.write(textTransformed + '\n')
+                                 textTransformed = ''
+                                 textText = ''
+                             else:
+                                 continue
+                         else:
+                             line = line.strip('\n')
+                             #print('Line ' + str(line.encode(encoding='UTF-8', errors='replace')))
+                             listLine1 = line.split('\t')
+                             if len(listLine1) != 3:
+                                 continue
+                             text = listLine1[0]
+                             # Replacing an estrange space character
+                             text = text.replace(' ', '-')
+                             listLine2 = listLine1[2].split(' ')
+                             lemma = listLine2[0]
+                             # Replacing an estrange space character
+                             lemma = lemma.replace(' ', '-')
+                             if bioPOST:
+                                 pos = listLine2[1]
+                                 #print('Line ' + str(line.encode(encoding='UTF-8', errors='replace')))
+                             else:
+                                 pos = listLine1[1]
+                             textText = textText + text + ' '
+                             textTransformed = textTransformed + text + '|' + lemma + '|' + pos + ' '
+                             # RI+GC	NN	RI+GC NN PennPOS
+                             if not options.classes is None:
+                                 if text in listClasses:
+                                     # if length(textTransformed.split()) > options.minWordsInLine:
+                                     if length(textTransformed.split())[0] > options.minWordsInLine and length(textTransformed.split())[1] <= 1000:
+                                         transformedFile.write(textTransformed + '\n')
+                                         # print(textTransformed)
+                                     textTransformed = ''
+                                     textText = ''
+             filesProcessed += 1
+ 
+     # Imprime archivos procesados
+     print()
+     print("Files processed: " + str(filesProcessed))
+     print("In: %fs" % (time() - t0))