getting alphanum file

Kevin Meza Landeros
Commit 61f7cb247507b2b2828ac87d037f90d58749b138 61f7cb24 1 parent 5934b0d0
Showing 1 changed file with 78 additions and 0 deletions
CoreNLP/bin/filtering_v2.py
--- a/CoreNLP/bin/filtering_v2.py 0 → 100644
View file @61f7cb2
+++ b/CoreNLP/bin/filtering_v2.py 0 → 100644
View file @61f7cb2
+ # Importacion de librerias
+ import pandas as pd
+ import re
+ import argparse
+ import os
+ 
+ __author__ = 'kevinml'
+ 
+ # Objective:
+ # Take two column files and make 1 file that contains just numbers and letters:
+ # 1.- Alphanum.txt - Archivo unipalabra.
+ # 
+ # Input parameters 
+ # --inputPath=PATH     Path of inputfiles.
+ # --outputPath=PATH    Path of outputfiles. 
+ # --iFile              Archivo a partir del cual se obtendran los 3 archivos. 
+ # 
+ # Examples 
+ # python filtering.py --inputPath /home/kevinml/automatic-extraction-growth-conditions/CoreNLP/input --outputPath /home/kevinml/automatic-extraction-growth-conditions/CoreNLP/input --iFile NER_words.txt
+ 
+ #################################################################################### 
+ #                                    FUNCTIONS                                     # 
+ ####################################################################################
+ 
+ def alphanum(word):     
+     ''' Esta funcion regresa True si en la palabra que recibe como parametro NO SE ENCUENTRAN CARACTERES ALFANUMERICOS
+     y regresa False si en la palabra se encuentra algun caracter NO ALFANUMERICO
+     '''
+     if re.search("[^a-zA-Z0-9]", word):  
+         return False    
+     else:
+         return True   
+ 
+ #################################################################################### 
+ #                                   MAIN PROGRAM                                  # 
+ ####################################################################################
+ 
+ if __name__ == '__main__':
+ 
+     # Definicion de Parametros
+     parser = argparse.ArgumentParser()     
+     parser.add_argument('--inputPath', help="Ruta donde se encuentra el archivo a procesar. Ej: --inputPath /home/kevinml/transporter-substrate-interactions/", required=True)     
+     parser.add_argument('--outputPath', help="Ruta donde se depositaran los archivos resultantes. Ej: --outputPath /home/kevinml/transporter-substrate-interactions/", required=True)     
+     parser.add_argument('--iFile', help="Archivo a procesar. Ej: --iFile NER_words.txt", required=True)     
+     args = parser.parse_args()     
+ 
+     # Se imprimen los parametros ingresados    
+     print('\n-------------------------------- PARAMETERS --------------------------------\n')         
+     print('Input Path: ' + str(args.inputPath))
+     print('File: ' + str(args.iFile))                  
+     print('Output Path: ' + str(args.outputPath))         
+     print('\n-------------------------------- PROCESSING --------------------------------\n')     
+ 
+     # Se abre el archivo a procesar
+     file = pd.read_csv(os.path.join(args.inputPath, args.iFile), sep = "\t")
+ 
+     print("######################\n#   PRIMER ARCHIVO   #\n######################")
+     conditions = []
+     lines = []
+     # Se abre el archivo.
+     with open (os.path.join(args.outputPath,"Alphanum.txt"), "w+") as oFile:
+         for index, row in file.iterrows():
+ 			# La bandera en 1 indica que ninguna palabra de la primer columna tiene caracteres NO alfanumericos
+             # La bandera en 0 indica que al menos una palabra tienes caracteres NO alfanumericos. 
+             bandera = 1
+             # Con el for se va a verificando la presencia de caracteres alfanumericos en cada palabra de la primera columna
+             for i in range(0, len(row[0].split(" "))):
+                 if alphanum(str(row[0].split(" ")[i])) == False:
+                     bandera = 0
+             if bandera == 1:
+                 conditions.append(row[0])
+                 lines.append(row[1]) 
+         # Se escriben en el primer archivo aquellos valores que cumplen las condiciones.
+         for i in range(len(lines)):
+             oFile.write(conditions[i] + "\t" + lines[i] + '\n')
+ 
+     print("\nArchivo cuyo contenido unicamente son numeros y letras ha sido generado.\nNombre del archivo: Alphanum.txt\n")
+