Carlos-Francisco Méndez-Cruz

Merge remote-tracking branch 'origin/master'

This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
1 +# -*- coding: UTF-8 -*-
2 +import os
3 +import sys
4 +import argparse
5 +import re
6 +import numpy as np
7 +from datetime import *
8 +__author__ = 'KevinML'
9 +
10 +# Objective: Obtenecion del metadato y del contenido de todas las lineas con <Tags/> detro de un erchivo.
11 +
12 +# Parameters:
13 +# 1) --inputPath input path
14 +# 2) --outputPath output path
15 +
16 +# Ouput:
17 +# 1)
18 +
19 +# Execution:
20 +#Example 1
21 +#python3 recorrer_archivos_o.py --inputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/tagged-xml-data/
22 +#--outputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/output-kevin/
23 +
24 +#Example 2
25 +#python3 /home/kevinml/automatic-extraction-growth-conditions/scripts/recorrer_archivos_o.py
26 +#--inputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/tagged-xml-data/
27 +#--outputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/output-kevin/
28 +
29 +###########################################################
30 +# MAIN PROGRAM #
31 +###########################################################
32 +
33 +parser = argparse.ArgumentParser(description='Obtenecion de metadatos y del contenido de de lineas con <Tags/>',
34 + epilog= 'Bien Hecho!')
35 +parser.add_argument('--inputPath', dest='inputPath', metavar='PATH', required = True,
36 + help='Ingrese el archivo de entrada.')
37 +parser.add_argument('--outputPath', dest='outputPath', metavar='PATH', required = True,
38 + help='Ingrese el archivo de salida.')
39 +
40 +args = parser.parse_args()
41 +
42 +#if len(args) != 2:
43 +# parser.error("Se introdujeron mas o menos de 2 parametros.")
44 +# sys.exit(1)
45 +
46 +# Printing parameter values
47 +print('-------------------------------- PARAMETERS --------------------------------')
48 +print("Path to read input files: " + str(args.inputPath))
49 +print("Path to place output files: " + str(args.outputPath))
50 +
51 +#ModificCIO TEMPORAL
52 +
53 +archivo = {}
54 +regexTag = re.compile(r'<[A-Za-z]+>')
55 +exit_file = r"exit_file.xml"
56 +
57 +with open(os.path.join(args.outputPath, exit_file), mode = "w") as oFile:
58 + oFile.write('#Fecha:{}\t\t\n#Archivo\tMetadato\tContenido\n\n'.format(datetime.today()))
59 +
60 +for path, dirs, files in os.walk(args.inputPath):
61 + for f in files:
62 + metadatos = {}
63 + with open(os.path.join(args.inputPath, f), mode ='r', encoding ="utf-8") as iFile:
64 + for line in iFile:
65 + line = line.strip('\n')
66 + if regexTag.search(line):
67 + renglon = line.split(" = ")
68 + if renglon[0] in metadatos:
69 + metadatos[renglon[0]].append(renglon[1])
70 + else:
71 + metadatos[renglon[0]] = [renglon[1]]
72 +
73 + archivo[f] = metadatos
74 +
75 + with open(os.path.join(args.outputPath, exit_file), mode = "a") as oFile:
76 + #oFile.write('Archivo\t' + 'Metadato\t' + 'Contenido')
77 + for arch in sorted(archivo):
78 + for k,v in sorted(metadatos.items()):
79 + for x in v:
80 + oFile.write('{}\t{}\t{}\n'.format(arch, k, x))