Showing
3 changed files
with
80 additions
and
0 deletions
data-sets/file_output/exit_file.txt
0 → 100644
This diff could not be displayed because it is too large.
data-sets/file_output/exit_file.xml
0 → 100644
This diff could not be displayed because it is too large.
scripts/file_output.py
0 → 100644
1 | +# -*- coding: UTF-8 -*- | ||
2 | +import os | ||
3 | +import sys | ||
4 | +import argparse | ||
5 | +import re | ||
6 | +import numpy as np | ||
7 | +from datetime import * | ||
8 | +__author__ = 'KevinML' | ||
9 | + | ||
10 | +# Objective: Obtenecion del metadato y del contenido de todas las lineas con <Tags/> detro de un erchivo. | ||
11 | + | ||
12 | +# Parameters: | ||
13 | +# 1) --inputPath input path | ||
14 | +# 2) --outputPath output path | ||
15 | + | ||
16 | +# Ouput: | ||
17 | +# 1) | ||
18 | + | ||
19 | +# Execution: | ||
20 | +#Example 1 | ||
21 | +#python3 recorrer_archivos_o.py --inputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/tagged-xml-data/ | ||
22 | +#--outputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/output-kevin/ | ||
23 | + | ||
24 | +#Example 2 | ||
25 | +#python3 /home/kevinml/automatic-extraction-growth-conditions/scripts/recorrer_archivos_o.py | ||
26 | +#--inputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/tagged-xml-data/ | ||
27 | +#--outputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/output-kevin/ | ||
28 | + | ||
29 | +########################################################### | ||
30 | +# MAIN PROGRAM # | ||
31 | +########################################################### | ||
32 | + | ||
33 | +parser = argparse.ArgumentParser(description='Obtenecion de metadatos y del contenido de de lineas con <Tags/>', | ||
34 | + epilog= 'Bien Hecho!') | ||
35 | +parser.add_argument('--inputPath', dest='inputPath', metavar='PATH', required = True, | ||
36 | + help='Ingrese el archivo de entrada.') | ||
37 | +parser.add_argument('--outputPath', dest='outputPath', metavar='PATH', required = True, | ||
38 | + help='Ingrese el archivo de salida.') | ||
39 | + | ||
40 | +args = parser.parse_args() | ||
41 | + | ||
42 | +#if len(args) != 2: | ||
43 | +# parser.error("Se introdujeron mas o menos de 2 parametros.") | ||
44 | +# sys.exit(1) | ||
45 | + | ||
46 | +# Printing parameter values | ||
47 | +print('-------------------------------- PARAMETERS --------------------------------') | ||
48 | +print("Path to read input files: " + str(args.inputPath)) | ||
49 | +print("Path to place output files: " + str(args.outputPath)) | ||
50 | + | ||
51 | +#ModificCIO TEMPORAL | ||
52 | + | ||
53 | +archivo = {} | ||
54 | +regexTag = re.compile(r'<[A-Za-z]+>') | ||
55 | +exit_file = r"exit_file.xml" | ||
56 | + | ||
57 | +with open(os.path.join(args.outputPath, exit_file), mode = "w") as oFile: | ||
58 | + oFile.write('#Fecha:{}\t\t\n#Archivo\tMetadato\tContenido\n\n'.format(datetime.today())) | ||
59 | + | ||
60 | +for path, dirs, files in os.walk(args.inputPath): | ||
61 | + for f in files: | ||
62 | + metadatos = {} | ||
63 | + with open(os.path.join(args.inputPath, f), mode ='r', encoding ="utf-8") as iFile: | ||
64 | + for line in iFile: | ||
65 | + line = line.strip('\n') | ||
66 | + if regexTag.search(line): | ||
67 | + renglon = line.split(" = ") | ||
68 | + if renglon[0] in metadatos: | ||
69 | + metadatos[renglon[0]].append(renglon[1]) | ||
70 | + else: | ||
71 | + metadatos[renglon[0]] = [renglon[1]] | ||
72 | + | ||
73 | + archivo[f] = metadatos | ||
74 | + | ||
75 | + with open(os.path.join(args.outputPath, exit_file), mode = "a") as oFile: | ||
76 | + #oFile.write('Archivo\t' + 'Metadato\t' + 'Contenido') | ||
77 | + for arch in sorted(archivo): | ||
78 | + for k,v in sorted(metadatos.items()): | ||
79 | + for x in v: | ||
80 | + oFile.write('{}\t{}\t{}\n'.format(arch, k, x)) |
-
Please register or login to post a comment