file_output.py 2.88 KB

Raw Blame History Permalink

# -*- coding: UTF-8 -*-
import os
import sys
import argparse
import re
import numpy as np
from datetime import *
__author__ = 'KevinML'

# Objective: Obtenecion del metadato y del contenido de todas las lineas con <Tags/> detro de un erchivo.

# Parameters:
#   1) --inputPath input path
#   2) --outputPath output path

# Ouput:
#   1)

# Execution:
#Example 1
#python3 recorrer_archivos_o.py --inputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/tagged-xml-data/
#--outputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/output-kevin/

#Example 2
#python3 /home/kevinml/automatic-extraction-growth-conditions/scripts/recorrer_archivos_o.py
#--inputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/tagged-xml-data/
#--outputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/output-kevin/

###########################################################
#                       MAIN PROGRAM                      #
###########################################################

parser = argparse.ArgumentParser(description='Obtenecion de metadatos y del contenido de de lineas con <Tags/>',
								epilog= 'Bien Hecho!')
parser.add_argument('--inputPath', dest='inputPath', metavar='PATH', required = True,
                    help='Ingrese el archivo de entrada.')
parser.add_argument('--outputPath', dest='outputPath', metavar='PATH', required = True,
                    help='Ingrese el archivo de salida.')

args = parser.parse_args()

#if len(args) != 2:
#	parser.error("Se introdujeron mas o menos de 2 parametros.")
#	sys.exit(1)

# Printing parameter values
print('-------------------------------- PARAMETERS --------------------------------')
print("Path to read input files: " + str(args.inputPath))
print("Path to place output files: " + str(args.outputPath))

#ModificCIO TEMPORAL

archivo = {}
regexTag = re.compile(r'<[A-Za-z]+>')
exit_file = r"exit_file.xml"

with open(os.path.join(args.outputPath, exit_file), mode = "w") as oFile:
  oFile.write('#Fecha:{}\t\t\n#Archivo\tMetadato\tContenido\n\n'.format(datetime.today()))

for path, dirs, files in os.walk(args.inputPath):
    for f in files:
      metadatos = {}
      with open(os.path.join(args.inputPath, f), mode ='r', encoding ="utf-8") as iFile:
        for line in iFile:
          line = line.strip('\n')
          if regexTag.search(line):
            renglon = line.split(" = ")
            if renglon[0] in metadatos:
              metadatos[renglon[0]].append(renglon[1])
            else:
              metadatos[renglon[0]] = [renglon[1]]

        archivo[f] = metadatos

      with open(os.path.join(args.outputPath, exit_file), mode = "a") as oFile:
        #oFile.write('Archivo\t' + 'Metadato\t' + 'Contenido')
        for arch in sorted(archivo):
          for k,v in sorted(metadatos.items()):
            for x in v:
              oFile.write('{}\t{}\t{}\n'.format(arch, k, x))