organizacion nueva

Estefani Gaytan Nunez
Commit 13412450bd4d13e34c9e3aaeecefe759c7a79b42 13412450 1 parent b4a0ecbb
Showing 53 changed files with 0 additions and 364 deletions
GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/bin/label-split_training_test_v1.py → CRF/bin/label-split_training_test_v1.py
GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/bin/params.py → CRF/bin/params.py
GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/bin/training_validation_v3.py → CRF/bin/training_validation_v3.py
GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/check/sentences-405-order-rep.txt → CRF/check/sentences-405-order-rep.txt
GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets/test-data-set-30.txt → CRF/data-sets/test-data-set-30.txt
GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets/training-data-set-70.txt → CRF/data-sets/training-data-set-70.txt
GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/models/training-data-set-70.fStopWords_False.fSymbols_False.mod → CRF/models/training-data-set-70.fStopWords_False.fSymbols_False.mod
GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/reports/report_training-data-set-70.fStopWords_False.fSymbols_False.txt → CRF/reports/report_training-data-set-70.fStopWords_False.fSymbols_False.txt
GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/reports/y_pred_training-data-set-70.fStopWords_False.fSymbols_False.txt → CRF/reports/y_pred_training-data-set-70.fStopWords_False.fSymbols_False.txt
GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/reports/y_test_training-data-set-70.fStopWords_False.fSymbols_False.txt → CRF/reports/y_test_training-data-set-70.fStopWords_False.fSymbols_False.txt
GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/bin/get-raw-sentences.sh → CoreNLP/bin/get-raw-sentences.sh
GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/bin/single_run.sh → CoreNLP/bin/single_run.sh
GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/input/raw-metadata-senteneces.txt → CoreNLP/input/raw-metadata-senteneces.txt
GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/raw-metadata-senteneces.txt.conll → CoreNLP/output/raw-metadata-senteneces.txt.conll
data-sets/file_output/exit_file.txt
data-sets/file_output/exit_file.xml
data-sets/gzip-data/borrame.txt
report-manually-tagged-gcs/GSE11230_family.report.csv → data-sets/report-manually-tagged-gcs/GSE11230_family.report.csv
report-manually-tagged-gcs/GSE19053_family.report.csv → data-sets/report-manually-tagged-gcs/GSE19053_family.report.csv
report-manually-tagged-gcs/GSE26054_family.report.csv → data-sets/report-manually-tagged-gcs/GSE26054_family.report.csv
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/bin/label-split_training_test_v1.py → CRF/bin/label-split_training_test_v1.py
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/bin/label-split_training_test_v1.py → CRF/bin/label-split_training_test_v1.py
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/bin/params.py → CRF/bin/params.py
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/bin/params.py → CRF/bin/params.py
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/bin/training_validation_v3.py → CRF/bin/training_validation_v3.py
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/bin/training_validation_v3.py → CRF/bin/training_validation_v3.py
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/check/sentences-405-order-rep.txt → CRF/check/sentences-405-order-rep.txt
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/check/sentences-405-order-rep.txt → CRF/check/sentences-405-order-rep.txt
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets/test-data-set-30.txt → CRF/data-sets/test-data-set-30.txt
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets/test-data-set-30.txt → CRF/data-sets/test-data-set-30.txt
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets/training-data-set-70.txt → CRF/data-sets/training-data-set-70.txt
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets/training-data-set-70.txt → CRF/data-sets/training-data-set-70.txt
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/models/training-data-set-70.fStopWords_False.fSymbols_False.mod → CRF/models/training-data-set-70.fStopWords_False.fSymbols_False.mod
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/models/training-data-set-70.fStopWords_False.fSymbols_False.mod → CRF/models/training-data-set-70.fStopWords_False.fSymbols_False.mod
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/reports/report_training-data-set-70.fStopWords_False.fSymbols_False.txt → CRF/reports/report_training-data-set-70.fStopWords_False.fSymbols_False.txt
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/reports/report_training-data-set-70.fStopWords_False.fSymbols_False.txt → CRF/reports/report_training-data-set-70.fStopWords_False.fSymbols_False.txt
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/reports/y_pred_training-data-set-70.fStopWords_False.fSymbols_False.txt → CRF/reports/y_pred_training-data-set-70.fStopWords_False.fSymbols_False.txt
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/reports/y_pred_training-data-set-70.fStopWords_False.fSymbols_False.txt → CRF/reports/y_pred_training-data-set-70.fStopWords_False.fSymbols_False.txt
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/reports/y_test_training-data-set-70.fStopWords_False.fSymbols_False.txt → CRF/reports/y_test_training-data-set-70.fStopWords_False.fSymbols_False.txt
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/reports/y_test_training-data-set-70.fStopWords_False.fSymbols_False.txt → CRF/reports/y_test_training-data-set-70.fStopWords_False.fSymbols_False.txt
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/bin/get-raw-sentences.sh → CoreNLP/bin/get-raw-sentences.sh
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/bin/get-raw-sentences.sh → CoreNLP/bin/get-raw-sentences.sh
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/bin/single_run.sh → CoreNLP/bin/single_run.sh
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/bin/single_run.sh → CoreNLP/bin/single_run.sh
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/input/raw-metadata-senteneces.txt → CoreNLP/input/raw-metadata-senteneces.txt
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/input/raw-metadata-senteneces.txt → CoreNLP/input/raw-metadata-senteneces.txt
View file @1341245
--- a/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/raw-metadata-senteneces.txt.conll → CoreNLP/output/raw-metadata-senteneces.txt.conll
View file @1341245
+++ b/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/raw-metadata-senteneces.txt.conll → CoreNLP/output/raw-metadata-senteneces.txt.conll
View file @1341245
--- a/data-sets/file_output/exit_file.txt deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/file_output/exit_file.txt deleted 100644 → 0
View file @b4a0ecb
--- a/data-sets/file_output/exit_file.xml deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/file_output/exit_file.xml deleted 100644 → 0
View file @b4a0ecb
--- a/data-sets/gzip-data/borrame.txt deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/gzip-data/borrame.txt deleted 100644 → 0
View file @b4a0ecb
--- a/report-manually-tagged-gcs/GSE11230_family.report.csv → data-sets/report-manually-tagged-gcs/GSE11230_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE11230_family.report.csv → data-sets/report-manually-tagged-gcs/GSE11230_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE19053_family.report.csv → data-sets/report-manually-tagged-gcs/GSE19053_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE19053_family.report.csv → data-sets/report-manually-tagged-gcs/GSE19053_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE26054_family.report.csv → data-sets/report-manually-tagged-gcs/GSE26054_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE26054_family.report.csv → data-sets/report-manually-tagged-gcs/GSE26054_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE26589_family.report.csv → data-sets/report-manually-tagged-gcs/GSE26589_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE26589_family.report.csv → data-sets/report-manually-tagged-gcs/GSE26589_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE41186_family.report.csv → data-sets/report-manually-tagged-gcs/GSE41186_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE41186_family.report.csv → data-sets/report-manually-tagged-gcs/GSE41186_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE41190_family.report.csv → data-sets/report-manually-tagged-gcs/GSE41190_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE41190_family.report.csv → data-sets/report-manually-tagged-gcs/GSE41190_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE41195_family.report.csv → data-sets/report-manually-tagged-gcs/GSE41195_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE41195_family.report.csv → data-sets/report-manually-tagged-gcs/GSE41195_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE4321_family.report.csv → data-sets/report-manually-tagged-gcs/GSE4321_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE4321_family.report.csv → data-sets/report-manually-tagged-gcs/GSE4321_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE54899_family.report.csv → data-sets/report-manually-tagged-gcs/GSE54899_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE54899_family.report.csv → data-sets/report-manually-tagged-gcs/GSE54899_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE54901_family.report.csv → data-sets/report-manually-tagged-gcs/GSE54901_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE54901_family.report.csv → data-sets/report-manually-tagged-gcs/GSE54901_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE55199_family.report.csv → data-sets/report-manually-tagged-gcs/GSE55199_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE55199_family.report.csv → data-sets/report-manually-tagged-gcs/GSE55199_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE55365_family.report.csv → data-sets/report-manually-tagged-gcs/GSE55365_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE55365_family.report.csv → data-sets/report-manually-tagged-gcs/GSE55365_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE55366_family.report.csv → data-sets/report-manually-tagged-gcs/GSE55366_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE55366_family.report.csv → data-sets/report-manually-tagged-gcs/GSE55366_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE60546_family.report.csv → data-sets/report-manually-tagged-gcs/GSE60546_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE60546_family.report.csv → data-sets/report-manually-tagged-gcs/GSE60546_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE65641_family.report.csv → data-sets/report-manually-tagged-gcs/GSE65641_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE65641_family.report.csv → data-sets/report-manually-tagged-gcs/GSE65641_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE65642_family.report.csv → data-sets/report-manually-tagged-gcs/GSE65642_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE65642_family.report.csv → data-sets/report-manually-tagged-gcs/GSE65642_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE65710_family.report.csv → data-sets/report-manually-tagged-gcs/GSE65710_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE65710_family.report.csv → data-sets/report-manually-tagged-gcs/GSE65710_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE65711_family.report.csv → data-sets/report-manually-tagged-gcs/GSE65711_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE65711_family.report.csv → data-sets/report-manually-tagged-gcs/GSE65711_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE66441_family.report.csv → data-sets/report-manually-tagged-gcs/GSE66441_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE66441_family.report.csv → data-sets/report-manually-tagged-gcs/GSE66441_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE66481_family.report.csv → data-sets/report-manually-tagged-gcs/GSE66481_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE66481_family.report.csv → data-sets/report-manually-tagged-gcs/GSE66481_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE74930_family.report.csv → data-sets/report-manually-tagged-gcs/GSE74930_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE74930_family.report.csv → data-sets/report-manually-tagged-gcs/GSE74930_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE74931_family.report.csv → data-sets/report-manually-tagged-gcs/GSE74931_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE74931_family.report.csv → data-sets/report-manually-tagged-gcs/GSE74931_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE74932_family.report.csv → data-sets/report-manually-tagged-gcs/GSE74932_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE74932_family.report.csv → data-sets/report-manually-tagged-gcs/GSE74932_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE88979_family.report.csv → data-sets/report-manually-tagged-gcs/GSE88979_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE88979_family.report.csv → data-sets/report-manually-tagged-gcs/GSE88979_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE88980_family.report.csv → data-sets/report-manually-tagged-gcs/GSE88980_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE88980_family.report.csv → data-sets/report-manually-tagged-gcs/GSE88980_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE93506_family.report.csv → data-sets/report-manually-tagged-gcs/GSE93506_family.report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE93506_family.report.csv → data-sets/report-manually-tagged-gcs/GSE93506_family.report.csv
View file @1341245
--- a/report-manually-tagged-gcs/GSE_family.complete-report.csv → data-sets/report-manually-tagged-gcs/GSE_family.complete-report.csv
View file @1341245
+++ b/report-manually-tagged-gcs/GSE_family.complete-report.csv → data-sets/report-manually-tagged-gcs/GSE_family.complete-report.csv
View file @1341245
--- a/report-manually-tagged-gcs/tagged-series-samples.txt → data-sets/report-manually-tagged-gcs/tagged-series-samples.txt
View file @1341245
+++ b/report-manually-tagged-gcs/tagged-series-samples.txt → data-sets/report-manually-tagged-gcs/tagged-series-samples.txt
View file @1341245
--- a/data-sets/scripts/extract-manually-tagged-gcs.py deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/scripts/extract-manually-tagged-gcs.py deleted 100644 → 0
View file @b4a0ecb
-# -*- coding: UTF-8 -*-
-
-from optparse import OptionParser
-import os
-import sys
-import re
-
-__author__ = 'CMendezC'
-
-# Objective: extract manually tagged growth conditions.
-
-# Parameters:
-#   1) --inputPath input path
-#   2) --outputPath output path
-
-# Ouput:
-#   1) Tab separated file
-
-# Execution:
-# python extract-manually-tagged-gcs.py
-# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data
-# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\report-manually-tagged-gcs
-# c:\anaconda3\python extract-manually-tagged-gcs.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\report-manually-tagged-gcs
-
-# python extract-manually-tagged-gcs.py
-# --inputPath "C:\Users\cmendezc\Dropbox (UNAM-CCG)\PGC-BC\Proyectos\O9-NLP\Growth Conditions_HT\etiquetado-manual-gcs"
-# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\report-manually-tagged-gcs
-# python extract-manually-tagged-gcs.py --inputPath "C:\Users\cmendezc\Dropbox (UNAM-CCG)\PGC-BC\Proyectos\O9-NLP\Growth Conditions_HT\etiquetado-manual-gcs" --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\report-manually-tagged-gcs
-
-###########################################################
-#                       MAIN PROGRAM                      #
-###########################################################
-
-if __name__ == "__main__":
-    # Parameter definition
-    parser = OptionParser()
-    parser.add_option("--inputPath", dest="inputPath",
-                      help="Path to read input files", metavar="PATH")
-    parser.add_option("--outputPath", dest="outputPath",
-                      help="Path to place output files", metavar="PATH")
-
-    (options, args) = parser.parse_args()
-    if len(args) > 0:
-        parser.error("None parameter entered.")
-        sys.exit(1)
-
-    # Printing parameter values
-    print('-------------------------------- PARAMETERS --------------------------------')
-    print("Path to read input files: " + str(options.inputPath))
-    print("Path to place output files: " + str(options.outputPath))
-
-    hashGcs = {}
-    regexTagContent = re.compile(r'<(?P<tag>[^>]+)>(?P<content>[^<]+)<')
-    regexSerie = re.compile(r'^\^SERIES = (?P<serie>GSE[0-9]+)$')
-    regexSample = re.compile(r'^\^SAMPLE = (?P<sample>GSM[0-9]+)$')
-    # Tags from esquema-gcs.xsd at 11/09/2018
-    tags = ["Name", "Anti", "Orgn", "Strain", "Substrain", "Gtype", "Gversion", "Med", "Technique", "Supp", "Air", "Temp", "pH", "Press", "OD", "Phase", "Rate", "Vess", "Agit", ]
-    processed_files = 0
-    saved_files = 0
-    complete_report = []
-    # Walk directory to read files
-    for path, dirs, files in os.walk(options.inputPath):
-        for f in files:
-            if f.endswith("_family.xml"):
-                print("Processing...{} {}".format(options.inputPath, f))
-                #with open(os.path.join(options.inputPath, f), "r", encoding="utf-8") as iFile:
-                with open(os.path.join(options.inputPath, f), "r", errors='replace') as iFile:
-                    # numline = 0
-                    for line in iFile:
-                        # numline+=1
-                        # if f.find("GSE41195") > -1:
-                        #     print(numline)
-                        line = line.strip('\n')
-                        result = regexSerie.match(line)
-                        if result:
-                            serie = result.group('serie')
-                            if serie in hashGcs:
-                                print("WARNING! duplicate serie")
-                            else:
-                                hashGcs[serie] = {}
-                            continue
-                        result = regexSample.match(line)
-                        if result:
-                            sample = result.group('sample')
-                            if sample in hashGcs[serie]:
-                                print("WARNING! duplicate sample")
-                            else:
-                                hashGcs[serie][sample] = {}
-                                # hashGcs[serie] = hashSample
-                            #prevSample = sample
-                            continue
-                        result = regexTagContent.finditer(line)
-                        for m in result:
-                            tag = m.group('tag')
-                            content = m.group('content')
-                            content = content.strip()
-                            content = content.replace("&amp;", "&")
-                            content = content.replace("&lt;", "<")
-                            content = content.replace("&gt;", ">")
-                            content = content.replace("&quot;", "\"")
-                            content = content.replace("&apos;", "\'")
-                            #print("\nSerie: {}\tSample: {}\tTag: {}\tContent: {}".format(serie, sample, tag, content.encode(encoding='utf-8', errors='replace')))
-                            if tag in hashGcs[serie][sample]:
-                                if content in hashGcs[serie][sample][tag]:
-                                    #print("Duplicated content: {}".format(content.encode(encoding='utf-8', errors='replace')))
-                                    pass # GC content already in hash
-                                else:
-                                    # print("New content: {}".format(content))
-                                    hashGcs[serie][sample][tag].append(content)
-                                    # print("hashGcs[serie][sample][tag]: {}".format(hashGcs[serie][sample][tag]))
-                            else:
-                                hashGcs[serie][sample][tag] = [content]
-                                #print("New tag: {} and content: {}".format(tag, content.encode(encoding='utf-8', errors='replace')))
-                            # print(hashGcs)
-                    processed_files+=1
-                    #with open(os.path.join(options.outputPath, f.replace(".xml", ".report.csv")), "w", encoding="utf-8") as oFile:
-                    with open(os.path.join(options.outputPath, f.replace(".xml", ".report.csv")), "w") as oFile:
-                        output = '"Serie","Sample",'
-                        for tag in tags:
-                            output = output + '"' + tag + '",'
-                        output = output.rstrip(',')
-                        oFile.write(output + "\n")
-                        complete_report.append(output)
-                        for serie, hashSample in hashGcs.items():
-                            print("Serie: {}".format(serie))
-                            for sample, hashTag in sorted(hashSample.items()):
-                                print("\tSample: {}".format(sample))
-                                pTags = []
-                                for tag in tags:
-                                    if tag in hashTag:
-                                        pTags.append(', '.join(hashTag[tag]))
-                                    else:
-                                        pTags.append('')
-
-                                output = '"{}","{}",'.format(serie, sample)
-                                for tag in pTags:
-                                    output = output + '"' + tag + '",'
-                                output = output.rstrip(',')
-                                oFile.write(output + "\n")
-                                complete_report.append(output)
-                                # oFile.write("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(sample, serie, Technique, Orgn, Strain, Substrain, Gversion, Gtype, Phase, Air, Med, Temp, Supp))
-                                # for tag, listContent in sorted(hashTag.items()):
-                                #     print("\t\tTag: {}".format(tag))
-                                #     for content in sorted(listContent):
-                                #         print("\t\t\tContent: {}".format(content.encode(encoding='utf-8', errors='replace')))
-                                #         # oFile.write("Serie: {}\tSample: {}\tTag: {}\tContent: {}".format(serie, sample, tag, content.encode(encoding='utf-8', errors='replace')))
-                                #         oFile.write("Serie: {}\tSample: {}\tTag: {}\tContent: {}\n".format(serie, sample, tag, content))
-                        saved_files+=1
-
-    with open(os.path.join(options.outputPath, "GSE_family.complete-report.csv"), "w") as oFile:
-        for line in complete_report:
-            oFile.write(line + "\n")
-
-    print("Processed files: {}".format(processed_files))
-    print("Saved files: {}".format(saved_files))
-
-
--- a/data-sets/scripts/file_output.py deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/scripts/file_output.py deleted 100644 → 0
View file @b4a0ecb
-# -*- coding: UTF-8 -*-
-import os
-import sys
-import argparse
-import re
-import numpy as np 
-from datetime import *
-__author__ = 'KevinML'
-
-# Objective: Obtenecion del metadato y del contenido de todas las lineas con <Tags/> detro de un erchivo.
-
-# Parameters:
-#   1) --inputPath input path
-#   2) --outputPath output path
-
-# Ouput:
-#   1) 
-
-# Execution:
-#Example 1
-#python3 recorrer_archivos_o.py --inputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/tagged-xml-data/ 
-#--outputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/output-kevin/
-
-#Example 2
-#python3 /home/kevinml/automatic-extraction-growth-conditions/scripts/recorrer_archivos_o.py
-#--inputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/tagged-xml-data/ 
-#--outputPath /home/kevinml/automatic-extraction-growth-conditions/data-sets/output-kevin/
-
-###########################################################
-#                       MAIN PROGRAM                      #
-###########################################################
-
-parser = argparse.ArgumentParser(description='Obtenecion de metadatos y del contenido de de lineas con <Tags/>',
-								epilog= 'Bien Hecho!')
-parser.add_argument('--inputPath', dest='inputPath', metavar='PATH', required = True,
-                    help='Ingrese el archivo de entrada.')
-parser.add_argument('--outputPath', dest='outputPath', metavar='PATH', required = True,
-                    help='Ingrese el archivo de salida.')
-
-args = parser.parse_args()
-
-#if len(args) != 2:
-#	parser.error("Se introdujeron mas o menos de 2 parametros.")
-#	sys.exit(1)
-
-# Printing parameter values
-print('-------------------------------- PARAMETERS --------------------------------')
-print("Path to read input files: " + str(args.inputPath))
-print("Path to place output files: " + str(args.outputPath))
-
-#ModificCIO TEMPORAL
-
-archivo = {}
-regexTag = re.compile(r'<[A-Za-z]+>')
-exit_file = r"exit_file.xml"
-
-with open(os.path.join(args.outputPath, exit_file), mode = "w") as oFile:
-  oFile.write('#Fecha:{}\t\t\n#Archivo\tMetadato\tContenido\n\n'.format(datetime.today()))
-
-for path, dirs, files in os.walk(args.inputPath):
-    for f in files:
-      metadatos = {}
-      with open(os.path.join(args.inputPath, f), mode ='r', encoding ="utf-8") as iFile:
-        for line in iFile:
-          line = line.strip('\n')
-          if regexTag.search(line):
-            renglon = line.split(" = ")
-            if renglon[0] in metadatos:
-              metadatos[renglon[0]].append(renglon[1])
-            else:
-              metadatos[renglon[0]] = [renglon[1]]
-
-        archivo[f] = metadatos
-
-      with open(os.path.join(args.outputPath, exit_file), mode = "a") as oFile:
-        #oFile.write('Archivo\t' + 'Metadato\t' + 'Contenido')
-        for arch in sorted(archivo):
-          for k,v in sorted(metadatos.items()):
-            for x in v:
-              oFile.write('{}\t{}\t{}\n'.format(arch, k, x))
--- a/data-sets/scripts/gzip-2-soft.py deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/scripts/gzip-2-soft.py deleted 100644 → 0
View file @b4a0ecb
-# -*- coding: UTF-8 -*-
-
-from optparse import OptionParser
-import os
-import sys
-import gzip
-import shutil
-
-__author__ = 'CMendezC'
-
-# Objective: uncompress gzip soft file to text soft file
-
-# Parameters:
-#   1) --inputPath input path
-#   2) --outputPath output path
-
-# Ouput:
-#   1) Text soft file
-
-# Execution:
-# python gzip-2-soft.py
-# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\gzip-data
-# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data
-# python gzip-2-soft.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\gzip-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data
-
-###########################################################
-#                       MAIN PROGRAM                      #
-###########################################################
-
-if __name__ == "__main__":
-    # Parameter definition
-    parser = OptionParser()
-    parser.add_option("--inputPath", dest="inputPath",
-                      help="Path to read input files", metavar="PATH")
-    parser.add_option("--outputPath", dest="outputPath",
-                      help="Path to place output files", metavar="PATH")
-
-    (options, args) = parser.parse_args()
-    if len(args) > 0:
-        parser.error("None parameter entered.")
-        sys.exit(1)
-
-    # Printing parameter values
-    print('-------------------------------- PARAMETERS --------------------------------')
-    print("Path to read input files: " + str(options.inputPath))
-    print("Path to place output files: " + str(options.outputPath))
-
-    # Walk directory to read files
-    for path, dirs, files in os.walk(options.inputPath):
-        for f in files:
-            if f.endswith(".gz"):
-                print("Processing...{}/{}".format(options.inputPath, f))
-                try:
-                    with gzip.open(os.path.join(options.inputPath, f), 'rb') as f_in:
-                        with open(os.path.join(options.outputPath, f.replace('.soft.gz', '.txt')), 'wb') as f_out:
-                            shutil.copyfileobj(f_in, f_out)
-                except:
-                    pass
--- a/data-sets/scripts/output.txt deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/scripts/output.txt deleted 100644 → 0
View file @b4a0ecb
--- a/data-sets/scripts/soft-2-xml.py deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/scripts/soft-2-xml.py deleted 100644 → 0
View file @b4a0ecb
-# -*- coding: UTF-8 -*-
-
-from optparse import OptionParser
-import os
-import sys
-
-__author__ = 'CMendezC'
-
-# Objective: convert soft file to XML file:
-#   include headings, tags, substitute & and <
-
-# Parameters:
-#   1) --inputPath input path
-#   2) --outputPath output path
-
-# Ouput:
-#   1) XML File with soft file content
-
-# Execution:
-# python soft-2-xml.py
-# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data-additional
-# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
-# python soft-2-xml.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
-# Additional files
-# python soft-2-xml.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data-additional --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
-
-###########################################################
-#                       MAIN PROGRAM                      #
-###########################################################
-
-if __name__ == "__main__":
-    # Parameter definition
-    parser = OptionParser()
-    parser.add_option("--inputPath", dest="inputPath",
-                      help="Path to read input files", metavar="PATH")
-    parser.add_option("--outputPath", dest="outputPath",
-                      help="Path to place output files", metavar="PATH")
-
-    (options, args) = parser.parse_args()
-    if len(args) > 0:
-        parser.error("None parameter entered.")
-        sys.exit(1)
-
-    # Printing parameter values
-    print('-------------------------------- PARAMETERS --------------------------------')
-    print("Path to read input files: " + str(options.inputPath))
-    print("Path to place output files: " + str(options.outputPath))
-
-    # Walk directory to read files
-    processedFiles = 0
-    for path, dirs, files in os.walk(options.inputPath):
-        for f in files:
-            if f.endswith("_family.soft"):
-                print("Processing...{}/{}".format(options.inputPath, f))
-                softText = ''
-                with open(os.path.join(options.inputPath, f), "r", encoding="utf-8", errors="replace") as iFile:
-                    with open(os.path.join(options.outputPath, f.replace(".soft", ".xml")), "w",
-                              encoding="utf-8") as oFile:
-                        oFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<gse xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\nxsi:noNamespaceSchemaLocation=\"esquema-gcs.xsd\">\n\n")
-                        for line in iFile:
-                            line = line.replace("&", "&amp;")
-                            line = line.replace("<", "&lt;")
-                            # line = line.replace(">", "&gt;")
-                            # line = line.replace("\"", "&quot;")
-                            # line = line.replace("\'", "&apos;")
-                            oFile.write(line)
-                        oFile.write("\n</gse>\n")
-                        processedFiles+=1
-    print("Processed files: {}".format(processedFiles))
--- a/data-sets/soft-data-additional/borrame.txt deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/soft-data-additional/borrame.txt deleted 100644 → 0
View file @b4a0ecb
--- a/data-sets/soft-data/borrame.txt.txt deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/soft-data/borrame.txt.txt deleted 100644 → 0
View file @b4a0ecb
--- a/data-sets/xml-data/borrame.txt deleted 100644 → 0
View file @b4a0ecb
+++ b/data-sets/xml-data/borrame.txt deleted 100644 → 0
View file @b4a0ecb