get-TRN-v2.py 27.8 KB

Raw Blame History Permalink

# -*- coding: UTF-8 -*-
import operator
from optparse import OptionParser
import os
import sys
import json
import re
import pandas as pd

__author__ = 'CMendezC'


# Objective: generate TRN
# CFMC 2022-03-11: Agregamos:
#   1) Sección de oraciones de salida
#   2)

# Parameters:
#   1) --predictedPath Path for predicted interactions
#   2) --outputPath Output path
#   3) --outputFile Preffix file for saving TRN
#   4) --diccPath Dictionary path
#   5) --diccSynon File with synonyms of TFs
#   6) --tsvPath    Path to tsv file with section, id sentence, sentence. Extracted from jsonpdf
#   7) --jsonpdfPath    Path to read jsonpdf file to extract section name

# Ouput:
#   1) Tsv file detail with:
# TF	TypeRegulated	Regulated	Effect	PMID    IdSentence  TypeSentence    Sentence
#   Original_idsentence  Original_sentence  SectionNum SectionName  OrganismMentions    OrganismScore    ConfirmationLevel

#   1) Tsv file summary with:
# TF	TypeRegulated	Regulated	Effect	SentCount	Ver/Dev	Att	Auto	Score   RI (True/False)

# Execution:
# Version 2 TRN Salmonella
# python3.4 get-TRN-v2.py
# --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris
# --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021/bries-bacterial-regulatory-interaction-extraction-system/trn
# --outputFile STMTRN_v2
# --diccPath /home/cmendezc/terminologicalResources
# --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json
# --tsvPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/original-toy/tsv
# --jsonpdfPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/jsonpdf
# python3.4 get-TRN-v2.py --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021/bries-bacterial-regulatory-interaction-extraction-system/trn --outputFile STMTRN_v2 --diccPath /home/cmendezc/terminologicalResources --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json --tsvPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/original-toy/tsv --jsonpdfPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/jsonpdf

# articulos_sal_4
# python3.4 get-TRN-v2.py
# --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-4/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris
# --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-4/bries-bacterial-regulatory-interaction-extraction-system/trn
# --outputFile STMTRN_articulos_sal_4
# --diccPath /home/cmendezc/terminologicalResources
# --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json
# --tsvPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_4/original/tsv
# --jsonpdfPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_4/jsonpdf
# python3.4 get-TRN-v2.py --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-4/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-4/bries-bacterial-regulatory-interaction-extraction-system/trn --outputFile STMTRN_articulos_sal_4 --diccPath /home/cmendezc/terminologicalResources --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json --tsvPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_4/original/tsv --jsonpdfPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_4/jsonpdf

# articulos_sal_1
# python3.4 get-TRN-v2.py
# --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-1/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris
# --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-1/bries-bacterial-regulatory-interaction-extraction-system/trn
# --outputFile STMTRN_articulos_sal_1
# --diccPath /home/cmendezc/terminologicalResources
# --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json
# --tsvPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_1/original/tsv
# --jsonpdfPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_1/jsonpdf
# python3.4 get-TRN-v2.py --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-1/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-1/bries-bacterial-regulatory-interaction-extraction-system/trn --outputFile STMTRN_articulos_sal_1 --diccPath /home/cmendezc/terminologicalResources --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json --tsvPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_1/original/tsv --jsonpdfPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_1/jsonpdf

# all = articulos_sal_1 + articulos_sal_2 + articulos_sal_3 + articulos_sal_4
# python3.4 get-TRN-v2.py
# --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-all/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris
# --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-all/bries-bacterial-regulatory-interaction-extraction-system/trn
# --outputFile STMTRN_all
# --diccPath /home/cmendezc/terminologicalResources
# --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json
# --tsvPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_all/original/tsv
# --jsonpdfPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_all/jsonpdf
# python3.4 get-TRN-v2.py --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-all/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN-2021-all/bries-bacterial-regulatory-interaction-extraction-system/trn --outputFile STMTRN_all --diccPath /home/cmendezc/terminologicalResources --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json --tsvPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_all/original/tsv --jsonpdfPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/data-sets-STM/preprocessed-STMTRN-2021/articulos_sal_all/jsonpdf

####
# python3.4 get-TRN-v1.py
# --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris
# --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN/bries-bacterial-regulatory-interaction-extraction-system/trn
# --outputFile STMTRN
# --diccPath /home/cmendezc/terminologicalResources
# --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json
# python3.4 get-TRN-v1.py --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STMTRN/bries-bacterial-regulatory-interaction-extraction-system/trn --outputFile STMTRN --diccPath /home/cmendezc/terminologicalResources --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json

# Con dataset automatic-extraction-STM-RIs-dataset
# python3.4 get-TRN-v1.py
# --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STM-RIs-dataset/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris
# --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STM-RIs-dataset/bries-bacterial-regulatory-interaction-extraction-system/trn
# --outputFile STM-RIs-dataset
# --diccPath /home/cmendezc/terminologicalResources
# --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json
# python3.4 get-TRN-v1.py --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STM-RIs-dataset/bries-bacterial-regulatory-interaction-extraction-system/predicted-ris-gcs/complete-ris --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-STM-RIs-dataset/bries-bacterial-regulatory-interaction-extraction-system/trn --outputFile STM-RIs-dataset --diccPath /home/cmendezc/terminologicalResources --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json

###########################################################
#                       MAIN PROGRAM                      #
###########################################################

def updateHashPredicted(pr, hashP, pm, sF, ef):
    # updateHashPredicted(prief, hashPredictedRIEF, pmid, sentenceFile, hashOriginalEffect[effect])
    if pr not in hashP:
        hashTemp = {"pmids": {pm: [sF]}, "orieff": ef}
        hashP[pr] = hashTemp
    else:
        hashTemp = hashP[pr]
        if pm in hashTemp["pmids"]:
            hashP[pr]["pmids"][pm].append(sF)
        else:
            hashP[pr]["pmids"][pm] = [sF]

def get_standard_name(regSynon):
    reg = regSynon
    if regSynon in hashSynon:
        reg = hashSynon[regSynon]
    else:
        for syn, std in hashSynon.items():
            if regSynon.startswith(syn):
                reg = regSynon.replace(syn, std, 1)
                break
    return reg

if __name__ == "__main__":
    # Parameter definition
    parser = OptionParser()
    parser.add_option("--predictedPath", dest="predictedPath",
                      help="Path predicted ris gcs", metavar="PATH")
    parser.add_option("--outputPath", dest="outputPath",
                      help="Output path", metavar="PATH")
    parser.add_option("--outputFile", dest="outputFile",
                      help="Preffix file for saving results", metavar="FILE")
    parser.add_option("--diccPath", dest="diccPath",
                      help="Path to dictionary", metavar="PATH")
    parser.add_option("--diccSynon", dest="diccSynon",
                      help="File with synonyms", metavar="FILE")
    parser.add_option("--tsvPath", dest="tsvPath",
                      help="Path to tsv file with section, id sentence, sentence. Extracted from jsonpdf.", metavar="PATH")
    parser.add_option("--jsonpdfPath", dest="jsonpdfPath",
                        help="Path to read jsonpdf file to extract section name", metavar="PATH")

    (options, args) = parser.parse_args()
    if len(args) > 0:
        parser.error("None parameter entered.")
        sys.exit(1)

    # Printing parameter values
    print('-------------------------------- PARAMETERS --------------------------------')
    print("Path predicted ris gcs: " + str(options.predictedPath))
    print("Output path: " + str(options.outputPath))
    print("Preffix file for saving results: " + str(options.outputFile))
    print("Path to dictionary: " + str(options.diccPath))
    print("File with synonyms: " + str(options.diccSynon))
    print("Path to tsv file with section, id sentence, sentence (Extracted from jsonpdf): " + str(options.tsvPath))
    print("Path to read jsonpdf file to extract section name: " + str(options.jsonpdfPath))

    use_synonyms = False
    hashSynon = {}
    if options.diccPath != None and options.diccSynon != "no-synonyms":
        print("***** Using synonyms *****")
        use_synonyms = True
        print('Loading dictionary of synonyms...')
        with open(os.path.join(options.diccPath, options.diccSynon)) as diccSynon:
            hashSynon = json.load(diccSynon)
        print('Loading dictionary of synonyms {}... done!'.format(len(hashSynon)))

    hashPredictedRIs = {}
    hashPredictedRIsCount = {}
    hashPredictedRIsCountVer = {}
    hashPredictedRIsCountDev = {}
    hashPredictedRIsCountAtt = {}
    hashPredictedRIsCountAuto = {}
    hashFiles = {}
    for path, dirs, files in os.walk(options.predictedPath):
        for file in files:
            if file.endswith(".a1"):
                filename = file[:-3]
                if filename not in hashFiles:
                    hashFiles[filename] = 1
                else:
                    hashFiles[filename] += 1
    print("Files: {}".format(len(hashFiles)))

    processedFiles = 0
    id_ri = 1
    regex_att_auto = re.compile(r"(\.att\.|\.auto\.)[0-9]*$")
    for file in sorted(hashFiles.keys()):
        print("File: {}".format(file))
        type_sent = "ver/dev"
        if file.find("dataSet_OnlyRI_sentences") > -1:
            pmid = "000000"
            if file.find("dataSet_OnlyRI_sentences.") > -1:
                if file.find(".att.") > -1:
                    numsent = file[file.find("att.") + 4:]
                    type_sent = "att"
                if pmid.find(".auto.") > -1:
                    numsent = file[file.find("auto.") + 5:]
                    type_sent = "auto"
            else:
                numsent = file[file.find("_", file.find("_", file.find("_") + 1) + 1) + 1:file.find("-")]
            numsent = numsent.replace(".al", "")
            print("dataSet_OnlyRI_sentences numsent: {}".format(numsent))
            print("dataSet_OnlyRI_sentences pmid: {}".format(pmid))
        else:
            pmid = file[:file.find("_")]
            # print("pmid: {}".format(pmid))
            numsent = file[file.find("_")+1:file.find("-")]
            numsent = numsent.replace(".al", "")
            if pmid.find(".att.") > -1:
                # CFMC 2022-03-11: Fix errro in pmid
                # CFMC 2022-03-11 Original: pmid = pmid.replace(".att.", "")
                pmid = regex_att_auto.sub("", pmid)
                numsent = file[file.find("att.")+4:]
                type_sent = "att"
            if pmid.find(".auto.") > -1:
                # CFMC 2022-03-11: Fix errro in pmid
                # CFMC 2022-03-11 Original: pmid = pmid.replace(".auto.", "")
                pmid = regex_att_auto.sub("", pmid)
                numsent = file[file.find("auto.") + 5:]
                type_sent = "auto"
        # numsent = file[file.find("_"):file.find("-")]
        # print("pmid {}".format(pmid))
        # print("numsent: {}".format(numsent))

        sentenceFile = file[:file.find("-", file.find("_"))] + ".txt"
        hashEntitiesGenes = {}
        hashEntitiesTUs = {}
        hashEntitiesTFs = {}
        hashEntitiesEffects = {}
        hashOriginalEffect = {}
        regex_fix_regulator = re.compile(r'(Regulated|Binds|Bind|deverbal_effect|Regulate)')
        regex_fix_repressor = re.compile(r'(Repressing|Represses)')
        with open(os.path.join(options.predictedPath, file + ".a1"), mode="r") as a1File:
            for line in a1File:
                line = line.strip('\n')
                listLine1 = line.split('\t')
                listLine2 = listLine1[1].split(' ')
                entity = listLine2[0]
                entity_type = listLine2[0]
                idEntity = listLine1[0]
                originalEffect = listLine1[2]
                if entity.startswith("EFFECT"):
                    entity = entity[entity.find(".") + 1:]
                    # print("Entity: {}".format(entity))
                    if pmid.find("_dev") > -1:
                        type_sent = "dev"
                        entity = entity.replace("_dev", "")
                    # print("Entity without _dev: {}".format(entity))
                    if idEntity not in hashOriginalEffect:
                        hashOriginalEffect[idEntity] = originalEffect
                    if idEntity not in hashEntitiesEffects:
                        # We fixed some wrong effects in TRN, but we must fix this also in another script where error is produced
                        if regex_fix_regulator.match(entity):
                            print("WARNING EFFECT: {}".format(entity))
                            entity = regex_fix_regulator.sub("regulator", entity)
                            print("WARNING EFFECT after: {}".format(entity))
                        if regex_fix_repressor.match(entity):
                            print("WARNING EFFECT: {}".format(entity))
                            entity = regex_fix_repressor.sub("repressor", entity)
                            print("WARNING EFFECT after: {}".format(entity))
                        hashEntitiesEffects[idEntity] = entity
                else:
                    entity = listLine1[2]
                    if entity_type == "GENE":
                        if idEntity not in hashEntitiesGenes:
                            hashEntitiesGenes[idEntity] = entity
                    elif entity_type == "TU":
                        if idEntity not in hashEntitiesTUs:
                            hashEntitiesTUs[idEntity] = entity
                    elif entity_type == "TF":
                        if idEntity not in hashEntitiesTFs:
                            hashEntitiesTFs[idEntity] = entity

        # print("hashEntities: {}".format(hashEntitiesGenes))
        # print("hashEntities: {}".format(hashEntitiesTUs))
        # print("hashEntities: {}".format(hashEntitiesTFs))

        with open(os.path.join(options.predictedPath, file + ".a2"), mode="r") as a2File:
            sentence = ''
            with open(os.path.join(options.predictedPath, file + ".txt"), mode="r") as txtFile:
                sentence = txtFile.read()
                listTokens = [token.split('|')[0] for token in sentence.split()]
                sentence = ' '.join(listTokens)

            # CFMC 2022-03-11: We included section of sentences (num, name) and original idsentence and original sentence
            # Open jsonpdf file
            hash_sections = {}
            sentences = {}
            print('Loading jsonpdf file...')
            with open(os.path.join(options.jsonpdfPath, pmid + ".jsonpdf"), "r", encoding="utf-8", errors="replace") as jsonpdfFile:
                text_file = jsonpdfFile.read()
                if file.startswith("26781240"):
                    text_file = text_file.replace(" \\ ", " \\\\ ")
                elif file.startswith("26249345"):
                    text_file = text_file.replace('}], ', '}],"sections": {}')
                try:
                    hash_jsonpdf = json.loads(text_file)
                    print('   Loading jsponpdf file... done!')
                except Exception as e:
                    print(e)
                    print("   Loading jsonpdf file failed: {}".format(file))
                hash_sections = hash_jsonpdf["sections"]
                # print("Sections: {}".format(hash_sections))
                sentences = hash_jsonpdf["sentences"]
            # Open tsv file
            print('Loading tsv file...')
            file_tsv = pmid + ".pre.fil.tsv"
            tsv_file = pd.read_table(os.path.join(options.tsvPath, file_tsv))
            # print("tsv_file.shape: {}".format(tsv_file.shape))
            tsv_file_filtered = tsv_file[tsv_file['status'] == 1]
            # print("tsv_file_filtered.shape: {}".format(tsv_file_filtered.shape))
            tsv_file_new = tsv_file_filtered.reset_index(drop=True)
            # print(tsv_file_new.head(10))
            print('   Loading tsv file... done!')
            numsent_int = int(numsent)
            original_sentence = tsv_file_new.at[numsent_int, 'sentence']
            section_num = tsv_file_new.at[numsent_int, 'section']
            # print("type(section_num): {}".format(type(section_num)))
            original_idsentence = tsv_file_new.at[numsent_int, 'idsentence']
            section_num_str = str(section_num)
            if section_num_str in hash_sections:
                section_name = hash_sections[section_num_str]
            else:
                section_name = "Unknown"

            for line in a2File:
                # print("Line a2: {}".format(line))
                # R1	Interaction.T3 Target:T2 Agent:T1 Condition: T4
                line = line.strip('\n')
                listLine1 = line.split('\t')
                listLine2 = listLine1[1].split(' ')
                regulator = listLine2[2]
                regulator = regulator[regulator.find(":") + 1:]
                regulated = listLine2[1]
                regulated = regulated[regulated.find(":") + 1:]
                effect = listLine2[0]
                effect = effect[effect.find(".") + 1:]

                tf = hashEntitiesTFs[regulator]
                if tf.endswith("ed"):
                    tf = tf[:tf.find("-")]
                #else:
                # Clean TF names by expressions seen in TRN outpur file
                tf = re.sub(r"(/absence|controlle|activation|‐regulate|‐mediate|mediate|-regulate|regulate|ˉ|-like|-mutant|-type|-independent|-dependent|dependent|-dependant|-binding|-and|-family|-bound|-deficient|-indepen-dent|-inducing|-green|-overproducing|-or|-depletion|-repressible|-dual|-box)", "", tf)
                # Clean false TF names - 2329
                result = re.match(r"(cyclic|RHONDA|Crawford|Hulett|Rhodobacter|Danino|Huang|Neisseria|Huang|HUGHES1|Robbe-Saule|Danchin|Roberts|Furer|Hunter|Furue|Humphreys|Nacional)", tf)
                if result:
                    break
                # H
                tf = get_standard_name(tf)

                # print("numsent: {}".format(numsent))
                # For L&C do not increment 1
                # CFMC 2022-03-11 Original: numsent_int = int(numsent)

                if regulated in hashEntitiesGenes:
                    type_regulated = "Gene"
                    llave = "{}\t{}\t{}\t{}".format(tf, "gene", hashEntitiesGenes[regulated],
                                                    hashEntitiesEffects[effect])
                elif regulated in hashEntitiesTUs:
                    type_regulated ="TU"
                    llave = "{}\t{}\t{}\t{}".format(tf, "TU", hashEntitiesTUs[regulated],
                                                    hashEntitiesEffects[effect])
                else:
                    print("ERROR: Regulated did not found!")
                # Clean false cases
                if llave.startswith("Hu"):
                    break

                if llave in hashPredictedRIs:
                    # CFMC 2022-03-11: We included section of sentences (num, name) and original idsentence and original sentence
                    hashPredictedRIs[llave].append("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(pmid, numsent_int, type_sent, sentence, original_idsentence, original_sentence, section_num, section_name, "", 0, ""))
                    hashPredictedRIsCount[llave] += 1
                    if type_sent == "ver/dev":
                    #    if llave in hashPredictedRIsCountVer:
                        hashPredictedRIsCountVer[llave] += 1
                    #    else:
                    #        hashPredictedRIsCountVer[llave] = 1
                    elif type_sent == "dev":
                    #    if llave in hashPredictedRIsCountVer:
                        hashPredictedRIsCountDev[llave] += 1
                    #    else:
                    #        hashPredictedRIsCountDev[llave] = 1
                    elif type_sent == "att":
                    #    if llave in hashPredictedRIsCountVer:
                        hashPredictedRIsCountAtt[llave] += 1
                    #    else:
                    #        hashPredictedRIsCountAtt[llave] = 1
                    elif type_sent == "auto":
                    #    if llave in hashPredictedRIsCountVer:
                        hashPredictedRIsCountAuto[llave] += 1
                    #    else:
                    #        hashPredictedRIsCountAuto[llave] = 1
                else:
                    # CFMC 2022-03-11: We included section of sentences (num, name) and original idsentence and original sentence
                    hashPredictedRIs[llave] = ["{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(pmid, numsent_int, type_sent, sentence, original_idsentence, original_sentence, section_num, section_name, "", 0, "")]
                    hashPredictedRIsCount[llave] = 1
                    hashPredictedRIsCountVer[llave] = 0
                    hashPredictedRIsCountDev[llave] = 0
                    hashPredictedRIsCountAtt[llave] = 0
                    hashPredictedRIsCountAuto[llave] = 0
                    if type_sent == "ver/dev":
                        hashPredictedRIsCountVer[llave] = 1
                    elif type_sent == "dev":
                        hashPredictedRIsCountDev[llave] = 1
                    elif type_sent == "att":
                        hashPredictedRIsCountAtt[llave] = 1
                    elif type_sent == "auto":
                        hashPredictedRIsCountAuto[llave] = 1

                id_ri += 1
        processedFiles += 1

    print("Processed files: {}".format(processedFiles))
    with open(os.path.join(options.outputPath, options.outputFile + ".summary.tsv"), mode="w") as oFile:
        # oFile.write("TF\tTypeRegulated\tRegulated\tEffect\tSentCount\tVer/Dev\tDev\tAtt\tAuto\tSentences\n")
        oFile.write("TF\tTypeRegulated\tRegulated\tEffect\tSentCount\tVer/Dev\tAtt\tAuto\tScore\tRI\n")
        for k,v in hashPredictedRIs.items():
            oFile.write("{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(k, hashPredictedRIsCount[k], hashPredictedRIsCountVer[k],
                                                              hashPredictedRIsCountAtt[k], hashPredictedRIsCountAuto[k], "1", "True"))
            #oFile.write("{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(k, hashPredictedRIsCount[k], hashPredictedRIsCountVer[k], hashPredictedRIsCountDev[k], hashPredictedRIsCountAtt[k], hashPredictedRIsCountAuto[k], v))
    with open(os.path.join(options.outputPath, options.outputFile + ".detail.tsv"), mode="w") as oFile:
        # oFile.write("TF\tTypeRegulated\tRegulated\tEffect\tSentCount\tVer/Dev\tDev\tAtt\tAuto\tSentences\n")
        oFile.write("TF\tTypeRegulated\tRegulated\tEffect\tPMID\tNumSentence\tTypeSentence\tSentence\tOriginalIdSentence\tOriginalSentence\tSectionNum\tSectionName\tOrganisms\tOrganismScore\tConfirmationLevel\n")
        for k,v in hashPredictedRIs.items():
            for s in v:
                oFile.write("{}\t{}\n".format(k, s))